]>
git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/csv/test_common.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
18 #include "arrow/csv/test_common.h"
20 #include "arrow/testing/gtest_util.h"
25 std::string
MakeCSVData(std::vector
<std::string
> lines
) {
27 for (const auto& line
: lines
) {
33 void MakeCSVParser(std::vector
<std::string
> lines
, ParseOptions options
, int32_t num_cols
,
34 std::shared_ptr
<BlockParser
>* out
) {
35 auto csv
= MakeCSVData(lines
);
36 auto parser
= std::make_shared
<BlockParser
>(options
, num_cols
);
38 ASSERT_OK(parser
->Parse(util::string_view(csv
), &out_size
));
39 ASSERT_EQ(out_size
, csv
.size()) << "trailing CSV data not parsed";
43 void MakeCSVParser(std::vector
<std::string
> lines
, ParseOptions options
,
44 std::shared_ptr
<BlockParser
>* out
) {
45 return MakeCSVParser(lines
, options
, -1, out
);
48 void MakeCSVParser(std::vector
<std::string
> lines
, std::shared_ptr
<BlockParser
>* out
) {
49 MakeCSVParser(lines
, ParseOptions::Defaults(), out
);
52 void MakeColumnParser(std::vector
<std::string
> items
, std::shared_ptr
<BlockParser
>* out
) {
53 auto options
= ParseOptions::Defaults();
54 // Need this to test for null (empty) values
55 options
.ignore_empty_lines
= false;
56 std::vector
<std::string
> lines
;
57 for (const auto& item
: items
) {
58 lines
.push_back(item
+ '\n');
60 MakeCSVParser(lines
, options
, 1, out
);
61 ASSERT_EQ((*out
)->num_cols(), 1) << "Should have seen only 1 CSV column";
62 ASSERT_EQ((*out
)->num_rows(), items
.size());
67 const std::vector
<std::string
> int64_rows
= {"123", "4", "-317005557", "", "N/A", "0"};
68 const std::vector
<std::string
> float_rows
= {"0", "123.456", "-3170.55766", "", "N/A"};
69 const std::vector
<std::string
> decimal128_rows
= {"0", "123.456", "-3170.55766",
70 "", "N/A", "1233456789.123456789"};
71 const std::vector
<std::string
> iso8601_rows
= {"1917-10-17", "2018-09-13",
72 "1941-06-22 04:00", "1945-05-09 09:45:38"};
73 const std::vector
<std::string
> strptime_rows
= {"10/17/1917", "9/13/2018", "9/5/1945"};
75 static void WriteHeader(std::ostream
& writer
) {
76 writer
<< "Int64,Float,Decimal128,ISO8601,Strptime" << std::endl
;
79 static std::string
GetCell(const std::vector
<std::string
>& base_rows
, size_t row_index
) {
80 return base_rows
[row_index
% base_rows
.size()];
83 static void WriteRow(std::ostream
& writer
, size_t row_index
) {
84 writer
<< GetCell(int64_rows
, row_index
);
86 writer
<< GetCell(float_rows
, row_index
);
88 writer
<< GetCell(decimal128_rows
, row_index
);
90 writer
<< GetCell(iso8601_rows
, row_index
);
92 writer
<< GetCell(strptime_rows
, row_index
);
96 static void WriteInvalidRow(std::ostream
& writer
, size_t row_index
) {
97 writer
<< "\"" << std::endl
<< "\"";
102 Result
<std::shared_ptr
<Buffer
>> MakeSampleCsvBuffer(
103 size_t num_rows
, std::function
<bool(size_t)> is_valid
) {
104 std::stringstream writer
;
107 for (size_t i
= 0; i
< num_rows
; ++i
) {
108 if (!is_valid
|| is_valid(i
)) {
111 WriteInvalidRow(writer
, i
);
115 auto table_str
= writer
.str();
116 auto table_buffer
= std::make_shared
<Buffer
>(table_str
);
117 return MemoryManager::CopyBuffer(table_buffer
, default_cpu_memory_manager());