1 # Licensed to the Apache Software Foundation (ASF) under one
2 # or more contributor license agreements. See the NOTICE file
3 # distributed with this work for additional information
4 # regarding copyright ownership. The ASF licenses this file
5 # to you under the Apache License, Version 2.0 (the
6 # "License"); you may not use this file except in compliance
7 # with the License. You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing,
12 # software distributed under the License is distributed on an
13 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 # KIND, either express or implied. See the License for the
15 # specific language governing permissions and limitations
18 class TestCSVReader
< Test
::Unit::TestCase
19 include Helper
::Buildable
20 include Helper
::Omittable
22 sub_test_case("#read") do
24 buffer
= Arrow
::Buffer.new(csv
)
25 Arrow
::BufferInputStream.new(buffer
)
29 table
= Arrow
::CSVReader.new(open_input(<<-CSV))
35 "message" => build_string_array(["Start", "Shutdown"]),
36 "count" => build_int64_array([2, 9]),
38 assert_equal(build_table(columns
),
42 sub_test_case("options") do
43 def test_add_column_type
44 options
= Arrow
::CSVReadOptions.new
45 options
.add_column_type("count", Arrow
::UInt8DataType.new
)
46 options
.add_column_type("valid", Arrow
::BooleanDataType.new
)
47 table
= Arrow
::CSVReader.new(open_input(<<-CSV), options)
53 "count" => build_uint8_array([2, 9]),
54 "valid" => build_boolean_array([true, false]),
56 assert_equal(build_table(columns
),
61 options
= Arrow
::CSVReadOptions.new
63 Arrow
::Field.new("count", Arrow
::UInt8DataType.new
),
64 Arrow
::Field.new("valid", Arrow
::BooleanDataType.new
),
66 schema
= Arrow
::Schema.new(fields
)
67 options
.add_schema(schema
)
68 table
= Arrow
::CSVReader.new(open_input(<<-CSV), options)
74 "count" => build_uint8_array([2, 9]),
75 "valid" => build_boolean_array([true, false]),
77 assert_equal(build_table(columns
),
82 require_gi_bindings(3, 3, 1)
83 options
= Arrow
::CSVReadOptions.new
84 options
.add_column_type("count", Arrow
::UInt8DataType.new
)
85 options
.add_column_type("valid", Arrow
::BooleanDataType.new
)
87 "count" => Arrow
::UInt8DataType.new
,
88 "valid" => Arrow
::BooleanDataType.new
,
94 options
= Arrow
::CSVReadOptions.new
95 null_values
= ["2", "5"]
96 options
.null_values
= null_values
97 assert_equal(null_values
, options
.null_values
)
99 table
= Arrow
::CSVReader.new(open_input(<<-CSV), options)
106 "message" => build_string_array(["Start", "Shutdown", "Restart"]),
107 "count" => build_int64_array([nil, 9, nil]),
109 assert_equal(build_table(columns
),
113 def test_add_null_value
114 options
= Arrow
::CSVReadOptions.new
115 null_values
= ["2", "5"]
116 options
.null_values
= null_values
117 options
.add_null_value("9")
118 assert_equal(null_values
+ ["9"], options
.null_values
)
121 def test_boolean_values
122 options
= Arrow
::CSVReadOptions.new
123 true_values
= ["Start", "Restart"]
124 options
.true_values
= true_values
125 assert_equal(true_values
, options
.true_values
)
127 false_values
= ["Shutdown"]
128 options
.false_values
= false_values
129 assert_equal(false_values
, options
.false_values
)
131 table
= Arrow
::CSVReader.new(open_input(<<-CSV), options)
138 "message" => build_boolean_array([true, false, true]),
139 "count" => build_int64_array([2, 9, 5]),
141 assert_equal(build_table(columns
),
145 def test_add_true_value
146 options
= Arrow
::CSVReadOptions.new
147 true_values
= ["Start", "Restart"]
148 options
.true_values
= true_values
149 options
.add_true_value("Shutdown")
150 assert_equal(true_values
+ ["Shutdown"], options
.true_values
)
153 def test_add_false_value
154 options
= Arrow
::CSVReadOptions.new
155 false_values
= ["Start", "Restart"]
156 options
.false_values
= false_values
157 options
.add_false_value("Shutdown")
158 assert_equal(false_values
+ ["Shutdown"], options
.false_values
)
161 def test_allow_null_strings
162 options
= Arrow
::CSVReadOptions.new
163 options
.null_values
= ["Start", "Restart"]
164 options
.allow_null_strings
= true
165 table
= Arrow
::CSVReader.new(open_input(<<-CSV), options)
172 "message" => build_string_array([nil, "Shutdown", nil]),
173 "count" => build_int64_array([2, 9, 5]),
175 assert_equal(build_table(columns
),
180 options
= Arrow
::CSVReadOptions.new
181 options
.n_skip_rows
= 1
182 table
= Arrow
::CSVReader.new(open_input(<<-CSV), options)
185 "Shutdown1","Shutdown2"
189 "Start1" => build_string_array(["Shutdown1", "Reboot1"]),
190 "Start2" => build_string_array(["Shutdown2", "Reboot2"]),
192 assert_equal(build_table(columns
),
196 def test_column_names
197 options
= Arrow
::CSVReadOptions.new
198 column_names
= ["message", "count"]
199 options
.column_names
= column_names
200 assert_equal(column_names
, options
.column_names
)
202 table
= Arrow
::CSVReader.new(open_input(<<-CSV), options)
208 "message" => build_string_array(["Start", "Shutdown", "Reboot"]),
209 "count" => build_int64_array([2, 9, 5]),
211 assert_equal(build_table(columns
),
215 def test_add_column_name
216 options
= Arrow
::CSVReadOptions.new
217 column_names
= ["message", "count"]
218 options
.column_names
= column_names
219 options
.add_column_name("score")
220 assert_equal(column_names
+ ["score"], options
.column_names
)
223 def test_generate_column_names
224 options
= Arrow
::CSVReadOptions.new
225 options
.generate_column_names
= true
227 table
= Arrow
::CSVReader.new(open_input(<<-CSV), options)
233 "f0" => build_string_array(["Start", "Shutdown", "Reboot"]),
234 "f1" => build_int64_array([2, 9, 5]),
236 assert_equal(build_table(columns
),