]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/c_glib/test/test-csv-reader.rb
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / c_glib / test / test-csv-reader.rb
1 # Licensed to the Apache Software Foundation (ASF) under one
2 # or more contributor license agreements. See the NOTICE file
3 # distributed with this work for additional information
4 # regarding copyright ownership. The ASF licenses this file
5 # to you under the Apache License, Version 2.0 (the
6 # "License"); you may not use this file except in compliance
7 # with the License. You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing,
12 # software distributed under the License is distributed on an
13 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 # KIND, either express or implied. See the License for the
15 # specific language governing permissions and limitations
16 # under the License.
17
18 class TestCSVReader < Test::Unit::TestCase
19 include Helper::Buildable
20 include Helper::Omittable
21
22 sub_test_case("#read") do
23 def open_input(csv)
24 buffer = Arrow::Buffer.new(csv)
25 Arrow::BufferInputStream.new(buffer)
26 end
27
28 def test_default
29 table = Arrow::CSVReader.new(open_input(<<-CSV))
30 message,count
31 "Start",2
32 "Shutdown",9
33 CSV
34 columns = {
35 "message" => build_string_array(["Start", "Shutdown"]),
36 "count" => build_int64_array([2, 9]),
37 }
38 assert_equal(build_table(columns),
39 table.read)
40 end
41
42 sub_test_case("options") do
43 def test_add_column_type
44 options = Arrow::CSVReadOptions.new
45 options.add_column_type("count", Arrow::UInt8DataType.new)
46 options.add_column_type("valid", Arrow::BooleanDataType.new)
47 table = Arrow::CSVReader.new(open_input(<<-CSV), options)
48 count,valid
49 2,1
50 9,0
51 CSV
52 columns = {
53 "count" => build_uint8_array([2, 9]),
54 "valid" => build_boolean_array([true, false]),
55 }
56 assert_equal(build_table(columns),
57 table.read)
58 end
59
60 def test_add_schema
61 options = Arrow::CSVReadOptions.new
62 fields = [
63 Arrow::Field.new("count", Arrow::UInt8DataType.new),
64 Arrow::Field.new("valid", Arrow::BooleanDataType.new),
65 ]
66 schema = Arrow::Schema.new(fields)
67 options.add_schema(schema)
68 table = Arrow::CSVReader.new(open_input(<<-CSV), options)
69 count,valid
70 2,1
71 9,0
72 CSV
73 columns = {
74 "count" => build_uint8_array([2, 9]),
75 "valid" => build_boolean_array([true, false]),
76 }
77 assert_equal(build_table(columns),
78 table.read)
79 end
80
81 def test_column_types
82 require_gi_bindings(3, 3, 1)
83 options = Arrow::CSVReadOptions.new
84 options.add_column_type("count", Arrow::UInt8DataType.new)
85 options.add_column_type("valid", Arrow::BooleanDataType.new)
86 assert_equal({
87 "count" => Arrow::UInt8DataType.new,
88 "valid" => Arrow::BooleanDataType.new,
89 },
90 options.column_types)
91 end
92
93 def test_null_values
94 options = Arrow::CSVReadOptions.new
95 null_values = ["2", "5"]
96 options.null_values = null_values
97 assert_equal(null_values, options.null_values)
98
99 table = Arrow::CSVReader.new(open_input(<<-CSV), options)
100 message,count
101 "Start",2
102 "Shutdown",9
103 "Restart",5
104 CSV
105 columns = {
106 "message" => build_string_array(["Start", "Shutdown", "Restart"]),
107 "count" => build_int64_array([nil, 9, nil]),
108 }
109 assert_equal(build_table(columns),
110 table.read)
111 end
112
113 def test_add_null_value
114 options = Arrow::CSVReadOptions.new
115 null_values = ["2", "5"]
116 options.null_values = null_values
117 options.add_null_value("9")
118 assert_equal(null_values + ["9"], options.null_values)
119 end
120
121 def test_boolean_values
122 options = Arrow::CSVReadOptions.new
123 true_values = ["Start", "Restart"]
124 options.true_values = true_values
125 assert_equal(true_values, options.true_values)
126
127 false_values = ["Shutdown"]
128 options.false_values = false_values
129 assert_equal(false_values, options.false_values)
130
131 table = Arrow::CSVReader.new(open_input(<<-CSV), options)
132 message,count
133 "Start",2
134 "Shutdown",9
135 "Restart",5
136 CSV
137 columns = {
138 "message" => build_boolean_array([true, false, true]),
139 "count" => build_int64_array([2, 9, 5]),
140 }
141 assert_equal(build_table(columns),
142 table.read)
143 end
144
145 def test_add_true_value
146 options = Arrow::CSVReadOptions.new
147 true_values = ["Start", "Restart"]
148 options.true_values = true_values
149 options.add_true_value("Shutdown")
150 assert_equal(true_values + ["Shutdown"], options.true_values)
151 end
152
153 def test_add_false_value
154 options = Arrow::CSVReadOptions.new
155 false_values = ["Start", "Restart"]
156 options.false_values = false_values
157 options.add_false_value("Shutdown")
158 assert_equal(false_values + ["Shutdown"], options.false_values)
159 end
160
161 def test_allow_null_strings
162 options = Arrow::CSVReadOptions.new
163 options.null_values = ["Start", "Restart"]
164 options.allow_null_strings = true
165 table = Arrow::CSVReader.new(open_input(<<-CSV), options)
166 message,count
167 "Start",2
168 "Shutdown",9
169 "Restart",5
170 CSV
171 columns = {
172 "message" => build_string_array([nil, "Shutdown", nil]),
173 "count" => build_int64_array([2, 9, 5]),
174 }
175 assert_equal(build_table(columns),
176 table.read)
177 end
178
179 def test_n_skip_rows
180 options = Arrow::CSVReadOptions.new
181 options.n_skip_rows = 1
182 table = Arrow::CSVReader.new(open_input(<<-CSV), options)
183 message1,message2
184 "Start1","Start2"
185 "Shutdown1","Shutdown2"
186 "Reboot1","Reboot2"
187 CSV
188 columns = {
189 "Start1" => build_string_array(["Shutdown1", "Reboot1"]),
190 "Start2" => build_string_array(["Shutdown2", "Reboot2"]),
191 }
192 assert_equal(build_table(columns),
193 table.read)
194 end
195
196 def test_column_names
197 options = Arrow::CSVReadOptions.new
198 column_names = ["message", "count"]
199 options.column_names = column_names
200 assert_equal(column_names, options.column_names)
201
202 table = Arrow::CSVReader.new(open_input(<<-CSV), options)
203 "Start",2
204 "Shutdown",9
205 "Reboot",5
206 CSV
207 columns = {
208 "message" => build_string_array(["Start", "Shutdown", "Reboot"]),
209 "count" => build_int64_array([2, 9, 5]),
210 }
211 assert_equal(build_table(columns),
212 table.read)
213 end
214
215 def test_add_column_name
216 options = Arrow::CSVReadOptions.new
217 column_names = ["message", "count"]
218 options.column_names = column_names
219 options.add_column_name("score")
220 assert_equal(column_names + ["score"], options.column_names)
221 end
222
223 def test_generate_column_names
224 options = Arrow::CSVReadOptions.new
225 options.generate_column_names = true
226
227 table = Arrow::CSVReader.new(open_input(<<-CSV), options)
228 "Start",2
229 "Shutdown",9
230 "Reboot",5
231 CSV
232 columns = {
233 "f0" => build_string_array(["Start", "Shutdown", "Reboot"]),
234 "f1" => build_int64_array([2, 9, 5]),
235 }
236 assert_equal(build_table(columns),
237 table.read)
238 end
239 end
240 end
241 end