]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/arrow/c_glib/test/test-csv-reader.rb
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / c_glib / test / test-csv-reader.rb
diff --git a/ceph/src/arrow/c_glib/test/test-csv-reader.rb b/ceph/src/arrow/c_glib/test/test-csv-reader.rb
new file mode 100644 (file)
index 0000000..018f062
--- /dev/null
@@ -0,0 +1,241 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestCSVReader < Test::Unit::TestCase
+  include Helper::Buildable
+  include Helper::Omittable
+
+  sub_test_case("#read") do
+    def open_input(csv)
+      buffer = Arrow::Buffer.new(csv)
+      Arrow::BufferInputStream.new(buffer)
+    end
+
+    def test_default
+      table = Arrow::CSVReader.new(open_input(<<-CSV))
+message,count
+"Start",2
+"Shutdown",9
+      CSV
+      columns = {
+        "message" => build_string_array(["Start", "Shutdown"]),
+        "count" => build_int64_array([2, 9]),
+      }
+      assert_equal(build_table(columns),
+                   table.read)
+    end
+
+    sub_test_case("options") do
+      def test_add_column_type
+        options = Arrow::CSVReadOptions.new
+        options.add_column_type("count", Arrow::UInt8DataType.new)
+        options.add_column_type("valid", Arrow::BooleanDataType.new)
+        table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+count,valid
+2,1
+9,0
+        CSV
+        columns = {
+          "count" => build_uint8_array([2, 9]),
+          "valid" => build_boolean_array([true, false]),
+        }
+        assert_equal(build_table(columns),
+                     table.read)
+      end
+
+      def test_add_schema
+        options = Arrow::CSVReadOptions.new
+        fields = [
+          Arrow::Field.new("count", Arrow::UInt8DataType.new),
+          Arrow::Field.new("valid", Arrow::BooleanDataType.new),
+        ]
+        schema = Arrow::Schema.new(fields)
+        options.add_schema(schema)
+        table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+count,valid
+2,1
+9,0
+        CSV
+        columns = {
+          "count" => build_uint8_array([2, 9]),
+          "valid" => build_boolean_array([true, false]),
+        }
+        assert_equal(build_table(columns),
+                     table.read)
+      end
+
+      def test_column_types
+        require_gi_bindings(3, 3, 1)
+        options = Arrow::CSVReadOptions.new
+        options.add_column_type("count", Arrow::UInt8DataType.new)
+        options.add_column_type("valid", Arrow::BooleanDataType.new)
+        assert_equal({
+                       "count" => Arrow::UInt8DataType.new,
+                       "valid" => Arrow::BooleanDataType.new,
+                     },
+                     options.column_types)
+      end
+
+      def test_null_values
+        options = Arrow::CSVReadOptions.new
+        null_values = ["2", "5"]
+        options.null_values = null_values
+        assert_equal(null_values, options.null_values)
+
+        table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+message,count
+"Start",2
+"Shutdown",9
+"Restart",5
+        CSV
+        columns = {
+          "message" => build_string_array(["Start", "Shutdown", "Restart"]),
+          "count" => build_int64_array([nil, 9, nil]),
+        }
+        assert_equal(build_table(columns),
+                     table.read)
+      end
+
+      def test_add_null_value
+        options = Arrow::CSVReadOptions.new
+        null_values = ["2", "5"]
+        options.null_values = null_values
+        options.add_null_value("9")
+        assert_equal(null_values + ["9"], options.null_values)
+      end
+
+      def test_boolean_values
+        options = Arrow::CSVReadOptions.new
+        true_values = ["Start", "Restart"]
+        options.true_values = true_values
+        assert_equal(true_values, options.true_values)
+
+        false_values = ["Shutdown"]
+        options.false_values = false_values
+        assert_equal(false_values, options.false_values)
+
+        table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+message,count
+"Start",2
+"Shutdown",9
+"Restart",5
+        CSV
+        columns = {
+          "message" => build_boolean_array([true, false, true]),
+          "count" => build_int64_array([2, 9, 5]),
+        }
+        assert_equal(build_table(columns),
+                     table.read)
+      end
+
+      def test_add_true_value
+        options = Arrow::CSVReadOptions.new
+        true_values = ["Start", "Restart"]
+        options.true_values = true_values
+        options.add_true_value("Shutdown")
+        assert_equal(true_values + ["Shutdown"], options.true_values)
+      end
+
+      def test_add_false_value
+        options = Arrow::CSVReadOptions.new
+        false_values = ["Start", "Restart"]
+        options.false_values = false_values
+        options.add_false_value("Shutdown")
+        assert_equal(false_values + ["Shutdown"], options.false_values)
+      end
+
+      def test_allow_null_strings
+        options = Arrow::CSVReadOptions.new
+        options.null_values = ["Start", "Restart"]
+        options.allow_null_strings = true
+        table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+message,count
+"Start",2
+"Shutdown",9
+"Restart",5
+        CSV
+        columns = {
+          "message" => build_string_array([nil, "Shutdown", nil]),
+          "count" => build_int64_array([2, 9, 5]),
+        }
+        assert_equal(build_table(columns),
+                     table.read)
+      end
+
+      def test_n_skip_rows
+        options = Arrow::CSVReadOptions.new
+        options.n_skip_rows = 1
+        table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+message1,message2
+"Start1","Start2"
+"Shutdown1","Shutdown2"
+"Reboot1","Reboot2"
+        CSV
+        columns = {
+          "Start1" => build_string_array(["Shutdown1", "Reboot1"]),
+          "Start2" => build_string_array(["Shutdown2", "Reboot2"]),
+        }
+        assert_equal(build_table(columns),
+                     table.read)
+      end
+
+      def test_column_names
+        options = Arrow::CSVReadOptions.new
+        column_names = ["message", "count"]
+        options.column_names = column_names
+        assert_equal(column_names, options.column_names)
+
+        table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+"Start",2
+"Shutdown",9
+"Reboot",5
+        CSV
+        columns = {
+          "message" => build_string_array(["Start", "Shutdown", "Reboot"]),
+          "count" => build_int64_array([2, 9, 5]),
+        }
+        assert_equal(build_table(columns),
+                     table.read)
+      end
+
+      def test_add_column_name
+        options = Arrow::CSVReadOptions.new
+        column_names = ["message", "count"]
+        options.column_names = column_names
+        options.add_column_name("score")
+        assert_equal(column_names + ["score"], options.column_names)
+      end
+
+      def test_generate_column_names
+        options = Arrow::CSVReadOptions.new
+        options.generate_column_names = true
+
+        table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+"Start",2
+"Shutdown",9
+"Reboot",5
+        CSV
+        columns = {
+          "f0" => build_string_array(["Start", "Shutdown", "Reboot"]),
+          "f1" => build_int64_array([2, 9, 5]),
+        }
+        assert_equal(build_table(columns),
+                     table.read)
+      end
+    end
+  end
+end