]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/json/parser_test.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / json / parser_test.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "arrow/json/parser.h"
19
20 #include <gmock/gmock-matchers.h>
21 #include <gtest/gtest.h>
22
23 #include <string>
24 #include <utility>
25 #include <vector>
26
27 #include "arrow/json/options.h"
28 #include "arrow/json/test_common.h"
29 #include "arrow/status.h"
30 #include "arrow/testing/gtest_util.h"
31 #include "arrow/util/checked_cast.h"
32 #include "arrow/util/string_view.h"
33
34 namespace arrow {
35
36 using internal::checked_cast;
37
38 namespace json {
39
40 using util::string_view;
41
42 void AssertUnconvertedStructArraysEqual(const StructArray& expected,
43 const StructArray& actual);
44
45 void AssertUnconvertedArraysEqual(const Array& expected, const Array& actual) {
46 switch (actual.type_id()) {
47 case Type::BOOL:
48 case Type::NA:
49 return AssertArraysEqual(expected, actual);
50 case Type::DICTIONARY: {
51 ASSERT_EQ(expected.type_id(), Type::STRING);
52 std::shared_ptr<Array> actual_decoded;
53 ASSERT_OK(DecodeStringDictionary(checked_cast<const DictionaryArray&>(actual),
54 &actual_decoded));
55 return AssertArraysEqual(expected, *actual_decoded);
56 }
57 case Type::LIST: {
58 ASSERT_EQ(expected.type_id(), Type::LIST);
59 ASSERT_EQ(expected.null_count(), actual.null_count());
60 if (expected.null_count() != 0) {
61 AssertBufferEqual(*expected.null_bitmap(), *actual.null_bitmap());
62 }
63 const auto& expected_offsets = expected.data()->buffers[1];
64 const auto& actual_offsets = actual.data()->buffers[1];
65 AssertBufferEqual(*expected_offsets, *actual_offsets);
66 auto expected_values = checked_cast<const ListArray&>(expected).values();
67 auto actual_values = checked_cast<const ListArray&>(actual).values();
68 return AssertUnconvertedArraysEqual(*expected_values, *actual_values);
69 }
70 case Type::STRUCT:
71 ASSERT_EQ(expected.type_id(), Type::STRUCT);
72 return AssertUnconvertedStructArraysEqual(
73 checked_cast<const StructArray&>(expected),
74 checked_cast<const StructArray&>(actual));
75 default:
76 FAIL();
77 }
78 }
79
80 void AssertUnconvertedStructArraysEqual(const StructArray& expected,
81 const StructArray& actual) {
82 ASSERT_EQ(expected.num_fields(), actual.num_fields());
83 for (int i = 0; i < expected.num_fields(); ++i) {
84 auto expected_name = expected.type()->field(i)->name();
85 auto actual_name = actual.type()->field(i)->name();
86 ASSERT_EQ(expected_name, actual_name);
87 AssertUnconvertedArraysEqual(*expected.field(i), *actual.field(i));
88 }
89 }
90
91 void AssertParseColumns(ParseOptions options, string_view src_str,
92 const std::vector<std::shared_ptr<Field>>& fields,
93 const std::vector<std::string>& columns_json) {
94 std::shared_ptr<Array> parsed;
95 ASSERT_OK(ParseFromString(options, src_str, &parsed));
96 auto struct_array = std::static_pointer_cast<StructArray>(parsed);
97 for (size_t i = 0; i < fields.size(); ++i) {
98 auto column_expected = ArrayFromJSON(fields[i]->type(), columns_json[i]);
99 auto column = struct_array->GetFieldByName(fields[i]->name());
100 AssertUnconvertedArraysEqual(*column_expected, *column);
101 }
102 }
103
104 // TODO(bkietz) parameterize (at least some of) these tests over UnexpectedFieldBehavior
105
106 TEST(BlockParserWithSchema, Basics) {
107 auto options = ParseOptions::Defaults();
108 options.explicit_schema =
109 schema({field("hello", float64()), field("world", boolean()), field("yo", utf8())});
110 options.unexpected_field_behavior = UnexpectedFieldBehavior::Ignore;
111 AssertParseColumns(
112 options, scalars_only_src(),
113 {field("hello", utf8()), field("world", boolean()), field("yo", utf8())},
114 {"[\"3.5\", \"3.25\", \"3.125\", \"0.0\"]", "[false, null, null, true]",
115 "[\"thing\", null, \"\xe5\xbf\x8d\", null]"});
116 }
117
118 TEST(BlockParserWithSchema, Empty) {
119 auto options = ParseOptions::Defaults();
120 options.explicit_schema =
121 schema({field("hello", float64()), field("world", boolean()), field("yo", utf8())});
122 options.unexpected_field_behavior = UnexpectedFieldBehavior::Ignore;
123 AssertParseColumns(
124 options, "",
125 {field("hello", utf8()), field("world", boolean()), field("yo", utf8())},
126 {"[]", "[]", "[]"});
127 }
128
129 TEST(BlockParserWithSchema, SkipFieldsOutsideSchema) {
130 auto options = ParseOptions::Defaults();
131 options.explicit_schema = schema({field("hello", float64()), field("yo", utf8())});
132 options.unexpected_field_behavior = UnexpectedFieldBehavior::Ignore;
133 AssertParseColumns(options, scalars_only_src(),
134 {field("hello", utf8()), field("yo", utf8())},
135 {"[\"3.5\", \"3.25\", \"3.125\", \"0.0\"]",
136 "[\"thing\", null, \"\xe5\xbf\x8d\", null]"});
137 }
138
139 class BlockParserTypeError : public ::testing::TestWithParam<UnexpectedFieldBehavior> {
140 public:
141 ParseOptions Options(std::shared_ptr<Schema> explicit_schema) {
142 auto options = ParseOptions::Defaults();
143 options.explicit_schema = std::move(explicit_schema);
144 options.unexpected_field_behavior = GetParam();
145 return options;
146 }
147 };
148
149 TEST_P(BlockParserTypeError, FailOnInconvertible) {
150 auto options = Options(schema({field("a", int32())}));
151 std::shared_ptr<Array> parsed;
152 Status error = ParseFromString(options, "{\"a\":0}\n{\"a\":true}", &parsed);
153 ASSERT_RAISES(Invalid, error);
154 EXPECT_THAT(
155 error.message(),
156 testing::StartsWith(
157 "JSON parse error: Column(/a) changed from number to boolean in row 1"));
158 }
159
160 TEST_P(BlockParserTypeError, FailOnNestedInconvertible) {
161 auto options = Options(schema({field("a", list(struct_({field("b", int32())})))}));
162 std::shared_ptr<Array> parsed;
163 Status error =
164 ParseFromString(options, "{\"a\":[{\"b\":0}]}\n{\"a\":[{\"b\":true}]}", &parsed);
165 ASSERT_RAISES(Invalid, error);
166 EXPECT_THAT(
167 error.message(),
168 testing::StartsWith(
169 "JSON parse error: Column(/a/[]/b) changed from number to boolean in row 1"));
170 }
171
172 TEST_P(BlockParserTypeError, FailOnDuplicateKeys) {
173 std::shared_ptr<Array> parsed;
174 Status error = ParseFromString(Options(schema({field("a", int32())})),
175 "{\"a\":0, \"a\":1}\n", &parsed);
176 ASSERT_RAISES(Invalid, error);
177 EXPECT_THAT(
178 error.message(),
179 testing::StartsWith("JSON parse error: Column(/a) was specified twice in row 0"));
180 }
181
182 TEST_P(BlockParserTypeError, FailOnDuplicateKeysNoSchema) {
183 std::shared_ptr<Array> parsed;
184 Status error =
185 ParseFromString(ParseOptions::Defaults(), "{\"a\":0, \"a\":1}\n", &parsed);
186
187 ASSERT_RAISES(Invalid, error);
188 EXPECT_THAT(
189 error.message(),
190 testing::StartsWith("JSON parse error: Column(/a) was specified twice in row 0"));
191 }
192
193 INSTANTIATE_TEST_SUITE_P(BlockParserTypeError, BlockParserTypeError,
194 ::testing::Values(UnexpectedFieldBehavior::Ignore,
195 UnexpectedFieldBehavior::Error,
196 UnexpectedFieldBehavior::InferType));
197
198 TEST(BlockParserWithSchema, Nested) {
199 auto options = ParseOptions::Defaults();
200 options.explicit_schema = schema({field("yo", utf8()), field("arr", list(int32())),
201 field("nuf", struct_({field("ps", int32())}))});
202 options.unexpected_field_behavior = UnexpectedFieldBehavior::Ignore;
203 AssertParseColumns(options, nested_src(),
204 {field("yo", utf8()), field("arr", list(utf8())),
205 field("nuf", struct_({field("ps", utf8())}))},
206 {"[\"thing\", null, \"\xe5\xbf\x8d\", null]",
207 R"([["1", "2", "3"], ["2"], [], null])",
208 R"([{"ps":null}, {}, {"ps":"78"}, {"ps":"90"}])"});
209 }
210
211 TEST(BlockParserWithSchema, FailOnIncompleteJson) {
212 auto options = ParseOptions::Defaults();
213 options.explicit_schema = schema({field("a", int32())});
214 options.unexpected_field_behavior = UnexpectedFieldBehavior::Ignore;
215 std::shared_ptr<Array> parsed;
216 ASSERT_RAISES(Invalid, ParseFromString(options, "{\"a\":0, \"b\"", &parsed));
217 }
218
219 TEST(BlockParser, Basics) {
220 auto options = ParseOptions::Defaults();
221 options.unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
222 AssertParseColumns(
223 options, scalars_only_src(),
224 {field("hello", utf8()), field("world", boolean()), field("yo", utf8())},
225 {"[\"3.5\", \"3.25\", \"3.125\", \"0.0\"]", "[false, null, null, true]",
226 "[\"thing\", null, \"\xe5\xbf\x8d\", null]"});
227 }
228
229 TEST(BlockParser, Nested) {
230 auto options = ParseOptions::Defaults();
231 options.unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
232 AssertParseColumns(options, nested_src(),
233 {field("yo", utf8()), field("arr", list(utf8())),
234 field("nuf", struct_({field("ps", utf8())}))},
235 {"[\"thing\", null, \"\xe5\xbf\x8d\", null]",
236 R"([["1", "2", "3"], ["2"], [], null])",
237 R"([{"ps":null}, {}, {"ps":"78"}, {"ps":"90"}])"});
238 }
239
240 TEST(BlockParser, Null) {
241 auto options = ParseOptions::Defaults();
242 options.unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
243 AssertParseColumns(
244 options, null_src(),
245 {field("plain", null()), field("list1", list(null())), field("list2", list(null())),
246 field("struct", struct_({field("plain", null())}))},
247 {"[null, null]", "[[], []]", "[[], [null]]",
248 R"([{"plain": null}, {"plain": null}])"});
249 }
250
251 TEST(BlockParser, AdHoc) {
252 auto options = ParseOptions::Defaults();
253 options.unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
254 AssertParseColumns(
255 options, R"({"a": [1], "b": {"c": true, "d": "1991-02-03"}}
256 {"a": [], "b": {"c": false, "d": "2019-04-01"}}
257 )",
258 {field("a", list(utf8())),
259 field("b", struct_({field("c", boolean()), field("d", utf8())}))},
260 {R"([["1"], []])",
261 R"([{"c":true, "d": "1991-02-03"}, {"c":false, "d":"2019-04-01"}])"});
262 }
263
264 } // namespace json
265 } // namespace arrow