]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/json/parser_test.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / json / parser_test.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include "arrow/json/parser.h"
19
20#include <gmock/gmock-matchers.h>
21#include <gtest/gtest.h>
22
23#include <string>
24#include <utility>
25#include <vector>
26
27#include "arrow/json/options.h"
28#include "arrow/json/test_common.h"
29#include "arrow/status.h"
30#include "arrow/testing/gtest_util.h"
31#include "arrow/util/checked_cast.h"
32#include "arrow/util/string_view.h"
33
34namespace arrow {
35
36using internal::checked_cast;
37
38namespace json {
39
40using util::string_view;
41
42void AssertUnconvertedStructArraysEqual(const StructArray& expected,
43 const StructArray& actual);
44
45void AssertUnconvertedArraysEqual(const Array& expected, const Array& actual) {
46 switch (actual.type_id()) {
47 case Type::BOOL:
48 case Type::NA:
49 return AssertArraysEqual(expected, actual);
50 case Type::DICTIONARY: {
51 ASSERT_EQ(expected.type_id(), Type::STRING);
52 std::shared_ptr<Array> actual_decoded;
53 ASSERT_OK(DecodeStringDictionary(checked_cast<const DictionaryArray&>(actual),
54 &actual_decoded));
55 return AssertArraysEqual(expected, *actual_decoded);
56 }
57 case Type::LIST: {
58 ASSERT_EQ(expected.type_id(), Type::LIST);
59 ASSERT_EQ(expected.null_count(), actual.null_count());
60 if (expected.null_count() != 0) {
61 AssertBufferEqual(*expected.null_bitmap(), *actual.null_bitmap());
62 }
63 const auto& expected_offsets = expected.data()->buffers[1];
64 const auto& actual_offsets = actual.data()->buffers[1];
65 AssertBufferEqual(*expected_offsets, *actual_offsets);
66 auto expected_values = checked_cast<const ListArray&>(expected).values();
67 auto actual_values = checked_cast<const ListArray&>(actual).values();
68 return AssertUnconvertedArraysEqual(*expected_values, *actual_values);
69 }
70 case Type::STRUCT:
71 ASSERT_EQ(expected.type_id(), Type::STRUCT);
72 return AssertUnconvertedStructArraysEqual(
73 checked_cast<const StructArray&>(expected),
74 checked_cast<const StructArray&>(actual));
75 default:
76 FAIL();
77 }
78}
79
80void AssertUnconvertedStructArraysEqual(const StructArray& expected,
81 const StructArray& actual) {
82 ASSERT_EQ(expected.num_fields(), actual.num_fields());
83 for (int i = 0; i < expected.num_fields(); ++i) {
84 auto expected_name = expected.type()->field(i)->name();
85 auto actual_name = actual.type()->field(i)->name();
86 ASSERT_EQ(expected_name, actual_name);
87 AssertUnconvertedArraysEqual(*expected.field(i), *actual.field(i));
88 }
89}
90
91void AssertParseColumns(ParseOptions options, string_view src_str,
92 const std::vector<std::shared_ptr<Field>>& fields,
93 const std::vector<std::string>& columns_json) {
94 std::shared_ptr<Array> parsed;
95 ASSERT_OK(ParseFromString(options, src_str, &parsed));
96 auto struct_array = std::static_pointer_cast<StructArray>(parsed);
97 for (size_t i = 0; i < fields.size(); ++i) {
98 auto column_expected = ArrayFromJSON(fields[i]->type(), columns_json[i]);
99 auto column = struct_array->GetFieldByName(fields[i]->name());
100 AssertUnconvertedArraysEqual(*column_expected, *column);
101 }
102}
103
104// TODO(bkietz) parameterize (at least some of) these tests over UnexpectedFieldBehavior
105
106TEST(BlockParserWithSchema, Basics) {
107 auto options = ParseOptions::Defaults();
108 options.explicit_schema =
109 schema({field("hello", float64()), field("world", boolean()), field("yo", utf8())});
110 options.unexpected_field_behavior = UnexpectedFieldBehavior::Ignore;
111 AssertParseColumns(
112 options, scalars_only_src(),
113 {field("hello", utf8()), field("world", boolean()), field("yo", utf8())},
114 {"[\"3.5\", \"3.25\", \"3.125\", \"0.0\"]", "[false, null, null, true]",
115 "[\"thing\", null, \"\xe5\xbf\x8d\", null]"});
116}
117
118TEST(BlockParserWithSchema, Empty) {
119 auto options = ParseOptions::Defaults();
120 options.explicit_schema =
121 schema({field("hello", float64()), field("world", boolean()), field("yo", utf8())});
122 options.unexpected_field_behavior = UnexpectedFieldBehavior::Ignore;
123 AssertParseColumns(
124 options, "",
125 {field("hello", utf8()), field("world", boolean()), field("yo", utf8())},
126 {"[]", "[]", "[]"});
127}
128
129TEST(BlockParserWithSchema, SkipFieldsOutsideSchema) {
130 auto options = ParseOptions::Defaults();
131 options.explicit_schema = schema({field("hello", float64()), field("yo", utf8())});
132 options.unexpected_field_behavior = UnexpectedFieldBehavior::Ignore;
133 AssertParseColumns(options, scalars_only_src(),
134 {field("hello", utf8()), field("yo", utf8())},
135 {"[\"3.5\", \"3.25\", \"3.125\", \"0.0\"]",
136 "[\"thing\", null, \"\xe5\xbf\x8d\", null]"});
137}
138
139class BlockParserTypeError : public ::testing::TestWithParam<UnexpectedFieldBehavior> {
140 public:
141 ParseOptions Options(std::shared_ptr<Schema> explicit_schema) {
142 auto options = ParseOptions::Defaults();
143 options.explicit_schema = std::move(explicit_schema);
144 options.unexpected_field_behavior = GetParam();
145 return options;
146 }
147};
148
149TEST_P(BlockParserTypeError, FailOnInconvertible) {
150 auto options = Options(schema({field("a", int32())}));
151 std::shared_ptr<Array> parsed;
152 Status error = ParseFromString(options, "{\"a\":0}\n{\"a\":true}", &parsed);
153 ASSERT_RAISES(Invalid, error);
154 EXPECT_THAT(
155 error.message(),
156 testing::StartsWith(
157 "JSON parse error: Column(/a) changed from number to boolean in row 1"));
158}
159
160TEST_P(BlockParserTypeError, FailOnNestedInconvertible) {
161 auto options = Options(schema({field("a", list(struct_({field("b", int32())})))}));
162 std::shared_ptr<Array> parsed;
163 Status error =
164 ParseFromString(options, "{\"a\":[{\"b\":0}]}\n{\"a\":[{\"b\":true}]}", &parsed);
165 ASSERT_RAISES(Invalid, error);
166 EXPECT_THAT(
167 error.message(),
168 testing::StartsWith(
169 "JSON parse error: Column(/a/[]/b) changed from number to boolean in row 1"));
170}
171
172TEST_P(BlockParserTypeError, FailOnDuplicateKeys) {
173 std::shared_ptr<Array> parsed;
174 Status error = ParseFromString(Options(schema({field("a", int32())})),
175 "{\"a\":0, \"a\":1}\n", &parsed);
176 ASSERT_RAISES(Invalid, error);
177 EXPECT_THAT(
178 error.message(),
179 testing::StartsWith("JSON parse error: Column(/a) was specified twice in row 0"));
180}
181
182TEST_P(BlockParserTypeError, FailOnDuplicateKeysNoSchema) {
183 std::shared_ptr<Array> parsed;
184 Status error =
185 ParseFromString(ParseOptions::Defaults(), "{\"a\":0, \"a\":1}\n", &parsed);
186
187 ASSERT_RAISES(Invalid, error);
188 EXPECT_THAT(
189 error.message(),
190 testing::StartsWith("JSON parse error: Column(/a) was specified twice in row 0"));
191}
192
193INSTANTIATE_TEST_SUITE_P(BlockParserTypeError, BlockParserTypeError,
194 ::testing::Values(UnexpectedFieldBehavior::Ignore,
195 UnexpectedFieldBehavior::Error,
196 UnexpectedFieldBehavior::InferType));
197
198TEST(BlockParserWithSchema, Nested) {
199 auto options = ParseOptions::Defaults();
200 options.explicit_schema = schema({field("yo", utf8()), field("arr", list(int32())),
201 field("nuf", struct_({field("ps", int32())}))});
202 options.unexpected_field_behavior = UnexpectedFieldBehavior::Ignore;
203 AssertParseColumns(options, nested_src(),
204 {field("yo", utf8()), field("arr", list(utf8())),
205 field("nuf", struct_({field("ps", utf8())}))},
206 {"[\"thing\", null, \"\xe5\xbf\x8d\", null]",
207 R"([["1", "2", "3"], ["2"], [], null])",
208 R"([{"ps":null}, {}, {"ps":"78"}, {"ps":"90"}])"});
209}
210
211TEST(BlockParserWithSchema, FailOnIncompleteJson) {
212 auto options = ParseOptions::Defaults();
213 options.explicit_schema = schema({field("a", int32())});
214 options.unexpected_field_behavior = UnexpectedFieldBehavior::Ignore;
215 std::shared_ptr<Array> parsed;
216 ASSERT_RAISES(Invalid, ParseFromString(options, "{\"a\":0, \"b\"", &parsed));
217}
218
219TEST(BlockParser, Basics) {
220 auto options = ParseOptions::Defaults();
221 options.unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
222 AssertParseColumns(
223 options, scalars_only_src(),
224 {field("hello", utf8()), field("world", boolean()), field("yo", utf8())},
225 {"[\"3.5\", \"3.25\", \"3.125\", \"0.0\"]", "[false, null, null, true]",
226 "[\"thing\", null, \"\xe5\xbf\x8d\", null]"});
227}
228
229TEST(BlockParser, Nested) {
230 auto options = ParseOptions::Defaults();
231 options.unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
232 AssertParseColumns(options, nested_src(),
233 {field("yo", utf8()), field("arr", list(utf8())),
234 field("nuf", struct_({field("ps", utf8())}))},
235 {"[\"thing\", null, \"\xe5\xbf\x8d\", null]",
236 R"([["1", "2", "3"], ["2"], [], null])",
237 R"([{"ps":null}, {}, {"ps":"78"}, {"ps":"90"}])"});
238}
239
240TEST(BlockParser, Null) {
241 auto options = ParseOptions::Defaults();
242 options.unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
243 AssertParseColumns(
244 options, null_src(),
245 {field("plain", null()), field("list1", list(null())), field("list2", list(null())),
246 field("struct", struct_({field("plain", null())}))},
247 {"[null, null]", "[[], []]", "[[], [null]]",
248 R"([{"plain": null}, {"plain": null}])"});
249}
250
251TEST(BlockParser, AdHoc) {
252 auto options = ParseOptions::Defaults();
253 options.unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
254 AssertParseColumns(
255 options, R"({"a": [1], "b": {"c": true, "d": "1991-02-03"}}
256{"a": [], "b": {"c": false, "d": "2019-04-01"}}
257)",
258 {field("a", list(utf8())),
259 field("b", struct_({field("c", boolean()), field("d", utf8())}))},
260 {R"([["1"], []])",
261 R"([{"c":true, "d": "1991-02-03"}, {"c":false, "d":"2019-04-01"}])"});
262}
263
264} // namespace json
265} // namespace arrow