]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include <cstdint> | |
19 | #include <sstream> | |
20 | #include <type_traits> | |
21 | #include <utility> | |
22 | #include <vector> | |
23 | ||
24 | #include "arrow/array/array_dict.h" | |
25 | #include "arrow/array/builder_binary.h" | |
26 | #include "arrow/array/builder_decimal.h" | |
27 | #include "arrow/array/builder_dict.h" | |
28 | #include "arrow/array/builder_nested.h" | |
29 | #include "arrow/array/builder_primitive.h" | |
30 | #include "arrow/array/builder_time.h" | |
31 | #include "arrow/array/builder_union.h" | |
32 | #include "arrow/ipc/json_simple.h" | |
33 | #include "arrow/scalar.h" | |
34 | #include "arrow/type_traits.h" | |
35 | #include "arrow/util/checked_cast.h" | |
36 | #include "arrow/util/decimal.h" | |
37 | #include "arrow/util/logging.h" | |
38 | #include "arrow/util/string_view.h" | |
39 | #include "arrow/util/value_parsing.h" | |
40 | ||
41 | #include "arrow/json/rapidjson_defs.h" | |
42 | ||
43 | #include <rapidjson/document.h> | |
44 | #include <rapidjson/error/en.h> | |
45 | #include <rapidjson/rapidjson.h> | |
46 | #include <rapidjson/reader.h> | |
47 | #include <rapidjson/writer.h> | |
48 | ||
49 | namespace rj = arrow::rapidjson; | |
50 | ||
51 | namespace arrow { | |
52 | ||
53 | using internal::ParseValue; | |
54 | ||
55 | namespace ipc { | |
56 | namespace internal { | |
57 | namespace json { | |
58 | ||
59 | using ::arrow::internal::checked_cast; | |
60 | using ::arrow::internal::checked_pointer_cast; | |
61 | ||
62 | namespace { | |
63 | ||
64 | constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag; | |
65 | ||
66 | Status JSONTypeError(const char* expected_type, rj::Type json_type) { | |
67 | return Status::Invalid("Expected ", expected_type, " or null, got JSON type ", | |
68 | json_type); | |
69 | } | |
70 | ||
71 | class Converter { | |
72 | public: | |
73 | virtual ~Converter() = default; | |
74 | ||
75 | virtual Status Init() { return Status::OK(); } | |
76 | ||
77 | virtual Status AppendValue(const rj::Value& json_obj) = 0; | |
78 | ||
79 | Status AppendNull() { return this->builder()->AppendNull(); } | |
80 | ||
81 | virtual Status AppendValues(const rj::Value& json_array) = 0; | |
82 | ||
83 | virtual std::shared_ptr<ArrayBuilder> builder() = 0; | |
84 | ||
85 | virtual Status Finish(std::shared_ptr<Array>* out) { | |
86 | auto builder = this->builder(); | |
87 | if (builder->length() == 0) { | |
88 | // Make sure the builder was initialized | |
89 | RETURN_NOT_OK(builder->Resize(1)); | |
90 | } | |
91 | return builder->Finish(out); | |
92 | } | |
93 | ||
94 | protected: | |
95 | std::shared_ptr<DataType> type_; | |
96 | }; | |
97 | ||
98 | Status GetConverter(const std::shared_ptr<DataType>&, std::shared_ptr<Converter>* out); | |
99 | ||
100 | // CRTP | |
101 | template <class Derived> | |
102 | class ConcreteConverter : public Converter { | |
103 | public: | |
104 | Status AppendValues(const rj::Value& json_array) override { | |
105 | auto self = static_cast<Derived*>(this); | |
106 | if (!json_array.IsArray()) { | |
107 | return JSONTypeError("array", json_array.GetType()); | |
108 | } | |
109 | auto size = json_array.Size(); | |
110 | for (uint32_t i = 0; i < size; ++i) { | |
111 | RETURN_NOT_OK(self->AppendValue(json_array[i])); | |
112 | } | |
113 | return Status::OK(); | |
114 | } | |
115 | ||
116 | const std::shared_ptr<DataType>& value_type() { | |
117 | if (type_->id() != Type::DICTIONARY) { | |
118 | return type_; | |
119 | } | |
120 | return checked_cast<const DictionaryType&>(*type_).value_type(); | |
121 | } | |
122 | ||
123 | template <typename BuilderType> | |
124 | Status MakeConcreteBuilder(std::shared_ptr<BuilderType>* out) { | |
125 | std::unique_ptr<ArrayBuilder> builder; | |
126 | RETURN_NOT_OK(MakeBuilder(default_memory_pool(), this->type_, &builder)); | |
127 | *out = checked_pointer_cast<BuilderType>(std::move(builder)); | |
128 | DCHECK(*out); | |
129 | return Status::OK(); | |
130 | } | |
131 | }; | |
132 | ||
133 | // ------------------------------------------------------------------------ | |
134 | // Converter for null arrays | |
135 | ||
136 | class NullConverter final : public ConcreteConverter<NullConverter> { | |
137 | public: | |
138 | explicit NullConverter(const std::shared_ptr<DataType>& type) { | |
139 | type_ = type; | |
140 | builder_ = std::make_shared<NullBuilder>(); | |
141 | } | |
142 | ||
143 | Status AppendValue(const rj::Value& json_obj) override { | |
144 | if (json_obj.IsNull()) { | |
145 | return AppendNull(); | |
146 | } | |
147 | return JSONTypeError("null", json_obj.GetType()); | |
148 | } | |
149 | ||
150 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
151 | ||
152 | private: | |
153 | std::shared_ptr<NullBuilder> builder_; | |
154 | }; | |
155 | ||
156 | // ------------------------------------------------------------------------ | |
157 | // Converter for boolean arrays | |
158 | ||
159 | class BooleanConverter final : public ConcreteConverter<BooleanConverter> { | |
160 | public: | |
161 | explicit BooleanConverter(const std::shared_ptr<DataType>& type) { | |
162 | type_ = type; | |
163 | builder_ = std::make_shared<BooleanBuilder>(); | |
164 | } | |
165 | ||
166 | Status AppendValue(const rj::Value& json_obj) override { | |
167 | if (json_obj.IsNull()) { | |
168 | return AppendNull(); | |
169 | } | |
170 | if (json_obj.IsBool()) { | |
171 | return builder_->Append(json_obj.GetBool()); | |
172 | } | |
173 | if (json_obj.IsInt()) { | |
174 | return builder_->Append(json_obj.GetInt() != 0); | |
175 | } | |
176 | return JSONTypeError("boolean", json_obj.GetType()); | |
177 | } | |
178 | ||
179 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
180 | ||
181 | private: | |
182 | std::shared_ptr<BooleanBuilder> builder_; | |
183 | }; | |
184 | ||
185 | // ------------------------------------------------------------------------ | |
186 | // Helpers for numeric converters | |
187 | ||
188 | // Convert single signed integer value (also {Date,Time}{32,64} and Timestamp) | |
189 | template <typename T> | |
190 | enable_if_physical_signed_integer<T, Status> ConvertNumber(const rj::Value& json_obj, | |
191 | const DataType& type, | |
192 | typename T::c_type* out) { | |
193 | if (json_obj.IsInt64()) { | |
194 | int64_t v64 = json_obj.GetInt64(); | |
195 | *out = static_cast<typename T::c_type>(v64); | |
196 | if (*out == v64) { | |
197 | return Status::OK(); | |
198 | } else { | |
199 | return Status::Invalid("Value ", v64, " out of bounds for ", type); | |
200 | } | |
201 | } else { | |
202 | *out = static_cast<typename T::c_type>(0); | |
203 | return JSONTypeError("signed int", json_obj.GetType()); | |
204 | } | |
205 | } | |
206 | ||
207 | // Convert single unsigned integer value | |
208 | template <typename T> | |
209 | enable_if_physical_unsigned_integer<T, Status> ConvertNumber(const rj::Value& json_obj, | |
210 | const DataType& type, | |
211 | typename T::c_type* out) { | |
212 | if (json_obj.IsUint64()) { | |
213 | uint64_t v64 = json_obj.GetUint64(); | |
214 | *out = static_cast<typename T::c_type>(v64); | |
215 | if (*out == v64) { | |
216 | return Status::OK(); | |
217 | } else { | |
218 | return Status::Invalid("Value ", v64, " out of bounds for ", type); | |
219 | } | |
220 | } else { | |
221 | *out = static_cast<typename T::c_type>(0); | |
222 | return JSONTypeError("unsigned int", json_obj.GetType()); | |
223 | } | |
224 | } | |
225 | ||
226 | // Convert single floating point value | |
227 | template <typename T> | |
228 | enable_if_physical_floating_point<T, Status> ConvertNumber(const rj::Value& json_obj, | |
229 | const DataType& type, | |
230 | typename T::c_type* out) { | |
231 | if (json_obj.IsNumber()) { | |
232 | *out = static_cast<typename T::c_type>(json_obj.GetDouble()); | |
233 | return Status::OK(); | |
234 | } else { | |
235 | *out = static_cast<typename T::c_type>(0); | |
236 | return JSONTypeError("number", json_obj.GetType()); | |
237 | } | |
238 | } | |
239 | ||
240 | // ------------------------------------------------------------------------ | |
241 | // Converter for int arrays | |
242 | ||
243 | template <typename Type, typename BuilderType = typename TypeTraits<Type>::BuilderType> | |
244 | class IntegerConverter final | |
245 | : public ConcreteConverter<IntegerConverter<Type, BuilderType>> { | |
246 | using c_type = typename Type::c_type; | |
247 | ||
248 | static constexpr auto is_signed = std::is_signed<c_type>::value; | |
249 | ||
250 | public: | |
251 | explicit IntegerConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; } | |
252 | ||
253 | Status Init() override { return this->MakeConcreteBuilder(&builder_); } | |
254 | ||
255 | Status AppendValue(const rj::Value& json_obj) override { | |
256 | if (json_obj.IsNull()) { | |
257 | return this->AppendNull(); | |
258 | } | |
259 | c_type value; | |
260 | RETURN_NOT_OK(ConvertNumber<Type>(json_obj, *this->type_, &value)); | |
261 | return builder_->Append(value); | |
262 | } | |
263 | ||
264 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
265 | ||
266 | private: | |
267 | std::shared_ptr<BuilderType> builder_; | |
268 | }; | |
269 | ||
270 | // ------------------------------------------------------------------------ | |
271 | // Converter for float arrays | |
272 | ||
273 | template <typename Type, typename BuilderType = typename TypeTraits<Type>::BuilderType> | |
274 | class FloatConverter final : public ConcreteConverter<FloatConverter<Type, BuilderType>> { | |
275 | using c_type = typename Type::c_type; | |
276 | ||
277 | public: | |
278 | explicit FloatConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; } | |
279 | ||
280 | Status Init() override { return this->MakeConcreteBuilder(&builder_); } | |
281 | ||
282 | Status AppendValue(const rj::Value& json_obj) override { | |
283 | if (json_obj.IsNull()) { | |
284 | return this->AppendNull(); | |
285 | } | |
286 | c_type value; | |
287 | RETURN_NOT_OK(ConvertNumber<Type>(json_obj, *this->type_, &value)); | |
288 | return builder_->Append(value); | |
289 | } | |
290 | ||
291 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
292 | ||
293 | private: | |
294 | std::shared_ptr<BuilderType> builder_; | |
295 | }; | |
296 | ||
297 | // ------------------------------------------------------------------------ | |
298 | // Converter for decimal arrays | |
299 | ||
300 | template <typename DecimalSubtype, typename DecimalValue, typename BuilderType> | |
301 | class DecimalConverter final | |
302 | : public ConcreteConverter< | |
303 | DecimalConverter<DecimalSubtype, DecimalValue, BuilderType>> { | |
304 | public: | |
305 | explicit DecimalConverter(const std::shared_ptr<DataType>& type) { | |
306 | this->type_ = type; | |
307 | decimal_type_ = &checked_cast<const DecimalSubtype&>(*this->value_type()); | |
308 | } | |
309 | ||
310 | Status Init() override { return this->MakeConcreteBuilder(&builder_); } | |
311 | ||
312 | Status AppendValue(const rj::Value& json_obj) override { | |
313 | if (json_obj.IsNull()) { | |
314 | return this->AppendNull(); | |
315 | } | |
316 | if (json_obj.IsString()) { | |
317 | int32_t precision, scale; | |
318 | DecimalValue d; | |
319 | auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength()); | |
320 | RETURN_NOT_OK(DecimalValue::FromString(view, &d, &precision, &scale)); | |
321 | if (scale != decimal_type_->scale()) { | |
322 | return Status::Invalid("Invalid scale for decimal: expected ", | |
323 | decimal_type_->scale(), ", got ", scale); | |
324 | } | |
325 | return builder_->Append(d); | |
326 | } | |
327 | return JSONTypeError("decimal string", json_obj.GetType()); | |
328 | } | |
329 | ||
330 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
331 | ||
332 | private: | |
333 | std::shared_ptr<BuilderType> builder_; | |
334 | const DecimalSubtype* decimal_type_; | |
335 | }; | |
336 | ||
337 | template <typename BuilderType = typename TypeTraits<Decimal128Type>::BuilderType> | |
338 | using Decimal128Converter = DecimalConverter<Decimal128Type, Decimal128, BuilderType>; | |
339 | template <typename BuilderType = typename TypeTraits<Decimal256Type>::BuilderType> | |
340 | using Decimal256Converter = DecimalConverter<Decimal256Type, Decimal256, BuilderType>; | |
341 | ||
342 | // ------------------------------------------------------------------------ | |
343 | // Converter for timestamp arrays | |
344 | ||
345 | class TimestampConverter final : public ConcreteConverter<TimestampConverter> { | |
346 | public: | |
347 | explicit TimestampConverter(const std::shared_ptr<DataType>& type) | |
348 | : timestamp_type_{checked_cast<const TimestampType*>(type.get())} { | |
349 | this->type_ = type; | |
350 | builder_ = std::make_shared<TimestampBuilder>(type, default_memory_pool()); | |
351 | } | |
352 | ||
353 | Status AppendValue(const rj::Value& json_obj) override { | |
354 | if (json_obj.IsNull()) { | |
355 | return this->AppendNull(); | |
356 | } | |
357 | int64_t value; | |
358 | if (json_obj.IsNumber()) { | |
359 | RETURN_NOT_OK(ConvertNumber<Int64Type>(json_obj, *this->type_, &value)); | |
360 | } else if (json_obj.IsString()) { | |
361 | util::string_view view(json_obj.GetString(), json_obj.GetStringLength()); | |
362 | if (!ParseValue(*timestamp_type_, view.data(), view.size(), &value)) { | |
363 | return Status::Invalid("couldn't parse timestamp from ", view); | |
364 | } | |
365 | } else { | |
366 | return JSONTypeError("timestamp", json_obj.GetType()); | |
367 | } | |
368 | return builder_->Append(value); | |
369 | } | |
370 | ||
371 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
372 | ||
373 | private: | |
374 | const TimestampType* timestamp_type_; | |
375 | std::shared_ptr<TimestampBuilder> builder_; | |
376 | }; | |
377 | ||
378 | // ------------------------------------------------------------------------ | |
379 | // Converter for day-time interval arrays | |
380 | ||
381 | class DayTimeIntervalConverter final | |
382 | : public ConcreteConverter<DayTimeIntervalConverter> { | |
383 | public: | |
384 | explicit DayTimeIntervalConverter(const std::shared_ptr<DataType>& type) { | |
385 | this->type_ = type; | |
386 | builder_ = std::make_shared<DayTimeIntervalBuilder>(default_memory_pool()); | |
387 | } | |
388 | ||
389 | Status AppendValue(const rj::Value& json_obj) override { | |
390 | if (json_obj.IsNull()) { | |
391 | return this->AppendNull(); | |
392 | } | |
393 | DayTimeIntervalType::DayMilliseconds value; | |
394 | if (!json_obj.IsArray()) { | |
395 | return JSONTypeError("array", json_obj.GetType()); | |
396 | } | |
397 | if (json_obj.Size() != 2) { | |
398 | return Status::Invalid( | |
399 | "day time interval pair must have exactly two elements, had ", json_obj.Size()); | |
400 | } | |
401 | RETURN_NOT_OK(ConvertNumber<Int32Type>(json_obj[0], *this->type_, &value.days)); | |
402 | RETURN_NOT_OK( | |
403 | ConvertNumber<Int32Type>(json_obj[1], *this->type_, &value.milliseconds)); | |
404 | return builder_->Append(value); | |
405 | } | |
406 | ||
407 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
408 | ||
409 | private: | |
410 | std::shared_ptr<DayTimeIntervalBuilder> builder_; | |
411 | }; | |
412 | ||
413 | class MonthDayNanoIntervalConverter final | |
414 | : public ConcreteConverter<MonthDayNanoIntervalConverter> { | |
415 | public: | |
416 | explicit MonthDayNanoIntervalConverter(const std::shared_ptr<DataType>& type) { | |
417 | this->type_ = type; | |
418 | builder_ = std::make_shared<MonthDayNanoIntervalBuilder>(default_memory_pool()); | |
419 | } | |
420 | ||
421 | Status AppendValue(const rj::Value& json_obj) override { | |
422 | if (json_obj.IsNull()) { | |
423 | return this->AppendNull(); | |
424 | } | |
425 | MonthDayNanoIntervalType::MonthDayNanos value; | |
426 | if (!json_obj.IsArray()) { | |
427 | return JSONTypeError("array", json_obj.GetType()); | |
428 | } | |
429 | if (json_obj.Size() != 3) { | |
430 | return Status::Invalid( | |
431 | "month_day_nano_interval must have exactly 3 elements, had ", json_obj.Size()); | |
432 | } | |
433 | RETURN_NOT_OK(ConvertNumber<Int32Type>(json_obj[0], *this->type_, &value.months)); | |
434 | RETURN_NOT_OK(ConvertNumber<Int32Type>(json_obj[1], *this->type_, &value.days)); | |
435 | RETURN_NOT_OK( | |
436 | ConvertNumber<Int64Type>(json_obj[2], *this->type_, &value.nanoseconds)); | |
437 | ||
438 | return builder_->Append(value); | |
439 | } | |
440 | ||
441 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
442 | ||
443 | private: | |
444 | std::shared_ptr<MonthDayNanoIntervalBuilder> builder_; | |
445 | }; | |
446 | ||
447 | // ------------------------------------------------------------------------ | |
448 | // Converter for binary and string arrays | |
449 | ||
450 | template <typename Type, typename BuilderType = typename TypeTraits<Type>::BuilderType> | |
451 | class StringConverter final | |
452 | : public ConcreteConverter<StringConverter<Type, BuilderType>> { | |
453 | public: | |
454 | explicit StringConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; } | |
455 | ||
456 | Status Init() override { return this->MakeConcreteBuilder(&builder_); } | |
457 | ||
458 | Status AppendValue(const rj::Value& json_obj) override { | |
459 | if (json_obj.IsNull()) { | |
460 | return this->AppendNull(); | |
461 | } | |
462 | if (json_obj.IsString()) { | |
463 | auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength()); | |
464 | return builder_->Append(view); | |
465 | } else { | |
466 | return JSONTypeError("string", json_obj.GetType()); | |
467 | } | |
468 | } | |
469 | ||
470 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
471 | ||
472 | private: | |
473 | std::shared_ptr<BuilderType> builder_; | |
474 | }; | |
475 | ||
476 | // ------------------------------------------------------------------------ | |
477 | // Converter for fixed-size binary arrays | |
478 | ||
479 | template <typename BuilderType = typename TypeTraits<FixedSizeBinaryType>::BuilderType> | |
480 | class FixedSizeBinaryConverter final | |
481 | : public ConcreteConverter<FixedSizeBinaryConverter<BuilderType>> { | |
482 | public: | |
483 | explicit FixedSizeBinaryConverter(const std::shared_ptr<DataType>& type) { | |
484 | this->type_ = type; | |
485 | } | |
486 | ||
487 | Status Init() override { return this->MakeConcreteBuilder(&builder_); } | |
488 | ||
489 | Status AppendValue(const rj::Value& json_obj) override { | |
490 | if (json_obj.IsNull()) { | |
491 | return this->AppendNull(); | |
492 | } | |
493 | if (json_obj.IsString()) { | |
494 | auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength()); | |
495 | if (view.length() != static_cast<size_t>(builder_->byte_width())) { | |
496 | std::stringstream ss; | |
497 | ss << "Invalid string length " << view.length() << " in JSON input for " | |
498 | << this->type_->ToString(); | |
499 | return Status::Invalid(ss.str()); | |
500 | } | |
501 | return builder_->Append(view); | |
502 | } else { | |
503 | return JSONTypeError("string", json_obj.GetType()); | |
504 | } | |
505 | } | |
506 | ||
507 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
508 | ||
509 | private: | |
510 | std::shared_ptr<BuilderType> builder_; | |
511 | }; | |
512 | ||
513 | // ------------------------------------------------------------------------ | |
514 | // Converter for list arrays | |
515 | ||
516 | template <typename TYPE> | |
517 | class ListConverter final : public ConcreteConverter<ListConverter<TYPE>> { | |
518 | public: | |
519 | using BuilderType = typename TypeTraits<TYPE>::BuilderType; | |
520 | ||
521 | explicit ListConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; } | |
522 | ||
523 | Status Init() override { | |
524 | const auto& list_type = checked_cast<const TYPE&>(*this->type_); | |
525 | RETURN_NOT_OK(GetConverter(list_type.value_type(), &child_converter_)); | |
526 | auto child_builder = child_converter_->builder(); | |
527 | builder_ = | |
528 | std::make_shared<BuilderType>(default_memory_pool(), child_builder, this->type_); | |
529 | return Status::OK(); | |
530 | } | |
531 | ||
532 | Status AppendValue(const rj::Value& json_obj) override { | |
533 | if (json_obj.IsNull()) { | |
534 | return this->AppendNull(); | |
535 | } | |
536 | RETURN_NOT_OK(builder_->Append()); | |
537 | // Extend the child converter with this JSON array | |
538 | return child_converter_->AppendValues(json_obj); | |
539 | } | |
540 | ||
541 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
542 | ||
543 | private: | |
544 | std::shared_ptr<BuilderType> builder_; | |
545 | std::shared_ptr<Converter> child_converter_; | |
546 | }; | |
547 | ||
548 | // ------------------------------------------------------------------------ | |
549 | // Converter for map arrays | |
550 | ||
551 | class MapConverter final : public ConcreteConverter<MapConverter> { | |
552 | public: | |
553 | explicit MapConverter(const std::shared_ptr<DataType>& type) { type_ = type; } | |
554 | ||
555 | Status Init() override { | |
556 | const auto& map_type = checked_cast<const MapType&>(*type_); | |
557 | RETURN_NOT_OK(GetConverter(map_type.key_type(), &key_converter_)); | |
558 | RETURN_NOT_OK(GetConverter(map_type.item_type(), &item_converter_)); | |
559 | auto key_builder = key_converter_->builder(); | |
560 | auto item_builder = item_converter_->builder(); | |
561 | builder_ = std::make_shared<MapBuilder>(default_memory_pool(), key_builder, | |
562 | item_builder, type_); | |
563 | return Status::OK(); | |
564 | } | |
565 | ||
566 | Status AppendValue(const rj::Value& json_obj) override { | |
567 | if (json_obj.IsNull()) { | |
568 | return this->AppendNull(); | |
569 | } | |
570 | RETURN_NOT_OK(builder_->Append()); | |
571 | if (!json_obj.IsArray()) { | |
572 | return JSONTypeError("array", json_obj.GetType()); | |
573 | } | |
574 | auto size = json_obj.Size(); | |
575 | for (uint32_t i = 0; i < size; ++i) { | |
576 | const auto& json_pair = json_obj[i]; | |
577 | if (!json_pair.IsArray()) { | |
578 | return JSONTypeError("array", json_pair.GetType()); | |
579 | } | |
580 | if (json_pair.Size() != 2) { | |
581 | return Status::Invalid("key item pair must have exactly two elements, had ", | |
582 | json_pair.Size()); | |
583 | } | |
584 | if (json_pair[0].IsNull()) { | |
585 | return Status::Invalid("null key is invalid"); | |
586 | } | |
587 | RETURN_NOT_OK(key_converter_->AppendValue(json_pair[0])); | |
588 | RETURN_NOT_OK(item_converter_->AppendValue(json_pair[1])); | |
589 | } | |
590 | return Status::OK(); | |
591 | } | |
592 | ||
593 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
594 | ||
595 | private: | |
596 | std::shared_ptr<MapBuilder> builder_; | |
597 | std::shared_ptr<Converter> key_converter_, item_converter_; | |
598 | }; | |
599 | ||
600 | // ------------------------------------------------------------------------ | |
601 | // Converter for fixed size list arrays | |
602 | ||
603 | class FixedSizeListConverter final : public ConcreteConverter<FixedSizeListConverter> { | |
604 | public: | |
605 | explicit FixedSizeListConverter(const std::shared_ptr<DataType>& type) { type_ = type; } | |
606 | ||
607 | Status Init() override { | |
608 | const auto& list_type = checked_cast<const FixedSizeListType&>(*type_); | |
609 | list_size_ = list_type.list_size(); | |
610 | RETURN_NOT_OK(GetConverter(list_type.value_type(), &child_converter_)); | |
611 | auto child_builder = child_converter_->builder(); | |
612 | builder_ = std::make_shared<FixedSizeListBuilder>(default_memory_pool(), | |
613 | child_builder, type_); | |
614 | return Status::OK(); | |
615 | } | |
616 | ||
617 | Status AppendValue(const rj::Value& json_obj) override { | |
618 | if (json_obj.IsNull()) { | |
619 | return this->AppendNull(); | |
620 | } | |
621 | RETURN_NOT_OK(builder_->Append()); | |
622 | // Extend the child converter with this JSON array | |
623 | RETURN_NOT_OK(child_converter_->AppendValues(json_obj)); | |
624 | if (json_obj.GetArray().Size() != static_cast<rj::SizeType>(list_size_)) { | |
625 | return Status::Invalid("incorrect list size ", json_obj.GetArray().Size()); | |
626 | } | |
627 | return Status::OK(); | |
628 | } | |
629 | ||
630 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
631 | ||
632 | private: | |
633 | int32_t list_size_; | |
634 | std::shared_ptr<FixedSizeListBuilder> builder_; | |
635 | std::shared_ptr<Converter> child_converter_; | |
636 | }; | |
637 | ||
638 | // ------------------------------------------------------------------------ | |
639 | // Converter for struct arrays | |
640 | ||
641 | class StructConverter final : public ConcreteConverter<StructConverter> { | |
642 | public: | |
643 | explicit StructConverter(const std::shared_ptr<DataType>& type) { type_ = type; } | |
644 | ||
645 | Status Init() override { | |
646 | std::vector<std::shared_ptr<ArrayBuilder>> child_builders; | |
647 | for (const auto& field : type_->fields()) { | |
648 | std::shared_ptr<Converter> child_converter; | |
649 | RETURN_NOT_OK(GetConverter(field->type(), &child_converter)); | |
650 | child_converters_.push_back(child_converter); | |
651 | child_builders.push_back(child_converter->builder()); | |
652 | } | |
653 | builder_ = std::make_shared<StructBuilder>(type_, default_memory_pool(), | |
654 | std::move(child_builders)); | |
655 | return Status::OK(); | |
656 | } | |
657 | ||
658 | // Append a JSON value that is either an array of N elements in order | |
659 | // or an object mapping struct names to values (omitted struct members | |
660 | // are mapped to null). | |
661 | Status AppendValue(const rj::Value& json_obj) override { | |
662 | if (json_obj.IsNull()) { | |
663 | return this->AppendNull(); | |
664 | } | |
665 | if (json_obj.IsArray()) { | |
666 | auto size = json_obj.Size(); | |
667 | auto expected_size = static_cast<uint32_t>(type_->num_fields()); | |
668 | if (size != expected_size) { | |
669 | return Status::Invalid("Expected array of size ", expected_size, | |
670 | ", got array of size ", size); | |
671 | } | |
672 | for (uint32_t i = 0; i < size; ++i) { | |
673 | RETURN_NOT_OK(child_converters_[i]->AppendValue(json_obj[i])); | |
674 | } | |
675 | return builder_->Append(); | |
676 | } | |
677 | if (json_obj.IsObject()) { | |
678 | auto remaining = json_obj.MemberCount(); | |
679 | auto num_children = type_->num_fields(); | |
680 | for (int32_t i = 0; i < num_children; ++i) { | |
681 | const auto& field = type_->field(i); | |
682 | auto it = json_obj.FindMember(field->name()); | |
683 | if (it != json_obj.MemberEnd()) { | |
684 | --remaining; | |
685 | RETURN_NOT_OK(child_converters_[i]->AppendValue(it->value)); | |
686 | } else { | |
687 | RETURN_NOT_OK(child_converters_[i]->AppendNull()); | |
688 | } | |
689 | } | |
690 | if (remaining > 0) { | |
691 | rj::StringBuffer sb; | |
692 | rj::Writer<rj::StringBuffer> writer(sb); | |
693 | json_obj.Accept(writer); | |
694 | return Status::Invalid("Unexpected members in JSON object for type ", | |
695 | type_->ToString(), " Object: ", sb.GetString()); | |
696 | } | |
697 | return builder_->Append(); | |
698 | } | |
699 | return JSONTypeError("array or object", json_obj.GetType()); | |
700 | } | |
701 | ||
702 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
703 | ||
704 | private: | |
705 | std::shared_ptr<StructBuilder> builder_; | |
706 | std::vector<std::shared_ptr<Converter>> child_converters_; | |
707 | }; | |
708 | ||
709 | // ------------------------------------------------------------------------ | |
710 | // Converter for union arrays | |
711 | ||
712 | class UnionConverter final : public ConcreteConverter<UnionConverter> { | |
713 | public: | |
714 | explicit UnionConverter(const std::shared_ptr<DataType>& type) { type_ = type; } | |
715 | ||
716 | Status Init() override { | |
717 | auto union_type = checked_cast<const UnionType*>(type_.get()); | |
718 | mode_ = union_type->mode(); | |
719 | type_id_to_child_num_.clear(); | |
720 | type_id_to_child_num_.resize(union_type->max_type_code() + 1, -1); | |
721 | int child_i = 0; | |
722 | for (auto type_id : union_type->type_codes()) { | |
723 | type_id_to_child_num_[type_id] = child_i++; | |
724 | } | |
725 | std::vector<std::shared_ptr<ArrayBuilder>> child_builders; | |
726 | for (const auto& field : type_->fields()) { | |
727 | std::shared_ptr<Converter> child_converter; | |
728 | RETURN_NOT_OK(GetConverter(field->type(), &child_converter)); | |
729 | child_converters_.push_back(child_converter); | |
730 | child_builders.push_back(child_converter->builder()); | |
731 | } | |
732 | if (mode_ == UnionMode::DENSE) { | |
733 | builder_ = std::make_shared<DenseUnionBuilder>(default_memory_pool(), | |
734 | std::move(child_builders), type_); | |
735 | } else { | |
736 | builder_ = std::make_shared<SparseUnionBuilder>(default_memory_pool(), | |
737 | std::move(child_builders), type_); | |
738 | } | |
739 | return Status::OK(); | |
740 | } | |
741 | ||
742 | // Append a JSON value that must be a 2-long array, containing the type_id | |
743 | // and value of the UnionArray's slot. | |
744 | Status AppendValue(const rj::Value& json_obj) override { | |
745 | if (json_obj.IsNull()) { | |
746 | return this->AppendNull(); | |
747 | } | |
748 | if (!json_obj.IsArray()) { | |
749 | return JSONTypeError("array", json_obj.GetType()); | |
750 | } | |
751 | if (json_obj.Size() != 2) { | |
752 | return Status::Invalid("Expected [type_id, value] pair, got array of size ", | |
753 | json_obj.Size()); | |
754 | } | |
755 | const auto& id_obj = json_obj[0]; | |
756 | if (!id_obj.IsInt()) { | |
757 | return JSONTypeError("int", id_obj.GetType()); | |
758 | } | |
759 | ||
760 | auto id = static_cast<int8_t>(id_obj.GetInt()); | |
761 | auto child_num = type_id_to_child_num_[id]; | |
762 | if (child_num == -1) { | |
763 | return Status::Invalid("type_id ", id, " not found in ", *type_); | |
764 | } | |
765 | ||
766 | auto child_converter = child_converters_[child_num]; | |
767 | if (mode_ == UnionMode::SPARSE) { | |
768 | RETURN_NOT_OK(checked_cast<SparseUnionBuilder&>(*builder_).Append(id)); | |
769 | for (auto&& other_converter : child_converters_) { | |
770 | if (other_converter != child_converter) { | |
771 | RETURN_NOT_OK(other_converter->AppendNull()); | |
772 | } | |
773 | } | |
774 | } else { | |
775 | RETURN_NOT_OK(checked_cast<DenseUnionBuilder&>(*builder_).Append(id)); | |
776 | } | |
777 | return child_converter->AppendValue(json_obj[1]); | |
778 | } | |
779 | ||
780 | std::shared_ptr<ArrayBuilder> builder() override { return builder_; } | |
781 | ||
782 | private: | |
783 | UnionMode::type mode_; | |
784 | std::shared_ptr<ArrayBuilder> builder_; | |
785 | std::vector<std::shared_ptr<Converter>> child_converters_; | |
786 | std::vector<int8_t> type_id_to_child_num_; | |
787 | }; | |
788 | ||
789 | // ------------------------------------------------------------------------ | |
790 | // General conversion functions | |
791 | ||
792 | Status ConversionNotImplemented(const std::shared_ptr<DataType>& type) { | |
793 | return Status::NotImplemented("JSON conversion to ", type->ToString(), | |
794 | " not implemented"); | |
795 | } | |
796 | ||
797 | Status GetDictConverter(const std::shared_ptr<DataType>& type, | |
798 | std::shared_ptr<Converter>* out) { | |
799 | std::shared_ptr<Converter> res; | |
800 | ||
801 | const auto value_type = checked_cast<const DictionaryType&>(*type).value_type(); | |
802 | ||
803 | #define SIMPLE_CONVERTER_CASE(ID, CLASS, TYPE) \ | |
804 | case ID: \ | |
805 | res = std::make_shared<CLASS<DictionaryBuilder<TYPE>>>(type); \ | |
806 | break; | |
807 | ||
808 | #define PARAM_CONVERTER_CASE(ID, CLASS, TYPE) \ | |
809 | case ID: \ | |
810 | res = std::make_shared<CLASS<TYPE, DictionaryBuilder<TYPE>>>(type); \ | |
811 | break; | |
812 | ||
813 | switch (value_type->id()) { | |
814 | PARAM_CONVERTER_CASE(Type::INT8, IntegerConverter, Int8Type) | |
815 | PARAM_CONVERTER_CASE(Type::INT16, IntegerConverter, Int16Type) | |
816 | PARAM_CONVERTER_CASE(Type::INT32, IntegerConverter, Int32Type) | |
817 | PARAM_CONVERTER_CASE(Type::INT64, IntegerConverter, Int64Type) | |
818 | PARAM_CONVERTER_CASE(Type::UINT8, IntegerConverter, UInt8Type) | |
819 | PARAM_CONVERTER_CASE(Type::UINT16, IntegerConverter, UInt16Type) | |
820 | PARAM_CONVERTER_CASE(Type::UINT32, IntegerConverter, UInt32Type) | |
821 | PARAM_CONVERTER_CASE(Type::UINT64, IntegerConverter, UInt64Type) | |
822 | PARAM_CONVERTER_CASE(Type::FLOAT, FloatConverter, FloatType) | |
823 | PARAM_CONVERTER_CASE(Type::DOUBLE, FloatConverter, DoubleType) | |
824 | PARAM_CONVERTER_CASE(Type::STRING, StringConverter, StringType) | |
825 | PARAM_CONVERTER_CASE(Type::BINARY, StringConverter, BinaryType) | |
826 | PARAM_CONVERTER_CASE(Type::LARGE_STRING, StringConverter, LargeStringType) | |
827 | PARAM_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter, LargeBinaryType) | |
828 | SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter, | |
829 | FixedSizeBinaryType) | |
830 | SIMPLE_CONVERTER_CASE(Type::DECIMAL128, Decimal128Converter, Decimal128Type) | |
831 | SIMPLE_CONVERTER_CASE(Type::DECIMAL256, Decimal256Converter, Decimal256Type) | |
832 | default: | |
833 | return ConversionNotImplemented(type); | |
834 | } | |
835 | ||
836 | #undef SIMPLE_CONVERTER_CASE | |
837 | #undef PARAM_CONVERTER_CASE | |
838 | ||
839 | RETURN_NOT_OK(res->Init()); | |
840 | *out = res; | |
841 | return Status::OK(); | |
842 | } | |
843 | ||
844 | Status GetConverter(const std::shared_ptr<DataType>& type, | |
845 | std::shared_ptr<Converter>* out) { | |
846 | if (type->id() == Type::DICTIONARY) { | |
847 | return GetDictConverter(type, out); | |
848 | } | |
849 | ||
850 | std::shared_ptr<Converter> res; | |
851 | ||
852 | #define SIMPLE_CONVERTER_CASE(ID, CLASS) \ | |
853 | case ID: \ | |
854 | res = std::make_shared<CLASS>(type); \ | |
855 | break; | |
856 | ||
857 | switch (type->id()) { | |
858 | SIMPLE_CONVERTER_CASE(Type::INT8, IntegerConverter<Int8Type>) | |
859 | SIMPLE_CONVERTER_CASE(Type::INT16, IntegerConverter<Int16Type>) | |
860 | SIMPLE_CONVERTER_CASE(Type::INT32, IntegerConverter<Int32Type>) | |
861 | SIMPLE_CONVERTER_CASE(Type::INT64, IntegerConverter<Int64Type>) | |
862 | SIMPLE_CONVERTER_CASE(Type::UINT8, IntegerConverter<UInt8Type>) | |
863 | SIMPLE_CONVERTER_CASE(Type::UINT16, IntegerConverter<UInt16Type>) | |
864 | SIMPLE_CONVERTER_CASE(Type::UINT32, IntegerConverter<UInt32Type>) | |
865 | SIMPLE_CONVERTER_CASE(Type::UINT64, IntegerConverter<UInt64Type>) | |
866 | SIMPLE_CONVERTER_CASE(Type::TIMESTAMP, TimestampConverter) | |
867 | SIMPLE_CONVERTER_CASE(Type::DATE32, IntegerConverter<Date32Type>) | |
868 | SIMPLE_CONVERTER_CASE(Type::DATE64, IntegerConverter<Date64Type>) | |
869 | SIMPLE_CONVERTER_CASE(Type::TIME32, IntegerConverter<Time32Type>) | |
870 | SIMPLE_CONVERTER_CASE(Type::TIME64, IntegerConverter<Time64Type>) | |
871 | SIMPLE_CONVERTER_CASE(Type::DURATION, IntegerConverter<DurationType>) | |
872 | SIMPLE_CONVERTER_CASE(Type::NA, NullConverter) | |
873 | SIMPLE_CONVERTER_CASE(Type::BOOL, BooleanConverter) | |
874 | SIMPLE_CONVERTER_CASE(Type::HALF_FLOAT, IntegerConverter<HalfFloatType>) | |
875 | SIMPLE_CONVERTER_CASE(Type::FLOAT, FloatConverter<FloatType>) | |
876 | SIMPLE_CONVERTER_CASE(Type::DOUBLE, FloatConverter<DoubleType>) | |
877 | SIMPLE_CONVERTER_CASE(Type::LIST, ListConverter<ListType>) | |
878 | SIMPLE_CONVERTER_CASE(Type::LARGE_LIST, ListConverter<LargeListType>) | |
879 | SIMPLE_CONVERTER_CASE(Type::MAP, MapConverter) | |
880 | SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_LIST, FixedSizeListConverter) | |
881 | SIMPLE_CONVERTER_CASE(Type::STRUCT, StructConverter) | |
882 | SIMPLE_CONVERTER_CASE(Type::STRING, StringConverter<StringType>) | |
883 | SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter<BinaryType>) | |
884 | SIMPLE_CONVERTER_CASE(Type::LARGE_STRING, StringConverter<LargeStringType>) | |
885 | SIMPLE_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter<LargeBinaryType>) | |
886 | SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter<>) | |
887 | SIMPLE_CONVERTER_CASE(Type::DECIMAL128, Decimal128Converter<>) | |
888 | SIMPLE_CONVERTER_CASE(Type::DECIMAL256, Decimal256Converter<>) | |
889 | SIMPLE_CONVERTER_CASE(Type::SPARSE_UNION, UnionConverter) | |
890 | SIMPLE_CONVERTER_CASE(Type::DENSE_UNION, UnionConverter) | |
891 | SIMPLE_CONVERTER_CASE(Type::INTERVAL_MONTHS, IntegerConverter<MonthIntervalType>) | |
892 | SIMPLE_CONVERTER_CASE(Type::INTERVAL_DAY_TIME, DayTimeIntervalConverter) | |
893 | SIMPLE_CONVERTER_CASE(Type::INTERVAL_MONTH_DAY_NANO, MonthDayNanoIntervalConverter) | |
894 | default: | |
895 | return ConversionNotImplemented(type); | |
896 | } | |
897 | ||
898 | #undef SIMPLE_CONVERTER_CASE | |
899 | ||
900 | RETURN_NOT_OK(res->Init()); | |
901 | *out = res; | |
902 | return Status::OK(); | |
903 | } | |
904 | ||
905 | } // namespace | |
906 | ||
907 | Status ArrayFromJSON(const std::shared_ptr<DataType>& type, util::string_view json_string, | |
908 | std::shared_ptr<Array>* out) { | |
909 | std::shared_ptr<Converter> converter; | |
910 | RETURN_NOT_OK(GetConverter(type, &converter)); | |
911 | ||
912 | rj::Document json_doc; | |
913 | json_doc.Parse<kParseFlags>(json_string.data(), json_string.length()); | |
914 | if (json_doc.HasParseError()) { | |
915 | return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ", | |
916 | GetParseError_En(json_doc.GetParseError())); | |
917 | } | |
918 | ||
919 | // The JSON document should be an array, append it | |
920 | RETURN_NOT_OK(converter->AppendValues(json_doc)); | |
921 | return converter->Finish(out); | |
922 | } | |
923 | ||
924 | Status ArrayFromJSON(const std::shared_ptr<DataType>& type, | |
925 | const std::string& json_string, std::shared_ptr<Array>* out) { | |
926 | return ArrayFromJSON(type, util::string_view(json_string), out); | |
927 | } | |
928 | ||
929 | Status ArrayFromJSON(const std::shared_ptr<DataType>& type, const char* json_string, | |
930 | std::shared_ptr<Array>* out) { | |
931 | return ArrayFromJSON(type, util::string_view(json_string), out); | |
932 | } | |
933 | ||
934 | Status DictArrayFromJSON(const std::shared_ptr<DataType>& type, | |
935 | util::string_view indices_json, | |
936 | util::string_view dictionary_json, std::shared_ptr<Array>* out) { | |
937 | if (type->id() != Type::DICTIONARY) { | |
938 | return Status::TypeError("DictArrayFromJSON requires dictionary type, got ", *type); | |
939 | } | |
940 | ||
941 | const auto& dictionary_type = checked_cast<const DictionaryType&>(*type); | |
942 | ||
943 | std::shared_ptr<Array> indices, dictionary; | |
944 | RETURN_NOT_OK(ArrayFromJSON(dictionary_type.index_type(), indices_json, &indices)); | |
945 | RETURN_NOT_OK( | |
946 | ArrayFromJSON(dictionary_type.value_type(), dictionary_json, &dictionary)); | |
947 | ||
948 | return DictionaryArray::FromArrays(type, std::move(indices), std::move(dictionary)) | |
949 | .Value(out); | |
950 | } | |
951 | ||
952 | Status ScalarFromJSON(const std::shared_ptr<DataType>& type, | |
953 | util::string_view json_string, std::shared_ptr<Scalar>* out) { | |
954 | std::shared_ptr<Converter> converter; | |
955 | RETURN_NOT_OK(GetConverter(type, &converter)); | |
956 | ||
957 | rj::Document json_doc; | |
958 | json_doc.Parse<kParseFlags>(json_string.data(), json_string.length()); | |
959 | if (json_doc.HasParseError()) { | |
960 | return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ", | |
961 | GetParseError_En(json_doc.GetParseError())); | |
962 | } | |
963 | ||
964 | std::shared_ptr<Array> array; | |
965 | RETURN_NOT_OK(converter->AppendValue(json_doc)); | |
966 | RETURN_NOT_OK(converter->Finish(&array)); | |
967 | DCHECK_EQ(array->length(), 1); | |
968 | ARROW_ASSIGN_OR_RAISE(*out, array->GetScalar(0)); | |
969 | return Status::OK(); | |
970 | } | |
971 | ||
972 | Status DictScalarFromJSON(const std::shared_ptr<DataType>& type, | |
973 | util::string_view index_json, util::string_view dictionary_json, | |
974 | std::shared_ptr<Scalar>* out) { | |
975 | if (type->id() != Type::DICTIONARY) { | |
976 | return Status::TypeError("DictScalarFromJSON requires dictionary type, got ", *type); | |
977 | } | |
978 | ||
979 | const auto& dictionary_type = checked_cast<const DictionaryType&>(*type); | |
980 | ||
981 | std::shared_ptr<Scalar> index; | |
982 | std::shared_ptr<Array> dictionary; | |
983 | RETURN_NOT_OK(ScalarFromJSON(dictionary_type.index_type(), index_json, &index)); | |
984 | RETURN_NOT_OK( | |
985 | ArrayFromJSON(dictionary_type.value_type(), dictionary_json, &dictionary)); | |
986 | ||
987 | *out = DictionaryScalar::Make(std::move(index), std::move(dictionary)); | |
988 | return Status::OK(); | |
989 | } | |
990 | ||
991 | } // namespace json | |
992 | } // namespace internal | |
993 | } // namespace ipc | |
994 | } // namespace arrow |