]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/ipc/json_simple.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / ipc / json_simple.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <cstdint>
19#include <sstream>
20#include <type_traits>
21#include <utility>
22#include <vector>
23
24#include "arrow/array/array_dict.h"
25#include "arrow/array/builder_binary.h"
26#include "arrow/array/builder_decimal.h"
27#include "arrow/array/builder_dict.h"
28#include "arrow/array/builder_nested.h"
29#include "arrow/array/builder_primitive.h"
30#include "arrow/array/builder_time.h"
31#include "arrow/array/builder_union.h"
32#include "arrow/ipc/json_simple.h"
33#include "arrow/scalar.h"
34#include "arrow/type_traits.h"
35#include "arrow/util/checked_cast.h"
36#include "arrow/util/decimal.h"
37#include "arrow/util/logging.h"
38#include "arrow/util/string_view.h"
39#include "arrow/util/value_parsing.h"
40
41#include "arrow/json/rapidjson_defs.h"
42
43#include <rapidjson/document.h>
44#include <rapidjson/error/en.h>
45#include <rapidjson/rapidjson.h>
46#include <rapidjson/reader.h>
47#include <rapidjson/writer.h>
48
49namespace rj = arrow::rapidjson;
50
51namespace arrow {
52
53using internal::ParseValue;
54
55namespace ipc {
56namespace internal {
57namespace json {
58
59using ::arrow::internal::checked_cast;
60using ::arrow::internal::checked_pointer_cast;
61
62namespace {
63
64constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag;
65
66Status JSONTypeError(const char* expected_type, rj::Type json_type) {
67 return Status::Invalid("Expected ", expected_type, " or null, got JSON type ",
68 json_type);
69}
70
71class Converter {
72 public:
73 virtual ~Converter() = default;
74
75 virtual Status Init() { return Status::OK(); }
76
77 virtual Status AppendValue(const rj::Value& json_obj) = 0;
78
79 Status AppendNull() { return this->builder()->AppendNull(); }
80
81 virtual Status AppendValues(const rj::Value& json_array) = 0;
82
83 virtual std::shared_ptr<ArrayBuilder> builder() = 0;
84
85 virtual Status Finish(std::shared_ptr<Array>* out) {
86 auto builder = this->builder();
87 if (builder->length() == 0) {
88 // Make sure the builder was initialized
89 RETURN_NOT_OK(builder->Resize(1));
90 }
91 return builder->Finish(out);
92 }
93
94 protected:
95 std::shared_ptr<DataType> type_;
96};
97
98Status GetConverter(const std::shared_ptr<DataType>&, std::shared_ptr<Converter>* out);
99
100// CRTP
101template <class Derived>
102class ConcreteConverter : public Converter {
103 public:
104 Status AppendValues(const rj::Value& json_array) override {
105 auto self = static_cast<Derived*>(this);
106 if (!json_array.IsArray()) {
107 return JSONTypeError("array", json_array.GetType());
108 }
109 auto size = json_array.Size();
110 for (uint32_t i = 0; i < size; ++i) {
111 RETURN_NOT_OK(self->AppendValue(json_array[i]));
112 }
113 return Status::OK();
114 }
115
116 const std::shared_ptr<DataType>& value_type() {
117 if (type_->id() != Type::DICTIONARY) {
118 return type_;
119 }
120 return checked_cast<const DictionaryType&>(*type_).value_type();
121 }
122
123 template <typename BuilderType>
124 Status MakeConcreteBuilder(std::shared_ptr<BuilderType>* out) {
125 std::unique_ptr<ArrayBuilder> builder;
126 RETURN_NOT_OK(MakeBuilder(default_memory_pool(), this->type_, &builder));
127 *out = checked_pointer_cast<BuilderType>(std::move(builder));
128 DCHECK(*out);
129 return Status::OK();
130 }
131};
132
133// ------------------------------------------------------------------------
134// Converter for null arrays
135
136class NullConverter final : public ConcreteConverter<NullConverter> {
137 public:
138 explicit NullConverter(const std::shared_ptr<DataType>& type) {
139 type_ = type;
140 builder_ = std::make_shared<NullBuilder>();
141 }
142
143 Status AppendValue(const rj::Value& json_obj) override {
144 if (json_obj.IsNull()) {
145 return AppendNull();
146 }
147 return JSONTypeError("null", json_obj.GetType());
148 }
149
150 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
151
152 private:
153 std::shared_ptr<NullBuilder> builder_;
154};
155
156// ------------------------------------------------------------------------
157// Converter for boolean arrays
158
159class BooleanConverter final : public ConcreteConverter<BooleanConverter> {
160 public:
161 explicit BooleanConverter(const std::shared_ptr<DataType>& type) {
162 type_ = type;
163 builder_ = std::make_shared<BooleanBuilder>();
164 }
165
166 Status AppendValue(const rj::Value& json_obj) override {
167 if (json_obj.IsNull()) {
168 return AppendNull();
169 }
170 if (json_obj.IsBool()) {
171 return builder_->Append(json_obj.GetBool());
172 }
173 if (json_obj.IsInt()) {
174 return builder_->Append(json_obj.GetInt() != 0);
175 }
176 return JSONTypeError("boolean", json_obj.GetType());
177 }
178
179 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
180
181 private:
182 std::shared_ptr<BooleanBuilder> builder_;
183};
184
185// ------------------------------------------------------------------------
186// Helpers for numeric converters
187
188// Convert single signed integer value (also {Date,Time}{32,64} and Timestamp)
189template <typename T>
190enable_if_physical_signed_integer<T, Status> ConvertNumber(const rj::Value& json_obj,
191 const DataType& type,
192 typename T::c_type* out) {
193 if (json_obj.IsInt64()) {
194 int64_t v64 = json_obj.GetInt64();
195 *out = static_cast<typename T::c_type>(v64);
196 if (*out == v64) {
197 return Status::OK();
198 } else {
199 return Status::Invalid("Value ", v64, " out of bounds for ", type);
200 }
201 } else {
202 *out = static_cast<typename T::c_type>(0);
203 return JSONTypeError("signed int", json_obj.GetType());
204 }
205}
206
207// Convert single unsigned integer value
208template <typename T>
209enable_if_physical_unsigned_integer<T, Status> ConvertNumber(const rj::Value& json_obj,
210 const DataType& type,
211 typename T::c_type* out) {
212 if (json_obj.IsUint64()) {
213 uint64_t v64 = json_obj.GetUint64();
214 *out = static_cast<typename T::c_type>(v64);
215 if (*out == v64) {
216 return Status::OK();
217 } else {
218 return Status::Invalid("Value ", v64, " out of bounds for ", type);
219 }
220 } else {
221 *out = static_cast<typename T::c_type>(0);
222 return JSONTypeError("unsigned int", json_obj.GetType());
223 }
224}
225
226// Convert single floating point value
227template <typename T>
228enable_if_physical_floating_point<T, Status> ConvertNumber(const rj::Value& json_obj,
229 const DataType& type,
230 typename T::c_type* out) {
231 if (json_obj.IsNumber()) {
232 *out = static_cast<typename T::c_type>(json_obj.GetDouble());
233 return Status::OK();
234 } else {
235 *out = static_cast<typename T::c_type>(0);
236 return JSONTypeError("number", json_obj.GetType());
237 }
238}
239
240// ------------------------------------------------------------------------
241// Converter for int arrays
242
243template <typename Type, typename BuilderType = typename TypeTraits<Type>::BuilderType>
244class IntegerConverter final
245 : public ConcreteConverter<IntegerConverter<Type, BuilderType>> {
246 using c_type = typename Type::c_type;
247
248 static constexpr auto is_signed = std::is_signed<c_type>::value;
249
250 public:
251 explicit IntegerConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; }
252
253 Status Init() override { return this->MakeConcreteBuilder(&builder_); }
254
255 Status AppendValue(const rj::Value& json_obj) override {
256 if (json_obj.IsNull()) {
257 return this->AppendNull();
258 }
259 c_type value;
260 RETURN_NOT_OK(ConvertNumber<Type>(json_obj, *this->type_, &value));
261 return builder_->Append(value);
262 }
263
264 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
265
266 private:
267 std::shared_ptr<BuilderType> builder_;
268};
269
270// ------------------------------------------------------------------------
271// Converter for float arrays
272
273template <typename Type, typename BuilderType = typename TypeTraits<Type>::BuilderType>
274class FloatConverter final : public ConcreteConverter<FloatConverter<Type, BuilderType>> {
275 using c_type = typename Type::c_type;
276
277 public:
278 explicit FloatConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; }
279
280 Status Init() override { return this->MakeConcreteBuilder(&builder_); }
281
282 Status AppendValue(const rj::Value& json_obj) override {
283 if (json_obj.IsNull()) {
284 return this->AppendNull();
285 }
286 c_type value;
287 RETURN_NOT_OK(ConvertNumber<Type>(json_obj, *this->type_, &value));
288 return builder_->Append(value);
289 }
290
291 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
292
293 private:
294 std::shared_ptr<BuilderType> builder_;
295};
296
297// ------------------------------------------------------------------------
298// Converter for decimal arrays
299
300template <typename DecimalSubtype, typename DecimalValue, typename BuilderType>
301class DecimalConverter final
302 : public ConcreteConverter<
303 DecimalConverter<DecimalSubtype, DecimalValue, BuilderType>> {
304 public:
305 explicit DecimalConverter(const std::shared_ptr<DataType>& type) {
306 this->type_ = type;
307 decimal_type_ = &checked_cast<const DecimalSubtype&>(*this->value_type());
308 }
309
310 Status Init() override { return this->MakeConcreteBuilder(&builder_); }
311
312 Status AppendValue(const rj::Value& json_obj) override {
313 if (json_obj.IsNull()) {
314 return this->AppendNull();
315 }
316 if (json_obj.IsString()) {
317 int32_t precision, scale;
318 DecimalValue d;
319 auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength());
320 RETURN_NOT_OK(DecimalValue::FromString(view, &d, &precision, &scale));
321 if (scale != decimal_type_->scale()) {
322 return Status::Invalid("Invalid scale for decimal: expected ",
323 decimal_type_->scale(), ", got ", scale);
324 }
325 return builder_->Append(d);
326 }
327 return JSONTypeError("decimal string", json_obj.GetType());
328 }
329
330 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
331
332 private:
333 std::shared_ptr<BuilderType> builder_;
334 const DecimalSubtype* decimal_type_;
335};
336
337template <typename BuilderType = typename TypeTraits<Decimal128Type>::BuilderType>
338using Decimal128Converter = DecimalConverter<Decimal128Type, Decimal128, BuilderType>;
339template <typename BuilderType = typename TypeTraits<Decimal256Type>::BuilderType>
340using Decimal256Converter = DecimalConverter<Decimal256Type, Decimal256, BuilderType>;
341
342// ------------------------------------------------------------------------
343// Converter for timestamp arrays
344
345class TimestampConverter final : public ConcreteConverter<TimestampConverter> {
346 public:
347 explicit TimestampConverter(const std::shared_ptr<DataType>& type)
348 : timestamp_type_{checked_cast<const TimestampType*>(type.get())} {
349 this->type_ = type;
350 builder_ = std::make_shared<TimestampBuilder>(type, default_memory_pool());
351 }
352
353 Status AppendValue(const rj::Value& json_obj) override {
354 if (json_obj.IsNull()) {
355 return this->AppendNull();
356 }
357 int64_t value;
358 if (json_obj.IsNumber()) {
359 RETURN_NOT_OK(ConvertNumber<Int64Type>(json_obj, *this->type_, &value));
360 } else if (json_obj.IsString()) {
361 util::string_view view(json_obj.GetString(), json_obj.GetStringLength());
362 if (!ParseValue(*timestamp_type_, view.data(), view.size(), &value)) {
363 return Status::Invalid("couldn't parse timestamp from ", view);
364 }
365 } else {
366 return JSONTypeError("timestamp", json_obj.GetType());
367 }
368 return builder_->Append(value);
369 }
370
371 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
372
373 private:
374 const TimestampType* timestamp_type_;
375 std::shared_ptr<TimestampBuilder> builder_;
376};
377
378// ------------------------------------------------------------------------
379// Converter for day-time interval arrays
380
381class DayTimeIntervalConverter final
382 : public ConcreteConverter<DayTimeIntervalConverter> {
383 public:
384 explicit DayTimeIntervalConverter(const std::shared_ptr<DataType>& type) {
385 this->type_ = type;
386 builder_ = std::make_shared<DayTimeIntervalBuilder>(default_memory_pool());
387 }
388
389 Status AppendValue(const rj::Value& json_obj) override {
390 if (json_obj.IsNull()) {
391 return this->AppendNull();
392 }
393 DayTimeIntervalType::DayMilliseconds value;
394 if (!json_obj.IsArray()) {
395 return JSONTypeError("array", json_obj.GetType());
396 }
397 if (json_obj.Size() != 2) {
398 return Status::Invalid(
399 "day time interval pair must have exactly two elements, had ", json_obj.Size());
400 }
401 RETURN_NOT_OK(ConvertNumber<Int32Type>(json_obj[0], *this->type_, &value.days));
402 RETURN_NOT_OK(
403 ConvertNumber<Int32Type>(json_obj[1], *this->type_, &value.milliseconds));
404 return builder_->Append(value);
405 }
406
407 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
408
409 private:
410 std::shared_ptr<DayTimeIntervalBuilder> builder_;
411};
412
413class MonthDayNanoIntervalConverter final
414 : public ConcreteConverter<MonthDayNanoIntervalConverter> {
415 public:
416 explicit MonthDayNanoIntervalConverter(const std::shared_ptr<DataType>& type) {
417 this->type_ = type;
418 builder_ = std::make_shared<MonthDayNanoIntervalBuilder>(default_memory_pool());
419 }
420
421 Status AppendValue(const rj::Value& json_obj) override {
422 if (json_obj.IsNull()) {
423 return this->AppendNull();
424 }
425 MonthDayNanoIntervalType::MonthDayNanos value;
426 if (!json_obj.IsArray()) {
427 return JSONTypeError("array", json_obj.GetType());
428 }
429 if (json_obj.Size() != 3) {
430 return Status::Invalid(
431 "month_day_nano_interval must have exactly 3 elements, had ", json_obj.Size());
432 }
433 RETURN_NOT_OK(ConvertNumber<Int32Type>(json_obj[0], *this->type_, &value.months));
434 RETURN_NOT_OK(ConvertNumber<Int32Type>(json_obj[1], *this->type_, &value.days));
435 RETURN_NOT_OK(
436 ConvertNumber<Int64Type>(json_obj[2], *this->type_, &value.nanoseconds));
437
438 return builder_->Append(value);
439 }
440
441 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
442
443 private:
444 std::shared_ptr<MonthDayNanoIntervalBuilder> builder_;
445};
446
447// ------------------------------------------------------------------------
448// Converter for binary and string arrays
449
450template <typename Type, typename BuilderType = typename TypeTraits<Type>::BuilderType>
451class StringConverter final
452 : public ConcreteConverter<StringConverter<Type, BuilderType>> {
453 public:
454 explicit StringConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; }
455
456 Status Init() override { return this->MakeConcreteBuilder(&builder_); }
457
458 Status AppendValue(const rj::Value& json_obj) override {
459 if (json_obj.IsNull()) {
460 return this->AppendNull();
461 }
462 if (json_obj.IsString()) {
463 auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength());
464 return builder_->Append(view);
465 } else {
466 return JSONTypeError("string", json_obj.GetType());
467 }
468 }
469
470 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
471
472 private:
473 std::shared_ptr<BuilderType> builder_;
474};
475
476// ------------------------------------------------------------------------
477// Converter for fixed-size binary arrays
478
479template <typename BuilderType = typename TypeTraits<FixedSizeBinaryType>::BuilderType>
480class FixedSizeBinaryConverter final
481 : public ConcreteConverter<FixedSizeBinaryConverter<BuilderType>> {
482 public:
483 explicit FixedSizeBinaryConverter(const std::shared_ptr<DataType>& type) {
484 this->type_ = type;
485 }
486
487 Status Init() override { return this->MakeConcreteBuilder(&builder_); }
488
489 Status AppendValue(const rj::Value& json_obj) override {
490 if (json_obj.IsNull()) {
491 return this->AppendNull();
492 }
493 if (json_obj.IsString()) {
494 auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength());
495 if (view.length() != static_cast<size_t>(builder_->byte_width())) {
496 std::stringstream ss;
497 ss << "Invalid string length " << view.length() << " in JSON input for "
498 << this->type_->ToString();
499 return Status::Invalid(ss.str());
500 }
501 return builder_->Append(view);
502 } else {
503 return JSONTypeError("string", json_obj.GetType());
504 }
505 }
506
507 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
508
509 private:
510 std::shared_ptr<BuilderType> builder_;
511};
512
513// ------------------------------------------------------------------------
514// Converter for list arrays
515
516template <typename TYPE>
517class ListConverter final : public ConcreteConverter<ListConverter<TYPE>> {
518 public:
519 using BuilderType = typename TypeTraits<TYPE>::BuilderType;
520
521 explicit ListConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; }
522
523 Status Init() override {
524 const auto& list_type = checked_cast<const TYPE&>(*this->type_);
525 RETURN_NOT_OK(GetConverter(list_type.value_type(), &child_converter_));
526 auto child_builder = child_converter_->builder();
527 builder_ =
528 std::make_shared<BuilderType>(default_memory_pool(), child_builder, this->type_);
529 return Status::OK();
530 }
531
532 Status AppendValue(const rj::Value& json_obj) override {
533 if (json_obj.IsNull()) {
534 return this->AppendNull();
535 }
536 RETURN_NOT_OK(builder_->Append());
537 // Extend the child converter with this JSON array
538 return child_converter_->AppendValues(json_obj);
539 }
540
541 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
542
543 private:
544 std::shared_ptr<BuilderType> builder_;
545 std::shared_ptr<Converter> child_converter_;
546};
547
548// ------------------------------------------------------------------------
549// Converter for map arrays
550
551class MapConverter final : public ConcreteConverter<MapConverter> {
552 public:
553 explicit MapConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
554
555 Status Init() override {
556 const auto& map_type = checked_cast<const MapType&>(*type_);
557 RETURN_NOT_OK(GetConverter(map_type.key_type(), &key_converter_));
558 RETURN_NOT_OK(GetConverter(map_type.item_type(), &item_converter_));
559 auto key_builder = key_converter_->builder();
560 auto item_builder = item_converter_->builder();
561 builder_ = std::make_shared<MapBuilder>(default_memory_pool(), key_builder,
562 item_builder, type_);
563 return Status::OK();
564 }
565
566 Status AppendValue(const rj::Value& json_obj) override {
567 if (json_obj.IsNull()) {
568 return this->AppendNull();
569 }
570 RETURN_NOT_OK(builder_->Append());
571 if (!json_obj.IsArray()) {
572 return JSONTypeError("array", json_obj.GetType());
573 }
574 auto size = json_obj.Size();
575 for (uint32_t i = 0; i < size; ++i) {
576 const auto& json_pair = json_obj[i];
577 if (!json_pair.IsArray()) {
578 return JSONTypeError("array", json_pair.GetType());
579 }
580 if (json_pair.Size() != 2) {
581 return Status::Invalid("key item pair must have exactly two elements, had ",
582 json_pair.Size());
583 }
584 if (json_pair[0].IsNull()) {
585 return Status::Invalid("null key is invalid");
586 }
587 RETURN_NOT_OK(key_converter_->AppendValue(json_pair[0]));
588 RETURN_NOT_OK(item_converter_->AppendValue(json_pair[1]));
589 }
590 return Status::OK();
591 }
592
593 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
594
595 private:
596 std::shared_ptr<MapBuilder> builder_;
597 std::shared_ptr<Converter> key_converter_, item_converter_;
598};
599
600// ------------------------------------------------------------------------
601// Converter for fixed size list arrays
602
603class FixedSizeListConverter final : public ConcreteConverter<FixedSizeListConverter> {
604 public:
605 explicit FixedSizeListConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
606
607 Status Init() override {
608 const auto& list_type = checked_cast<const FixedSizeListType&>(*type_);
609 list_size_ = list_type.list_size();
610 RETURN_NOT_OK(GetConverter(list_type.value_type(), &child_converter_));
611 auto child_builder = child_converter_->builder();
612 builder_ = std::make_shared<FixedSizeListBuilder>(default_memory_pool(),
613 child_builder, type_);
614 return Status::OK();
615 }
616
617 Status AppendValue(const rj::Value& json_obj) override {
618 if (json_obj.IsNull()) {
619 return this->AppendNull();
620 }
621 RETURN_NOT_OK(builder_->Append());
622 // Extend the child converter with this JSON array
623 RETURN_NOT_OK(child_converter_->AppendValues(json_obj));
624 if (json_obj.GetArray().Size() != static_cast<rj::SizeType>(list_size_)) {
625 return Status::Invalid("incorrect list size ", json_obj.GetArray().Size());
626 }
627 return Status::OK();
628 }
629
630 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
631
632 private:
633 int32_t list_size_;
634 std::shared_ptr<FixedSizeListBuilder> builder_;
635 std::shared_ptr<Converter> child_converter_;
636};
637
638// ------------------------------------------------------------------------
639// Converter for struct arrays
640
641class StructConverter final : public ConcreteConverter<StructConverter> {
642 public:
643 explicit StructConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
644
645 Status Init() override {
646 std::vector<std::shared_ptr<ArrayBuilder>> child_builders;
647 for (const auto& field : type_->fields()) {
648 std::shared_ptr<Converter> child_converter;
649 RETURN_NOT_OK(GetConverter(field->type(), &child_converter));
650 child_converters_.push_back(child_converter);
651 child_builders.push_back(child_converter->builder());
652 }
653 builder_ = std::make_shared<StructBuilder>(type_, default_memory_pool(),
654 std::move(child_builders));
655 return Status::OK();
656 }
657
658 // Append a JSON value that is either an array of N elements in order
659 // or an object mapping struct names to values (omitted struct members
660 // are mapped to null).
661 Status AppendValue(const rj::Value& json_obj) override {
662 if (json_obj.IsNull()) {
663 return this->AppendNull();
664 }
665 if (json_obj.IsArray()) {
666 auto size = json_obj.Size();
667 auto expected_size = static_cast<uint32_t>(type_->num_fields());
668 if (size != expected_size) {
669 return Status::Invalid("Expected array of size ", expected_size,
670 ", got array of size ", size);
671 }
672 for (uint32_t i = 0; i < size; ++i) {
673 RETURN_NOT_OK(child_converters_[i]->AppendValue(json_obj[i]));
674 }
675 return builder_->Append();
676 }
677 if (json_obj.IsObject()) {
678 auto remaining = json_obj.MemberCount();
679 auto num_children = type_->num_fields();
680 for (int32_t i = 0; i < num_children; ++i) {
681 const auto& field = type_->field(i);
682 auto it = json_obj.FindMember(field->name());
683 if (it != json_obj.MemberEnd()) {
684 --remaining;
685 RETURN_NOT_OK(child_converters_[i]->AppendValue(it->value));
686 } else {
687 RETURN_NOT_OK(child_converters_[i]->AppendNull());
688 }
689 }
690 if (remaining > 0) {
691 rj::StringBuffer sb;
692 rj::Writer<rj::StringBuffer> writer(sb);
693 json_obj.Accept(writer);
694 return Status::Invalid("Unexpected members in JSON object for type ",
695 type_->ToString(), " Object: ", sb.GetString());
696 }
697 return builder_->Append();
698 }
699 return JSONTypeError("array or object", json_obj.GetType());
700 }
701
702 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
703
704 private:
705 std::shared_ptr<StructBuilder> builder_;
706 std::vector<std::shared_ptr<Converter>> child_converters_;
707};
708
709// ------------------------------------------------------------------------
710// Converter for union arrays
711
712class UnionConverter final : public ConcreteConverter<UnionConverter> {
713 public:
714 explicit UnionConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
715
716 Status Init() override {
717 auto union_type = checked_cast<const UnionType*>(type_.get());
718 mode_ = union_type->mode();
719 type_id_to_child_num_.clear();
720 type_id_to_child_num_.resize(union_type->max_type_code() + 1, -1);
721 int child_i = 0;
722 for (auto type_id : union_type->type_codes()) {
723 type_id_to_child_num_[type_id] = child_i++;
724 }
725 std::vector<std::shared_ptr<ArrayBuilder>> child_builders;
726 for (const auto& field : type_->fields()) {
727 std::shared_ptr<Converter> child_converter;
728 RETURN_NOT_OK(GetConverter(field->type(), &child_converter));
729 child_converters_.push_back(child_converter);
730 child_builders.push_back(child_converter->builder());
731 }
732 if (mode_ == UnionMode::DENSE) {
733 builder_ = std::make_shared<DenseUnionBuilder>(default_memory_pool(),
734 std::move(child_builders), type_);
735 } else {
736 builder_ = std::make_shared<SparseUnionBuilder>(default_memory_pool(),
737 std::move(child_builders), type_);
738 }
739 return Status::OK();
740 }
741
742 // Append a JSON value that must be a 2-long array, containing the type_id
743 // and value of the UnionArray's slot.
744 Status AppendValue(const rj::Value& json_obj) override {
745 if (json_obj.IsNull()) {
746 return this->AppendNull();
747 }
748 if (!json_obj.IsArray()) {
749 return JSONTypeError("array", json_obj.GetType());
750 }
751 if (json_obj.Size() != 2) {
752 return Status::Invalid("Expected [type_id, value] pair, got array of size ",
753 json_obj.Size());
754 }
755 const auto& id_obj = json_obj[0];
756 if (!id_obj.IsInt()) {
757 return JSONTypeError("int", id_obj.GetType());
758 }
759
760 auto id = static_cast<int8_t>(id_obj.GetInt());
761 auto child_num = type_id_to_child_num_[id];
762 if (child_num == -1) {
763 return Status::Invalid("type_id ", id, " not found in ", *type_);
764 }
765
766 auto child_converter = child_converters_[child_num];
767 if (mode_ == UnionMode::SPARSE) {
768 RETURN_NOT_OK(checked_cast<SparseUnionBuilder&>(*builder_).Append(id));
769 for (auto&& other_converter : child_converters_) {
770 if (other_converter != child_converter) {
771 RETURN_NOT_OK(other_converter->AppendNull());
772 }
773 }
774 } else {
775 RETURN_NOT_OK(checked_cast<DenseUnionBuilder&>(*builder_).Append(id));
776 }
777 return child_converter->AppendValue(json_obj[1]);
778 }
779
780 std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
781
782 private:
783 UnionMode::type mode_;
784 std::shared_ptr<ArrayBuilder> builder_;
785 std::vector<std::shared_ptr<Converter>> child_converters_;
786 std::vector<int8_t> type_id_to_child_num_;
787};
788
789// ------------------------------------------------------------------------
790// General conversion functions
791
792Status ConversionNotImplemented(const std::shared_ptr<DataType>& type) {
793 return Status::NotImplemented("JSON conversion to ", type->ToString(),
794 " not implemented");
795}
796
797Status GetDictConverter(const std::shared_ptr<DataType>& type,
798 std::shared_ptr<Converter>* out) {
799 std::shared_ptr<Converter> res;
800
801 const auto value_type = checked_cast<const DictionaryType&>(*type).value_type();
802
803#define SIMPLE_CONVERTER_CASE(ID, CLASS, TYPE) \
804 case ID: \
805 res = std::make_shared<CLASS<DictionaryBuilder<TYPE>>>(type); \
806 break;
807
808#define PARAM_CONVERTER_CASE(ID, CLASS, TYPE) \
809 case ID: \
810 res = std::make_shared<CLASS<TYPE, DictionaryBuilder<TYPE>>>(type); \
811 break;
812
813 switch (value_type->id()) {
814 PARAM_CONVERTER_CASE(Type::INT8, IntegerConverter, Int8Type)
815 PARAM_CONVERTER_CASE(Type::INT16, IntegerConverter, Int16Type)
816 PARAM_CONVERTER_CASE(Type::INT32, IntegerConverter, Int32Type)
817 PARAM_CONVERTER_CASE(Type::INT64, IntegerConverter, Int64Type)
818 PARAM_CONVERTER_CASE(Type::UINT8, IntegerConverter, UInt8Type)
819 PARAM_CONVERTER_CASE(Type::UINT16, IntegerConverter, UInt16Type)
820 PARAM_CONVERTER_CASE(Type::UINT32, IntegerConverter, UInt32Type)
821 PARAM_CONVERTER_CASE(Type::UINT64, IntegerConverter, UInt64Type)
822 PARAM_CONVERTER_CASE(Type::FLOAT, FloatConverter, FloatType)
823 PARAM_CONVERTER_CASE(Type::DOUBLE, FloatConverter, DoubleType)
824 PARAM_CONVERTER_CASE(Type::STRING, StringConverter, StringType)
825 PARAM_CONVERTER_CASE(Type::BINARY, StringConverter, BinaryType)
826 PARAM_CONVERTER_CASE(Type::LARGE_STRING, StringConverter, LargeStringType)
827 PARAM_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter, LargeBinaryType)
828 SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter,
829 FixedSizeBinaryType)
830 SIMPLE_CONVERTER_CASE(Type::DECIMAL128, Decimal128Converter, Decimal128Type)
831 SIMPLE_CONVERTER_CASE(Type::DECIMAL256, Decimal256Converter, Decimal256Type)
832 default:
833 return ConversionNotImplemented(type);
834 }
835
836#undef SIMPLE_CONVERTER_CASE
837#undef PARAM_CONVERTER_CASE
838
839 RETURN_NOT_OK(res->Init());
840 *out = res;
841 return Status::OK();
842}
843
844Status GetConverter(const std::shared_ptr<DataType>& type,
845 std::shared_ptr<Converter>* out) {
846 if (type->id() == Type::DICTIONARY) {
847 return GetDictConverter(type, out);
848 }
849
850 std::shared_ptr<Converter> res;
851
852#define SIMPLE_CONVERTER_CASE(ID, CLASS) \
853 case ID: \
854 res = std::make_shared<CLASS>(type); \
855 break;
856
857 switch (type->id()) {
858 SIMPLE_CONVERTER_CASE(Type::INT8, IntegerConverter<Int8Type>)
859 SIMPLE_CONVERTER_CASE(Type::INT16, IntegerConverter<Int16Type>)
860 SIMPLE_CONVERTER_CASE(Type::INT32, IntegerConverter<Int32Type>)
861 SIMPLE_CONVERTER_CASE(Type::INT64, IntegerConverter<Int64Type>)
862 SIMPLE_CONVERTER_CASE(Type::UINT8, IntegerConverter<UInt8Type>)
863 SIMPLE_CONVERTER_CASE(Type::UINT16, IntegerConverter<UInt16Type>)
864 SIMPLE_CONVERTER_CASE(Type::UINT32, IntegerConverter<UInt32Type>)
865 SIMPLE_CONVERTER_CASE(Type::UINT64, IntegerConverter<UInt64Type>)
866 SIMPLE_CONVERTER_CASE(Type::TIMESTAMP, TimestampConverter)
867 SIMPLE_CONVERTER_CASE(Type::DATE32, IntegerConverter<Date32Type>)
868 SIMPLE_CONVERTER_CASE(Type::DATE64, IntegerConverter<Date64Type>)
869 SIMPLE_CONVERTER_CASE(Type::TIME32, IntegerConverter<Time32Type>)
870 SIMPLE_CONVERTER_CASE(Type::TIME64, IntegerConverter<Time64Type>)
871 SIMPLE_CONVERTER_CASE(Type::DURATION, IntegerConverter<DurationType>)
872 SIMPLE_CONVERTER_CASE(Type::NA, NullConverter)
873 SIMPLE_CONVERTER_CASE(Type::BOOL, BooleanConverter)
874 SIMPLE_CONVERTER_CASE(Type::HALF_FLOAT, IntegerConverter<HalfFloatType>)
875 SIMPLE_CONVERTER_CASE(Type::FLOAT, FloatConverter<FloatType>)
876 SIMPLE_CONVERTER_CASE(Type::DOUBLE, FloatConverter<DoubleType>)
877 SIMPLE_CONVERTER_CASE(Type::LIST, ListConverter<ListType>)
878 SIMPLE_CONVERTER_CASE(Type::LARGE_LIST, ListConverter<LargeListType>)
879 SIMPLE_CONVERTER_CASE(Type::MAP, MapConverter)
880 SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_LIST, FixedSizeListConverter)
881 SIMPLE_CONVERTER_CASE(Type::STRUCT, StructConverter)
882 SIMPLE_CONVERTER_CASE(Type::STRING, StringConverter<StringType>)
883 SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter<BinaryType>)
884 SIMPLE_CONVERTER_CASE(Type::LARGE_STRING, StringConverter<LargeStringType>)
885 SIMPLE_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter<LargeBinaryType>)
886 SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter<>)
887 SIMPLE_CONVERTER_CASE(Type::DECIMAL128, Decimal128Converter<>)
888 SIMPLE_CONVERTER_CASE(Type::DECIMAL256, Decimal256Converter<>)
889 SIMPLE_CONVERTER_CASE(Type::SPARSE_UNION, UnionConverter)
890 SIMPLE_CONVERTER_CASE(Type::DENSE_UNION, UnionConverter)
891 SIMPLE_CONVERTER_CASE(Type::INTERVAL_MONTHS, IntegerConverter<MonthIntervalType>)
892 SIMPLE_CONVERTER_CASE(Type::INTERVAL_DAY_TIME, DayTimeIntervalConverter)
893 SIMPLE_CONVERTER_CASE(Type::INTERVAL_MONTH_DAY_NANO, MonthDayNanoIntervalConverter)
894 default:
895 return ConversionNotImplemented(type);
896 }
897
898#undef SIMPLE_CONVERTER_CASE
899
900 RETURN_NOT_OK(res->Init());
901 *out = res;
902 return Status::OK();
903}
904
905} // namespace
906
907Status ArrayFromJSON(const std::shared_ptr<DataType>& type, util::string_view json_string,
908 std::shared_ptr<Array>* out) {
909 std::shared_ptr<Converter> converter;
910 RETURN_NOT_OK(GetConverter(type, &converter));
911
912 rj::Document json_doc;
913 json_doc.Parse<kParseFlags>(json_string.data(), json_string.length());
914 if (json_doc.HasParseError()) {
915 return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ",
916 GetParseError_En(json_doc.GetParseError()));
917 }
918
919 // The JSON document should be an array, append it
920 RETURN_NOT_OK(converter->AppendValues(json_doc));
921 return converter->Finish(out);
922}
923
924Status ArrayFromJSON(const std::shared_ptr<DataType>& type,
925 const std::string& json_string, std::shared_ptr<Array>* out) {
926 return ArrayFromJSON(type, util::string_view(json_string), out);
927}
928
929Status ArrayFromJSON(const std::shared_ptr<DataType>& type, const char* json_string,
930 std::shared_ptr<Array>* out) {
931 return ArrayFromJSON(type, util::string_view(json_string), out);
932}
933
934Status DictArrayFromJSON(const std::shared_ptr<DataType>& type,
935 util::string_view indices_json,
936 util::string_view dictionary_json, std::shared_ptr<Array>* out) {
937 if (type->id() != Type::DICTIONARY) {
938 return Status::TypeError("DictArrayFromJSON requires dictionary type, got ", *type);
939 }
940
941 const auto& dictionary_type = checked_cast<const DictionaryType&>(*type);
942
943 std::shared_ptr<Array> indices, dictionary;
944 RETURN_NOT_OK(ArrayFromJSON(dictionary_type.index_type(), indices_json, &indices));
945 RETURN_NOT_OK(
946 ArrayFromJSON(dictionary_type.value_type(), dictionary_json, &dictionary));
947
948 return DictionaryArray::FromArrays(type, std::move(indices), std::move(dictionary))
949 .Value(out);
950}
951
952Status ScalarFromJSON(const std::shared_ptr<DataType>& type,
953 util::string_view json_string, std::shared_ptr<Scalar>* out) {
954 std::shared_ptr<Converter> converter;
955 RETURN_NOT_OK(GetConverter(type, &converter));
956
957 rj::Document json_doc;
958 json_doc.Parse<kParseFlags>(json_string.data(), json_string.length());
959 if (json_doc.HasParseError()) {
960 return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ",
961 GetParseError_En(json_doc.GetParseError()));
962 }
963
964 std::shared_ptr<Array> array;
965 RETURN_NOT_OK(converter->AppendValue(json_doc));
966 RETURN_NOT_OK(converter->Finish(&array));
967 DCHECK_EQ(array->length(), 1);
968 ARROW_ASSIGN_OR_RAISE(*out, array->GetScalar(0));
969 return Status::OK();
970}
971
972Status DictScalarFromJSON(const std::shared_ptr<DataType>& type,
973 util::string_view index_json, util::string_view dictionary_json,
974 std::shared_ptr<Scalar>* out) {
975 if (type->id() != Type::DICTIONARY) {
976 return Status::TypeError("DictScalarFromJSON requires dictionary type, got ", *type);
977 }
978
979 const auto& dictionary_type = checked_cast<const DictionaryType&>(*type);
980
981 std::shared_ptr<Scalar> index;
982 std::shared_ptr<Array> dictionary;
983 RETURN_NOT_OK(ScalarFromJSON(dictionary_type.index_type(), index_json, &index));
984 RETURN_NOT_OK(
985 ArrayFromJSON(dictionary_type.value_type(), dictionary_json, &dictionary));
986
987 *out = DictionaryScalar::Make(std::move(index), std::move(dictionary));
988 return Status::OK();
989}
990
991} // namespace json
992} // namespace internal
993} // namespace ipc
994} // namespace arrow