2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
20 #include "red-arrow.hpp"
23 #include <ruby/encoding.h>
25 #include <arrow-glib/error.hpp>
27 #include <arrow/util/logging.h>
30 class ListArrayValueConverter;
31 class StructArrayValueConverter;
32 class MapArrayValueConverter;
33 class UnionArrayValueConverter;
34 class DictionaryArrayValueConverter;
36 class ArrayValueConverter {
40 list_array_value_converter_(nullptr),
41 struct_array_value_converter_(nullptr),
42 map_array_value_converter_(nullptr),
43 union_array_value_converter_(nullptr),
44 dictionary_array_value_converter_(nullptr) {
47 inline void set_sub_value_converters(ListArrayValueConverter* list_array_value_converter,
48 StructArrayValueConverter* struct_array_value_converter,
49 MapArrayValueConverter* map_array_value_converter,
50 UnionArrayValueConverter* union_array_value_converter,
51 DictionaryArrayValueConverter* dictionary_array_value_converter) {
52 list_array_value_converter_ = list_array_value_converter;
53 struct_array_value_converter_ = struct_array_value_converter;
54 map_array_value_converter_ = map_array_value_converter;
55 union_array_value_converter_ = union_array_value_converter;
56 dictionary_array_value_converter_ = dictionary_array_value_converter;
59 inline VALUE convert(const arrow::NullArray& array,
64 inline VALUE convert(const arrow::BooleanArray& array,
66 return array.Value(i) ? Qtrue : Qfalse;
69 inline VALUE convert(const arrow::Int8Array& array,
71 return INT2NUM(array.Value(i));
74 inline VALUE convert(const arrow::Int16Array& array,
76 return INT2NUM(array.Value(i));
79 inline VALUE convert(const arrow::Int32Array& array,
81 return INT2NUM(array.Value(i));
84 inline VALUE convert(const arrow::Int64Array& array,
86 return LL2NUM(array.Value(i));
89 inline VALUE convert(const arrow::UInt8Array& array,
91 return UINT2NUM(array.Value(i));
94 inline VALUE convert(const arrow::UInt16Array& array,
96 return UINT2NUM(array.Value(i));
99 inline VALUE convert(const arrow::UInt32Array& array,
101 return UINT2NUM(array.Value(i));
104 inline VALUE convert(const arrow::UInt64Array& array,
106 return ULL2NUM(array.Value(i));
110 // inline VALUE convert(const arrow::HalfFloatArray& array,
111 // const int64_t i) {
114 inline VALUE convert(const arrow::FloatArray& array,
116 return DBL2NUM(array.Value(i));
119 inline VALUE convert(const arrow::DoubleArray& array,
121 return DBL2NUM(array.Value(i));
124 inline VALUE convert(const arrow::BinaryArray& array,
127 const auto value = array.GetValue(i, &length);
128 // TODO: encoding support
129 return rb_enc_str_new(reinterpret_cast<const char*>(value),
131 rb_ascii8bit_encoding());
134 inline VALUE convert(const arrow::StringArray& array,
137 const auto value = array.GetValue(i, &length);
138 return rb_utf8_str_new(reinterpret_cast<const char*>(value),
142 inline VALUE convert(const arrow::FixedSizeBinaryArray& array,
144 return rb_enc_str_new(reinterpret_cast<const char*>(array.Value(i)),
146 rb_ascii8bit_encoding());
149 constexpr static int32_t JULIAN_DATE_UNIX_EPOCH = 2440588;
150 inline VALUE convert(const arrow::Date32Array& array,
152 const auto value = array.Value(i);
153 const auto days_in_julian = value + JULIAN_DATE_UNIX_EPOCH;
154 return rb_funcall(cDate, id_jd, 1, LONG2NUM(days_in_julian));
157 inline VALUE convert(const arrow::Date64Array& array,
159 const auto value = array.Value(i);
160 auto msec = LL2NUM(value);
161 auto sec = rb_rational_new(msec, INT2NUM(1000));
162 auto time_value = rb_time_num_new(sec, Qnil);
163 return rb_funcall(time_value, id_to_datetime, 0, 0);
166 inline VALUE convert(const arrow::Time32Array& array,
169 arrow::internal::checked_cast<const arrow::Time32Type*>(array.type().get());
170 const auto value = array.Value(i);
171 return rb_funcall(red_arrow::cArrowTime,
174 time_unit_to_enum(type->unit()),
178 inline VALUE convert(const arrow::Time64Array& array,
181 arrow::internal::checked_cast<const arrow::Time64Type*>(array.type().get());
182 const auto value = array.Value(i);
183 return rb_funcall(red_arrow::cArrowTime,
186 time_unit_to_enum(type->unit()),
190 inline VALUE convert(const arrow::TimestampArray& array,
193 arrow::internal::checked_cast<const arrow::TimestampType*>(array.type().get());
194 auto scale = time_unit_to_scale(type->unit());
195 auto value = array.Value(i);
196 auto sec = rb_rational_new(LL2NUM(value), scale);
197 return rb_time_num_new(sec, Qnil);
201 // inline VALUE convert(const arrow::IntervalArray& array,
202 // const int64_t i) {
205 VALUE convert(const arrow::ListArray& array,
208 VALUE convert(const arrow::StructArray& array,
211 VALUE convert(const arrow::MapArray& array,
214 VALUE convert(const arrow::UnionArray& array,
217 VALUE convert(const arrow::DictionaryArray& array,
220 inline VALUE convert(const arrow::Decimal128Array& array,
222 return convert_decimal(std::move(array.FormatValue(i)));
225 inline VALUE convert(const arrow::Decimal256Array& array,
227 return convert_decimal(std::move(array.FormatValue(i)));
231 inline VALUE convert_decimal(std::string&& value) {
232 decimal_buffer_ = value;
233 return rb_funcall(rb_cObject,
236 rb_enc_str_new(decimal_buffer_.data(),
237 decimal_buffer_.length(),
238 rb_ascii8bit_encoding()));
241 std::string decimal_buffer_;
242 ListArrayValueConverter* list_array_value_converter_;
243 StructArrayValueConverter* struct_array_value_converter_;
244 MapArrayValueConverter* map_array_value_converter_;
245 UnionArrayValueConverter* union_array_value_converter_;
246 DictionaryArrayValueConverter* dictionary_array_value_converter_;
249 class ListArrayValueConverter : public arrow::ArrayVisitor {
251 explicit ListArrayValueConverter(ArrayValueConverter* converter)
252 : array_value_converter_(converter),
257 VALUE convert(const arrow::ListArray& array, const int64_t index) {
258 auto values = array.values().get();
259 auto offset_keep = offset_;
260 auto length_keep = length_;
261 offset_ = array.value_offset(index);
262 length_ = array.value_length(index);
263 auto result_keep = result_;
264 result_ = rb_ary_new_capa(length_);
265 check_status(values->Accept(this),
266 "[raw-records][list-array]");
267 offset_ = offset_keep;
268 length_ = length_keep;
269 auto result_return = result_;
270 result_ = result_keep;
271 return result_return;
274 #define VISIT(TYPE) \
275 arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
276 return visit_value(array); \
295 VISIT(FixedSizeBinary)
317 template <typename ArrayType>
318 inline VALUE convert_value(const ArrayType& array,
320 return array_value_converter_->convert(array, i);
323 template <typename ArrayType>
324 arrow::Status visit_value(const ArrayType& array) {
325 if (array.null_count() > 0) {
326 for (int64_t i = 0; i < length_; ++i) {
328 if (!array.IsNull(i + offset_)) {
329 value = convert_value(array, i + offset_);
331 rb_ary_push(result_, value);
334 for (int64_t i = 0; i < length_; ++i) {
335 rb_ary_push(result_, convert_value(array, i + offset_));
338 return arrow::Status::OK();
341 ArrayValueConverter* array_value_converter_;
347 class StructArrayValueConverter : public arrow::ArrayVisitor {
349 explicit StructArrayValueConverter(ArrayValueConverter* converter)
350 : array_value_converter_(converter),
355 VALUE convert(const arrow::StructArray& array,
356 const int64_t index) {
357 auto index_keep = index_;
358 auto result_keep = result_;
360 result_ = rb_hash_new();
361 const auto struct_type = array.struct_type();
362 const auto n = struct_type->num_fields();
363 for (int i = 0; i < n; ++i) {
364 const auto field_type = struct_type->field(i).get();
365 const auto& field_name = field_type->name();
366 auto key_keep = key_;
367 key_ = rb_utf8_str_new(field_name.data(), field_name.length());
368 const auto field_array = array.field(i).get();
369 check_status(field_array->Accept(this),
370 "[raw-records][struct-array]");
373 auto result_return = result_;
374 result_ = result_keep;
376 return result_return;
379 #define VISIT(TYPE) \
380 arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
382 return arrow::Status::OK(); \
401 VISIT(FixedSizeBinary)
423 template <typename ArrayType>
424 inline VALUE convert_value(const ArrayType& array,
426 return array_value_converter_->convert(array, i);
429 template <typename ArrayType>
430 void fill_field(const ArrayType& array) {
431 if (array.IsNull(index_)) {
432 rb_hash_aset(result_, key_, Qnil);
434 rb_hash_aset(result_, key_, convert_value(array, index_));
438 ArrayValueConverter* array_value_converter_;
444 class MapArrayValueConverter : public arrow::ArrayVisitor {
446 explicit MapArrayValueConverter(ArrayValueConverter* converter)
447 : array_value_converter_(converter),
452 VALUE convert(const arrow::MapArray& array,
453 const int64_t index) {
454 auto key_array = array.keys().get();
455 auto item_array = array.items().get();
456 auto offset_keep = offset_;
457 auto length_keep = length_;
458 auto values_keep = values_;
459 offset_ = array.value_offset(index);
460 length_ = array.value_length(index);
461 auto keys = rb_ary_new_capa(length_);
463 check_status(key_array->Accept(this),
464 "[raw-records][map-array][keys]");
465 auto items = rb_ary_new_capa(length_);
467 check_status(item_array->Accept(this),
468 "[raw-records][map-array][items]");
469 auto map = rb_hash_new();
470 auto n = RARRAY_LEN(keys);
471 auto raw_keys = RARRAY_CONST_PTR(keys);
472 auto raw_items = RARRAY_CONST_PTR(items);
473 for (long i = 0; i < n; ++i) {
474 rb_hash_aset(map, raw_keys[i], raw_items[i]);
476 offset_ = offset_keep;
477 length_ = length_keep;
478 values_ = values_keep;
482 #define VISIT(TYPE) \
483 arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
484 return visit_value(array); \
503 VISIT(FixedSizeBinary)
525 template <typename ArrayType>
526 inline VALUE convert_value(const ArrayType& array,
528 return array_value_converter_->convert(array, i);
531 template <typename ArrayType>
532 arrow::Status visit_value(const ArrayType& array) {
533 if (array.null_count() > 0) {
534 for (int64_t i = 0; i < length_; ++i) {
536 if (!array.IsNull(i + offset_)) {
537 value = convert_value(array, i + offset_);
539 rb_ary_push(values_, value);
542 for (int64_t i = 0; i < length_; ++i) {
543 rb_ary_push(values_, convert_value(array, i + offset_));
546 return arrow::Status::OK();
549 ArrayValueConverter* array_value_converter_;
555 class UnionArrayValueConverter : public arrow::ArrayVisitor {
557 explicit UnionArrayValueConverter(ArrayValueConverter* converter)
558 : array_value_converter_(converter),
562 VALUE convert(const arrow::UnionArray& array,
563 const int64_t index) {
564 const auto index_keep = index_;
565 const auto result_keep = result_;
567 switch (array.mode()) {
568 case arrow::UnionMode::SPARSE:
569 convert_sparse(static_cast<const arrow::SparseUnionArray&>(array));
571 case arrow::UnionMode::DENSE:
572 convert_dense(static_cast<const arrow::DenseUnionArray&>(array));
575 rb_raise(rb_eArgError, "Invalid union mode");
578 auto result_return = result_;
580 result_ = result_keep;
581 return result_return;
584 #define VISIT(TYPE) \
585 arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
586 convert_value(array); \
587 return arrow::Status::OK(); \
606 VISIT(FixedSizeBinary)
628 template <typename ArrayType>
629 inline void convert_value(const ArrayType& array) {
630 auto result = rb_hash_new();
631 if (array.IsNull(index_)) {
632 rb_hash_aset(result, field_name_, Qnil);
636 array_value_converter_->convert(array, index_));
641 uint8_t compute_field_index(const arrow::UnionArray& array,
642 arrow::UnionType* type,
644 const auto type_code = array.raw_type_codes()[index_];
645 if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
646 const auto field_id = type->child_ids()[type_code];
651 check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
656 void convert_sparse(const arrow::SparseUnionArray& array) {
658 std::static_pointer_cast<arrow::UnionType>(array.type()).get();
659 const auto tag = "[raw-records][union-sparse-array]";
660 const auto index = compute_field_index(array, type, tag);
661 const auto field = type->field(index).get();
662 const auto& field_name = field->name();
663 const auto field_name_keep = field_name_;
664 field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
665 const auto field_array = array.field(index).get();
666 check_status(field_array->Accept(this), tag);
667 field_name_ = field_name_keep;
670 void convert_dense(const arrow::DenseUnionArray& array) {
672 std::static_pointer_cast<arrow::UnionType>(array.type()).get();
673 const auto tag = "[raw-records][union-dense-array]";
674 const auto index = compute_field_index(array, type, tag);
675 const auto field = type->field(index).get();
676 const auto& field_name = field->name();
677 const auto field_name_keep = field_name_;
678 field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
679 const auto field_array = array.field(index);
680 const auto index_keep = index_;
681 index_ = array.value_offset(index_);
682 check_status(field_array->Accept(this), tag);
684 field_name_ = field_name_keep;
687 ArrayValueConverter* array_value_converter_;
693 class DictionaryArrayValueConverter : public arrow::ArrayVisitor {
695 explicit DictionaryArrayValueConverter(ArrayValueConverter* converter)
696 : array_value_converter_(converter),
701 VALUE convert(const arrow::DictionaryArray& array,
702 const int64_t index) {
703 value_index_ = array.GetValueIndex(index);
704 auto dictionary = array.dictionary().get();
705 check_status(dictionary->Accept(this),
706 "[raw-records][dictionary-array]");
710 #define VISIT(TYPE) \
711 arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
712 result_ = convert_value(array, value_index_); \
713 return arrow::Status::OK(); \
732 VISIT(FixedSizeBinary)
754 template <typename ArrayType>
755 inline VALUE convert_value(const ArrayType& array,
757 return array_value_converter_->convert(array, i);
760 ArrayValueConverter* array_value_converter_;
761 int64_t value_index_;
768 : array_value_converter_(),
769 list_array_value_converter_(&array_value_converter_),
770 struct_array_value_converter_(&array_value_converter_),
771 map_array_value_converter_(&array_value_converter_),
772 union_array_value_converter_(&array_value_converter_),
773 dictionary_array_value_converter_(&array_value_converter_) {
774 array_value_converter_.
775 set_sub_value_converters(&list_array_value_converter_,
776 &struct_array_value_converter_,
777 &map_array_value_converter_,
778 &union_array_value_converter_,
779 &dictionary_array_value_converter_);
782 template <typename ArrayType>
783 inline VALUE convert_value(const ArrayType& array,
785 return array_value_converter_.convert(array, i);
788 ArrayValueConverter array_value_converter_;
789 ListArrayValueConverter list_array_value_converter_;
790 StructArrayValueConverter struct_array_value_converter_;
791 MapArrayValueConverter map_array_value_converter_;
792 UnionArrayValueConverter union_array_value_converter_;
793 DictionaryArrayValueConverter dictionary_array_value_converter_;