]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one | |
3 | * or more contributor license agreements. See the NOTICE file | |
4 | * distributed with this work for additional information | |
5 | * regarding copyright ownership. The ASF licenses this file | |
6 | * to you under the Apache License, Version 2.0 (the | |
7 | * "License"); you may not use this file except in compliance | |
8 | * with the License. You may obtain a copy of the License at | |
9 | * | |
10 | * http://www.apache.org/licenses/LICENSE-2.0 | |
11 | * | |
12 | * Unless required by applicable law or agreed to in writing, | |
13 | * software distributed under the License is distributed on an | |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
15 | * KIND, either express or implied. See the License for the | |
16 | * specific language governing permissions and limitations | |
17 | * under the License. | |
18 | */ | |
19 | ||
20 | #include "red-arrow.hpp" | |
21 | ||
22 | #include <ruby.hpp> | |
23 | #include <ruby/encoding.h> | |
24 | ||
25 | #include <arrow-glib/error.hpp> | |
26 | ||
27 | #include <arrow/util/logging.h> | |
28 | ||
29 | namespace red_arrow { | |
30 | class ListArrayValueConverter; | |
31 | class StructArrayValueConverter; | |
32 | class MapArrayValueConverter; | |
33 | class UnionArrayValueConverter; | |
34 | class DictionaryArrayValueConverter; | |
35 | ||
36 | class ArrayValueConverter { | |
37 | public: | |
38 | ArrayValueConverter() | |
39 | : decimal_buffer_(), | |
40 | list_array_value_converter_(nullptr), | |
41 | struct_array_value_converter_(nullptr), | |
42 | map_array_value_converter_(nullptr), | |
43 | union_array_value_converter_(nullptr), | |
44 | dictionary_array_value_converter_(nullptr) { | |
45 | } | |
46 | ||
47 | inline void set_sub_value_converters(ListArrayValueConverter* list_array_value_converter, | |
48 | StructArrayValueConverter* struct_array_value_converter, | |
49 | MapArrayValueConverter* map_array_value_converter, | |
50 | UnionArrayValueConverter* union_array_value_converter, | |
51 | DictionaryArrayValueConverter* dictionary_array_value_converter) { | |
52 | list_array_value_converter_ = list_array_value_converter; | |
53 | struct_array_value_converter_ = struct_array_value_converter; | |
54 | map_array_value_converter_ = map_array_value_converter; | |
55 | union_array_value_converter_ = union_array_value_converter; | |
56 | dictionary_array_value_converter_ = dictionary_array_value_converter; | |
57 | } | |
58 | ||
59 | inline VALUE convert(const arrow::NullArray& array, | |
60 | const int64_t i) { | |
61 | return Qnil; | |
62 | } | |
63 | ||
64 | inline VALUE convert(const arrow::BooleanArray& array, | |
65 | const int64_t i) { | |
66 | return array.Value(i) ? Qtrue : Qfalse; | |
67 | } | |
68 | ||
69 | inline VALUE convert(const arrow::Int8Array& array, | |
70 | const int64_t i) { | |
71 | return INT2NUM(array.Value(i)); | |
72 | } | |
73 | ||
74 | inline VALUE convert(const arrow::Int16Array& array, | |
75 | const int64_t i) { | |
76 | return INT2NUM(array.Value(i)); | |
77 | } | |
78 | ||
79 | inline VALUE convert(const arrow::Int32Array& array, | |
80 | const int64_t i) { | |
81 | return INT2NUM(array.Value(i)); | |
82 | } | |
83 | ||
84 | inline VALUE convert(const arrow::Int64Array& array, | |
85 | const int64_t i) { | |
86 | return LL2NUM(array.Value(i)); | |
87 | } | |
88 | ||
89 | inline VALUE convert(const arrow::UInt8Array& array, | |
90 | const int64_t i) { | |
91 | return UINT2NUM(array.Value(i)); | |
92 | } | |
93 | ||
94 | inline VALUE convert(const arrow::UInt16Array& array, | |
95 | const int64_t i) { | |
96 | return UINT2NUM(array.Value(i)); | |
97 | } | |
98 | ||
99 | inline VALUE convert(const arrow::UInt32Array& array, | |
100 | const int64_t i) { | |
101 | return UINT2NUM(array.Value(i)); | |
102 | } | |
103 | ||
104 | inline VALUE convert(const arrow::UInt64Array& array, | |
105 | const int64_t i) { | |
106 | return ULL2NUM(array.Value(i)); | |
107 | } | |
108 | ||
109 | // TODO | |
110 | // inline VALUE convert(const arrow::HalfFloatArray& array, | |
111 | // const int64_t i) { | |
112 | // } | |
113 | ||
114 | inline VALUE convert(const arrow::FloatArray& array, | |
115 | const int64_t i) { | |
116 | return DBL2NUM(array.Value(i)); | |
117 | } | |
118 | ||
119 | inline VALUE convert(const arrow::DoubleArray& array, | |
120 | const int64_t i) { | |
121 | return DBL2NUM(array.Value(i)); | |
122 | } | |
123 | ||
124 | inline VALUE convert(const arrow::BinaryArray& array, | |
125 | const int64_t i) { | |
126 | int32_t length; | |
127 | const auto value = array.GetValue(i, &length); | |
128 | // TODO: encoding support | |
129 | return rb_enc_str_new(reinterpret_cast<const char*>(value), | |
130 | length, | |
131 | rb_ascii8bit_encoding()); | |
132 | } | |
133 | ||
134 | inline VALUE convert(const arrow::StringArray& array, | |
135 | const int64_t i) { | |
136 | int32_t length; | |
137 | const auto value = array.GetValue(i, &length); | |
138 | return rb_utf8_str_new(reinterpret_cast<const char*>(value), | |
139 | length); | |
140 | } | |
141 | ||
142 | inline VALUE convert(const arrow::FixedSizeBinaryArray& array, | |
143 | const int64_t i) { | |
144 | return rb_enc_str_new(reinterpret_cast<const char*>(array.Value(i)), | |
145 | array.byte_width(), | |
146 | rb_ascii8bit_encoding()); | |
147 | } | |
148 | ||
149 | constexpr static int32_t JULIAN_DATE_UNIX_EPOCH = 2440588; | |
150 | inline VALUE convert(const arrow::Date32Array& array, | |
151 | const int64_t i) { | |
152 | const auto value = array.Value(i); | |
153 | const auto days_in_julian = value + JULIAN_DATE_UNIX_EPOCH; | |
154 | return rb_funcall(cDate, id_jd, 1, LONG2NUM(days_in_julian)); | |
155 | } | |
156 | ||
157 | inline VALUE convert(const arrow::Date64Array& array, | |
158 | const int64_t i) { | |
159 | const auto value = array.Value(i); | |
160 | auto msec = LL2NUM(value); | |
161 | auto sec = rb_rational_new(msec, INT2NUM(1000)); | |
162 | auto time_value = rb_time_num_new(sec, Qnil); | |
163 | return rb_funcall(time_value, id_to_datetime, 0, 0); | |
164 | } | |
165 | ||
166 | inline VALUE convert(const arrow::Time32Array& array, | |
167 | const int64_t i) { | |
168 | const auto type = | |
169 | arrow::internal::checked_cast<const arrow::Time32Type*>(array.type().get()); | |
170 | const auto value = array.Value(i); | |
171 | return rb_funcall(red_arrow::cArrowTime, | |
172 | id_new, | |
173 | 2, | |
174 | time_unit_to_enum(type->unit()), | |
175 | INT2NUM(value)); | |
176 | } | |
177 | ||
178 | inline VALUE convert(const arrow::Time64Array& array, | |
179 | const int64_t i) { | |
180 | const auto type = | |
181 | arrow::internal::checked_cast<const arrow::Time64Type*>(array.type().get()); | |
182 | const auto value = array.Value(i); | |
183 | return rb_funcall(red_arrow::cArrowTime, | |
184 | id_new, | |
185 | 2, | |
186 | time_unit_to_enum(type->unit()), | |
187 | LL2NUM(value)); | |
188 | } | |
189 | ||
190 | inline VALUE convert(const arrow::TimestampArray& array, | |
191 | const int64_t i) { | |
192 | const auto type = | |
193 | arrow::internal::checked_cast<const arrow::TimestampType*>(array.type().get()); | |
194 | auto scale = time_unit_to_scale(type->unit()); | |
195 | auto value = array.Value(i); | |
196 | auto sec = rb_rational_new(LL2NUM(value), scale); | |
197 | return rb_time_num_new(sec, Qnil); | |
198 | } | |
199 | ||
200 | // TODO | |
201 | // inline VALUE convert(const arrow::IntervalArray& array, | |
202 | // const int64_t i) { | |
203 | // }; | |
204 | ||
205 | VALUE convert(const arrow::ListArray& array, | |
206 | const int64_t i); | |
207 | ||
208 | VALUE convert(const arrow::StructArray& array, | |
209 | const int64_t i); | |
210 | ||
211 | VALUE convert(const arrow::MapArray& array, | |
212 | const int64_t i); | |
213 | ||
214 | VALUE convert(const arrow::UnionArray& array, | |
215 | const int64_t i); | |
216 | ||
217 | VALUE convert(const arrow::DictionaryArray& array, | |
218 | const int64_t i); | |
219 | ||
220 | inline VALUE convert(const arrow::Decimal128Array& array, | |
221 | const int64_t i) { | |
222 | return convert_decimal(std::move(array.FormatValue(i))); | |
223 | } | |
224 | ||
225 | inline VALUE convert(const arrow::Decimal256Array& array, | |
226 | const int64_t i) { | |
227 | return convert_decimal(std::move(array.FormatValue(i))); | |
228 | } | |
229 | ||
230 | private: | |
231 | inline VALUE convert_decimal(std::string&& value) { | |
232 | decimal_buffer_ = value; | |
233 | return rb_funcall(rb_cObject, | |
234 | id_BigDecimal, | |
235 | 1, | |
236 | rb_enc_str_new(decimal_buffer_.data(), | |
237 | decimal_buffer_.length(), | |
238 | rb_ascii8bit_encoding())); | |
239 | } | |
240 | ||
241 | std::string decimal_buffer_; | |
242 | ListArrayValueConverter* list_array_value_converter_; | |
243 | StructArrayValueConverter* struct_array_value_converter_; | |
244 | MapArrayValueConverter* map_array_value_converter_; | |
245 | UnionArrayValueConverter* union_array_value_converter_; | |
246 | DictionaryArrayValueConverter* dictionary_array_value_converter_; | |
247 | }; | |
248 | ||
249 | class ListArrayValueConverter : public arrow::ArrayVisitor { | |
250 | public: | |
251 | explicit ListArrayValueConverter(ArrayValueConverter* converter) | |
252 | : array_value_converter_(converter), | |
253 | offset_(0), | |
254 | length_(0), | |
255 | result_(Qnil) {} | |
256 | ||
257 | VALUE convert(const arrow::ListArray& array, const int64_t index) { | |
258 | auto values = array.values().get(); | |
259 | auto offset_keep = offset_; | |
260 | auto length_keep = length_; | |
261 | offset_ = array.value_offset(index); | |
262 | length_ = array.value_length(index); | |
263 | auto result_keep = result_; | |
264 | result_ = rb_ary_new_capa(length_); | |
265 | check_status(values->Accept(this), | |
266 | "[raw-records][list-array]"); | |
267 | offset_ = offset_keep; | |
268 | length_ = length_keep; | |
269 | auto result_return = result_; | |
270 | result_ = result_keep; | |
271 | return result_return; | |
272 | } | |
273 | ||
274 | #define VISIT(TYPE) \ | |
275 | arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ | |
276 | return visit_value(array); \ | |
277 | } | |
278 | ||
279 | VISIT(Null) | |
280 | VISIT(Boolean) | |
281 | VISIT(Int8) | |
282 | VISIT(Int16) | |
283 | VISIT(Int32) | |
284 | VISIT(Int64) | |
285 | VISIT(UInt8) | |
286 | VISIT(UInt16) | |
287 | VISIT(UInt32) | |
288 | VISIT(UInt64) | |
289 | // TODO | |
290 | // VISIT(HalfFloat) | |
291 | VISIT(Float) | |
292 | VISIT(Double) | |
293 | VISIT(Binary) | |
294 | VISIT(String) | |
295 | VISIT(FixedSizeBinary) | |
296 | VISIT(Date32) | |
297 | VISIT(Date64) | |
298 | VISIT(Time32) | |
299 | VISIT(Time64) | |
300 | VISIT(Timestamp) | |
301 | // TODO | |
302 | // VISIT(Interval) | |
303 | VISIT(List) | |
304 | VISIT(Struct) | |
305 | VISIT(Map) | |
306 | VISIT(SparseUnion) | |
307 | VISIT(DenseUnion) | |
308 | VISIT(Dictionary) | |
309 | VISIT(Decimal128) | |
310 | VISIT(Decimal256) | |
311 | // TODO | |
312 | // VISIT(Extension) | |
313 | ||
314 | #undef VISIT | |
315 | ||
316 | private: | |
317 | template <typename ArrayType> | |
318 | inline VALUE convert_value(const ArrayType& array, | |
319 | const int64_t i) { | |
320 | return array_value_converter_->convert(array, i); | |
321 | } | |
322 | ||
323 | template <typename ArrayType> | |
324 | arrow::Status visit_value(const ArrayType& array) { | |
325 | if (array.null_count() > 0) { | |
326 | for (int64_t i = 0; i < length_; ++i) { | |
327 | auto value = Qnil; | |
328 | if (!array.IsNull(i + offset_)) { | |
329 | value = convert_value(array, i + offset_); | |
330 | } | |
331 | rb_ary_push(result_, value); | |
332 | } | |
333 | } else { | |
334 | for (int64_t i = 0; i < length_; ++i) { | |
335 | rb_ary_push(result_, convert_value(array, i + offset_)); | |
336 | } | |
337 | } | |
338 | return arrow::Status::OK(); | |
339 | } | |
340 | ||
341 | ArrayValueConverter* array_value_converter_; | |
342 | int32_t offset_; | |
343 | int32_t length_; | |
344 | VALUE result_; | |
345 | }; | |
346 | ||
347 | class StructArrayValueConverter : public arrow::ArrayVisitor { | |
348 | public: | |
349 | explicit StructArrayValueConverter(ArrayValueConverter* converter) | |
350 | : array_value_converter_(converter), | |
351 | key_(Qnil), | |
352 | index_(0), | |
353 | result_(Qnil) {} | |
354 | ||
355 | VALUE convert(const arrow::StructArray& array, | |
356 | const int64_t index) { | |
357 | auto index_keep = index_; | |
358 | auto result_keep = result_; | |
359 | index_ = index; | |
360 | result_ = rb_hash_new(); | |
361 | const auto struct_type = array.struct_type(); | |
362 | const auto n = struct_type->num_fields(); | |
363 | for (int i = 0; i < n; ++i) { | |
364 | const auto field_type = struct_type->field(i).get(); | |
365 | const auto& field_name = field_type->name(); | |
366 | auto key_keep = key_; | |
367 | key_ = rb_utf8_str_new(field_name.data(), field_name.length()); | |
368 | const auto field_array = array.field(i).get(); | |
369 | check_status(field_array->Accept(this), | |
370 | "[raw-records][struct-array]"); | |
371 | key_ = key_keep; | |
372 | } | |
373 | auto result_return = result_; | |
374 | result_ = result_keep; | |
375 | index_ = index_keep; | |
376 | return result_return; | |
377 | } | |
378 | ||
379 | #define VISIT(TYPE) \ | |
380 | arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ | |
381 | fill_field(array); \ | |
382 | return arrow::Status::OK(); \ | |
383 | } | |
384 | ||
385 | VISIT(Null) | |
386 | VISIT(Boolean) | |
387 | VISIT(Int8) | |
388 | VISIT(Int16) | |
389 | VISIT(Int32) | |
390 | VISIT(Int64) | |
391 | VISIT(UInt8) | |
392 | VISIT(UInt16) | |
393 | VISIT(UInt32) | |
394 | VISIT(UInt64) | |
395 | // TODO | |
396 | // VISIT(HalfFloat) | |
397 | VISIT(Float) | |
398 | VISIT(Double) | |
399 | VISIT(Binary) | |
400 | VISIT(String) | |
401 | VISIT(FixedSizeBinary) | |
402 | VISIT(Date32) | |
403 | VISIT(Date64) | |
404 | VISIT(Time32) | |
405 | VISIT(Time64) | |
406 | VISIT(Timestamp) | |
407 | // TODO | |
408 | // VISIT(Interval) | |
409 | VISIT(List) | |
410 | VISIT(Struct) | |
411 | VISIT(Map) | |
412 | VISIT(SparseUnion) | |
413 | VISIT(DenseUnion) | |
414 | VISIT(Dictionary) | |
415 | VISIT(Decimal128) | |
416 | VISIT(Decimal256) | |
417 | // TODO | |
418 | // VISIT(Extension) | |
419 | ||
420 | #undef VISIT | |
421 | ||
422 | private: | |
423 | template <typename ArrayType> | |
424 | inline VALUE convert_value(const ArrayType& array, | |
425 | const int64_t i) { | |
426 | return array_value_converter_->convert(array, i); | |
427 | } | |
428 | ||
429 | template <typename ArrayType> | |
430 | void fill_field(const ArrayType& array) { | |
431 | if (array.IsNull(index_)) { | |
432 | rb_hash_aset(result_, key_, Qnil); | |
433 | } else { | |
434 | rb_hash_aset(result_, key_, convert_value(array, index_)); | |
435 | } | |
436 | } | |
437 | ||
438 | ArrayValueConverter* array_value_converter_; | |
439 | VALUE key_; | |
440 | int64_t index_; | |
441 | VALUE result_; | |
442 | }; | |
443 | ||
444 | class MapArrayValueConverter : public arrow::ArrayVisitor { | |
445 | public: | |
446 | explicit MapArrayValueConverter(ArrayValueConverter* converter) | |
447 | : array_value_converter_(converter), | |
448 | offset_(0), | |
449 | length_(0), | |
450 | values_(Qnil) {} | |
451 | ||
452 | VALUE convert(const arrow::MapArray& array, | |
453 | const int64_t index) { | |
454 | auto key_array = array.keys().get(); | |
455 | auto item_array = array.items().get(); | |
456 | auto offset_keep = offset_; | |
457 | auto length_keep = length_; | |
458 | auto values_keep = values_; | |
459 | offset_ = array.value_offset(index); | |
460 | length_ = array.value_length(index); | |
461 | auto keys = rb_ary_new_capa(length_); | |
462 | values_ = keys; | |
463 | check_status(key_array->Accept(this), | |
464 | "[raw-records][map-array][keys]"); | |
465 | auto items = rb_ary_new_capa(length_); | |
466 | values_ = items; | |
467 | check_status(item_array->Accept(this), | |
468 | "[raw-records][map-array][items]"); | |
469 | auto map = rb_hash_new(); | |
470 | auto n = RARRAY_LEN(keys); | |
471 | auto raw_keys = RARRAY_CONST_PTR(keys); | |
472 | auto raw_items = RARRAY_CONST_PTR(items); | |
473 | for (long i = 0; i < n; ++i) { | |
474 | rb_hash_aset(map, raw_keys[i], raw_items[i]); | |
475 | } | |
476 | offset_ = offset_keep; | |
477 | length_ = length_keep; | |
478 | values_ = values_keep; | |
479 | return map; | |
480 | } | |
481 | ||
482 | #define VISIT(TYPE) \ | |
483 | arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ | |
484 | return visit_value(array); \ | |
485 | } | |
486 | ||
487 | VISIT(Null) | |
488 | VISIT(Boolean) | |
489 | VISIT(Int8) | |
490 | VISIT(Int16) | |
491 | VISIT(Int32) | |
492 | VISIT(Int64) | |
493 | VISIT(UInt8) | |
494 | VISIT(UInt16) | |
495 | VISIT(UInt32) | |
496 | VISIT(UInt64) | |
497 | // TODO | |
498 | // VISIT(HalfFloat) | |
499 | VISIT(Float) | |
500 | VISIT(Double) | |
501 | VISIT(Binary) | |
502 | VISIT(String) | |
503 | VISIT(FixedSizeBinary) | |
504 | VISIT(Date32) | |
505 | VISIT(Date64) | |
506 | VISIT(Time32) | |
507 | VISIT(Time64) | |
508 | VISIT(Timestamp) | |
509 | // TODO | |
510 | // VISIT(Interval) | |
511 | VISIT(List) | |
512 | VISIT(Struct) | |
513 | VISIT(Map) | |
514 | VISIT(SparseUnion) | |
515 | VISIT(DenseUnion) | |
516 | VISIT(Dictionary) | |
517 | VISIT(Decimal128) | |
518 | VISIT(Decimal256) | |
519 | // TODO | |
520 | // VISIT(Extension) | |
521 | ||
522 | #undef VISIT | |
523 | ||
524 | private: | |
525 | template <typename ArrayType> | |
526 | inline VALUE convert_value(const ArrayType& array, | |
527 | const int64_t i) { | |
528 | return array_value_converter_->convert(array, i); | |
529 | } | |
530 | ||
531 | template <typename ArrayType> | |
532 | arrow::Status visit_value(const ArrayType& array) { | |
533 | if (array.null_count() > 0) { | |
534 | for (int64_t i = 0; i < length_; ++i) { | |
535 | auto value = Qnil; | |
536 | if (!array.IsNull(i + offset_)) { | |
537 | value = convert_value(array, i + offset_); | |
538 | } | |
539 | rb_ary_push(values_, value); | |
540 | } | |
541 | } else { | |
542 | for (int64_t i = 0; i < length_; ++i) { | |
543 | rb_ary_push(values_, convert_value(array, i + offset_)); | |
544 | } | |
545 | } | |
546 | return arrow::Status::OK(); | |
547 | } | |
548 | ||
549 | ArrayValueConverter* array_value_converter_; | |
550 | int32_t offset_; | |
551 | int32_t length_; | |
552 | VALUE values_; | |
553 | }; | |
554 | ||
555 | class UnionArrayValueConverter : public arrow::ArrayVisitor { | |
556 | public: | |
557 | explicit UnionArrayValueConverter(ArrayValueConverter* converter) | |
558 | : array_value_converter_(converter), | |
559 | index_(0), | |
560 | result_(Qnil) {} | |
561 | ||
562 | VALUE convert(const arrow::UnionArray& array, | |
563 | const int64_t index) { | |
564 | const auto index_keep = index_; | |
565 | const auto result_keep = result_; | |
566 | index_ = index; | |
567 | switch (array.mode()) { | |
568 | case arrow::UnionMode::SPARSE: | |
569 | convert_sparse(static_cast<const arrow::SparseUnionArray&>(array)); | |
570 | break; | |
571 | case arrow::UnionMode::DENSE: | |
572 | convert_dense(static_cast<const arrow::DenseUnionArray&>(array)); | |
573 | break; | |
574 | default: | |
575 | rb_raise(rb_eArgError, "Invalid union mode"); | |
576 | break; | |
577 | } | |
578 | auto result_return = result_; | |
579 | index_ = index_keep; | |
580 | result_ = result_keep; | |
581 | return result_return; | |
582 | } | |
583 | ||
584 | #define VISIT(TYPE) \ | |
585 | arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ | |
586 | convert_value(array); \ | |
587 | return arrow::Status::OK(); \ | |
588 | } | |
589 | ||
590 | VISIT(Null) | |
591 | VISIT(Boolean) | |
592 | VISIT(Int8) | |
593 | VISIT(Int16) | |
594 | VISIT(Int32) | |
595 | VISIT(Int64) | |
596 | VISIT(UInt8) | |
597 | VISIT(UInt16) | |
598 | VISIT(UInt32) | |
599 | VISIT(UInt64) | |
600 | // TODO | |
601 | // VISIT(HalfFloat) | |
602 | VISIT(Float) | |
603 | VISIT(Double) | |
604 | VISIT(Binary) | |
605 | VISIT(String) | |
606 | VISIT(FixedSizeBinary) | |
607 | VISIT(Date32) | |
608 | VISIT(Date64) | |
609 | VISIT(Time32) | |
610 | VISIT(Time64) | |
611 | VISIT(Timestamp) | |
612 | // TODO | |
613 | // VISIT(Interval) | |
614 | VISIT(List) | |
615 | VISIT(Struct) | |
616 | VISIT(Map) | |
617 | VISIT(SparseUnion) | |
618 | VISIT(DenseUnion) | |
619 | VISIT(Dictionary) | |
620 | VISIT(Decimal128) | |
621 | VISIT(Decimal256) | |
622 | // TODO | |
623 | // VISIT(Extension) | |
624 | ||
625 | #undef VISIT | |
626 | ||
627 | private: | |
628 | template <typename ArrayType> | |
629 | inline void convert_value(const ArrayType& array) { | |
630 | auto result = rb_hash_new(); | |
631 | if (array.IsNull(index_)) { | |
632 | rb_hash_aset(result, field_name_, Qnil); | |
633 | } else { | |
634 | rb_hash_aset(result, | |
635 | field_name_, | |
636 | array_value_converter_->convert(array, index_)); | |
637 | } | |
638 | result_ = result; | |
639 | } | |
640 | ||
641 | uint8_t compute_field_index(const arrow::UnionArray& array, | |
642 | arrow::UnionType* type, | |
643 | const char* tag) { | |
644 | const auto type_code = array.raw_type_codes()[index_]; | |
645 | if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) { | |
646 | const auto field_id = type->child_ids()[type_code]; | |
647 | if (field_id >= 0) { | |
648 | return field_id; | |
649 | } | |
650 | } | |
651 | check_status(arrow::Status::Invalid("Unknown type ID: ", type_code), | |
652 | tag); | |
653 | return 0; | |
654 | } | |
655 | ||
656 | void convert_sparse(const arrow::SparseUnionArray& array) { | |
657 | const auto type = | |
658 | std::static_pointer_cast<arrow::UnionType>(array.type()).get(); | |
659 | const auto tag = "[raw-records][union-sparse-array]"; | |
660 | const auto index = compute_field_index(array, type, tag); | |
661 | const auto field = type->field(index).get(); | |
662 | const auto& field_name = field->name(); | |
663 | const auto field_name_keep = field_name_; | |
664 | field_name_ = rb_utf8_str_new(field_name.data(), field_name.length()); | |
665 | const auto field_array = array.field(index).get(); | |
666 | check_status(field_array->Accept(this), tag); | |
667 | field_name_ = field_name_keep; | |
668 | } | |
669 | ||
670 | void convert_dense(const arrow::DenseUnionArray& array) { | |
671 | const auto type = | |
672 | std::static_pointer_cast<arrow::UnionType>(array.type()).get(); | |
673 | const auto tag = "[raw-records][union-dense-array]"; | |
674 | const auto index = compute_field_index(array, type, tag); | |
675 | const auto field = type->field(index).get(); | |
676 | const auto& field_name = field->name(); | |
677 | const auto field_name_keep = field_name_; | |
678 | field_name_ = rb_utf8_str_new(field_name.data(), field_name.length()); | |
679 | const auto field_array = array.field(index); | |
680 | const auto index_keep = index_; | |
681 | index_ = array.value_offset(index_); | |
682 | check_status(field_array->Accept(this), tag); | |
683 | index_ = index_keep; | |
684 | field_name_ = field_name_keep; | |
685 | } | |
686 | ||
687 | ArrayValueConverter* array_value_converter_; | |
688 | int64_t index_; | |
689 | VALUE field_name_; | |
690 | VALUE result_; | |
691 | }; | |
692 | ||
693 | class DictionaryArrayValueConverter : public arrow::ArrayVisitor { | |
694 | public: | |
695 | explicit DictionaryArrayValueConverter(ArrayValueConverter* converter) | |
696 | : array_value_converter_(converter), | |
697 | value_index_(0), | |
698 | result_(Qnil) { | |
699 | } | |
700 | ||
701 | VALUE convert(const arrow::DictionaryArray& array, | |
702 | const int64_t index) { | |
703 | value_index_ = array.GetValueIndex(index); | |
704 | auto dictionary = array.dictionary().get(); | |
705 | check_status(dictionary->Accept(this), | |
706 | "[raw-records][dictionary-array]"); | |
707 | return result_; | |
708 | } | |
709 | ||
710 | #define VISIT(TYPE) \ | |
711 | arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ | |
712 | result_ = convert_value(array, value_index_); \ | |
713 | return arrow::Status::OK(); \ | |
714 | } | |
715 | ||
716 | VISIT(Null) | |
717 | VISIT(Boolean) | |
718 | VISIT(Int8) | |
719 | VISIT(Int16) | |
720 | VISIT(Int32) | |
721 | VISIT(Int64) | |
722 | VISIT(UInt8) | |
723 | VISIT(UInt16) | |
724 | VISIT(UInt32) | |
725 | VISIT(UInt64) | |
726 | // TODO | |
727 | // VISIT(HalfFloat) | |
728 | VISIT(Float) | |
729 | VISIT(Double) | |
730 | VISIT(Binary) | |
731 | VISIT(String) | |
732 | VISIT(FixedSizeBinary) | |
733 | VISIT(Date32) | |
734 | VISIT(Date64) | |
735 | VISIT(Time32) | |
736 | VISIT(Time64) | |
737 | VISIT(Timestamp) | |
738 | // TODO | |
739 | // VISIT(Interval) | |
740 | VISIT(List) | |
741 | VISIT(Struct) | |
742 | VISIT(Map) | |
743 | VISIT(SparseUnion) | |
744 | VISIT(DenseUnion) | |
745 | VISIT(Dictionary) | |
746 | VISIT(Decimal128) | |
747 | VISIT(Decimal256) | |
748 | // TODO | |
749 | // VISIT(Extension) | |
750 | ||
751 | #undef VISIT | |
752 | ||
753 | private: | |
754 | template <typename ArrayType> | |
755 | inline VALUE convert_value(const ArrayType& array, | |
756 | const int64_t i) { | |
757 | return array_value_converter_->convert(array, i); | |
758 | } | |
759 | ||
760 | ArrayValueConverter* array_value_converter_; | |
761 | int64_t value_index_; | |
762 | VALUE result_; | |
763 | }; | |
764 | ||
765 | class Converter { | |
766 | public: | |
767 | explicit Converter() | |
768 | : array_value_converter_(), | |
769 | list_array_value_converter_(&array_value_converter_), | |
770 | struct_array_value_converter_(&array_value_converter_), | |
771 | map_array_value_converter_(&array_value_converter_), | |
772 | union_array_value_converter_(&array_value_converter_), | |
773 | dictionary_array_value_converter_(&array_value_converter_) { | |
774 | array_value_converter_. | |
775 | set_sub_value_converters(&list_array_value_converter_, | |
776 | &struct_array_value_converter_, | |
777 | &map_array_value_converter_, | |
778 | &union_array_value_converter_, | |
779 | &dictionary_array_value_converter_); | |
780 | } | |
781 | ||
782 | template <typename ArrayType> | |
783 | inline VALUE convert_value(const ArrayType& array, | |
784 | const int64_t i) { | |
785 | return array_value_converter_.convert(array, i); | |
786 | } | |
787 | ||
788 | ArrayValueConverter array_value_converter_; | |
789 | ListArrayValueConverter list_array_value_converter_; | |
790 | StructArrayValueConverter struct_array_value_converter_; | |
791 | MapArrayValueConverter map_array_value_converter_; | |
792 | UnionArrayValueConverter union_array_value_converter_; | |
793 | DictionaryArrayValueConverter dictionary_array_value_converter_; | |
794 | }; | |
795 | } |