]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/array/array_union_test.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / array / array_union_test.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <gtest/gtest.h>
19
20#include <string>
21
22#include "arrow/array.h"
23#include "arrow/array/builder_nested.h"
24#include "arrow/array/builder_union.h"
25// TODO ipc shouldn't be included here
26#include "arrow/ipc/test_common.h"
27#include "arrow/testing/gtest_util.h"
28#include "arrow/testing/util.h"
29#include "arrow/type.h"
30#include "arrow/util/checked_cast.h"
31
32namespace arrow {
33
34using internal::checked_cast;
35
36TEST(TestUnionArray, TestSliceEquals) {
37 std::shared_ptr<RecordBatch> batch;
38 ASSERT_OK(ipc::test::MakeUnion(&batch));
39
40 auto CheckUnion = [](std::shared_ptr<Array> array) {
41 const int64_t size = array->length();
42 std::shared_ptr<Array> slice, slice2;
43 slice = array->Slice(2);
44 ASSERT_EQ(size - 2, slice->length());
45
46 slice2 = array->Slice(2);
47 ASSERT_EQ(size - 2, slice->length());
48
49 ASSERT_TRUE(slice->Equals(slice2));
50 ASSERT_TRUE(array->RangeEquals(2, array->length(), 0, slice));
51
52 // Chained slices
53 slice2 = array->Slice(1)->Slice(1);
54 ASSERT_TRUE(slice->Equals(slice2));
55
56 slice = array->Slice(1, 5);
57 slice2 = array->Slice(1, 5);
58 ASSERT_EQ(5, slice->length());
59
60 ASSERT_TRUE(slice->Equals(slice2));
61 ASSERT_TRUE(array->RangeEquals(1, 6, 0, slice));
62
63 AssertZeroPadded(*array);
64 TestInitialized(*array);
65 };
66
67 CheckUnion(batch->column(0));
68 CheckUnion(batch->column(1));
69}
70
71TEST(TestSparseUnionArray, Validate) {
72 auto a = ArrayFromJSON(int32(), "[4, 5]");
73 auto type = sparse_union({field("a", int32())});
74 auto children = std::vector<std::shared_ptr<Array>>{a};
75 auto type_ids_array = ArrayFromJSON(int8(), "[0, 0, 0]");
76 auto type_ids = type_ids_array->data()->buffers[1];
77
78 auto arr = std::make_shared<SparseUnionArray>(type, 2, children, type_ids);
79 ASSERT_OK(arr->ValidateFull());
80 arr = std::make_shared<SparseUnionArray>(type, 1, children, type_ids,
81 /*offset=*/1);
82 ASSERT_OK(arr->ValidateFull());
83 arr = std::make_shared<SparseUnionArray>(type, 0, children, type_ids,
84 /*offset=*/2);
85 ASSERT_OK(arr->ValidateFull());
86
87 // Length + offset < child length, but it's ok
88 arr = std::make_shared<SparseUnionArray>(type, 1, children, type_ids,
89 /*offset=*/0);
90 ASSERT_OK(arr->ValidateFull());
91
92 // Length + offset > child length
93 arr = std::make_shared<SparseUnionArray>(type, 1, children, type_ids,
94 /*offset=*/2);
95 ASSERT_RAISES(Invalid, arr->ValidateFull());
96
97 // Offset > child length
98 arr = std::make_shared<SparseUnionArray>(type, 0, children, type_ids,
99 /*offset=*/3);
100 ASSERT_RAISES(Invalid, arr->ValidateFull());
101}
102
103// -------------------------------------------------------------------------
104// Tests for MakeDense and MakeSparse
105
106class TestUnionArrayFactories : public ::testing::Test {
107 public:
108 void SetUp() {
109 pool_ = default_memory_pool();
110 type_codes_ = {1, 2, 4, 127};
111 ArrayFromVector<Int8Type>({0, 1, 2, 0, 1, 3, 2, 0, 2, 1}, &type_ids_);
112 ArrayFromVector<Int8Type>({1, 2, 4, 1, 2, 127, 4, 1, 4, 2}, &logical_type_ids_);
113 ArrayFromVector<Int8Type>({1, 2, 4, 1, -2, 127, 4, 1, 4, 2}, &invalid_type_ids1_);
114 ArrayFromVector<Int8Type>({1, 2, 4, 1, 3, 127, 4, 1, 4, 2}, &invalid_type_ids2_);
115 }
116
117 void CheckUnionArray(const UnionArray& array, UnionMode::type mode,
118 const std::vector<std::string>& field_names,
119 const std::vector<int8_t>& type_codes) {
120 ASSERT_EQ(mode, array.mode());
121 CheckFieldNames(array, field_names);
122 CheckTypeCodes(array, type_codes);
123 const auto& type_ids = checked_cast<const Int8Array&>(*type_ids_);
124 for (int64_t i = 0; i < type_ids.length(); ++i) {
125 ASSERT_EQ(array.child_id(i), type_ids.Value(i));
126 }
127 ASSERT_EQ(nullptr, array.field(-1));
128 ASSERT_EQ(nullptr, array.field(static_cast<int>(type_ids.length())));
129 }
130
131 void CheckFieldNames(const UnionArray& array, const std::vector<std::string>& names) {
132 const auto& type = checked_cast<const UnionType&>(*array.type());
133 ASSERT_EQ(type.num_fields(), names.size());
134 for (int i = 0; i < type.num_fields(); ++i) {
135 ASSERT_EQ(type.field(i)->name(), names[i]);
136 }
137 }
138
139 void CheckTypeCodes(const UnionArray& array, const std::vector<int8_t>& codes) {
140 const auto& type = checked_cast<const UnionType&>(*array.type());
141 ASSERT_EQ(codes, type.type_codes());
142 }
143
144 protected:
145 MemoryPool* pool_;
146 std::vector<int8_t> type_codes_;
147 std::shared_ptr<Array> type_ids_;
148 std::shared_ptr<Array> logical_type_ids_;
149 std::shared_ptr<Array> invalid_type_ids1_;
150 std::shared_ptr<Array> invalid_type_ids2_;
151};
152
153TEST_F(TestUnionArrayFactories, TestMakeDense) {
154 std::shared_ptr<Array> value_offsets;
155 // type_ids_: {0, 1, 2, 0, 1, 3, 2, 0, 2, 1}
156 ArrayFromVector<Int32Type, int32_t>({0, 0, 0, 1, 1, 0, 1, 2, 1, 2}, &value_offsets);
157
158 auto children = std::vector<std::shared_ptr<Array>>(4);
159 ArrayFromVector<StringType, std::string>({"abc", "def", "xyz"}, &children[0]);
160 ArrayFromVector<UInt8Type>({10, 20, 30}, &children[1]);
161 ArrayFromVector<DoubleType>({1.618, 2.718, 3.142}, &children[2]);
162 ArrayFromVector<Int8Type>({-12}, &children[3]);
163
164 std::vector<std::string> field_names = {"str", "int1", "real", "int2"};
165
166 std::shared_ptr<Array> result;
167 const UnionArray* union_array;
168
169 // without field names and type codes
170 ASSERT_OK_AND_ASSIGN(result,
171 DenseUnionArray::Make(*type_ids_, *value_offsets, children));
172 ASSERT_OK(result->ValidateFull());
173 union_array = checked_cast<const UnionArray*>(result.get());
174 CheckUnionArray(*union_array, UnionMode::DENSE, {"0", "1", "2", "3"}, {0, 1, 2, 3});
175
176 // with field name
177 ASSERT_RAISES(Invalid,
178 DenseUnionArray::Make(*type_ids_, *value_offsets, children, {"one"}));
179 ASSERT_OK_AND_ASSIGN(
180 result, DenseUnionArray::Make(*type_ids_, *value_offsets, children, field_names));
181 ASSERT_OK(result->ValidateFull());
182 union_array = checked_cast<const UnionArray*>(result.get());
183 CheckUnionArray(*union_array, UnionMode::DENSE, field_names, {0, 1, 2, 3});
184
185 // with type codes
186 ASSERT_RAISES(Invalid, DenseUnionArray::Make(*logical_type_ids_, *value_offsets,
187 children, std::vector<int8_t>{0}));
188 ASSERT_OK_AND_ASSIGN(result, DenseUnionArray::Make(*logical_type_ids_, *value_offsets,
189 children, type_codes_));
190 ASSERT_OK(result->ValidateFull());
191 union_array = checked_cast<const UnionArray*>(result.get());
192 CheckUnionArray(*union_array, UnionMode::DENSE, {"0", "1", "2", "3"}, type_codes_);
193
194 // with field names and type codes
195 ASSERT_RAISES(Invalid, DenseUnionArray::Make(*logical_type_ids_, *value_offsets,
196 children, {"one"}, type_codes_));
197 ASSERT_OK_AND_ASSIGN(result, DenseUnionArray::Make(*logical_type_ids_, *value_offsets,
198 children, field_names, type_codes_));
199 ASSERT_OK(result->ValidateFull());
200 union_array = checked_cast<const UnionArray*>(result.get());
201 CheckUnionArray(*union_array, UnionMode::DENSE, field_names, type_codes_);
202
203 // Invalid type codes
204 ASSERT_OK_AND_ASSIGN(result, DenseUnionArray::Make(*invalid_type_ids1_, *value_offsets,
205 children, type_codes_));
206 ASSERT_RAISES(Invalid, result->ValidateFull());
207 ASSERT_OK_AND_ASSIGN(result, DenseUnionArray::Make(*invalid_type_ids2_, *value_offsets,
208 children, type_codes_));
209 ASSERT_RAISES(Invalid, result->ValidateFull());
210
211 // Invalid offsets
212 // - offset out of bounds at index 5
213 std::shared_ptr<Array> invalid_offsets;
214 ArrayFromVector<Int32Type, int32_t>({0, 0, 0, 1, 1, 1, 1, 2, 1, 2}, &invalid_offsets);
215 ASSERT_OK_AND_ASSIGN(result,
216 DenseUnionArray::Make(*type_ids_, *invalid_offsets, children));
217 ASSERT_RAISES(Invalid, result->ValidateFull());
218 // - negative offset at index 5
219 ArrayFromVector<Int32Type, int32_t>({0, 0, 0, 1, 1, -1, 1, 2, 1, 2}, &invalid_offsets);
220 ASSERT_OK_AND_ASSIGN(result,
221 DenseUnionArray::Make(*type_ids_, *invalid_offsets, children));
222 ASSERT_RAISES(Invalid, result->ValidateFull());
223 // - non-monotonic offset at index 3
224 ArrayFromVector<Int32Type, int32_t>({1, 0, 0, 0, 1, 0, 1, 2, 1, 2}, &invalid_offsets);
225 ASSERT_OK_AND_ASSIGN(result,
226 DenseUnionArray::Make(*type_ids_, *invalid_offsets, children));
227 ASSERT_RAISES(Invalid, result->ValidateFull());
228}
229
230TEST_F(TestUnionArrayFactories, TestMakeSparse) {
231 auto children = std::vector<std::shared_ptr<Array>>(4);
232 ArrayFromVector<StringType, std::string>(
233 {"abc", "", "", "def", "", "", "", "xyz", "", ""}, &children[0]);
234 ArrayFromVector<UInt8Type>({0, 10, 0, 0, 20, 0, 0, 0, 0, 30}, &children[1]);
235 ArrayFromVector<DoubleType>({0.0, 0.0, 1.618, 0.0, 0.0, 0.0, 2.718, 0.0, 3.142, 0.0},
236 &children[2]);
237 ArrayFromVector<Int8Type>({0, 0, 0, 0, 0, -12, 0, 0, 0, 0}, &children[3]);
238
239 std::vector<std::string> field_names = {"str", "int1", "real", "int2"};
240
241 std::shared_ptr<Array> result;
242
243 // without field names and type codes
244 ASSERT_OK_AND_ASSIGN(result, SparseUnionArray::Make(*type_ids_, children));
245 ASSERT_OK(result->ValidateFull());
246 CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::SPARSE,
247 {"0", "1", "2", "3"}, {0, 1, 2, 3});
248
249 // with field names
250 ASSERT_RAISES(Invalid, SparseUnionArray::Make(*type_ids_, children, {"one"}));
251 ASSERT_OK_AND_ASSIGN(result, SparseUnionArray::Make(*type_ids_, children, field_names));
252 ASSERT_OK(result->ValidateFull());
253 CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::SPARSE, field_names,
254 {0, 1, 2, 3});
255
256 // with type codes
257 ASSERT_RAISES(Invalid, SparseUnionArray::Make(*logical_type_ids_, children,
258 std::vector<int8_t>{0}));
259 ASSERT_OK_AND_ASSIGN(result,
260 SparseUnionArray::Make(*logical_type_ids_, children, type_codes_));
261 ASSERT_OK(result->ValidateFull());
262 CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::SPARSE,
263 {"0", "1", "2", "3"}, type_codes_);
264
265 // with field names and type codes
266 ASSERT_RAISES(Invalid, SparseUnionArray::Make(*logical_type_ids_, children, {"one"},
267 type_codes_));
268 ASSERT_OK_AND_ASSIGN(result, SparseUnionArray::Make(*logical_type_ids_, children,
269 field_names, type_codes_));
270 ASSERT_OK(result->ValidateFull());
271 CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::SPARSE, field_names,
272 type_codes_);
273
274 // Invalid type codes
275 ASSERT_OK_AND_ASSIGN(
276 result, SparseUnionArray::Make(*invalid_type_ids1_, children, type_codes_));
277 ASSERT_RAISES(Invalid, result->ValidateFull());
278 ASSERT_OK_AND_ASSIGN(
279 result, SparseUnionArray::Make(*invalid_type_ids2_, children, type_codes_));
280 ASSERT_RAISES(Invalid, result->ValidateFull());
281
282 // Invalid child length
283 ArrayFromVector<Int8Type>({0, 0, 0, 0, 0, -12, 0, 0, 0}, &children[3]);
284 ASSERT_RAISES(Invalid, SparseUnionArray::Make(*type_ids_, children));
285}
286
287template <typename B>
288class UnionBuilderTest : public ::testing::Test {
289 public:
290 int8_t I8 = 8, STR = 13, DBL = 7;
291
292 virtual void AppendInt(int8_t i) {
293 expected_types_vector.push_back(I8);
294 ASSERT_OK(union_builder->Append(I8));
295 ASSERT_OK(i8_builder->Append(i));
296 }
297
298 virtual void AppendString(const std::string& str) {
299 expected_types_vector.push_back(STR);
300 ASSERT_OK(union_builder->Append(STR));
301 ASSERT_OK(str_builder->Append(str));
302 }
303
304 virtual void AppendDouble(double dbl) {
305 expected_types_vector.push_back(DBL);
306 ASSERT_OK(union_builder->Append(DBL));
307 ASSERT_OK(dbl_builder->Append(dbl));
308 }
309
310 void AppendBasics() {
311 AppendInt(33);
312 AppendString("abc");
313 AppendDouble(1.0);
314 AppendDouble(-1.0);
315 AppendString("");
316 AppendInt(10);
317 AppendString("def");
318 AppendInt(-10);
319 AppendDouble(0.5);
320
321 ASSERT_OK(union_builder->Finish(&actual));
322 ASSERT_OK(actual->ValidateFull());
323 ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
324 }
325
326 void AppendNullsAndEmptyValues() {
327 AppendString("abc");
328 ASSERT_OK(union_builder->AppendNull());
329 ASSERT_OK(union_builder->AppendEmptyValue());
330 expected_types_vector.insert(expected_types_vector.end(), 3, I8);
331 AppendInt(42);
332 ASSERT_OK(union_builder->AppendNulls(2));
333 ASSERT_OK(union_builder->AppendEmptyValues(2));
334 expected_types_vector.insert(expected_types_vector.end(), 3, I8);
335
336 ASSERT_OK(union_builder->Finish(&actual));
337 ASSERT_OK(actual->ValidateFull());
338 ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
339 }
340
341 void AppendInferred() {
342 I8 = union_builder->AppendChild(i8_builder, "i8");
343 ASSERT_EQ(I8, 0);
344 AppendInt(33);
345 AppendInt(10);
346
347 STR = union_builder->AppendChild(str_builder, "str");
348 ASSERT_EQ(STR, 1);
349 AppendString("abc");
350 AppendString("");
351 AppendString("def");
352 AppendInt(-10);
353
354 DBL = union_builder->AppendChild(dbl_builder, "dbl");
355 ASSERT_EQ(DBL, 2);
356 AppendDouble(1.0);
357 AppendDouble(-1.0);
358 AppendDouble(0.5);
359
360 ASSERT_OK(union_builder->Finish(&actual));
361 ASSERT_OK(actual->ValidateFull());
362 ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
363
364 ASSERT_EQ(I8, 0);
365 ASSERT_EQ(STR, 1);
366 ASSERT_EQ(DBL, 2);
367 }
368
369 void AppendListOfInferred(std::shared_ptr<ListArray>* actual) {
370 ListBuilder list_builder(default_memory_pool(), union_builder);
371
372 ASSERT_OK(list_builder.Append());
373 I8 = union_builder->AppendChild(i8_builder, "i8");
374 ASSERT_EQ(I8, 0);
375 AppendInt(10);
376
377 ASSERT_OK(list_builder.Append());
378 STR = union_builder->AppendChild(str_builder, "str");
379 ASSERT_EQ(STR, 1);
380 AppendString("abc");
381 AppendInt(-10);
382
383 ASSERT_OK(list_builder.Append());
384 DBL = union_builder->AppendChild(dbl_builder, "dbl");
385 ASSERT_EQ(DBL, 2);
386 AppendDouble(0.5);
387
388 ASSERT_OK(list_builder.Finish(actual));
389 ASSERT_OK((*actual)->ValidateFull());
390 ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
391 }
392
393 std::vector<uint8_t> expected_types_vector;
394 std::shared_ptr<Array> expected_types;
395 std::shared_ptr<Int8Builder> i8_builder = std::make_shared<Int8Builder>();
396 std::shared_ptr<StringBuilder> str_builder = std::make_shared<StringBuilder>();
397 std::shared_ptr<DoubleBuilder> dbl_builder = std::make_shared<DoubleBuilder>();
398 std::shared_ptr<B> union_builder = std::make_shared<B>(default_memory_pool());
399 std::shared_ptr<UnionArray> actual;
400};
401
402class DenseUnionBuilderTest : public UnionBuilderTest<DenseUnionBuilder> {};
403class SparseUnionBuilderTest : public UnionBuilderTest<SparseUnionBuilder> {
404 public:
405 using Base = UnionBuilderTest<SparseUnionBuilder>;
406
407 void AppendInt(int8_t i) override {
408 Base::AppendInt(i);
409 ASSERT_OK(str_builder->AppendEmptyValue());
410 ASSERT_OK(dbl_builder->AppendEmptyValue());
411 }
412
413 void AppendString(const std::string& str) override {
414 Base::AppendString(str);
415 ASSERT_OK(i8_builder->AppendEmptyValue());
416 ASSERT_OK(dbl_builder->AppendEmptyValue());
417 }
418
419 void AppendDouble(double dbl) override {
420 Base::AppendDouble(dbl);
421 ASSERT_OK(i8_builder->AppendEmptyValue());
422 ASSERT_OK(str_builder->AppendEmptyValue());
423 }
424};
425
426TEST_F(DenseUnionBuilderTest, Basics) {
427 union_builder.reset(new DenseUnionBuilder(
428 default_memory_pool(), {i8_builder, str_builder, dbl_builder},
429 dense_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())},
430 {I8, STR, DBL})));
431 AppendBasics();
432
433 auto expected_i8 = ArrayFromJSON(int8(), "[33, 10, -10]");
434 auto expected_str = ArrayFromJSON(utf8(), R"(["abc", "", "def"])");
435 auto expected_dbl = ArrayFromJSON(float64(), "[1.0, -1.0, 0.5]");
436
437 auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 0, 1, 1, 1, 2, 2, 2]");
438
439 ASSERT_OK_AND_ASSIGN(auto expected,
440 DenseUnionArray::Make(*expected_types, *expected_offsets,
441 {expected_i8, expected_str, expected_dbl},
442 {"i8", "str", "dbl"}, {I8, STR, DBL}));
443
444 ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
445 ASSERT_ARRAYS_EQUAL(*expected, *actual);
446}
447
448TEST_F(DenseUnionBuilderTest, NullsAndEmptyValues) {
449 union_builder.reset(new DenseUnionBuilder(
450 default_memory_pool(), {i8_builder, str_builder, dbl_builder},
451 dense_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())},
452 {I8, STR, DBL})));
453 AppendNullsAndEmptyValues();
454
455 // Four null / empty values (the latter implementation-defined) were appended to I8
456 auto expected_i8 = ArrayFromJSON(int8(), "[null, 0, 42, null, 0]");
457 auto expected_str = ArrayFromJSON(utf8(), R"(["abc"])");
458 auto expected_dbl = ArrayFromJSON(float64(), "[]");
459
460 // "abc", null, 0, 42, null, null, 0, 0
461 auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 2, 3, 3, 4, 4]");
462
463 ASSERT_OK_AND_ASSIGN(auto expected,
464 DenseUnionArray::Make(*expected_types, *expected_offsets,
465 {expected_i8, expected_str, expected_dbl},
466 {"i8", "str", "dbl"}, {I8, STR, DBL}));
467
468 ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
469 ASSERT_ARRAYS_EQUAL(*expected, *actual);
470 // Physical arrays must be as expected
471 ASSERT_ARRAYS_EQUAL(*expected_i8, *actual->field(0));
472 ASSERT_ARRAYS_EQUAL(*expected_str, *actual->field(1));
473 ASSERT_ARRAYS_EQUAL(*expected_dbl, *actual->field(2));
474}
475
476TEST_F(DenseUnionBuilderTest, InferredType) {
477 AppendInferred();
478
479 auto expected_i8 = ArrayFromJSON(int8(), "[33, 10, -10]");
480 auto expected_str = ArrayFromJSON(utf8(), R"(["abc", "", "def"])");
481 auto expected_dbl = ArrayFromJSON(float64(), "[1.0, -1.0, 0.5]");
482
483 auto expected_offsets = ArrayFromJSON(int32(), "[0, 1, 0, 1, 2, 2, 0, 1, 2]");
484
485 ASSERT_OK_AND_ASSIGN(auto expected,
486 DenseUnionArray::Make(*expected_types, *expected_offsets,
487 {expected_i8, expected_str, expected_dbl},
488 {"i8", "str", "dbl"}, {I8, STR, DBL}));
489
490 ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
491 ASSERT_ARRAYS_EQUAL(*expected, *actual);
492}
493
494TEST_F(DenseUnionBuilderTest, ListOfInferredType) {
495 std::shared_ptr<ListArray> actual;
496 AppendListOfInferred(&actual);
497
498 auto expected_type = list(
499 dense_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())},
500 {I8, STR, DBL}));
501 ASSERT_EQ(expected_type->ToString(), actual->type()->ToString());
502}
503
504TEST_F(SparseUnionBuilderTest, Basics) {
505 union_builder.reset(new SparseUnionBuilder(
506 default_memory_pool(), {i8_builder, str_builder, dbl_builder},
507 sparse_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())},
508 {I8, STR, DBL})));
509
510 AppendBasics();
511
512 auto expected_i8 =
513 ArrayFromJSON(int8(), "[33, null, null, null, null, 10, null, -10, null]");
514 auto expected_str =
515 ArrayFromJSON(utf8(), R"([null, "abc", null, null, "", null, "def", null, null])");
516 auto expected_dbl =
517 ArrayFromJSON(float64(), "[null, null, 1.0, -1.0, null, null, null, null, 0.5]");
518
519 ASSERT_OK_AND_ASSIGN(
520 auto expected,
521 SparseUnionArray::Make(*expected_types, {expected_i8, expected_str, expected_dbl},
522 {"i8", "str", "dbl"}, {I8, STR, DBL}));
523
524 ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
525 ASSERT_ARRAYS_EQUAL(*expected, *actual);
526}
527
528TEST_F(SparseUnionBuilderTest, NullsAndEmptyValues) {
529 union_builder.reset(new SparseUnionBuilder(
530 default_memory_pool(), {i8_builder, str_builder, dbl_builder},
531 sparse_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())},
532 {I8, STR, DBL})));
533 AppendNullsAndEmptyValues();
534
535 // "abc", null, 0, 42, null, null, 0, 0
536 // (note that getting 0 for empty values is implementation-defined)
537 auto expected_i8 = ArrayFromJSON(int8(), "[0, null, 0, 42, null, null, 0, 0]");
538 auto expected_str = ArrayFromJSON(utf8(), R"(["abc", "", "", "", "", "", "", ""])");
539 auto expected_dbl = ArrayFromJSON(float64(), "[0, 0, 0, 0, 0, 0, 0, 0]");
540
541 ASSERT_OK_AND_ASSIGN(
542 auto expected,
543 SparseUnionArray::Make(*expected_types, {expected_i8, expected_str, expected_dbl},
544 {"i8", "str", "dbl"}, {I8, STR, DBL}));
545
546 ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
547 ASSERT_ARRAYS_EQUAL(*expected, *actual);
548 // Physical arrays must be as expected
549 ASSERT_ARRAYS_EQUAL(*expected_i8, *actual->field(0));
550 ASSERT_ARRAYS_EQUAL(*expected_str, *actual->field(1));
551 ASSERT_ARRAYS_EQUAL(*expected_dbl, *actual->field(2));
552}
553
554TEST_F(SparseUnionBuilderTest, InferredType) {
555 AppendInferred();
556
557 auto expected_i8 =
558 ArrayFromJSON(int8(), "[33, 10, null, null, null, -10, null, null, null]");
559 auto expected_str =
560 ArrayFromJSON(utf8(), R"([null, null, "abc", "", "def", null, null, null, null])");
561 auto expected_dbl =
562 ArrayFromJSON(float64(), "[null, null, null, null, null, null, 1.0, -1.0, 0.5]");
563
564 ASSERT_OK_AND_ASSIGN(
565 auto expected,
566 SparseUnionArray::Make(*expected_types, {expected_i8, expected_str, expected_dbl},
567 {"i8", "str", "dbl"}, {I8, STR, DBL}));
568
569 ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
570 ASSERT_ARRAYS_EQUAL(*expected, *actual);
571}
572
573TEST_F(SparseUnionBuilderTest, StructWithUnion) {
574 auto union_builder = std::make_shared<SparseUnionBuilder>(default_memory_pool());
575 StructBuilder builder(struct_({field("u", union_builder->type())}),
576 default_memory_pool(), {union_builder});
577 ASSERT_EQ(union_builder->AppendChild(std::make_shared<Int32Builder>(), "i"), 0);
578 ASSERT_TRUE(builder.type()->Equals(
579 struct_({field("u", sparse_union({field("i", int32())}, {0}))})));
580}
581
582} // namespace arrow