]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include <gtest/gtest.h> | |
19 | ||
20 | #include <string> | |
21 | ||
22 | #include "arrow/array.h" | |
23 | #include "arrow/array/builder_nested.h" | |
24 | #include "arrow/array/builder_union.h" | |
25 | // TODO ipc shouldn't be included here | |
26 | #include "arrow/ipc/test_common.h" | |
27 | #include "arrow/testing/gtest_util.h" | |
28 | #include "arrow/testing/util.h" | |
29 | #include "arrow/type.h" | |
30 | #include "arrow/util/checked_cast.h" | |
31 | ||
32 | namespace arrow { | |
33 | ||
34 | using internal::checked_cast; | |
35 | ||
36 | TEST(TestUnionArray, TestSliceEquals) { | |
37 | std::shared_ptr<RecordBatch> batch; | |
38 | ASSERT_OK(ipc::test::MakeUnion(&batch)); | |
39 | ||
40 | auto CheckUnion = [](std::shared_ptr<Array> array) { | |
41 | const int64_t size = array->length(); | |
42 | std::shared_ptr<Array> slice, slice2; | |
43 | slice = array->Slice(2); | |
44 | ASSERT_EQ(size - 2, slice->length()); | |
45 | ||
46 | slice2 = array->Slice(2); | |
47 | ASSERT_EQ(size - 2, slice->length()); | |
48 | ||
49 | ASSERT_TRUE(slice->Equals(slice2)); | |
50 | ASSERT_TRUE(array->RangeEquals(2, array->length(), 0, slice)); | |
51 | ||
52 | // Chained slices | |
53 | slice2 = array->Slice(1)->Slice(1); | |
54 | ASSERT_TRUE(slice->Equals(slice2)); | |
55 | ||
56 | slice = array->Slice(1, 5); | |
57 | slice2 = array->Slice(1, 5); | |
58 | ASSERT_EQ(5, slice->length()); | |
59 | ||
60 | ASSERT_TRUE(slice->Equals(slice2)); | |
61 | ASSERT_TRUE(array->RangeEquals(1, 6, 0, slice)); | |
62 | ||
63 | AssertZeroPadded(*array); | |
64 | TestInitialized(*array); | |
65 | }; | |
66 | ||
67 | CheckUnion(batch->column(0)); | |
68 | CheckUnion(batch->column(1)); | |
69 | } | |
70 | ||
71 | TEST(TestSparseUnionArray, Validate) { | |
72 | auto a = ArrayFromJSON(int32(), "[4, 5]"); | |
73 | auto type = sparse_union({field("a", int32())}); | |
74 | auto children = std::vector<std::shared_ptr<Array>>{a}; | |
75 | auto type_ids_array = ArrayFromJSON(int8(), "[0, 0, 0]"); | |
76 | auto type_ids = type_ids_array->data()->buffers[1]; | |
77 | ||
78 | auto arr = std::make_shared<SparseUnionArray>(type, 2, children, type_ids); | |
79 | ASSERT_OK(arr->ValidateFull()); | |
80 | arr = std::make_shared<SparseUnionArray>(type, 1, children, type_ids, | |
81 | /*offset=*/1); | |
82 | ASSERT_OK(arr->ValidateFull()); | |
83 | arr = std::make_shared<SparseUnionArray>(type, 0, children, type_ids, | |
84 | /*offset=*/2); | |
85 | ASSERT_OK(arr->ValidateFull()); | |
86 | ||
87 | // Length + offset < child length, but it's ok | |
88 | arr = std::make_shared<SparseUnionArray>(type, 1, children, type_ids, | |
89 | /*offset=*/0); | |
90 | ASSERT_OK(arr->ValidateFull()); | |
91 | ||
92 | // Length + offset > child length | |
93 | arr = std::make_shared<SparseUnionArray>(type, 1, children, type_ids, | |
94 | /*offset=*/2); | |
95 | ASSERT_RAISES(Invalid, arr->ValidateFull()); | |
96 | ||
97 | // Offset > child length | |
98 | arr = std::make_shared<SparseUnionArray>(type, 0, children, type_ids, | |
99 | /*offset=*/3); | |
100 | ASSERT_RAISES(Invalid, arr->ValidateFull()); | |
101 | } | |
102 | ||
103 | // ------------------------------------------------------------------------- | |
104 | // Tests for MakeDense and MakeSparse | |
105 | ||
106 | class TestUnionArrayFactories : public ::testing::Test { | |
107 | public: | |
108 | void SetUp() { | |
109 | pool_ = default_memory_pool(); | |
110 | type_codes_ = {1, 2, 4, 127}; | |
111 | ArrayFromVector<Int8Type>({0, 1, 2, 0, 1, 3, 2, 0, 2, 1}, &type_ids_); | |
112 | ArrayFromVector<Int8Type>({1, 2, 4, 1, 2, 127, 4, 1, 4, 2}, &logical_type_ids_); | |
113 | ArrayFromVector<Int8Type>({1, 2, 4, 1, -2, 127, 4, 1, 4, 2}, &invalid_type_ids1_); | |
114 | ArrayFromVector<Int8Type>({1, 2, 4, 1, 3, 127, 4, 1, 4, 2}, &invalid_type_ids2_); | |
115 | } | |
116 | ||
117 | void CheckUnionArray(const UnionArray& array, UnionMode::type mode, | |
118 | const std::vector<std::string>& field_names, | |
119 | const std::vector<int8_t>& type_codes) { | |
120 | ASSERT_EQ(mode, array.mode()); | |
121 | CheckFieldNames(array, field_names); | |
122 | CheckTypeCodes(array, type_codes); | |
123 | const auto& type_ids = checked_cast<const Int8Array&>(*type_ids_); | |
124 | for (int64_t i = 0; i < type_ids.length(); ++i) { | |
125 | ASSERT_EQ(array.child_id(i), type_ids.Value(i)); | |
126 | } | |
127 | ASSERT_EQ(nullptr, array.field(-1)); | |
128 | ASSERT_EQ(nullptr, array.field(static_cast<int>(type_ids.length()))); | |
129 | } | |
130 | ||
131 | void CheckFieldNames(const UnionArray& array, const std::vector<std::string>& names) { | |
132 | const auto& type = checked_cast<const UnionType&>(*array.type()); | |
133 | ASSERT_EQ(type.num_fields(), names.size()); | |
134 | for (int i = 0; i < type.num_fields(); ++i) { | |
135 | ASSERT_EQ(type.field(i)->name(), names[i]); | |
136 | } | |
137 | } | |
138 | ||
139 | void CheckTypeCodes(const UnionArray& array, const std::vector<int8_t>& codes) { | |
140 | const auto& type = checked_cast<const UnionType&>(*array.type()); | |
141 | ASSERT_EQ(codes, type.type_codes()); | |
142 | } | |
143 | ||
144 | protected: | |
145 | MemoryPool* pool_; | |
146 | std::vector<int8_t> type_codes_; | |
147 | std::shared_ptr<Array> type_ids_; | |
148 | std::shared_ptr<Array> logical_type_ids_; | |
149 | std::shared_ptr<Array> invalid_type_ids1_; | |
150 | std::shared_ptr<Array> invalid_type_ids2_; | |
151 | }; | |
152 | ||
153 | TEST_F(TestUnionArrayFactories, TestMakeDense) { | |
154 | std::shared_ptr<Array> value_offsets; | |
155 | // type_ids_: {0, 1, 2, 0, 1, 3, 2, 0, 2, 1} | |
156 | ArrayFromVector<Int32Type, int32_t>({0, 0, 0, 1, 1, 0, 1, 2, 1, 2}, &value_offsets); | |
157 | ||
158 | auto children = std::vector<std::shared_ptr<Array>>(4); | |
159 | ArrayFromVector<StringType, std::string>({"abc", "def", "xyz"}, &children[0]); | |
160 | ArrayFromVector<UInt8Type>({10, 20, 30}, &children[1]); | |
161 | ArrayFromVector<DoubleType>({1.618, 2.718, 3.142}, &children[2]); | |
162 | ArrayFromVector<Int8Type>({-12}, &children[3]); | |
163 | ||
164 | std::vector<std::string> field_names = {"str", "int1", "real", "int2"}; | |
165 | ||
166 | std::shared_ptr<Array> result; | |
167 | const UnionArray* union_array; | |
168 | ||
169 | // without field names and type codes | |
170 | ASSERT_OK_AND_ASSIGN(result, | |
171 | DenseUnionArray::Make(*type_ids_, *value_offsets, children)); | |
172 | ASSERT_OK(result->ValidateFull()); | |
173 | union_array = checked_cast<const UnionArray*>(result.get()); | |
174 | CheckUnionArray(*union_array, UnionMode::DENSE, {"0", "1", "2", "3"}, {0, 1, 2, 3}); | |
175 | ||
176 | // with field name | |
177 | ASSERT_RAISES(Invalid, | |
178 | DenseUnionArray::Make(*type_ids_, *value_offsets, children, {"one"})); | |
179 | ASSERT_OK_AND_ASSIGN( | |
180 | result, DenseUnionArray::Make(*type_ids_, *value_offsets, children, field_names)); | |
181 | ASSERT_OK(result->ValidateFull()); | |
182 | union_array = checked_cast<const UnionArray*>(result.get()); | |
183 | CheckUnionArray(*union_array, UnionMode::DENSE, field_names, {0, 1, 2, 3}); | |
184 | ||
185 | // with type codes | |
186 | ASSERT_RAISES(Invalid, DenseUnionArray::Make(*logical_type_ids_, *value_offsets, | |
187 | children, std::vector<int8_t>{0})); | |
188 | ASSERT_OK_AND_ASSIGN(result, DenseUnionArray::Make(*logical_type_ids_, *value_offsets, | |
189 | children, type_codes_)); | |
190 | ASSERT_OK(result->ValidateFull()); | |
191 | union_array = checked_cast<const UnionArray*>(result.get()); | |
192 | CheckUnionArray(*union_array, UnionMode::DENSE, {"0", "1", "2", "3"}, type_codes_); | |
193 | ||
194 | // with field names and type codes | |
195 | ASSERT_RAISES(Invalid, DenseUnionArray::Make(*logical_type_ids_, *value_offsets, | |
196 | children, {"one"}, type_codes_)); | |
197 | ASSERT_OK_AND_ASSIGN(result, DenseUnionArray::Make(*logical_type_ids_, *value_offsets, | |
198 | children, field_names, type_codes_)); | |
199 | ASSERT_OK(result->ValidateFull()); | |
200 | union_array = checked_cast<const UnionArray*>(result.get()); | |
201 | CheckUnionArray(*union_array, UnionMode::DENSE, field_names, type_codes_); | |
202 | ||
203 | // Invalid type codes | |
204 | ASSERT_OK_AND_ASSIGN(result, DenseUnionArray::Make(*invalid_type_ids1_, *value_offsets, | |
205 | children, type_codes_)); | |
206 | ASSERT_RAISES(Invalid, result->ValidateFull()); | |
207 | ASSERT_OK_AND_ASSIGN(result, DenseUnionArray::Make(*invalid_type_ids2_, *value_offsets, | |
208 | children, type_codes_)); | |
209 | ASSERT_RAISES(Invalid, result->ValidateFull()); | |
210 | ||
211 | // Invalid offsets | |
212 | // - offset out of bounds at index 5 | |
213 | std::shared_ptr<Array> invalid_offsets; | |
214 | ArrayFromVector<Int32Type, int32_t>({0, 0, 0, 1, 1, 1, 1, 2, 1, 2}, &invalid_offsets); | |
215 | ASSERT_OK_AND_ASSIGN(result, | |
216 | DenseUnionArray::Make(*type_ids_, *invalid_offsets, children)); | |
217 | ASSERT_RAISES(Invalid, result->ValidateFull()); | |
218 | // - negative offset at index 5 | |
219 | ArrayFromVector<Int32Type, int32_t>({0, 0, 0, 1, 1, -1, 1, 2, 1, 2}, &invalid_offsets); | |
220 | ASSERT_OK_AND_ASSIGN(result, | |
221 | DenseUnionArray::Make(*type_ids_, *invalid_offsets, children)); | |
222 | ASSERT_RAISES(Invalid, result->ValidateFull()); | |
223 | // - non-monotonic offset at index 3 | |
224 | ArrayFromVector<Int32Type, int32_t>({1, 0, 0, 0, 1, 0, 1, 2, 1, 2}, &invalid_offsets); | |
225 | ASSERT_OK_AND_ASSIGN(result, | |
226 | DenseUnionArray::Make(*type_ids_, *invalid_offsets, children)); | |
227 | ASSERT_RAISES(Invalid, result->ValidateFull()); | |
228 | } | |
229 | ||
230 | TEST_F(TestUnionArrayFactories, TestMakeSparse) { | |
231 | auto children = std::vector<std::shared_ptr<Array>>(4); | |
232 | ArrayFromVector<StringType, std::string>( | |
233 | {"abc", "", "", "def", "", "", "", "xyz", "", ""}, &children[0]); | |
234 | ArrayFromVector<UInt8Type>({0, 10, 0, 0, 20, 0, 0, 0, 0, 30}, &children[1]); | |
235 | ArrayFromVector<DoubleType>({0.0, 0.0, 1.618, 0.0, 0.0, 0.0, 2.718, 0.0, 3.142, 0.0}, | |
236 | &children[2]); | |
237 | ArrayFromVector<Int8Type>({0, 0, 0, 0, 0, -12, 0, 0, 0, 0}, &children[3]); | |
238 | ||
239 | std::vector<std::string> field_names = {"str", "int1", "real", "int2"}; | |
240 | ||
241 | std::shared_ptr<Array> result; | |
242 | ||
243 | // without field names and type codes | |
244 | ASSERT_OK_AND_ASSIGN(result, SparseUnionArray::Make(*type_ids_, children)); | |
245 | ASSERT_OK(result->ValidateFull()); | |
246 | CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::SPARSE, | |
247 | {"0", "1", "2", "3"}, {0, 1, 2, 3}); | |
248 | ||
249 | // with field names | |
250 | ASSERT_RAISES(Invalid, SparseUnionArray::Make(*type_ids_, children, {"one"})); | |
251 | ASSERT_OK_AND_ASSIGN(result, SparseUnionArray::Make(*type_ids_, children, field_names)); | |
252 | ASSERT_OK(result->ValidateFull()); | |
253 | CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::SPARSE, field_names, | |
254 | {0, 1, 2, 3}); | |
255 | ||
256 | // with type codes | |
257 | ASSERT_RAISES(Invalid, SparseUnionArray::Make(*logical_type_ids_, children, | |
258 | std::vector<int8_t>{0})); | |
259 | ASSERT_OK_AND_ASSIGN(result, | |
260 | SparseUnionArray::Make(*logical_type_ids_, children, type_codes_)); | |
261 | ASSERT_OK(result->ValidateFull()); | |
262 | CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::SPARSE, | |
263 | {"0", "1", "2", "3"}, type_codes_); | |
264 | ||
265 | // with field names and type codes | |
266 | ASSERT_RAISES(Invalid, SparseUnionArray::Make(*logical_type_ids_, children, {"one"}, | |
267 | type_codes_)); | |
268 | ASSERT_OK_AND_ASSIGN(result, SparseUnionArray::Make(*logical_type_ids_, children, | |
269 | field_names, type_codes_)); | |
270 | ASSERT_OK(result->ValidateFull()); | |
271 | CheckUnionArray(checked_cast<UnionArray&>(*result), UnionMode::SPARSE, field_names, | |
272 | type_codes_); | |
273 | ||
274 | // Invalid type codes | |
275 | ASSERT_OK_AND_ASSIGN( | |
276 | result, SparseUnionArray::Make(*invalid_type_ids1_, children, type_codes_)); | |
277 | ASSERT_RAISES(Invalid, result->ValidateFull()); | |
278 | ASSERT_OK_AND_ASSIGN( | |
279 | result, SparseUnionArray::Make(*invalid_type_ids2_, children, type_codes_)); | |
280 | ASSERT_RAISES(Invalid, result->ValidateFull()); | |
281 | ||
282 | // Invalid child length | |
283 | ArrayFromVector<Int8Type>({0, 0, 0, 0, 0, -12, 0, 0, 0}, &children[3]); | |
284 | ASSERT_RAISES(Invalid, SparseUnionArray::Make(*type_ids_, children)); | |
285 | } | |
286 | ||
287 | template <typename B> | |
288 | class UnionBuilderTest : public ::testing::Test { | |
289 | public: | |
290 | int8_t I8 = 8, STR = 13, DBL = 7; | |
291 | ||
292 | virtual void AppendInt(int8_t i) { | |
293 | expected_types_vector.push_back(I8); | |
294 | ASSERT_OK(union_builder->Append(I8)); | |
295 | ASSERT_OK(i8_builder->Append(i)); | |
296 | } | |
297 | ||
298 | virtual void AppendString(const std::string& str) { | |
299 | expected_types_vector.push_back(STR); | |
300 | ASSERT_OK(union_builder->Append(STR)); | |
301 | ASSERT_OK(str_builder->Append(str)); | |
302 | } | |
303 | ||
304 | virtual void AppendDouble(double dbl) { | |
305 | expected_types_vector.push_back(DBL); | |
306 | ASSERT_OK(union_builder->Append(DBL)); | |
307 | ASSERT_OK(dbl_builder->Append(dbl)); | |
308 | } | |
309 | ||
310 | void AppendBasics() { | |
311 | AppendInt(33); | |
312 | AppendString("abc"); | |
313 | AppendDouble(1.0); | |
314 | AppendDouble(-1.0); | |
315 | AppendString(""); | |
316 | AppendInt(10); | |
317 | AppendString("def"); | |
318 | AppendInt(-10); | |
319 | AppendDouble(0.5); | |
320 | ||
321 | ASSERT_OK(union_builder->Finish(&actual)); | |
322 | ASSERT_OK(actual->ValidateFull()); | |
323 | ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types); | |
324 | } | |
325 | ||
326 | void AppendNullsAndEmptyValues() { | |
327 | AppendString("abc"); | |
328 | ASSERT_OK(union_builder->AppendNull()); | |
329 | ASSERT_OK(union_builder->AppendEmptyValue()); | |
330 | expected_types_vector.insert(expected_types_vector.end(), 3, I8); | |
331 | AppendInt(42); | |
332 | ASSERT_OK(union_builder->AppendNulls(2)); | |
333 | ASSERT_OK(union_builder->AppendEmptyValues(2)); | |
334 | expected_types_vector.insert(expected_types_vector.end(), 3, I8); | |
335 | ||
336 | ASSERT_OK(union_builder->Finish(&actual)); | |
337 | ASSERT_OK(actual->ValidateFull()); | |
338 | ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types); | |
339 | } | |
340 | ||
341 | void AppendInferred() { | |
342 | I8 = union_builder->AppendChild(i8_builder, "i8"); | |
343 | ASSERT_EQ(I8, 0); | |
344 | AppendInt(33); | |
345 | AppendInt(10); | |
346 | ||
347 | STR = union_builder->AppendChild(str_builder, "str"); | |
348 | ASSERT_EQ(STR, 1); | |
349 | AppendString("abc"); | |
350 | AppendString(""); | |
351 | AppendString("def"); | |
352 | AppendInt(-10); | |
353 | ||
354 | DBL = union_builder->AppendChild(dbl_builder, "dbl"); | |
355 | ASSERT_EQ(DBL, 2); | |
356 | AppendDouble(1.0); | |
357 | AppendDouble(-1.0); | |
358 | AppendDouble(0.5); | |
359 | ||
360 | ASSERT_OK(union_builder->Finish(&actual)); | |
361 | ASSERT_OK(actual->ValidateFull()); | |
362 | ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types); | |
363 | ||
364 | ASSERT_EQ(I8, 0); | |
365 | ASSERT_EQ(STR, 1); | |
366 | ASSERT_EQ(DBL, 2); | |
367 | } | |
368 | ||
369 | void AppendListOfInferred(std::shared_ptr<ListArray>* actual) { | |
370 | ListBuilder list_builder(default_memory_pool(), union_builder); | |
371 | ||
372 | ASSERT_OK(list_builder.Append()); | |
373 | I8 = union_builder->AppendChild(i8_builder, "i8"); | |
374 | ASSERT_EQ(I8, 0); | |
375 | AppendInt(10); | |
376 | ||
377 | ASSERT_OK(list_builder.Append()); | |
378 | STR = union_builder->AppendChild(str_builder, "str"); | |
379 | ASSERT_EQ(STR, 1); | |
380 | AppendString("abc"); | |
381 | AppendInt(-10); | |
382 | ||
383 | ASSERT_OK(list_builder.Append()); | |
384 | DBL = union_builder->AppendChild(dbl_builder, "dbl"); | |
385 | ASSERT_EQ(DBL, 2); | |
386 | AppendDouble(0.5); | |
387 | ||
388 | ASSERT_OK(list_builder.Finish(actual)); | |
389 | ASSERT_OK((*actual)->ValidateFull()); | |
390 | ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types); | |
391 | } | |
392 | ||
393 | std::vector<uint8_t> expected_types_vector; | |
394 | std::shared_ptr<Array> expected_types; | |
395 | std::shared_ptr<Int8Builder> i8_builder = std::make_shared<Int8Builder>(); | |
396 | std::shared_ptr<StringBuilder> str_builder = std::make_shared<StringBuilder>(); | |
397 | std::shared_ptr<DoubleBuilder> dbl_builder = std::make_shared<DoubleBuilder>(); | |
398 | std::shared_ptr<B> union_builder = std::make_shared<B>(default_memory_pool()); | |
399 | std::shared_ptr<UnionArray> actual; | |
400 | }; | |
401 | ||
402 | class DenseUnionBuilderTest : public UnionBuilderTest<DenseUnionBuilder> {}; | |
403 | class SparseUnionBuilderTest : public UnionBuilderTest<SparseUnionBuilder> { | |
404 | public: | |
405 | using Base = UnionBuilderTest<SparseUnionBuilder>; | |
406 | ||
407 | void AppendInt(int8_t i) override { | |
408 | Base::AppendInt(i); | |
409 | ASSERT_OK(str_builder->AppendEmptyValue()); | |
410 | ASSERT_OK(dbl_builder->AppendEmptyValue()); | |
411 | } | |
412 | ||
413 | void AppendString(const std::string& str) override { | |
414 | Base::AppendString(str); | |
415 | ASSERT_OK(i8_builder->AppendEmptyValue()); | |
416 | ASSERT_OK(dbl_builder->AppendEmptyValue()); | |
417 | } | |
418 | ||
419 | void AppendDouble(double dbl) override { | |
420 | Base::AppendDouble(dbl); | |
421 | ASSERT_OK(i8_builder->AppendEmptyValue()); | |
422 | ASSERT_OK(str_builder->AppendEmptyValue()); | |
423 | } | |
424 | }; | |
425 | ||
426 | TEST_F(DenseUnionBuilderTest, Basics) { | |
427 | union_builder.reset(new DenseUnionBuilder( | |
428 | default_memory_pool(), {i8_builder, str_builder, dbl_builder}, | |
429 | dense_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())}, | |
430 | {I8, STR, DBL}))); | |
431 | AppendBasics(); | |
432 | ||
433 | auto expected_i8 = ArrayFromJSON(int8(), "[33, 10, -10]"); | |
434 | auto expected_str = ArrayFromJSON(utf8(), R"(["abc", "", "def"])"); | |
435 | auto expected_dbl = ArrayFromJSON(float64(), "[1.0, -1.0, 0.5]"); | |
436 | ||
437 | auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 0, 1, 1, 1, 2, 2, 2]"); | |
438 | ||
439 | ASSERT_OK_AND_ASSIGN(auto expected, | |
440 | DenseUnionArray::Make(*expected_types, *expected_offsets, | |
441 | {expected_i8, expected_str, expected_dbl}, | |
442 | {"i8", "str", "dbl"}, {I8, STR, DBL})); | |
443 | ||
444 | ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString()); | |
445 | ASSERT_ARRAYS_EQUAL(*expected, *actual); | |
446 | } | |
447 | ||
448 | TEST_F(DenseUnionBuilderTest, NullsAndEmptyValues) { | |
449 | union_builder.reset(new DenseUnionBuilder( | |
450 | default_memory_pool(), {i8_builder, str_builder, dbl_builder}, | |
451 | dense_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())}, | |
452 | {I8, STR, DBL}))); | |
453 | AppendNullsAndEmptyValues(); | |
454 | ||
455 | // Four null / empty values (the latter implementation-defined) were appended to I8 | |
456 | auto expected_i8 = ArrayFromJSON(int8(), "[null, 0, 42, null, 0]"); | |
457 | auto expected_str = ArrayFromJSON(utf8(), R"(["abc"])"); | |
458 | auto expected_dbl = ArrayFromJSON(float64(), "[]"); | |
459 | ||
460 | // "abc", null, 0, 42, null, null, 0, 0 | |
461 | auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 2, 3, 3, 4, 4]"); | |
462 | ||
463 | ASSERT_OK_AND_ASSIGN(auto expected, | |
464 | DenseUnionArray::Make(*expected_types, *expected_offsets, | |
465 | {expected_i8, expected_str, expected_dbl}, | |
466 | {"i8", "str", "dbl"}, {I8, STR, DBL})); | |
467 | ||
468 | ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString()); | |
469 | ASSERT_ARRAYS_EQUAL(*expected, *actual); | |
470 | // Physical arrays must be as expected | |
471 | ASSERT_ARRAYS_EQUAL(*expected_i8, *actual->field(0)); | |
472 | ASSERT_ARRAYS_EQUAL(*expected_str, *actual->field(1)); | |
473 | ASSERT_ARRAYS_EQUAL(*expected_dbl, *actual->field(2)); | |
474 | } | |
475 | ||
476 | TEST_F(DenseUnionBuilderTest, InferredType) { | |
477 | AppendInferred(); | |
478 | ||
479 | auto expected_i8 = ArrayFromJSON(int8(), "[33, 10, -10]"); | |
480 | auto expected_str = ArrayFromJSON(utf8(), R"(["abc", "", "def"])"); | |
481 | auto expected_dbl = ArrayFromJSON(float64(), "[1.0, -1.0, 0.5]"); | |
482 | ||
483 | auto expected_offsets = ArrayFromJSON(int32(), "[0, 1, 0, 1, 2, 2, 0, 1, 2]"); | |
484 | ||
485 | ASSERT_OK_AND_ASSIGN(auto expected, | |
486 | DenseUnionArray::Make(*expected_types, *expected_offsets, | |
487 | {expected_i8, expected_str, expected_dbl}, | |
488 | {"i8", "str", "dbl"}, {I8, STR, DBL})); | |
489 | ||
490 | ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString()); | |
491 | ASSERT_ARRAYS_EQUAL(*expected, *actual); | |
492 | } | |
493 | ||
494 | TEST_F(DenseUnionBuilderTest, ListOfInferredType) { | |
495 | std::shared_ptr<ListArray> actual; | |
496 | AppendListOfInferred(&actual); | |
497 | ||
498 | auto expected_type = list( | |
499 | dense_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())}, | |
500 | {I8, STR, DBL})); | |
501 | ASSERT_EQ(expected_type->ToString(), actual->type()->ToString()); | |
502 | } | |
503 | ||
504 | TEST_F(SparseUnionBuilderTest, Basics) { | |
505 | union_builder.reset(new SparseUnionBuilder( | |
506 | default_memory_pool(), {i8_builder, str_builder, dbl_builder}, | |
507 | sparse_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())}, | |
508 | {I8, STR, DBL}))); | |
509 | ||
510 | AppendBasics(); | |
511 | ||
512 | auto expected_i8 = | |
513 | ArrayFromJSON(int8(), "[33, null, null, null, null, 10, null, -10, null]"); | |
514 | auto expected_str = | |
515 | ArrayFromJSON(utf8(), R"([null, "abc", null, null, "", null, "def", null, null])"); | |
516 | auto expected_dbl = | |
517 | ArrayFromJSON(float64(), "[null, null, 1.0, -1.0, null, null, null, null, 0.5]"); | |
518 | ||
519 | ASSERT_OK_AND_ASSIGN( | |
520 | auto expected, | |
521 | SparseUnionArray::Make(*expected_types, {expected_i8, expected_str, expected_dbl}, | |
522 | {"i8", "str", "dbl"}, {I8, STR, DBL})); | |
523 | ||
524 | ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString()); | |
525 | ASSERT_ARRAYS_EQUAL(*expected, *actual); | |
526 | } | |
527 | ||
528 | TEST_F(SparseUnionBuilderTest, NullsAndEmptyValues) { | |
529 | union_builder.reset(new SparseUnionBuilder( | |
530 | default_memory_pool(), {i8_builder, str_builder, dbl_builder}, | |
531 | sparse_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())}, | |
532 | {I8, STR, DBL}))); | |
533 | AppendNullsAndEmptyValues(); | |
534 | ||
535 | // "abc", null, 0, 42, null, null, 0, 0 | |
536 | // (note that getting 0 for empty values is implementation-defined) | |
537 | auto expected_i8 = ArrayFromJSON(int8(), "[0, null, 0, 42, null, null, 0, 0]"); | |
538 | auto expected_str = ArrayFromJSON(utf8(), R"(["abc", "", "", "", "", "", "", ""])"); | |
539 | auto expected_dbl = ArrayFromJSON(float64(), "[0, 0, 0, 0, 0, 0, 0, 0]"); | |
540 | ||
541 | ASSERT_OK_AND_ASSIGN( | |
542 | auto expected, | |
543 | SparseUnionArray::Make(*expected_types, {expected_i8, expected_str, expected_dbl}, | |
544 | {"i8", "str", "dbl"}, {I8, STR, DBL})); | |
545 | ||
546 | ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString()); | |
547 | ASSERT_ARRAYS_EQUAL(*expected, *actual); | |
548 | // Physical arrays must be as expected | |
549 | ASSERT_ARRAYS_EQUAL(*expected_i8, *actual->field(0)); | |
550 | ASSERT_ARRAYS_EQUAL(*expected_str, *actual->field(1)); | |
551 | ASSERT_ARRAYS_EQUAL(*expected_dbl, *actual->field(2)); | |
552 | } | |
553 | ||
554 | TEST_F(SparseUnionBuilderTest, InferredType) { | |
555 | AppendInferred(); | |
556 | ||
557 | auto expected_i8 = | |
558 | ArrayFromJSON(int8(), "[33, 10, null, null, null, -10, null, null, null]"); | |
559 | auto expected_str = | |
560 | ArrayFromJSON(utf8(), R"([null, null, "abc", "", "def", null, null, null, null])"); | |
561 | auto expected_dbl = | |
562 | ArrayFromJSON(float64(), "[null, null, null, null, null, null, 1.0, -1.0, 0.5]"); | |
563 | ||
564 | ASSERT_OK_AND_ASSIGN( | |
565 | auto expected, | |
566 | SparseUnionArray::Make(*expected_types, {expected_i8, expected_str, expected_dbl}, | |
567 | {"i8", "str", "dbl"}, {I8, STR, DBL})); | |
568 | ||
569 | ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString()); | |
570 | ASSERT_ARRAYS_EQUAL(*expected, *actual); | |
571 | } | |
572 | ||
573 | TEST_F(SparseUnionBuilderTest, StructWithUnion) { | |
574 | auto union_builder = std::make_shared<SparseUnionBuilder>(default_memory_pool()); | |
575 | StructBuilder builder(struct_({field("u", union_builder->type())}), | |
576 | default_memory_pool(), {union_builder}); | |
577 | ASSERT_EQ(union_builder->AppendChild(std::make_shared<Int32Builder>(), "i"), 0); | |
578 | ASSERT_TRUE(builder.type()->Equals( | |
579 | struct_({field("u", sparse_union({field("i", int32())}, {0}))}))); | |
580 | } | |
581 | ||
582 | } // namespace arrow |