]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include <cstdint> | |
19 | #include <cstring> | |
20 | #include <memory> | |
21 | #include <vector> | |
22 | ||
23 | #include <gmock/gmock.h> | |
24 | #include <gtest/gtest.h> | |
25 | ||
26 | #include "arrow/array.h" | |
27 | #include "arrow/array/builder_nested.h" | |
28 | #include "arrow/buffer.h" | |
29 | #include "arrow/status.h" | |
30 | #include "arrow/testing/gtest_common.h" | |
31 | #include "arrow/testing/gtest_util.h" | |
32 | #include "arrow/type.h" | |
33 | #include "arrow/util/bit_util.h" | |
34 | #include "arrow/util/bitmap_builders.h" | |
35 | #include "arrow/util/checked_cast.h" | |
36 | ||
37 | namespace arrow { | |
38 | ||
39 | using internal::checked_cast; | |
40 | using internal::checked_pointer_cast; | |
41 | ||
42 | using ListTypes = ::testing::Types<ListType, LargeListType>; | |
43 | ||
44 | // ---------------------------------------------------------------------- | |
45 | // List tests | |
46 | ||
47 | template <typename T> | |
48 | class TestListArray : public TestBuilder { | |
49 | public: | |
50 | using TypeClass = T; | |
51 | using offset_type = typename TypeClass::offset_type; | |
52 | using ArrayType = typename TypeTraits<TypeClass>::ArrayType; | |
53 | using BuilderType = typename TypeTraits<TypeClass>::BuilderType; | |
54 | using OffsetType = typename TypeTraits<TypeClass>::OffsetType; | |
55 | using OffsetArrayType = typename TypeTraits<TypeClass>::OffsetArrayType; | |
56 | using OffsetBuilderType = typename TypeTraits<TypeClass>::OffsetBuilderType; | |
57 | ||
58 | void SetUp() { | |
59 | TestBuilder::SetUp(); | |
60 | ||
61 | value_type_ = int16(); | |
62 | type_ = std::make_shared<T>(value_type_); | |
63 | ||
64 | std::unique_ptr<ArrayBuilder> tmp; | |
65 | ASSERT_OK(MakeBuilder(pool_, type_, &tmp)); | |
66 | builder_.reset(checked_cast<BuilderType*>(tmp.release())); | |
67 | } | |
68 | ||
69 | void Done() { | |
70 | std::shared_ptr<Array> out; | |
71 | FinishAndCheckPadding(builder_.get(), &out); | |
72 | result_ = std::dynamic_pointer_cast<ArrayType>(out); | |
73 | } | |
74 | ||
75 | void ValidateBasicListArray(const ArrayType* result, const std::vector<int16_t>& values, | |
76 | const std::vector<uint8_t>& is_valid) { | |
77 | ASSERT_OK(result->ValidateFull()); | |
78 | ASSERT_EQ(1, result->null_count()); | |
79 | ASSERT_EQ(0, result->values()->null_count()); | |
80 | ||
81 | ASSERT_EQ(3, result->length()); | |
82 | std::vector<offset_type> ex_offsets = {0, 3, 3, 7}; | |
83 | for (size_t i = 0; i < ex_offsets.size(); ++i) { | |
84 | ASSERT_EQ(ex_offsets[i], result->value_offset(i)); | |
85 | } | |
86 | ||
87 | for (int i = 0; i < result->length(); ++i) { | |
88 | ASSERT_EQ(is_valid[i] == 0, result->IsNull(i)); | |
89 | } | |
90 | ||
91 | ASSERT_EQ(7, result->values()->length()); | |
92 | auto varr = std::dynamic_pointer_cast<Int16Array>(result->values()); | |
93 | ||
94 | for (size_t i = 0; i < values.size(); ++i) { | |
95 | ASSERT_EQ(values[i], varr->Value(i)); | |
96 | } | |
97 | ||
98 | auto offsets = std::dynamic_pointer_cast<OffsetArrayType>(result->offsets()); | |
99 | ASSERT_EQ(offsets->length(), result->length() + 1); | |
100 | ASSERT_EQ(offsets->null_count(), 0); | |
101 | AssertTypeEqual(*offsets->type(), OffsetType()); | |
102 | ||
103 | for (int64_t i = 0; i < result->length(); ++i) { | |
104 | ASSERT_EQ(offsets->Value(i), result_->raw_value_offsets()[i]); | |
105 | } | |
106 | // last offset | |
107 | ASSERT_EQ(offsets->Value(result->length()), | |
108 | result_->raw_value_offsets()[result->length()]); | |
109 | } | |
110 | ||
111 | void TestBasics() { | |
112 | std::vector<int16_t> values = {0, 1, 2, 3, 4, 5, 6}; | |
113 | std::vector<int> lengths = {3, 0, 4}; | |
114 | std::vector<uint8_t> is_valid = {1, 0, 1}; | |
115 | ||
116 | Int16Builder* vb = checked_cast<Int16Builder*>(builder_->value_builder()); | |
117 | ||
118 | ASSERT_OK(builder_->Reserve(lengths.size())); | |
119 | ASSERT_OK(vb->Reserve(values.size())); | |
120 | ||
121 | int pos = 0; | |
122 | for (size_t i = 0; i < lengths.size(); ++i) { | |
123 | ASSERT_OK(builder_->Append(is_valid[i] > 0)); | |
124 | for (int j = 0; j < lengths[i]; ++j) { | |
125 | ASSERT_OK(vb->Append(values[pos++])); | |
126 | } | |
127 | } | |
128 | ||
129 | Done(); | |
130 | ValidateBasicListArray(result_.get(), values, is_valid); | |
131 | } | |
132 | ||
133 | void TestEquality() { | |
134 | auto vb = checked_cast<Int16Builder*>(builder_->value_builder()); | |
135 | ||
136 | std::shared_ptr<Array> array, equal_array, unequal_array; | |
137 | std::vector<offset_type> equal_offsets = {0, 1, 2, 5, 6, 7, 8, 10}; | |
138 | std::vector<int16_t> equal_values = {1, 2, 3, 4, 5, 2, 2, 2, 5, 6}; | |
139 | std::vector<offset_type> unequal_offsets = {0, 1, 4, 7}; | |
140 | std::vector<int16_t> unequal_values = {1, 2, 2, 2, 3, 4, 5}; | |
141 | ||
142 | // setup two equal arrays | |
143 | ASSERT_OK(builder_->AppendValues(equal_offsets.data(), equal_offsets.size())); | |
144 | ASSERT_OK(vb->AppendValues(equal_values.data(), equal_values.size())); | |
145 | ||
146 | ASSERT_OK(builder_->Finish(&array)); | |
147 | ASSERT_OK(builder_->AppendValues(equal_offsets.data(), equal_offsets.size())); | |
148 | ASSERT_OK(vb->AppendValues(equal_values.data(), equal_values.size())); | |
149 | ||
150 | ASSERT_OK(builder_->Finish(&equal_array)); | |
151 | // now an unequal one | |
152 | ASSERT_OK(builder_->AppendValues(unequal_offsets.data(), unequal_offsets.size())); | |
153 | ASSERT_OK(vb->AppendValues(unequal_values.data(), unequal_values.size())); | |
154 | ||
155 | ASSERT_OK(builder_->Finish(&unequal_array)); | |
156 | ||
157 | // Test array equality | |
158 | EXPECT_TRUE(array->Equals(array)); | |
159 | EXPECT_TRUE(array->Equals(equal_array)); | |
160 | EXPECT_TRUE(equal_array->Equals(array)); | |
161 | EXPECT_FALSE(equal_array->Equals(unequal_array)); | |
162 | EXPECT_FALSE(unequal_array->Equals(equal_array)); | |
163 | ||
164 | // Test range equality | |
165 | EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_array)); | |
166 | EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_array)); | |
167 | EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array)); | |
168 | EXPECT_TRUE(array->RangeEquals(2, 3, 2, unequal_array)); | |
169 | ||
170 | // Check with slices, ARROW-33 | |
171 | std::shared_ptr<Array> slice, slice2; | |
172 | ||
173 | slice = array->Slice(2); | |
174 | slice2 = array->Slice(2); | |
175 | ASSERT_EQ(array->length() - 2, slice->length()); | |
176 | ||
177 | ASSERT_TRUE(slice->Equals(slice2)); | |
178 | ASSERT_TRUE(array->RangeEquals(2, slice->length(), 0, slice)); | |
179 | ||
180 | // Chained slices | |
181 | slice2 = array->Slice(1)->Slice(1); | |
182 | ASSERT_TRUE(slice->Equals(slice2)); | |
183 | ||
184 | slice = array->Slice(1, 4); | |
185 | slice2 = array->Slice(1, 4); | |
186 | ASSERT_EQ(4, slice->length()); | |
187 | ||
188 | ASSERT_TRUE(slice->Equals(slice2)); | |
189 | ASSERT_TRUE(array->RangeEquals(1, 5, 0, slice)); | |
190 | } | |
191 | ||
192 | void TestValuesEquality() { | |
193 | auto type = std::make_shared<T>(int32()); | |
194 | auto left = ArrayFromJSON(type, "[[1, 2], [3], [0]]"); | |
195 | auto right = ArrayFromJSON(type, "[[1, 2], [3], [100000]]"); | |
196 | auto offset = 2; | |
197 | EXPECT_FALSE(left->Slice(offset)->Equals(right->Slice(offset))); | |
198 | } | |
199 | ||
200 | void TestFromArrays() { | |
201 | std::shared_ptr<Array> offsets1, offsets2, offsets3, offsets4, offsets5, values; | |
202 | ||
203 | std::vector<bool> offsets_is_valid3 = {true, false, true, true}; | |
204 | std::vector<bool> offsets_is_valid4 = {true, true, false, true}; | |
205 | std::vector<bool> offsets_is_valid5 = {true, true, false, false}; | |
206 | ||
207 | std::vector<bool> values_is_valid = {true, false, true, true, true, true}; | |
208 | ||
209 | std::vector<offset_type> offset1_values = {0, 2, 2, 6}; | |
210 | std::vector<offset_type> offset2_values = {0, 2, 6, 6}; | |
211 | ||
212 | std::vector<int8_t> values_values = {0, 1, 2, 3, 4, 5}; | |
213 | const int length = 3; | |
214 | ||
215 | ArrayFromVector<OffsetType, offset_type>(offset1_values, &offsets1); | |
216 | ArrayFromVector<OffsetType, offset_type>(offset2_values, &offsets2); | |
217 | ||
218 | ArrayFromVector<OffsetType, offset_type>(offsets_is_valid3, offset1_values, | |
219 | &offsets3); | |
220 | ArrayFromVector<OffsetType, offset_type>(offsets_is_valid4, offset2_values, | |
221 | &offsets4); | |
222 | ArrayFromVector<OffsetType, offset_type>(offsets_is_valid5, offset2_values, | |
223 | &offsets5); | |
224 | ||
225 | ArrayFromVector<Int8Type, int8_t>(values_is_valid, values_values, &values); | |
226 | ||
227 | auto list_type = std::make_shared<T>(int8()); | |
228 | ||
229 | ASSERT_OK_AND_ASSIGN(auto list1, ArrayType::FromArrays(*offsets1, *values, pool_)); | |
230 | ASSERT_OK_AND_ASSIGN(auto list3, ArrayType::FromArrays(*offsets3, *values, pool_)); | |
231 | ASSERT_OK_AND_ASSIGN(auto list4, ArrayType::FromArrays(*offsets4, *values, pool_)); | |
232 | ASSERT_OK(list1->ValidateFull()); | |
233 | ASSERT_OK(list3->ValidateFull()); | |
234 | ASSERT_OK(list4->ValidateFull()); | |
235 | ||
236 | ArrayType expected1(list_type, length, offsets1->data()->buffers[1], values, | |
237 | offsets1->data()->buffers[0], 0); | |
238 | AssertArraysEqual(expected1, *list1); | |
239 | ||
240 | // Use null bitmap from offsets3, but clean offsets from non-null version | |
241 | ArrayType expected3(list_type, length, offsets1->data()->buffers[1], values, | |
242 | offsets3->data()->buffers[0], 1); | |
243 | AssertArraysEqual(expected3, *list3); | |
244 | ||
245 | // Check that the last offset bit is zero | |
246 | ASSERT_FALSE(BitUtil::GetBit(list3->null_bitmap()->data(), length + 1)); | |
247 | ||
248 | ArrayType expected4(list_type, length, offsets2->data()->buffers[1], values, | |
249 | offsets4->data()->buffers[0], 1); | |
250 | AssertArraysEqual(expected4, *list4); | |
251 | ||
252 | // Test failure modes | |
253 | ||
254 | std::shared_ptr<Array> tmp; | |
255 | ||
256 | // Zero-length offsets | |
257 | ASSERT_RAISES(Invalid, ArrayType::FromArrays(*offsets1->Slice(0, 0), *values, pool_)); | |
258 | ||
259 | // Offsets not the right type | |
260 | ASSERT_RAISES(TypeError, ArrayType::FromArrays(*values, *offsets1, pool_)); | |
261 | ||
262 | // Null final offset | |
263 | EXPECT_RAISES_WITH_MESSAGE_THAT( | |
264 | Invalid, ::testing::HasSubstr("Last list offset should be non-null"), | |
265 | ArrayType::FromArrays(*offsets5, *values, pool_)); | |
266 | ||
267 | // ARROW-12077: check for off-by-one in construction (need mimalloc/ASan/Valgrind) | |
268 | { | |
269 | std::shared_ptr<Array> offsets, values; | |
270 | // Length multiple of 8 - we'll allocate a validity buffer with exactly enough bits | |
271 | // (Need a large enough buffer or else ASan doesn't catch it) | |
272 | std::vector<bool> offsets_is_valid(4096); | |
273 | std::vector<offset_type> offset_values(4096); | |
274 | std::vector<int8_t> values_values(4096); | |
275 | std::fill(offsets_is_valid.begin(), offsets_is_valid.end(), true); | |
276 | offsets_is_valid[1] = false; | |
277 | std::fill(offset_values.begin(), offset_values.end(), 0); | |
278 | std::fill(values_values.begin(), values_values.end(), 0); | |
279 | ArrayFromVector<OffsetType, offset_type>(offsets_is_valid, offset_values, &offsets); | |
280 | ArrayFromVector<Int8Type, int8_t>(values_values, &values); | |
281 | ASSERT_OK_AND_ASSIGN(auto list, ArrayType::FromArrays(*offsets, *values, pool_)); | |
282 | } | |
283 | } | |
284 | ||
285 | void TestAppendNull() { | |
286 | ASSERT_OK(builder_->AppendNull()); | |
287 | ASSERT_OK(builder_->AppendNull()); | |
288 | ||
289 | Done(); | |
290 | ||
291 | ASSERT_OK(result_->ValidateFull()); | |
292 | ASSERT_TRUE(result_->IsNull(0)); | |
293 | ASSERT_TRUE(result_->IsNull(1)); | |
294 | ||
295 | ASSERT_EQ(0, result_->raw_value_offsets()[0]); | |
296 | ASSERT_EQ(0, result_->value_offset(1)); | |
297 | ASSERT_EQ(0, result_->value_offset(2)); | |
298 | ||
299 | auto values = result_->values(); | |
300 | ASSERT_EQ(0, values->length()); | |
301 | // Values buffer should be non-null | |
302 | ASSERT_NE(nullptr, values->data()->buffers[1]); | |
303 | } | |
304 | ||
305 | void TestAppendNulls() { | |
306 | ASSERT_OK(builder_->AppendNulls(3)); | |
307 | ||
308 | Done(); | |
309 | ||
310 | ASSERT_OK(result_->ValidateFull()); | |
311 | ASSERT_EQ(result_->length(), 3); | |
312 | ASSERT_EQ(result_->null_count(), 3); | |
313 | ASSERT_TRUE(result_->IsNull(0)); | |
314 | ASSERT_TRUE(result_->IsNull(1)); | |
315 | ASSERT_TRUE(result_->IsNull(2)); | |
316 | ||
317 | ASSERT_EQ(0, result_->raw_value_offsets()[0]); | |
318 | ASSERT_EQ(0, result_->value_offset(1)); | |
319 | ASSERT_EQ(0, result_->value_offset(2)); | |
320 | ASSERT_EQ(0, result_->value_offset(3)); | |
321 | ||
322 | auto values = result_->values(); | |
323 | ASSERT_EQ(0, values->length()); | |
324 | // Values buffer should be non-null | |
325 | ASSERT_NE(nullptr, values->data()->buffers[1]); | |
326 | } | |
327 | ||
328 | void TestBulkAppend() { | |
329 | std::vector<int16_t> values = {0, 1, 2, 3, 4, 5, 6}; | |
330 | std::vector<uint8_t> is_valid = {1, 0, 1}; | |
331 | std::vector<offset_type> offsets = {0, 3, 3}; | |
332 | ||
333 | Int16Builder* vb = checked_cast<Int16Builder*>(builder_->value_builder()); | |
334 | ASSERT_OK(vb->Reserve(values.size())); | |
335 | ||
336 | ASSERT_OK(builder_->AppendValues(offsets.data(), offsets.size(), is_valid.data())); | |
337 | for (int16_t value : values) { | |
338 | ASSERT_OK(vb->Append(value)); | |
339 | } | |
340 | Done(); | |
341 | ValidateBasicListArray(result_.get(), values, is_valid); | |
342 | } | |
343 | ||
344 | void TestBulkAppendInvalid() { | |
345 | std::vector<int16_t> values = {0, 1, 2, 3, 4, 5, 6}; | |
346 | std::vector<int> lengths = {3, 0, 4}; | |
347 | std::vector<uint8_t> is_valid = {1, 0, 1}; | |
348 | // Should be {0, 3, 3} given the is_valid array | |
349 | std::vector<offset_type> offsets = {0, 2, 4}; | |
350 | ||
351 | Int16Builder* vb = checked_cast<Int16Builder*>(builder_->value_builder()); | |
352 | ASSERT_OK(vb->Reserve(values.size())); | |
353 | ||
354 | ASSERT_OK(builder_->AppendValues(offsets.data(), offsets.size(), is_valid.data())); | |
355 | ASSERT_OK(builder_->AppendValues(offsets.data(), offsets.size(), is_valid.data())); | |
356 | for (int16_t value : values) { | |
357 | ASSERT_OK(vb->Append(value)); | |
358 | } | |
359 | ||
360 | Done(); | |
361 | ASSERT_RAISES(Invalid, result_->ValidateFull()); | |
362 | } | |
363 | ||
364 | void TestZeroLength() { | |
365 | // All buffers are null | |
366 | Done(); | |
367 | ASSERT_OK(result_->ValidateFull()); | |
368 | } | |
369 | ||
370 | void TestBuilderPreserveFieldName() { | |
371 | auto list_type_with_name = std::make_shared<T>(field("counts", int16())); | |
372 | ||
373 | std::unique_ptr<ArrayBuilder> tmp; | |
374 | ASSERT_OK(MakeBuilder(pool_, list_type_with_name, &tmp)); | |
375 | builder_.reset(checked_cast<BuilderType*>(tmp.release())); | |
376 | ||
377 | std::vector<offset_type> offsets = {1, 2, 4, 8}; | |
378 | ASSERT_OK(builder_->AppendValues(offsets.data(), offsets.size())); | |
379 | ||
380 | std::shared_ptr<Array> list_array; | |
381 | ASSERT_OK(builder_->Finish(&list_array)); | |
382 | ||
383 | const auto& type = checked_cast<T&>(*list_array->type()); | |
384 | ASSERT_EQ("counts", type.value_field()->name()); | |
385 | } | |
386 | ||
387 | void TestFlattenZeroLength() { | |
388 | Done(); | |
389 | ASSERT_OK_AND_ASSIGN(auto flattened, result_->Flatten()); | |
390 | ASSERT_OK(flattened->ValidateFull()); | |
391 | ASSERT_EQ(0, flattened->length()); | |
392 | } | |
393 | ||
394 | void TestFlattenSimple() { | |
395 | auto type = std::make_shared<T>(int32()); | |
396 | auto list_array = std::dynamic_pointer_cast<ArrayType>( | |
397 | ArrayFromJSON(type, "[[1, 2], [3], [4], null, [5], [], [6]]")); | |
398 | ASSERT_OK_AND_ASSIGN(auto flattened, list_array->Flatten()); | |
399 | ASSERT_OK(flattened->ValidateFull()); | |
400 | EXPECT_TRUE(flattened->Equals(ArrayFromJSON(int32(), "[1, 2, 3, 4, 5, 6]"))); | |
401 | } | |
402 | ||
403 | void TestFlattenSliced() { | |
404 | auto type = std::make_shared<T>(int32()); | |
405 | auto list_array = std::dynamic_pointer_cast<ArrayType>( | |
406 | ArrayFromJSON(type, "[[1, 2], [3], [4], null, [5], [], [6]]")); | |
407 | auto sliced_list_array = | |
408 | std::dynamic_pointer_cast<ArrayType>(list_array->Slice(3, 4)); | |
409 | ASSERT_OK_AND_ASSIGN(auto flattened, list_array->Flatten()); | |
410 | ASSERT_OK(flattened->ValidateFull()); | |
411 | // Note the difference between values() and Flatten(). | |
412 | EXPECT_TRUE(flattened->Equals(ArrayFromJSON(int32(), "[5, 6]"))); | |
413 | EXPECT_TRUE(sliced_list_array->values()->Equals( | |
414 | ArrayFromJSON(int32(), "[1, 2, 3, 4, 5, 6]"))); | |
415 | } | |
416 | ||
417 | void TestFlattenNonEmptyBackingNulls() { | |
418 | auto type = std::make_shared<T>(int32()); | |
419 | auto array_data = | |
420 | std::dynamic_pointer_cast<ArrayType>( | |
421 | ArrayFromJSON(type, "[[1, 2], [3], null, [5, 6], [7, 8], [], [9]]")) | |
422 | ->data(); | |
423 | ASSERT_EQ(2, array_data->buffers.size()); | |
424 | auto null_bitmap_buffer = array_data->buffers[0]; | |
425 | ASSERT_NE(nullptr, null_bitmap_buffer); | |
426 | BitUtil::ClearBit(null_bitmap_buffer->mutable_data(), 1); | |
427 | BitUtil::ClearBit(null_bitmap_buffer->mutable_data(), 3); | |
428 | BitUtil::ClearBit(null_bitmap_buffer->mutable_data(), 4); | |
429 | array_data->null_count += 3; | |
430 | auto list_array = std::dynamic_pointer_cast<ArrayType>(MakeArray(array_data)); | |
431 | ASSERT_OK(list_array->ValidateFull()); | |
432 | ASSERT_OK_AND_ASSIGN(auto flattened, list_array->Flatten()); | |
433 | EXPECT_TRUE(flattened->Equals(ArrayFromJSON(int32(), "[1, 2, 9]"))) | |
434 | << flattened->ToString(); | |
435 | } | |
436 | ||
437 | Status ValidateOffsets(int64_t length, std::vector<offset_type> offsets, | |
438 | const std::shared_ptr<Array>& values, int64_t offset = 0) { | |
439 | auto type = std::make_shared<TypeClass>(values->type()); | |
440 | ArrayType arr(type, length, Buffer::Wrap(offsets), values, | |
441 | /*null_bitmap=*/nullptr, /*null_count=*/0, offset); | |
442 | return arr.ValidateFull(); | |
443 | } | |
444 | ||
445 | void TestValidateOffsets() { | |
446 | auto empty_values = ArrayFromJSON(int16(), "[]"); | |
447 | auto values = ArrayFromJSON(int16(), "[1, 2, 3, 4, 5, 6, 7]"); | |
448 | ||
449 | // An empty list array can have omitted or 0-length offsets | |
450 | ASSERT_OK(ValidateOffsets(0, {}, empty_values)); | |
451 | ||
452 | ASSERT_OK(ValidateOffsets(0, {0}, empty_values)); | |
453 | ASSERT_OK(ValidateOffsets(1, {0, 7}, values)); | |
454 | ASSERT_OK(ValidateOffsets(2, {0, 4, 7}, values)); | |
455 | ASSERT_OK(ValidateOffsets(3, {0, 4, 7, 7}, values)); | |
456 | ||
457 | // Non-zero array offset | |
458 | ASSERT_OK(ValidateOffsets(1, {0, 4, 7}, values, 1)); | |
459 | ASSERT_OK(ValidateOffsets(0, {0, 4, 7}, values, 2)); | |
460 | ||
461 | // Not enough offsets | |
462 | ASSERT_RAISES(Invalid, ValidateOffsets(1, {0}, values)); | |
463 | ASSERT_RAISES(Invalid, ValidateOffsets(2, {0, 0}, values, 1)); | |
464 | ||
465 | // Offset out of bounds | |
466 | ASSERT_RAISES(Invalid, ValidateOffsets(1, {0, 8}, values)); | |
467 | ASSERT_RAISES(Invalid, ValidateOffsets(1, {0, 8, 8}, values, 1)); | |
468 | // Negative offset | |
469 | ASSERT_RAISES(Invalid, ValidateOffsets(1, {-1, 0}, values)); | |
470 | ASSERT_RAISES(Invalid, ValidateOffsets(1, {0, -1}, values)); | |
471 | ASSERT_RAISES(Invalid, ValidateOffsets(2, {0, -1, -1}, values, 1)); | |
472 | // Offsets non-monotonic | |
473 | ASSERT_RAISES(Invalid, ValidateOffsets(2, {0, 7, 4}, values)); | |
474 | } | |
475 | ||
476 | void TestCornerCases() { | |
477 | // ARROW-7985 | |
478 | ASSERT_OK(builder_->AppendNull()); | |
479 | Done(); | |
480 | auto expected = ArrayFromJSON(type_, "[null]"); | |
481 | AssertArraysEqual(*result_, *expected); | |
482 | ||
483 | SetUp(); | |
484 | ASSERT_OK(builder_->Append()); | |
485 | Done(); | |
486 | expected = ArrayFromJSON(type_, "[[]]"); | |
487 | AssertArraysEqual(*result_, *expected); | |
488 | ||
489 | SetUp(); | |
490 | ASSERT_OK(builder_->AppendNull()); | |
491 | ASSERT_OK(builder_->value_builder()->Reserve(100)); | |
492 | Done(); | |
493 | expected = ArrayFromJSON(type_, "[null]"); | |
494 | AssertArraysEqual(*result_, *expected); | |
495 | } | |
496 | ||
497 | void TestOverflowCheck() { | |
498 | Int16Builder* vb = checked_cast<Int16Builder*>(builder_->value_builder()); | |
499 | auto max_elements = builder_->maximum_elements(); | |
500 | ||
501 | ASSERT_OK(builder_->ValidateOverflow(1)); | |
502 | ASSERT_OK(builder_->ValidateOverflow(max_elements)); | |
503 | ASSERT_RAISES(CapacityError, builder_->ValidateOverflow(max_elements + 1)); | |
504 | ||
505 | ASSERT_OK(builder_->Append()); | |
506 | ASSERT_OK(vb->Append(1)); | |
507 | ASSERT_OK(vb->Append(2)); | |
508 | ASSERT_OK(builder_->ValidateOverflow(max_elements - 2)); | |
509 | ASSERT_RAISES(CapacityError, builder_->ValidateOverflow(max_elements - 1)); | |
510 | ||
511 | ASSERT_OK(builder_->AppendNull()); | |
512 | ASSERT_OK(builder_->ValidateOverflow(max_elements - 2)); | |
513 | ASSERT_RAISES(CapacityError, builder_->ValidateOverflow(max_elements - 1)); | |
514 | ||
515 | ASSERT_OK(builder_->Append()); | |
516 | ASSERT_OK(vb->Append(1)); | |
517 | ASSERT_OK(vb->Append(2)); | |
518 | ASSERT_OK(vb->Append(3)); | |
519 | ASSERT_OK(builder_->ValidateOverflow(max_elements - 5)); | |
520 | ASSERT_RAISES(CapacityError, builder_->ValidateOverflow(max_elements - 4)); | |
521 | } | |
522 | ||
523 | protected: | |
524 | std::shared_ptr<DataType> value_type_; | |
525 | ||
526 | std::shared_ptr<BuilderType> builder_; | |
527 | std::shared_ptr<ArrayType> result_; | |
528 | }; | |
529 | ||
530 | TYPED_TEST_SUITE(TestListArray, ListTypes); | |
531 | ||
532 | TYPED_TEST(TestListArray, Basics) { this->TestBasics(); } | |
533 | ||
534 | TYPED_TEST(TestListArray, Equality) { this->TestEquality(); } | |
535 | ||
536 | TYPED_TEST(TestListArray, ValuesEquality) { this->TestValuesEquality(); } | |
537 | ||
538 | TYPED_TEST(TestListArray, FromArrays) { this->TestFromArrays(); } | |
539 | ||
540 | TYPED_TEST(TestListArray, AppendNull) { this->TestAppendNull(); } | |
541 | ||
542 | TYPED_TEST(TestListArray, AppendNulls) { this->TestAppendNulls(); } | |
543 | ||
544 | TYPED_TEST(TestListArray, BulkAppend) { this->TestBulkAppend(); } | |
545 | ||
546 | TYPED_TEST(TestListArray, BulkAppendInvalid) { this->TestBulkAppendInvalid(); } | |
547 | ||
548 | TYPED_TEST(TestListArray, ZeroLength) { this->TestZeroLength(); } | |
549 | ||
550 | TYPED_TEST(TestListArray, BuilderPreserveFieldName) { | |
551 | this->TestBuilderPreserveFieldName(); | |
552 | } | |
553 | ||
554 | TYPED_TEST(TestListArray, FlattenSimple) { this->TestFlattenSimple(); } | |
555 | TYPED_TEST(TestListArray, FlattenZeroLength) { this->TestFlattenZeroLength(); } | |
556 | TYPED_TEST(TestListArray, TestFlattenNonEmptyBackingNulls) { | |
557 | this->TestFlattenNonEmptyBackingNulls(); | |
558 | } | |
559 | ||
560 | TYPED_TEST(TestListArray, ValidateOffsets) { this->TestValidateOffsets(); } | |
561 | ||
562 | TYPED_TEST(TestListArray, CornerCases) { this->TestCornerCases(); } | |
563 | ||
564 | #ifndef ARROW_LARGE_MEMORY_TESTS | |
565 | TYPED_TEST(TestListArray, DISABLED_TestOverflowCheck) { this->TestOverflowCheck(); } | |
566 | #else | |
567 | TYPED_TEST(TestListArray, TestOverflowCheck) { this->TestOverflowCheck(); } | |
568 | #endif | |
569 | ||
570 | // ---------------------------------------------------------------------- | |
571 | // Map tests | |
572 | ||
573 | class TestMapArray : public TestBuilder { | |
574 | public: | |
575 | using offset_type = typename MapType::offset_type; | |
576 | using OffsetType = typename TypeTraits<MapType>::OffsetType; | |
577 | ||
578 | void SetUp() { | |
579 | TestBuilder::SetUp(); | |
580 | ||
581 | key_type_ = utf8(); | |
582 | value_type_ = int32(); | |
583 | type_ = map(key_type_, value_type_); | |
584 | ||
585 | std::unique_ptr<ArrayBuilder> tmp; | |
586 | ASSERT_OK(MakeBuilder(pool_, type_, &tmp)); | |
587 | builder_ = checked_pointer_cast<MapBuilder>(std::move(tmp)); | |
588 | } | |
589 | ||
590 | void Done() { | |
591 | std::shared_ptr<Array> out; | |
592 | FinishAndCheckPadding(builder_.get(), &out); | |
593 | result_ = std::dynamic_pointer_cast<MapArray>(out); | |
594 | } | |
595 | ||
596 | protected: | |
597 | std::shared_ptr<DataType> value_type_, key_type_; | |
598 | ||
599 | std::shared_ptr<MapBuilder> builder_; | |
600 | std::shared_ptr<MapArray> result_; | |
601 | }; | |
602 | ||
603 | TEST_F(TestMapArray, Equality) { | |
604 | auto& kb = checked_cast<StringBuilder&>(*builder_->key_builder()); | |
605 | auto& ib = checked_cast<Int32Builder&>(*builder_->item_builder()); | |
606 | ||
607 | std::shared_ptr<Array> array, equal_array, unequal_array; | |
608 | std::vector<int32_t> equal_offsets = {0, 1, 2, 5, 6, 7, 8, 10}; | |
609 | std::vector<util::string_view> equal_keys = {"a", "a", "a", "b", "c", | |
610 | "a", "a", "a", "a", "b"}; | |
611 | std::vector<int32_t> equal_values = {1, 2, 3, 4, 5, 2, 2, 2, 5, 6}; | |
612 | std::vector<int32_t> unequal_offsets = {0, 1, 4, 7}; | |
613 | std::vector<util::string_view> unequal_keys = {"a", "a", "b", "c", "a", "b", "c"}; | |
614 | std::vector<int32_t> unequal_values = {1, 2, 2, 2, 3, 4, 5}; | |
615 | ||
616 | // setup two equal arrays | |
617 | for (auto out : {&array, &equal_array}) { | |
618 | ASSERT_OK(builder_->AppendValues(equal_offsets.data(), equal_offsets.size())); | |
619 | for (auto&& key : equal_keys) { | |
620 | ASSERT_OK(kb.Append(key)); | |
621 | } | |
622 | ASSERT_OK(ib.AppendValues(equal_values.data(), equal_values.size())); | |
623 | ASSERT_OK(builder_->Finish(out)); | |
624 | } | |
625 | ||
626 | // now an unequal one | |
627 | ASSERT_OK(builder_->AppendValues(unequal_offsets.data(), unequal_offsets.size())); | |
628 | for (auto&& key : unequal_keys) { | |
629 | ASSERT_OK(kb.Append(key)); | |
630 | } | |
631 | ASSERT_OK(ib.AppendValues(unequal_values.data(), unequal_values.size())); | |
632 | ASSERT_OK(builder_->Finish(&unequal_array)); | |
633 | ||
634 | // Test array equality | |
635 | EXPECT_TRUE(array->Equals(array)); | |
636 | EXPECT_TRUE(array->Equals(equal_array)); | |
637 | EXPECT_TRUE(equal_array->Equals(array)); | |
638 | EXPECT_FALSE(equal_array->Equals(unequal_array)); | |
639 | EXPECT_FALSE(unequal_array->Equals(equal_array)); | |
640 | ||
641 | // Test range equality | |
642 | EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_array)); | |
643 | EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_array)); | |
644 | EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array)); | |
645 | EXPECT_TRUE(array->RangeEquals(2, 3, 2, unequal_array)); | |
646 | } | |
647 | ||
648 | TEST_F(TestMapArray, BuildingIntToInt) { | |
649 | auto type = map(int16(), int16()); | |
650 | ||
651 | auto expected_keys = ArrayFromJSON(int16(), R"([ | |
652 | 0, 1, 2, 3, 4, 5, | |
653 | 0, 1, 2, 3, 4, 5 | |
654 | ])"); | |
655 | auto expected_items = ArrayFromJSON(int16(), R"([ | |
656 | 1, 1, 2, 3, 5, 8, | |
657 | null, null, 0, 1, null, 2 | |
658 | ])"); | |
659 | auto expected_offsets = ArrayFromJSON(int32(), "[0, 6, 6, 12, 12]")->data()->buffers[1]; | |
660 | auto expected_null_bitmap = | |
661 | ArrayFromJSON(boolean(), "[1, 0, 1, 1]")->data()->buffers[1]; | |
662 | ||
663 | MapArray expected(type, 4, expected_offsets, expected_keys, expected_items, | |
664 | expected_null_bitmap, 1, 0); | |
665 | ||
666 | auto key_builder = std::make_shared<Int16Builder>(); | |
667 | auto item_builder = std::make_shared<Int16Builder>(); | |
668 | MapBuilder map_builder(default_memory_pool(), key_builder, item_builder); | |
669 | ||
670 | std::shared_ptr<Array> actual; | |
671 | ASSERT_OK(map_builder.Append()); | |
672 | ASSERT_OK(key_builder->AppendValues({0, 1, 2, 3, 4, 5})); | |
673 | ASSERT_OK(item_builder->AppendValues({1, 1, 2, 3, 5, 8})); | |
674 | ASSERT_OK(map_builder.AppendNull()); | |
675 | ASSERT_OK(map_builder.Append()); | |
676 | ASSERT_OK(key_builder->AppendValues({0, 1, 2, 3, 4, 5})); | |
677 | ASSERT_OK(item_builder->AppendValues({-1, -1, 0, 1, -1, 2}, {0, 0, 1, 1, 0, 1})); | |
678 | ASSERT_OK(map_builder.Append()); | |
679 | ASSERT_OK(map_builder.Finish(&actual)); | |
680 | ASSERT_OK(actual->ValidateFull()); | |
681 | ||
682 | ASSERT_ARRAYS_EQUAL(*actual, expected); | |
683 | } | |
684 | ||
685 | TEST_F(TestMapArray, BuildingStringToInt) { | |
686 | auto type = map(utf8(), int32()); | |
687 | ||
688 | std::vector<int32_t> offsets = {0, 2, 2, 3, 3}; | |
689 | auto expected_keys = ArrayFromJSON(utf8(), R"(["joe", "mark", "cap"])"); | |
690 | auto expected_values = ArrayFromJSON(int32(), "[0, null, 8]"); | |
691 | ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, internal::BytesToBits({1, 0, 1, 1})); | |
692 | MapArray expected(type, 4, Buffer::Wrap(offsets), expected_keys, expected_values, | |
693 | expected_null_bitmap, 1); | |
694 | ||
695 | auto key_builder = std::make_shared<StringBuilder>(); | |
696 | auto item_builder = std::make_shared<Int32Builder>(); | |
697 | MapBuilder map_builder(default_memory_pool(), key_builder, item_builder); | |
698 | ||
699 | std::shared_ptr<Array> actual; | |
700 | ASSERT_OK(map_builder.Append()); | |
701 | ASSERT_OK(key_builder->Append("joe")); | |
702 | ASSERT_OK(item_builder->Append(0)); | |
703 | ASSERT_OK(key_builder->Append("mark")); | |
704 | ASSERT_OK(item_builder->AppendNull()); | |
705 | ASSERT_OK(map_builder.AppendNull()); | |
706 | ASSERT_OK(map_builder.Append()); | |
707 | ASSERT_OK(key_builder->Append("cap")); | |
708 | ASSERT_OK(item_builder->Append(8)); | |
709 | ASSERT_OK(map_builder.Append()); | |
710 | ASSERT_OK(map_builder.Finish(&actual)); | |
711 | ASSERT_OK(actual->ValidateFull()); | |
712 | ||
713 | ASSERT_ARRAYS_EQUAL(*actual, expected); | |
714 | } | |
715 | ||
716 | TEST_F(TestMapArray, FromArrays) { | |
717 | std::shared_ptr<Array> offsets1, offsets2, offsets3, offsets4, keys, items; | |
718 | ||
719 | std::vector<bool> offsets_is_valid3 = {true, false, true, true}; | |
720 | std::vector<bool> offsets_is_valid4 = {true, true, false, true}; | |
721 | ||
722 | std::vector<bool> items_is_valid = {true, false, true, true, true, true}; | |
723 | ||
724 | std::vector<MapType::offset_type> offset1_values = {0, 2, 2, 6}; | |
725 | std::vector<MapType::offset_type> offset2_values = {0, 2, 6, 6}; | |
726 | ||
727 | std::vector<int8_t> key_values = {0, 1, 2, 3, 4, 5}; | |
728 | std::vector<int16_t> item_values = {10, 9, 8, 7, 6, 5}; | |
729 | const int length = 3; | |
730 | ||
731 | ArrayFromVector<OffsetType, offset_type>(offset1_values, &offsets1); | |
732 | ArrayFromVector<OffsetType, offset_type>(offset2_values, &offsets2); | |
733 | ||
734 | ArrayFromVector<OffsetType, offset_type>(offsets_is_valid3, offset1_values, &offsets3); | |
735 | ArrayFromVector<OffsetType, offset_type>(offsets_is_valid4, offset2_values, &offsets4); | |
736 | ||
737 | ArrayFromVector<Int8Type, int8_t>(key_values, &keys); | |
738 | ArrayFromVector<Int16Type, int16_t>(items_is_valid, item_values, &items); | |
739 | ||
740 | auto map_type = map(int8(), int16()); | |
741 | ||
742 | ASSERT_OK_AND_ASSIGN(auto map1, MapArray::FromArrays(offsets1, keys, items, pool_)); | |
743 | ASSERT_OK_AND_ASSIGN(auto map3, MapArray::FromArrays(offsets3, keys, items, pool_)); | |
744 | ASSERT_OK_AND_ASSIGN(auto map4, MapArray::FromArrays(offsets4, keys, items, pool_)); | |
745 | ASSERT_OK(map1->Validate()); | |
746 | ASSERT_OK(map3->Validate()); | |
747 | ASSERT_OK(map4->Validate()); | |
748 | ||
749 | MapArray expected1(map_type, length, offsets1->data()->buffers[1], keys, items, | |
750 | offsets1->data()->buffers[0], 0); | |
751 | AssertArraysEqual(expected1, *map1); | |
752 | ||
753 | // Use null bitmap from offsets3, but clean offsets from non-null version | |
754 | MapArray expected3(map_type, length, offsets1->data()->buffers[1], keys, items, | |
755 | offsets3->data()->buffers[0], 1); | |
756 | AssertArraysEqual(expected3, *map3); | |
757 | ||
758 | // Check that the last offset bit is zero | |
759 | ASSERT_FALSE(BitUtil::GetBit(map3->null_bitmap()->data(), length + 1)); | |
760 | ||
761 | MapArray expected4(map_type, length, offsets2->data()->buffers[1], keys, items, | |
762 | offsets4->data()->buffers[0], 1); | |
763 | AssertArraysEqual(expected4, *map4); | |
764 | ||
765 | // Test failure modes | |
766 | ||
767 | std::shared_ptr<Array> tmp; | |
768 | ||
769 | // Zero-length offsets | |
770 | ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets1->Slice(0, 0), keys, items, pool_)); | |
771 | ||
772 | // Offsets not the right type | |
773 | ASSERT_RAISES(TypeError, MapArray::FromArrays(keys, offsets1, items, pool_)); | |
774 | ||
775 | // Keys and Items different lengths | |
776 | ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets1, keys->Slice(0, 1), items, pool_)); | |
777 | ||
778 | // Keys contains null values | |
779 | std::shared_ptr<Array> keys_with_null = offsets3; | |
780 | std::shared_ptr<Array> tmp_items = items->Slice(0, offsets3->length()); | |
781 | ASSERT_EQ(keys_with_null->length(), tmp_items->length()); | |
782 | ASSERT_RAISES(Invalid, | |
783 | MapArray::FromArrays(offsets1, keys_with_null, tmp_items, pool_)); | |
784 | } | |
785 | ||
786 | TEST_F(TestMapArray, FromArraysEquality) { | |
787 | // More equality tests using MapArray::FromArrays | |
788 | auto keys1 = ArrayFromJSON(utf8(), R"(["ab", "cd", "ef", "gh", "ij", "kl"])"); | |
789 | auto keys2 = ArrayFromJSON(utf8(), R"(["ab", "cd", "ef", "gh", "ij", "kl"])"); | |
790 | auto keys3 = ArrayFromJSON(utf8(), R"(["ab", "cd", "ef", "gh", "zz", "kl"])"); | |
791 | auto items1 = ArrayFromJSON(int16(), "[1, 2, 3, 4, 5, 6]"); | |
792 | auto items2 = ArrayFromJSON(int16(), "[1, 2, 3, 4, 5, 6]"); | |
793 | auto items3 = ArrayFromJSON(int16(), "[1, 2, 3, null, 5, 6]"); | |
794 | auto offsets1 = ArrayFromJSON(int32(), "[0, 1, 3, null, 5, 6]"); | |
795 | auto offsets2 = ArrayFromJSON(int32(), "[0, 1, 3, null, 5, 6]"); | |
796 | auto offsets3 = ArrayFromJSON(int32(), "[0, 1, 3, 3, 5, 6]"); | |
797 | ||
798 | ASSERT_OK_AND_ASSIGN(auto array1, MapArray::FromArrays(offsets1, keys1, items1)); | |
799 | ASSERT_OK_AND_ASSIGN(auto array2, MapArray::FromArrays(offsets2, keys2, items2)); | |
800 | ASSERT_OK_AND_ASSIGN(auto array3, MapArray::FromArrays(offsets3, keys2, items2)); | |
801 | ASSERT_OK_AND_ASSIGN(auto array4, MapArray::FromArrays(offsets2, keys3, items2)); | |
802 | ASSERT_OK_AND_ASSIGN(auto array5, MapArray::FromArrays(offsets2, keys2, items3)); | |
803 | ASSERT_OK_AND_ASSIGN(auto array6, MapArray::FromArrays(offsets3, keys3, items3)); | |
804 | ||
805 | ASSERT_TRUE(array1->Equals(array2)); | |
806 | ASSERT_TRUE(array1->RangeEquals(array2, 0, 5, 0)); | |
807 | ||
808 | ASSERT_FALSE(array1->Equals(array3)); // different offsets | |
809 | ASSERT_FALSE(array1->RangeEquals(array3, 0, 5, 0)); | |
810 | ASSERT_TRUE(array1->RangeEquals(array3, 0, 2, 0)); | |
811 | ASSERT_FALSE(array1->RangeEquals(array3, 2, 5, 2)); | |
812 | ||
813 | ASSERT_FALSE(array1->Equals(array4)); // different keys | |
814 | ASSERT_FALSE(array1->RangeEquals(array4, 0, 5, 0)); | |
815 | ASSERT_TRUE(array1->RangeEquals(array4, 0, 2, 0)); | |
816 | ASSERT_FALSE(array1->RangeEquals(array4, 2, 5, 2)); | |
817 | ||
818 | ASSERT_FALSE(array1->Equals(array5)); // different items | |
819 | ASSERT_FALSE(array1->RangeEquals(array5, 0, 5, 0)); | |
820 | ASSERT_TRUE(array1->RangeEquals(array5, 0, 2, 0)); | |
821 | ASSERT_FALSE(array1->RangeEquals(array5, 2, 5, 2)); | |
822 | ||
823 | ASSERT_FALSE(array1->Equals(array6)); // different everything | |
824 | ASSERT_FALSE(array1->RangeEquals(array6, 0, 5, 0)); | |
825 | ASSERT_TRUE(array1->RangeEquals(array6, 0, 2, 0)); | |
826 | ASSERT_FALSE(array1->RangeEquals(array6, 2, 5, 2)); | |
827 | ||
828 | // Map array equality should be indifferent to field names | |
829 | ASSERT_OK_AND_ASSIGN(auto other_map_type, | |
830 | MapType::Make(field("some_entries", | |
831 | struct_({field("some_key", utf8(), false), | |
832 | field("some_value", int16())}), | |
833 | false))); | |
834 | ASSERT_OK_AND_ASSIGN(auto array7, | |
835 | MapArray::FromArrays(other_map_type, offsets2, keys2, items2)); | |
836 | ASSERT_TRUE(array1->Equals(array7)); | |
837 | ASSERT_TRUE(array1->RangeEquals(array7, 0, 5, 0)); | |
838 | } | |
839 | ||
840 | namespace { | |
841 | ||
842 | template <typename TYPE> | |
843 | Status BuildListOfStructPairs(TYPE& builder, std::shared_ptr<Array>* out) { | |
844 | auto struct_builder = internal::checked_cast<StructBuilder*>(builder.value_builder()); | |
845 | auto field0_builder = | |
846 | internal::checked_cast<Int16Builder*>(struct_builder->field_builder(0)); | |
847 | auto field1_builder = | |
848 | internal::checked_cast<Int16Builder*>(struct_builder->field_builder(1)); | |
849 | ||
850 | RETURN_NOT_OK(builder.Append()); | |
851 | RETURN_NOT_OK(field0_builder->AppendValues({0, 1})); | |
852 | RETURN_NOT_OK(field1_builder->AppendValues({1, -1}, {1, 0})); | |
853 | RETURN_NOT_OK(struct_builder->AppendValues(2, NULLPTR)); | |
854 | RETURN_NOT_OK(builder.AppendNull()); | |
855 | RETURN_NOT_OK(builder.Append()); | |
856 | RETURN_NOT_OK(field0_builder->Append(2)); | |
857 | RETURN_NOT_OK(field1_builder->Append(3)); | |
858 | RETURN_NOT_OK(struct_builder->Append()); | |
859 | RETURN_NOT_OK(builder.Append()); | |
860 | RETURN_NOT_OK(builder.Append()); | |
861 | RETURN_NOT_OK(field0_builder->AppendValues({3, 4})); | |
862 | RETURN_NOT_OK(field1_builder->AppendValues({4, 5})); | |
863 | RETURN_NOT_OK(struct_builder->AppendValues(2, NULLPTR)); | |
864 | RETURN_NOT_OK(builder.Finish(out)); | |
865 | RETURN_NOT_OK((*out)->Validate()); | |
866 | ||
867 | return Status::OK(); | |
868 | } | |
869 | ||
870 | } // namespace | |
871 | ||
872 | TEST_F(TestMapArray, ValueBuilder) { | |
873 | auto key_builder = std::make_shared<Int16Builder>(); | |
874 | auto item_builder = std::make_shared<Int16Builder>(); | |
875 | MapBuilder map_builder(default_memory_pool(), key_builder, item_builder); | |
876 | ||
877 | // Build Map array using key/item builder | |
878 | std::shared_ptr<Array> expected; | |
879 | ASSERT_OK(map_builder.Append()); | |
880 | ASSERT_OK(key_builder->AppendValues({0, 1})); | |
881 | ASSERT_OK(item_builder->AppendValues({1, -1}, {1, 0})); | |
882 | ASSERT_OK(map_builder.AppendNull()); | |
883 | ASSERT_OK(map_builder.Append()); | |
884 | ASSERT_OK(key_builder->Append(2)); | |
885 | ASSERT_OK(item_builder->Append(3)); | |
886 | ASSERT_OK(map_builder.Append()); | |
887 | ASSERT_OK(map_builder.Append()); | |
888 | ASSERT_OK(key_builder->AppendValues({3, 4})); | |
889 | ASSERT_OK(item_builder->AppendValues({4, 5})); | |
890 | ASSERT_OK(map_builder.Finish(&expected)); | |
891 | ASSERT_OK(expected->Validate()); | |
892 | ||
893 | map_builder.Reset(); | |
894 | ||
895 | // Build Map array like an Array of Structs using value builder | |
896 | std::shared_ptr<Array> actual_map; | |
897 | ASSERT_OK(BuildListOfStructPairs(map_builder, &actual_map)); | |
898 | ASSERT_ARRAYS_EQUAL(*actual_map, *expected); | |
899 | ||
900 | map_builder.Reset(); | |
901 | ||
902 | // Build a ListArray of Structs, and compare MapArray to the List | |
903 | auto map_type = internal::checked_pointer_cast<MapType>(map_builder.type()); | |
904 | auto struct_type = map_type->value_type(); | |
905 | std::vector<std::shared_ptr<ArrayBuilder>> child_builders{key_builder, item_builder}; | |
906 | auto struct_builder = | |
907 | std::make_shared<StructBuilder>(struct_type, default_memory_pool(), child_builders); | |
908 | ListBuilder list_builder(default_memory_pool(), struct_builder, map_type); | |
909 | ||
910 | std::shared_ptr<Array> actual_list; | |
911 | ASSERT_OK(BuildListOfStructPairs(list_builder, &actual_list)); | |
912 | ||
913 | MapArray* map_ptr = internal::checked_cast<MapArray*>(actual_map.get()); | |
914 | auto list_type = std::make_shared<ListType>(map_type->field(0)); | |
915 | ListArray map_as_list(list_type, map_ptr->length(), map_ptr->data()->buffers[1], | |
916 | map_ptr->values(), actual_map->data()->buffers[0], | |
917 | map_ptr->null_count()); | |
918 | ||
919 | ASSERT_ARRAYS_EQUAL(*actual_list, map_as_list); | |
920 | } | |
921 | ||
922 | // ---------------------------------------------------------------------- | |
923 | // FixedSizeList tests | |
924 | ||
925 | class TestFixedSizeListArray : public TestBuilder { | |
926 | public: | |
927 | void SetUp() { | |
928 | TestBuilder::SetUp(); | |
929 | ||
930 | value_type_ = int32(); | |
931 | type_ = fixed_size_list(value_type_, list_size()); | |
932 | ||
933 | std::unique_ptr<ArrayBuilder> tmp; | |
934 | ASSERT_OK(MakeBuilder(pool_, type_, &tmp)); | |
935 | builder_.reset(checked_cast<FixedSizeListBuilder*>(tmp.release())); | |
936 | } | |
937 | ||
938 | void Done() { | |
939 | std::shared_ptr<Array> out; | |
940 | FinishAndCheckPadding(builder_.get(), &out); | |
941 | result_ = std::dynamic_pointer_cast<FixedSizeListArray>(out); | |
942 | } | |
943 | ||
944 | protected: | |
945 | static constexpr int32_t list_size() { return 2; } | |
946 | std::shared_ptr<DataType> value_type_; | |
947 | ||
948 | std::shared_ptr<FixedSizeListBuilder> builder_; | |
949 | std::shared_ptr<FixedSizeListArray> result_; | |
950 | }; | |
951 | ||
952 | TEST_F(TestFixedSizeListArray, Equality) { | |
953 | Int32Builder* vb = checked_cast<Int32Builder*>(builder_->value_builder()); | |
954 | ||
955 | std::shared_ptr<Array> array, equal_array, unequal_array; | |
956 | std::vector<int32_t> equal_values = {1, 2, 3, 4, 5, 2, 2, 2, 5, 6}; | |
957 | std::vector<int32_t> unequal_values = {1, 2, 2, 2, 3, 4, 5, 2}; | |
958 | ||
959 | // setup two equal arrays | |
960 | ASSERT_OK(builder_->AppendValues(equal_values.size() / list_size())); | |
961 | ASSERT_OK(vb->AppendValues(equal_values.data(), equal_values.size())); | |
962 | ASSERT_OK(builder_->Finish(&array)); | |
963 | ||
964 | ASSERT_OK(builder_->AppendValues(equal_values.size() / list_size())); | |
965 | ASSERT_OK(vb->AppendValues(equal_values.data(), equal_values.size())); | |
966 | ||
967 | ASSERT_OK(builder_->Finish(&equal_array)); | |
968 | ||
969 | // now an unequal one | |
970 | ASSERT_OK(builder_->AppendValues(unequal_values.size() / list_size())); | |
971 | ASSERT_OK(vb->AppendValues(unequal_values.data(), unequal_values.size())); | |
972 | ASSERT_OK(builder_->Finish(&unequal_array)); | |
973 | ||
974 | // Test array equality | |
975 | AssertArraysEqual(*array, *array); | |
976 | AssertArraysEqual(*array, *equal_array); | |
977 | EXPECT_FALSE(equal_array->Equals(unequal_array)); | |
978 | EXPECT_FALSE(unequal_array->Equals(equal_array)); | |
979 | ||
980 | // Test range equality | |
981 | EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_array)); | |
982 | EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_array)); | |
983 | EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array)); | |
984 | EXPECT_TRUE(array->RangeEquals(1, 3, 2, unequal_array)); | |
985 | } | |
986 | ||
987 | TEST_F(TestFixedSizeListArray, TestAppendNull) { | |
988 | ASSERT_OK(builder_->AppendNull()); | |
989 | ASSERT_OK(builder_->AppendNull()); | |
990 | ||
991 | Done(); | |
992 | ||
993 | ASSERT_OK(result_->ValidateFull()); | |
994 | ASSERT_TRUE(result_->IsNull(0)); | |
995 | ASSERT_TRUE(result_->IsNull(1)); | |
996 | ||
997 | ASSERT_EQ(0, result_->value_offset(0)); | |
998 | ASSERT_EQ(list_size(), result_->value_offset(1)); | |
999 | ||
1000 | auto values = result_->values(); | |
1001 | ASSERT_EQ(list_size() * 2, values->length()); | |
1002 | } | |
1003 | ||
1004 | TEST_F(TestFixedSizeListArray, TestAppendNulls) { | |
1005 | ASSERT_OK(builder_->AppendNulls(3)); | |
1006 | ||
1007 | Done(); | |
1008 | ||
1009 | ASSERT_OK(result_->ValidateFull()); | |
1010 | ASSERT_EQ(result_->length(), 3); | |
1011 | ASSERT_EQ(result_->null_count(), 3); | |
1012 | ASSERT_TRUE(result_->IsNull(0)); | |
1013 | ASSERT_TRUE(result_->IsNull(1)); | |
1014 | ASSERT_TRUE(result_->IsNull(2)); | |
1015 | ||
1016 | ASSERT_EQ(0, result_->value_offset(0)); | |
1017 | ASSERT_EQ(list_size(), result_->value_offset(1)); | |
1018 | ASSERT_EQ(list_size() * 2, result_->value_offset(2)); | |
1019 | ||
1020 | auto values = result_->values(); | |
1021 | ASSERT_EQ(list_size() * 3, values->length()); | |
1022 | } | |
1023 | ||
1024 | void ValidateBasicFixedSizeListArray(const FixedSizeListArray* result, | |
1025 | const std::vector<int32_t>& values, | |
1026 | const std::vector<uint8_t>& is_valid) { | |
1027 | ASSERT_OK(result->ValidateFull()); | |
1028 | ASSERT_EQ(1, result->null_count()); | |
1029 | ASSERT_LE(result->values()->null_count(), 2); | |
1030 | ||
1031 | ASSERT_EQ(3, result->length()); | |
1032 | for (int32_t i = 0; i < 3; ++i) { | |
1033 | ASSERT_EQ(i * result->value_length(), result->value_offset(i)); | |
1034 | } | |
1035 | ||
1036 | for (int i = 0; i < result->length(); ++i) { | |
1037 | ASSERT_EQ(is_valid[i] == 0, result->IsNull(i)); | |
1038 | } | |
1039 | ||
1040 | ASSERT_LE(result->length() * result->value_length(), result->values()->length()); | |
1041 | auto varr = std::dynamic_pointer_cast<Int32Array>(result->values()); | |
1042 | ||
1043 | for (size_t i = 0; i < values.size(); ++i) { | |
1044 | if (is_valid[i / result->value_length()] == 0) { | |
1045 | continue; | |
1046 | } | |
1047 | ASSERT_EQ(values[i], varr->Value(i)); | |
1048 | } | |
1049 | } | |
1050 | ||
1051 | TEST_F(TestFixedSizeListArray, TestBasics) { | |
1052 | std::vector<int32_t> values = {0, 1, 2, 3, 4, 5}; | |
1053 | std::vector<uint8_t> is_valid = {1, 0, 1}; | |
1054 | ||
1055 | Int32Builder* vb = checked_cast<Int32Builder*>(builder_->value_builder()); | |
1056 | ||
1057 | int pos = 0; | |
1058 | for (size_t i = 0; i < values.size() / list_size(); ++i) { | |
1059 | if (is_valid[i] == 0) { | |
1060 | ASSERT_OK(builder_->AppendNull()); | |
1061 | pos += list_size(); | |
1062 | continue; | |
1063 | } | |
1064 | ASSERT_OK(builder_->Append()); | |
1065 | for (int j = 0; j < list_size(); ++j) { | |
1066 | ASSERT_OK(vb->Append(values[pos++])); | |
1067 | } | |
1068 | } | |
1069 | ||
1070 | Done(); | |
1071 | ValidateBasicFixedSizeListArray(result_.get(), values, is_valid); | |
1072 | } | |
1073 | ||
1074 | TEST_F(TestFixedSizeListArray, BulkAppend) { | |
1075 | std::vector<int32_t> values = {0, 1, 2, 3, 4, 5}; | |
1076 | std::vector<uint8_t> is_valid = {1, 0, 1}; | |
1077 | ||
1078 | Int32Builder* vb = checked_cast<Int32Builder*>(builder_->value_builder()); | |
1079 | ||
1080 | ASSERT_OK(builder_->AppendValues(values.size() / list_size(), is_valid.data())); | |
1081 | for (int32_t value : values) { | |
1082 | ASSERT_OK(vb->Append(value)); | |
1083 | } | |
1084 | Done(); | |
1085 | ValidateBasicFixedSizeListArray(result_.get(), values, is_valid); | |
1086 | } | |
1087 | ||
1088 | TEST_F(TestFixedSizeListArray, BulkAppendExcess) { | |
1089 | std::vector<int32_t> values = {0, 1, 2, 3, 4, 5}; | |
1090 | std::vector<uint8_t> is_valid = {1, 0, 1}; | |
1091 | ||
1092 | Int32Builder* vb = checked_cast<Int32Builder*>(builder_->value_builder()); | |
1093 | ||
1094 | ASSERT_OK(builder_->AppendValues(values.size() / list_size(), is_valid.data())); | |
1095 | for (int32_t value : values) { | |
1096 | ASSERT_OK(vb->Append(value)); | |
1097 | } | |
1098 | for (int32_t value : values) { | |
1099 | ASSERT_OK(vb->Append(value)); | |
1100 | } | |
1101 | ||
1102 | Done(); | |
1103 | // We appended too many values to the child array, but that's OK | |
1104 | ValidateBasicFixedSizeListArray(result_.get(), values, is_valid); | |
1105 | } | |
1106 | ||
1107 | TEST_F(TestFixedSizeListArray, TestZeroLength) { | |
1108 | // All buffers are null | |
1109 | Done(); | |
1110 | ASSERT_OK(result_->ValidateFull()); | |
1111 | } | |
1112 | ||
1113 | TEST_F(TestFixedSizeListArray, TestBuilderPreserveFieldName) { | |
1114 | auto list_type_with_name = fixed_size_list(field("counts", int32()), list_size()); | |
1115 | ||
1116 | std::unique_ptr<ArrayBuilder> tmp; | |
1117 | ASSERT_OK(MakeBuilder(pool_, list_type_with_name, &tmp)); | |
1118 | builder_.reset(checked_cast<FixedSizeListBuilder*>(tmp.release())); | |
1119 | ||
1120 | ASSERT_OK(builder_->AppendValues(4)); | |
1121 | ||
1122 | std::shared_ptr<Array> list_array; | |
1123 | ASSERT_OK(builder_->Finish(&list_array)); | |
1124 | ||
1125 | const auto& type = checked_cast<FixedSizeListType&>(*list_array->type()); | |
1126 | ASSERT_EQ("counts", type.value_field()->name()); | |
1127 | } | |
1128 | ||
1129 | TEST_F(TestFixedSizeListArray, NegativeLength) { | |
1130 | type_ = fixed_size_list(value_type_, -42); | |
1131 | auto values = ArrayFromJSON(value_type_, "[]"); | |
1132 | result_ = std::make_shared<FixedSizeListArray>(type_, 0, values); | |
1133 | ASSERT_RAISES(Invalid, result_->ValidateFull()); | |
1134 | } | |
1135 | ||
1136 | TEST_F(TestFixedSizeListArray, NotEnoughValues) { | |
1137 | type_ = fixed_size_list(value_type_, 2); | |
1138 | auto values = ArrayFromJSON(value_type_, "[]"); | |
1139 | result_ = std::make_shared<FixedSizeListArray>(type_, 1, values); | |
1140 | ASSERT_RAISES(Invalid, result_->ValidateFull()); | |
1141 | ||
1142 | // ARROW-13437: too many values is OK though | |
1143 | values = ArrayFromJSON(value_type_, "[1, 2, 3, 4]"); | |
1144 | result_ = std::make_shared<FixedSizeListArray>(type_, 1, values); | |
1145 | ASSERT_OK(result_->ValidateFull()); | |
1146 | } | |
1147 | ||
1148 | TEST_F(TestFixedSizeListArray, FlattenZeroLength) { | |
1149 | Done(); | |
1150 | ASSERT_OK_AND_ASSIGN(auto flattened, result_->Flatten()); | |
1151 | ASSERT_OK(flattened->ValidateFull()); | |
1152 | ASSERT_EQ(0, flattened->length()); | |
1153 | AssertTypeEqual(*flattened->type(), *value_type_); | |
1154 | } | |
1155 | ||
1156 | TEST_F(TestFixedSizeListArray, Flatten) { | |
1157 | std::vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6, 7}; | |
1158 | std::vector<uint8_t> is_valid = {1, 0, 1, 1}; | |
1159 | ASSERT_OK(builder_->AppendValues(4, is_valid.data())); | |
1160 | auto* vb = checked_cast<Int32Builder*>(builder_->value_builder()); | |
1161 | ASSERT_OK(vb->AppendValues(values.data(), static_cast<int64_t>(values.size()))); | |
1162 | Done(); | |
1163 | ||
1164 | { | |
1165 | ASSERT_OK_AND_ASSIGN(auto flattened, result_->Flatten()); | |
1166 | ASSERT_OK(flattened->ValidateFull()); | |
1167 | ASSERT_EQ(6, flattened->length()); | |
1168 | AssertArraysEqual(*flattened, *ArrayFromJSON(value_type_, "[0, 1, 4, 5, 6, 7]"), | |
1169 | /*verbose=*/true); | |
1170 | } | |
1171 | ||
1172 | { | |
1173 | auto sliced = std::dynamic_pointer_cast<FixedSizeListArray>(result_->Slice(1, 2)); | |
1174 | ASSERT_OK_AND_ASSIGN(auto flattened, sliced->Flatten()); | |
1175 | ASSERT_OK(flattened->ValidateFull()); | |
1176 | ASSERT_EQ(2, flattened->length()); | |
1177 | AssertArraysEqual(*flattened, *ArrayFromJSON(value_type_, "[4, 5]"), | |
1178 | /*verbose=*/true); | |
1179 | } | |
1180 | } | |
1181 | ||
1182 | } // namespace arrow |