]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include <cstdint> | |
19 | #include <memory> | |
20 | #include <vector> | |
21 | ||
22 | #include <gmock/gmock.h> | |
23 | #include <gtest/gtest.h> | |
24 | ||
25 | #include "arrow/array/array_base.h" | |
26 | #include "arrow/array/data.h" | |
27 | #include "arrow/array/util.h" | |
28 | #include "arrow/chunked_array.h" | |
29 | #include "arrow/record_batch.h" | |
30 | #include "arrow/status.h" | |
31 | #include "arrow/table.h" | |
32 | #include "arrow/testing/gtest_common.h" | |
33 | #include "arrow/testing/gtest_util.h" | |
34 | #include "arrow/type.h" | |
35 | #include "arrow/util/key_value_metadata.h" | |
36 | ||
37 | namespace arrow { | |
38 | ||
39 | class TestTable : public TestBase { | |
40 | public: | |
41 | void MakeExample1(int length) { | |
42 | auto f0 = field("f0", int32()); | |
43 | auto f1 = field("f1", uint8()); | |
44 | auto f2 = field("f2", int16()); | |
45 | ||
46 | std::vector<std::shared_ptr<Field>> fields = {f0, f1, f2}; | |
47 | schema_ = std::make_shared<Schema>(fields); | |
48 | ||
49 | arrays_ = {MakeRandomArray<Int32Array>(length), MakeRandomArray<UInt8Array>(length), | |
50 | MakeRandomArray<Int16Array>(length)}; | |
51 | ||
52 | columns_ = {std::make_shared<ChunkedArray>(arrays_[0]), | |
53 | std::make_shared<ChunkedArray>(arrays_[1]), | |
54 | std::make_shared<ChunkedArray>(arrays_[2])}; | |
55 | } | |
56 | ||
57 | protected: | |
58 | std::shared_ptr<Table> table_; | |
59 | std::shared_ptr<Schema> schema_; | |
60 | ||
61 | std::vector<std::shared_ptr<Array>> arrays_; | |
62 | std::vector<std::shared_ptr<ChunkedArray>> columns_; | |
63 | }; | |
64 | ||
65 | TEST_F(TestTable, EmptySchema) { | |
66 | auto empty_schema = ::arrow::schema({}); | |
67 | table_ = Table::Make(empty_schema, columns_); | |
68 | ASSERT_OK(table_->ValidateFull()); | |
69 | ASSERT_EQ(0, table_->num_rows()); | |
70 | ASSERT_EQ(0, table_->num_columns()); | |
71 | } | |
72 | ||
73 | TEST_F(TestTable, Ctors) { | |
74 | const int length = 100; | |
75 | MakeExample1(length); | |
76 | ||
77 | table_ = Table::Make(schema_, columns_); | |
78 | ASSERT_OK(table_->ValidateFull()); | |
79 | ASSERT_EQ(length, table_->num_rows()); | |
80 | ASSERT_EQ(3, table_->num_columns()); | |
81 | ||
82 | auto array_ctor = Table::Make(schema_, arrays_); | |
83 | ASSERT_TRUE(table_->Equals(*array_ctor)); | |
84 | ||
85 | table_ = Table::Make(schema_, columns_, length); | |
86 | ASSERT_OK(table_->ValidateFull()); | |
87 | ASSERT_EQ(length, table_->num_rows()); | |
88 | ||
89 | table_ = Table::Make(schema_, arrays_); | |
90 | ASSERT_OK(table_->ValidateFull()); | |
91 | ASSERT_EQ(length, table_->num_rows()); | |
92 | ASSERT_EQ(3, table_->num_columns()); | |
93 | } | |
94 | ||
95 | TEST_F(TestTable, Metadata) { | |
96 | const int length = 100; | |
97 | MakeExample1(length); | |
98 | ||
99 | table_ = Table::Make(schema_, columns_); | |
100 | ||
101 | ASSERT_TRUE(table_->schema()->Equals(*schema_)); | |
102 | ||
103 | auto col = table_->column(0); | |
104 | ASSERT_EQ(schema_->field(0)->type(), col->type()); | |
105 | } | |
106 | ||
107 | TEST_F(TestTable, InvalidColumns) { | |
108 | // Check that columns are all the same length | |
109 | const int length = 100; | |
110 | MakeExample1(length); | |
111 | ||
112 | table_ = Table::Make(schema_, columns_, length - 1); | |
113 | ASSERT_RAISES(Invalid, table_->ValidateFull()); | |
114 | ||
115 | columns_.clear(); | |
116 | ||
117 | // Wrong number of columns | |
118 | table_ = Table::Make(schema_, columns_, length); | |
119 | ASSERT_RAISES(Invalid, table_->ValidateFull()); | |
120 | ||
121 | columns_ = {std::make_shared<ChunkedArray>(MakeRandomArray<Int32Array>(length)), | |
122 | std::make_shared<ChunkedArray>(MakeRandomArray<UInt8Array>(length)), | |
123 | std::make_shared<ChunkedArray>(MakeRandomArray<Int16Array>(length - 1))}; | |
124 | ||
125 | table_ = Table::Make(schema_, columns_, length); | |
126 | ASSERT_RAISES(Invalid, table_->ValidateFull()); | |
127 | } | |
128 | ||
129 | TEST_F(TestTable, AllColumnsAndFields) { | |
130 | const int length = 100; | |
131 | MakeExample1(length); | |
132 | table_ = Table::Make(schema_, columns_); | |
133 | ||
134 | auto columns = table_->columns(); | |
135 | auto fields = table_->fields(); | |
136 | ||
137 | for (int i = 0; i < table_->num_columns(); ++i) { | |
138 | AssertChunkedEqual(*table_->column(i), *columns[i]); | |
139 | AssertFieldEqual(*table_->field(i), *fields[i]); | |
140 | } | |
141 | ||
142 | // Zero length | |
143 | std::vector<std::shared_ptr<Array>> t2_columns; | |
144 | auto t2 = Table::Make(::arrow::schema({}), t2_columns); | |
145 | columns = t2->columns(); | |
146 | fields = t2->fields(); | |
147 | ||
148 | ASSERT_EQ(0, columns.size()); | |
149 | ASSERT_EQ(0, fields.size()); | |
150 | } | |
151 | ||
152 | TEST_F(TestTable, Equals) { | |
153 | const int length = 100; | |
154 | MakeExample1(length); | |
155 | ||
156 | table_ = Table::Make(schema_, columns_); | |
157 | ||
158 | ASSERT_TRUE(table_->Equals(*table_)); | |
159 | // Differing schema | |
160 | auto f0 = field("f3", int32()); | |
161 | auto f1 = field("f4", uint8()); | |
162 | auto f2 = field("f5", int16()); | |
163 | std::vector<std::shared_ptr<Field>> fields = {f0, f1, f2}; | |
164 | auto other_schema = std::make_shared<Schema>(fields); | |
165 | auto other = Table::Make(other_schema, columns_); | |
166 | ASSERT_FALSE(table_->Equals(*other)); | |
167 | // Differing columns | |
168 | std::vector<std::shared_ptr<ChunkedArray>> other_columns = { | |
169 | std::make_shared<ChunkedArray>(MakeRandomArray<Int32Array>(length, 10)), | |
170 | std::make_shared<ChunkedArray>(MakeRandomArray<UInt8Array>(length, 10)), | |
171 | std::make_shared<ChunkedArray>(MakeRandomArray<Int16Array>(length, 10))}; | |
172 | ||
173 | other = Table::Make(schema_, other_columns); | |
174 | ASSERT_FALSE(table_->Equals(*other)); | |
175 | ||
176 | // Differring schema metadata | |
177 | other_schema = schema_->WithMetadata(::arrow::key_value_metadata({"key"}, {"value"})); | |
178 | other = Table::Make(other_schema, columns_); | |
179 | ASSERT_TRUE(table_->Equals(*other)); | |
180 | ASSERT_FALSE(table_->Equals(*other, /*check_metadata=*/true)); | |
181 | } | |
182 | ||
183 | TEST_F(TestTable, FromRecordBatches) { | |
184 | const int64_t length = 10; | |
185 | MakeExample1(length); | |
186 | ||
187 | auto batch1 = RecordBatch::Make(schema_, length, arrays_); | |
188 | ||
189 | ASSERT_OK_AND_ASSIGN(auto result, Table::FromRecordBatches({batch1})); | |
190 | ||
191 | auto expected = Table::Make(schema_, columns_); | |
192 | ASSERT_TRUE(result->Equals(*expected)); | |
193 | ||
194 | std::vector<std::shared_ptr<ChunkedArray>> other_columns; | |
195 | for (int i = 0; i < schema_->num_fields(); ++i) { | |
196 | std::vector<std::shared_ptr<Array>> col_arrays = {arrays_[i], arrays_[i]}; | |
197 | other_columns.push_back(std::make_shared<ChunkedArray>(col_arrays)); | |
198 | } | |
199 | ||
200 | ASSERT_OK_AND_ASSIGN(result, Table::FromRecordBatches({batch1, batch1})); | |
201 | expected = Table::Make(schema_, other_columns); | |
202 | ASSERT_TRUE(result->Equals(*expected)); | |
203 | ||
204 | // Error states | |
205 | std::vector<std::shared_ptr<RecordBatch>> empty_batches; | |
206 | ASSERT_RAISES(Invalid, Table::FromRecordBatches(empty_batches)); | |
207 | ||
208 | auto other_schema = ::arrow::schema({schema_->field(0), schema_->field(1)}); | |
209 | ||
210 | std::vector<std::shared_ptr<Array>> other_arrays = {arrays_[0], arrays_[1]}; | |
211 | auto batch2 = RecordBatch::Make(other_schema, length, other_arrays); | |
212 | ASSERT_RAISES(Invalid, Table::FromRecordBatches({batch1, batch2})); | |
213 | } | |
214 | ||
215 | TEST_F(TestTable, FromRecordBatchesZeroLength) { | |
216 | // ARROW-2307 | |
217 | MakeExample1(10); | |
218 | ||
219 | ASSERT_OK_AND_ASSIGN(auto result, Table::FromRecordBatches(schema_, {})); | |
220 | ||
221 | ASSERT_EQ(0, result->num_rows()); | |
222 | ASSERT_TRUE(result->schema()->Equals(*schema_)); | |
223 | } | |
224 | ||
225 | TEST_F(TestTable, CombineChunksZeroColumn) { | |
226 | // ARROW-11232 | |
227 | auto record_batch = RecordBatch::Make(schema({}), /*num_rows=*/10, | |
228 | std::vector<std::shared_ptr<Array>>{}); | |
229 | ||
230 | ASSERT_OK_AND_ASSIGN( | |
231 | auto table, | |
232 | Table::FromRecordBatches(record_batch->schema(), {record_batch, record_batch})); | |
233 | ASSERT_EQ(20, table->num_rows()); | |
234 | ||
235 | ASSERT_OK_AND_ASSIGN(auto combined, table->CombineChunks()); | |
236 | ||
237 | EXPECT_EQ(20, combined->num_rows()); | |
238 | EXPECT_TRUE(combined->Equals(*table)); | |
239 | } | |
240 | ||
241 | TEST_F(TestTable, CombineChunksZeroRow) { | |
242 | MakeExample1(10); | |
243 | ||
244 | ASSERT_OK_AND_ASSIGN(auto table, Table::FromRecordBatches(schema_, {})); | |
245 | ASSERT_EQ(0, table->num_rows()); | |
246 | ||
247 | ASSERT_OK_AND_ASSIGN(auto compacted, table->CombineChunks()); | |
248 | ||
249 | EXPECT_TRUE(compacted->Equals(*table)); | |
250 | } | |
251 | ||
252 | TEST_F(TestTable, CombineChunks) { | |
253 | MakeExample1(10); | |
254 | auto batch1 = RecordBatch::Make(schema_, 10, arrays_); | |
255 | ||
256 | MakeExample1(15); | |
257 | auto batch2 = RecordBatch::Make(schema_, 15, arrays_); | |
258 | ||
259 | ASSERT_OK_AND_ASSIGN(auto table, Table::FromRecordBatches({batch1, batch2})); | |
260 | for (int i = 0; i < table->num_columns(); ++i) { | |
261 | ASSERT_EQ(2, table->column(i)->num_chunks()); | |
262 | } | |
263 | ||
264 | ASSERT_OK_AND_ASSIGN(auto compacted, table->CombineChunks()); | |
265 | ||
266 | EXPECT_TRUE(compacted->Equals(*table)); | |
267 | for (int i = 0; i < compacted->num_columns(); ++i) { | |
268 | EXPECT_EQ(1, compacted->column(i)->num_chunks()); | |
269 | } | |
270 | } | |
271 | ||
272 | TEST_F(TestTable, LARGE_MEMORY_TEST(CombineChunksStringColumn)) { | |
273 | schema_ = schema({field("str", utf8())}); | |
274 | arrays_ = {nullptr}; | |
275 | ||
276 | std::string value(1 << 16, '-'); | |
277 | ||
278 | auto num_rows = kBinaryMemoryLimit / static_cast<int64_t>(value.size()); | |
279 | StringBuilder builder; | |
280 | ASSERT_OK(builder.Resize(num_rows)); | |
281 | ASSERT_OK(builder.ReserveData(value.size() * num_rows)); | |
282 | for (int i = 0; i < num_rows; ++i) builder.UnsafeAppend(value); | |
283 | ASSERT_OK(builder.Finish(&arrays_[0])); | |
284 | ||
285 | auto batch = RecordBatch::Make(schema_, num_rows, arrays_); | |
286 | ||
287 | ASSERT_OK_AND_ASSIGN(auto table, Table::FromRecordBatches({batch, batch})); | |
288 | ASSERT_EQ(table->column(0)->num_chunks(), 2); | |
289 | ||
290 | ASSERT_OK_AND_ASSIGN(auto compacted, table->CombineChunks()); | |
291 | EXPECT_TRUE(compacted->Equals(*table)); | |
292 | ||
293 | // can't compact these columns any further; they contain too much character data | |
294 | ASSERT_EQ(compacted->column(0)->num_chunks(), 2); | |
295 | } | |
296 | ||
297 | TEST_F(TestTable, ConcatenateTables) { | |
298 | const int64_t length = 10; | |
299 | ||
300 | MakeExample1(length); | |
301 | auto batch1 = RecordBatch::Make(schema_, length, arrays_); | |
302 | ||
303 | // generate different data | |
304 | MakeExample1(length); | |
305 | auto batch2 = RecordBatch::Make(schema_, length, arrays_); | |
306 | ||
307 | ASSERT_OK_AND_ASSIGN(auto t1, Table::FromRecordBatches({batch1})); | |
308 | ASSERT_OK_AND_ASSIGN(auto t2, Table::FromRecordBatches({batch2})); | |
309 | ||
310 | ASSERT_OK_AND_ASSIGN(auto result, ConcatenateTables({t1, t2})); | |
311 | ASSERT_OK_AND_ASSIGN(auto expected, Table::FromRecordBatches({batch1, batch2})); | |
312 | AssertTablesEqual(*expected, *result); | |
313 | ||
314 | // Error states | |
315 | std::vector<std::shared_ptr<Table>> empty_tables; | |
316 | ASSERT_RAISES(Invalid, ConcatenateTables(empty_tables)); | |
317 | ||
318 | auto other_schema = ::arrow::schema({schema_->field(0), schema_->field(1)}); | |
319 | ||
320 | std::vector<std::shared_ptr<Array>> other_arrays = {arrays_[0], arrays_[1]}; | |
321 | auto batch3 = RecordBatch::Make(other_schema, length, other_arrays); | |
322 | ASSERT_OK_AND_ASSIGN(auto t3, Table::FromRecordBatches({batch3})); | |
323 | ||
324 | ASSERT_RAISES(Invalid, ConcatenateTables({t1, t3})); | |
325 | } | |
326 | ||
327 | std::shared_ptr<Table> MakeTableWithOneNullFilledColumn( | |
328 | const std::string& column_name, const std::shared_ptr<DataType>& data_type, | |
329 | const int length) { | |
330 | auto array_of_nulls = *MakeArrayOfNull(data_type, length); | |
331 | return Table::Make(schema({field(column_name, data_type)}), {array_of_nulls}); | |
332 | } | |
333 | ||
334 | using TestPromoteTableToSchema = TestTable; | |
335 | ||
336 | TEST_F(TestPromoteTableToSchema, IdenticalSchema) { | |
337 | const int length = 10; | |
338 | auto metadata = | |
339 | std::shared_ptr<KeyValueMetadata>(new KeyValueMetadata({"foo"}, {"bar"})); | |
340 | MakeExample1(length); | |
341 | std::shared_ptr<Table> table = Table::Make(schema_, arrays_); | |
342 | ||
343 | ASSERT_OK_AND_ASSIGN(auto result, | |
344 | PromoteTableToSchema(table, schema_->WithMetadata(metadata))); | |
345 | ||
346 | std::shared_ptr<Table> expected = table->ReplaceSchemaMetadata(metadata); | |
347 | ||
348 | ASSERT_TRUE(result->Equals(*expected)); | |
349 | } | |
350 | ||
351 | // The promoted table's fields are ordered the same as the promote-to schema. | |
352 | TEST_F(TestPromoteTableToSchema, FieldsReorderedAfterPromotion) { | |
353 | const int length = 10; | |
354 | MakeExample1(length); | |
355 | ||
356 | std::vector<std::shared_ptr<Field>> reversed_fields(schema_->fields().crbegin(), | |
357 | schema_->fields().crend()); | |
358 | std::vector<std::shared_ptr<Array>> reversed_arrays(arrays_.crbegin(), arrays_.crend()); | |
359 | ||
360 | std::shared_ptr<Table> table = Table::Make(schema(reversed_fields), reversed_arrays); | |
361 | ||
362 | ASSERT_OK_AND_ASSIGN(auto result, PromoteTableToSchema(table, schema_)); | |
363 | ||
364 | ASSERT_TRUE(result->schema()->Equals(*schema_)); | |
365 | } | |
366 | ||
367 | TEST_F(TestPromoteTableToSchema, PromoteNullTypeField) { | |
368 | const int length = 10; | |
369 | auto metadata = | |
370 | std::shared_ptr<KeyValueMetadata>(new KeyValueMetadata({"foo"}, {"bar"})); | |
371 | auto table_with_null_column = MakeTableWithOneNullFilledColumn("field", null(), length) | |
372 | ->ReplaceSchemaMetadata(metadata); | |
373 | auto promoted_schema = schema({field("field", int32())}); | |
374 | ||
375 | ASSERT_OK_AND_ASSIGN(auto result, | |
376 | PromoteTableToSchema(table_with_null_column, promoted_schema)); | |
377 | ||
378 | ASSERT_TRUE( | |
379 | result->Equals(*MakeTableWithOneNullFilledColumn("field", int32(), length))); | |
380 | } | |
381 | ||
382 | TEST_F(TestPromoteTableToSchema, AddMissingField) { | |
383 | const int length = 10; | |
384 | auto f0 = field("f0", int32()); | |
385 | auto table = Table::Make(schema({}), std::vector<std::shared_ptr<Array>>(), length); | |
386 | auto promoted_schema = schema({field("field", int32())}); | |
387 | ||
388 | ASSERT_OK_AND_ASSIGN(auto result, PromoteTableToSchema(table, promoted_schema)); | |
389 | ||
390 | ASSERT_TRUE( | |
391 | result->Equals(*MakeTableWithOneNullFilledColumn("field", int32(), length))); | |
392 | } | |
393 | ||
394 | TEST_F(TestPromoteTableToSchema, IncompatibleTypes) { | |
395 | const int length = 10; | |
396 | auto table = MakeTableWithOneNullFilledColumn("field", int32(), length); | |
397 | ||
398 | // Invalid promotion: int32 to null. | |
399 | ASSERT_RAISES(Invalid, PromoteTableToSchema(table, schema({field("field", null())}))); | |
400 | ||
401 | // Invalid promotion: int32 to uint32. | |
402 | ASSERT_RAISES(Invalid, PromoteTableToSchema(table, schema({field("field", uint32())}))); | |
403 | } | |
404 | ||
405 | TEST_F(TestPromoteTableToSchema, IncompatibleNullity) { | |
406 | const int length = 10; | |
407 | auto table = MakeTableWithOneNullFilledColumn("field", int32(), length); | |
408 | ASSERT_RAISES(Invalid, | |
409 | PromoteTableToSchema( | |
410 | table, schema({field("field", uint32())->WithNullable(false)}))); | |
411 | } | |
412 | ||
413 | TEST_F(TestPromoteTableToSchema, DuplicateFieldNames) { | |
414 | const int length = 10; | |
415 | ||
416 | auto table = Table::Make( | |
417 | schema({field("field", int32()), field("field", null())}), | |
418 | {MakeRandomArray<Int32Array>(length), MakeRandomArray<NullArray>(length)}); | |
419 | ||
420 | ASSERT_RAISES(Invalid, PromoteTableToSchema(table, schema({field("field", int32())}))); | |
421 | } | |
422 | ||
423 | TEST_F(TestPromoteTableToSchema, TableFieldAbsentFromSchema) { | |
424 | const int length = 10; | |
425 | ||
426 | auto table = | |
427 | Table::Make(schema({field("f0", int32())}), {MakeRandomArray<Int32Array>(length)}); | |
428 | ||
429 | std::shared_ptr<Table> result; | |
430 | ASSERT_RAISES(Invalid, PromoteTableToSchema(table, schema({field("f1", int32())}))); | |
431 | } | |
432 | ||
433 | class ConcatenateTablesWithPromotionTest : public TestTable { | |
434 | protected: | |
435 | ConcatenateTablesOptions GetOptions() { | |
436 | ConcatenateTablesOptions options; | |
437 | options.unify_schemas = true; | |
438 | return options; | |
439 | } | |
440 | ||
441 | void MakeExample2(int length) { | |
442 | auto f0 = field("f0", int32()); | |
443 | auto f1 = field("f1", null()); | |
444 | ||
445 | std::vector<std::shared_ptr<Field>> fields = {f0, f1}; | |
446 | schema_ = std::make_shared<Schema>(fields); | |
447 | ||
448 | arrays_ = {MakeRandomArray<Int32Array>(length), MakeRandomArray<NullArray>(length)}; | |
449 | ||
450 | columns_ = {std::make_shared<ChunkedArray>(arrays_[0]), | |
451 | std::make_shared<ChunkedArray>(arrays_[1])}; | |
452 | } | |
453 | ||
454 | void AssertTablesEqualUnorderedFields(const Table& lhs, const Table& rhs) { | |
455 | ASSERT_EQ(lhs.schema()->num_fields(), rhs.schema()->num_fields()); | |
456 | if (lhs.schema()->metadata()) { | |
457 | ASSERT_NE(nullptr, rhs.schema()->metadata()); | |
458 | ASSERT_TRUE(lhs.schema()->metadata()->Equals(*rhs.schema()->metadata())); | |
459 | } else { | |
460 | ASSERT_EQ(nullptr, rhs.schema()->metadata()); | |
461 | } | |
462 | for (int i = 0; i < lhs.schema()->num_fields(); ++i) { | |
463 | const auto& lhs_field = lhs.schema()->field(i); | |
464 | const auto& rhs_field = rhs.schema()->GetFieldByName(lhs_field->name()); | |
465 | ASSERT_NE(nullptr, rhs_field); | |
466 | ASSERT_TRUE(lhs_field->Equals(rhs_field, true)); | |
467 | const auto& lhs_column = lhs.column(i); | |
468 | const auto& rhs_column = rhs.GetColumnByName(lhs_field->name()); | |
469 | AssertChunkedEqual(*lhs_column, *rhs_column); | |
470 | } | |
471 | } | |
472 | }; | |
473 | ||
474 | TEST_F(ConcatenateTablesWithPromotionTest, Simple) { | |
475 | const int64_t length = 10; | |
476 | ||
477 | MakeExample1(length); | |
478 | auto batch1 = RecordBatch::Make(schema_, length, arrays_); | |
479 | ||
480 | ASSERT_OK_AND_ASSIGN(auto f1_nulls, MakeArrayOfNull(schema_->field(1)->type(), length)); | |
481 | ASSERT_OK_AND_ASSIGN(auto f2_nulls, MakeArrayOfNull(schema_->field(2)->type(), length)); | |
482 | ||
483 | MakeExample2(length); | |
484 | auto batch2 = RecordBatch::Make(schema_, length, arrays_); | |
485 | ||
486 | auto batch2_null_filled = | |
487 | RecordBatch::Make(batch1->schema(), length, {arrays_[0], f1_nulls, f2_nulls}); | |
488 | ||
489 | ASSERT_OK_AND_ASSIGN(auto t1, Table::FromRecordBatches({batch1})); | |
490 | ASSERT_OK_AND_ASSIGN(auto t2, Table::FromRecordBatches({batch2})); | |
491 | ASSERT_OK_AND_ASSIGN(auto t3, Table::FromRecordBatches({batch2_null_filled})); | |
492 | ||
493 | ASSERT_OK_AND_ASSIGN(auto result, ConcatenateTables({t1, t2}, GetOptions())); | |
494 | ASSERT_OK_AND_ASSIGN(auto expected, ConcatenateTables({t1, t3})); | |
495 | AssertTablesEqualUnorderedFields(*expected, *result); | |
496 | ||
497 | ASSERT_OK_AND_ASSIGN(result, ConcatenateTables({t2, t1}, GetOptions())); | |
498 | ASSERT_OK_AND_ASSIGN(expected, ConcatenateTables({t3, t1})); | |
499 | AssertTablesEqualUnorderedFields(*expected, *result); | |
500 | } | |
501 | ||
502 | TEST_F(TestTable, Slice) { | |
503 | const int64_t length = 10; | |
504 | ||
505 | MakeExample1(length); | |
506 | auto batch = RecordBatch::Make(schema_, length, arrays_); | |
507 | ||
508 | ASSERT_OK_AND_ASSIGN(auto half, Table::FromRecordBatches({batch})); | |
509 | ASSERT_OK_AND_ASSIGN(auto whole, Table::FromRecordBatches({batch, batch})); | |
510 | ASSERT_OK_AND_ASSIGN(auto three, Table::FromRecordBatches({batch, batch, batch})); | |
511 | ||
512 | AssertTablesEqual(*whole->Slice(0, length), *half); | |
513 | AssertTablesEqual(*whole->Slice(length), *half); | |
514 | AssertTablesEqual(*whole->Slice(length / 3, 2 * (length - length / 3)), | |
515 | *three->Slice(length + length / 3, 2 * (length - length / 3))); | |
516 | } | |
517 | ||
518 | TEST_F(TestTable, RemoveColumn) { | |
519 | const int64_t length = 10; | |
520 | MakeExample1(length); | |
521 | ||
522 | auto table_sp = Table::Make(schema_, columns_); | |
523 | const Table& table = *table_sp; | |
524 | ||
525 | ASSERT_OK_AND_ASSIGN(auto result, table.RemoveColumn(0)); | |
526 | ||
527 | auto ex_schema = ::arrow::schema({schema_->field(1), schema_->field(2)}); | |
528 | std::vector<std::shared_ptr<ChunkedArray>> ex_columns = {table.column(1), | |
529 | table.column(2)}; | |
530 | ||
531 | auto expected = Table::Make(ex_schema, ex_columns); | |
532 | ASSERT_TRUE(result->Equals(*expected)); | |
533 | ||
534 | ASSERT_OK_AND_ASSIGN(result, table.RemoveColumn(1)); | |
535 | ex_schema = ::arrow::schema({schema_->field(0), schema_->field(2)}); | |
536 | ex_columns = {table.column(0), table.column(2)}; | |
537 | ||
538 | expected = Table::Make(ex_schema, ex_columns); | |
539 | ASSERT_TRUE(result->Equals(*expected)); | |
540 | ||
541 | ASSERT_OK_AND_ASSIGN(result, table.RemoveColumn(2)); | |
542 | ex_schema = ::arrow::schema({schema_->field(0), schema_->field(1)}); | |
543 | ex_columns = {table.column(0), table.column(1)}; | |
544 | expected = Table::Make(ex_schema, ex_columns); | |
545 | ASSERT_TRUE(result->Equals(*expected)); | |
546 | } | |
547 | ||
548 | TEST_F(TestTable, SetColumn) { | |
549 | const int64_t length = 10; | |
550 | MakeExample1(length); | |
551 | ||
552 | auto table_sp = Table::Make(schema_, columns_); | |
553 | const Table& table = *table_sp; | |
554 | ||
555 | ASSERT_OK_AND_ASSIGN(auto result, | |
556 | table.SetColumn(0, schema_->field(1), table.column(1))); | |
557 | ||
558 | auto ex_schema = | |
559 | ::arrow::schema({schema_->field(1), schema_->field(1), schema_->field(2)}); | |
560 | ||
561 | auto expected = | |
562 | Table::Make(ex_schema, {table.column(1), table.column(1), table.column(2)}); | |
563 | ASSERT_TRUE(result->Equals(*expected)); | |
564 | } | |
565 | ||
566 | TEST_F(TestTable, RenameColumns) { | |
567 | MakeExample1(10); | |
568 | auto table = Table::Make(schema_, columns_); | |
569 | EXPECT_THAT(table->ColumnNames(), testing::ElementsAre("f0", "f1", "f2")); | |
570 | ||
571 | ASSERT_OK_AND_ASSIGN(auto renamed, table->RenameColumns({"zero", "one", "two"})); | |
572 | EXPECT_THAT(renamed->ColumnNames(), testing::ElementsAre("zero", "one", "two")); | |
573 | ASSERT_OK(renamed->ValidateFull()); | |
574 | ||
575 | ASSERT_RAISES(Invalid, table->RenameColumns({"hello", "world"})); | |
576 | } | |
577 | ||
578 | TEST_F(TestTable, SelectColumns) { | |
579 | MakeExample1(10); | |
580 | auto table = Table::Make(schema_, columns_); | |
581 | ||
582 | ASSERT_OK_AND_ASSIGN(auto subset, table->SelectColumns({0, 2})); | |
583 | ASSERT_OK(subset->ValidateFull()); | |
584 | ||
585 | auto expexted_schema = ::arrow::schema({schema_->field(0), schema_->field(2)}); | |
586 | auto expected = Table::Make(expexted_schema, {table->column(0), table->column(2)}); | |
587 | ASSERT_TRUE(subset->Equals(*expected)); | |
588 | ||
589 | // Out of bounds indices | |
590 | ASSERT_RAISES(Invalid, table->SelectColumns({0, 3})); | |
591 | ASSERT_RAISES(Invalid, table->SelectColumns({-1})); | |
592 | } | |
593 | ||
594 | TEST_F(TestTable, RemoveColumnEmpty) { | |
595 | // ARROW-1865 | |
596 | const int64_t length = 10; | |
597 | ||
598 | auto f0 = field("f0", int32()); | |
599 | auto schema = ::arrow::schema({f0}); | |
600 | auto a0 = MakeRandomArray<Int32Array>(length); | |
601 | ||
602 | auto table = Table::Make(schema, {std::make_shared<ChunkedArray>(a0)}); | |
603 | ||
604 | ASSERT_OK_AND_ASSIGN(auto empty, table->RemoveColumn(0)); | |
605 | ||
606 | ASSERT_EQ(table->num_rows(), empty->num_rows()); | |
607 | ||
608 | ASSERT_OK_AND_ASSIGN(auto added, empty->AddColumn(0, f0, table->column(0))); | |
609 | ASSERT_EQ(table->num_rows(), added->num_rows()); | |
610 | } | |
611 | ||
612 | TEST_F(TestTable, AddColumn) { | |
613 | const int64_t length = 10; | |
614 | MakeExample1(length); | |
615 | ||
616 | auto table_sp = Table::Make(schema_, columns_); | |
617 | const Table& table = *table_sp; | |
618 | ||
619 | auto f0 = schema_->field(0); | |
620 | ||
621 | // Some negative tests with invalid index | |
622 | ASSERT_RAISES(Invalid, table.AddColumn(10, f0, columns_[0])); | |
623 | ASSERT_RAISES(Invalid, table.AddColumn(4, f0, columns_[0])); | |
624 | ASSERT_RAISES(Invalid, table.AddColumn(-1, f0, columns_[0])); | |
625 | ||
626 | // Add column with wrong length | |
627 | auto longer_col = | |
628 | std::make_shared<ChunkedArray>(MakeRandomArray<Int32Array>(length + 1)); | |
629 | ASSERT_RAISES(Invalid, table.AddColumn(0, f0, longer_col)); | |
630 | ||
631 | // Add column 0 in different places | |
632 | ASSERT_OK_AND_ASSIGN(auto result, table.AddColumn(0, f0, columns_[0])); | |
633 | auto ex_schema = ::arrow::schema( | |
634 | {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)}); | |
635 | ||
636 | auto expected = Table::Make( | |
637 | ex_schema, {table.column(0), table.column(0), table.column(1), table.column(2)}); | |
638 | ASSERT_TRUE(result->Equals(*expected)); | |
639 | ||
640 | ASSERT_OK_AND_ASSIGN(result, table.AddColumn(1, f0, columns_[0])); | |
641 | ex_schema = ::arrow::schema( | |
642 | {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)}); | |
643 | ||
644 | expected = Table::Make( | |
645 | ex_schema, {table.column(0), table.column(0), table.column(1), table.column(2)}); | |
646 | ASSERT_TRUE(result->Equals(*expected)); | |
647 | ||
648 | ASSERT_OK_AND_ASSIGN(result, table.AddColumn(2, f0, columns_[0])); | |
649 | ex_schema = ::arrow::schema( | |
650 | {schema_->field(0), schema_->field(1), schema_->field(0), schema_->field(2)}); | |
651 | expected = Table::Make( | |
652 | ex_schema, {table.column(0), table.column(1), table.column(0), table.column(2)}); | |
653 | ASSERT_TRUE(result->Equals(*expected)); | |
654 | ||
655 | ASSERT_OK_AND_ASSIGN(result, table.AddColumn(3, f0, columns_[0])); | |
656 | ex_schema = ::arrow::schema( | |
657 | {schema_->field(0), schema_->field(1), schema_->field(2), schema_->field(0)}); | |
658 | expected = Table::Make( | |
659 | ex_schema, {table.column(0), table.column(1), table.column(2), table.column(0)}); | |
660 | ASSERT_TRUE(result->Equals(*expected)); | |
661 | } | |
662 | ||
663 | class TestTableBatchReader : public TestBase {}; | |
664 | ||
665 | TEST_F(TestTableBatchReader, ReadNext) { | |
666 | ArrayVector c1, c2; | |
667 | ||
668 | auto a1 = MakeRandomArray<Int32Array>(10); | |
669 | auto a2 = MakeRandomArray<Int32Array>(20); | |
670 | auto a3 = MakeRandomArray<Int32Array>(30); | |
671 | auto a4 = MakeRandomArray<Int32Array>(10); | |
672 | ||
673 | auto sch1 = arrow::schema({field("f1", int32()), field("f2", int32())}); | |
674 | ||
675 | std::vector<std::shared_ptr<ChunkedArray>> columns; | |
676 | ||
677 | std::shared_ptr<RecordBatch> batch; | |
678 | ||
679 | std::vector<std::shared_ptr<Array>> arrays_1 = {a1, a4, a2}; | |
680 | std::vector<std::shared_ptr<Array>> arrays_2 = {a2, a2}; | |
681 | columns = {std::make_shared<ChunkedArray>(arrays_1), | |
682 | std::make_shared<ChunkedArray>(arrays_2)}; | |
683 | auto t1 = Table::Make(sch1, columns); | |
684 | ||
685 | TableBatchReader i1(*t1); | |
686 | ||
687 | ASSERT_OK(i1.ReadNext(&batch)); | |
688 | ASSERT_EQ(10, batch->num_rows()); | |
689 | ||
690 | ASSERT_OK(i1.ReadNext(&batch)); | |
691 | ASSERT_EQ(10, batch->num_rows()); | |
692 | ||
693 | ASSERT_OK(i1.ReadNext(&batch)); | |
694 | ASSERT_EQ(20, batch->num_rows()); | |
695 | ||
696 | ASSERT_OK(i1.ReadNext(&batch)); | |
697 | ASSERT_EQ(nullptr, batch); | |
698 | ||
699 | arrays_1 = {a1}; | |
700 | arrays_2 = {a4}; | |
701 | columns = {std::make_shared<ChunkedArray>(arrays_1), | |
702 | std::make_shared<ChunkedArray>(arrays_2)}; | |
703 | auto t2 = Table::Make(sch1, columns); | |
704 | ||
705 | TableBatchReader i2(*t2); | |
706 | ||
707 | ASSERT_OK(i2.ReadNext(&batch)); | |
708 | ASSERT_EQ(10, batch->num_rows()); | |
709 | ||
710 | // Ensure non-sliced | |
711 | ASSERT_EQ(a1->data().get(), batch->column_data(0).get()); | |
712 | ASSERT_EQ(a4->data().get(), batch->column_data(1).get()); | |
713 | ||
714 | ASSERT_OK(i1.ReadNext(&batch)); | |
715 | ASSERT_EQ(nullptr, batch); | |
716 | } | |
717 | ||
718 | TEST_F(TestTableBatchReader, Chunksize) { | |
719 | auto a1 = MakeRandomArray<Int32Array>(10); | |
720 | auto a2 = MakeRandomArray<Int32Array>(20); | |
721 | auto a3 = MakeRandomArray<Int32Array>(10); | |
722 | ||
723 | auto sch1 = arrow::schema({field("f1", int32())}); | |
724 | ||
725 | std::vector<std::shared_ptr<Array>> arrays = {a1, a2, a3}; | |
726 | auto t1 = Table::Make(sch1, {std::make_shared<ChunkedArray>(arrays)}); | |
727 | ||
728 | TableBatchReader i1(*t1); | |
729 | ||
730 | i1.set_chunksize(15); | |
731 | ||
732 | std::shared_ptr<RecordBatch> batch; | |
733 | ASSERT_OK(i1.ReadNext(&batch)); | |
734 | ASSERT_OK(batch->ValidateFull()); | |
735 | ASSERT_EQ(10, batch->num_rows()); | |
736 | ||
737 | ASSERT_OK(i1.ReadNext(&batch)); | |
738 | ASSERT_OK(batch->ValidateFull()); | |
739 | ASSERT_EQ(15, batch->num_rows()); | |
740 | ||
741 | ASSERT_OK(i1.ReadNext(&batch)); | |
742 | ASSERT_OK(batch->ValidateFull()); | |
743 | ASSERT_EQ(5, batch->num_rows()); | |
744 | ||
745 | ASSERT_OK(i1.ReadNext(&batch)); | |
746 | ASSERT_OK(batch->ValidateFull()); | |
747 | ASSERT_EQ(10, batch->num_rows()); | |
748 | ||
749 | ASSERT_OK(i1.ReadNext(&batch)); | |
750 | ASSERT_EQ(nullptr, batch); | |
751 | } | |
752 | ||
753 | } // namespace arrow |