]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/table_test.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / table_test.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <cstdint>
19#include <memory>
20#include <vector>
21
22#include <gmock/gmock.h>
23#include <gtest/gtest.h>
24
25#include "arrow/array/array_base.h"
26#include "arrow/array/data.h"
27#include "arrow/array/util.h"
28#include "arrow/chunked_array.h"
29#include "arrow/record_batch.h"
30#include "arrow/status.h"
31#include "arrow/table.h"
32#include "arrow/testing/gtest_common.h"
33#include "arrow/testing/gtest_util.h"
34#include "arrow/type.h"
35#include "arrow/util/key_value_metadata.h"
36
37namespace arrow {
38
39class TestTable : public TestBase {
40 public:
41 void MakeExample1(int length) {
42 auto f0 = field("f0", int32());
43 auto f1 = field("f1", uint8());
44 auto f2 = field("f2", int16());
45
46 std::vector<std::shared_ptr<Field>> fields = {f0, f1, f2};
47 schema_ = std::make_shared<Schema>(fields);
48
49 arrays_ = {MakeRandomArray<Int32Array>(length), MakeRandomArray<UInt8Array>(length),
50 MakeRandomArray<Int16Array>(length)};
51
52 columns_ = {std::make_shared<ChunkedArray>(arrays_[0]),
53 std::make_shared<ChunkedArray>(arrays_[1]),
54 std::make_shared<ChunkedArray>(arrays_[2])};
55 }
56
57 protected:
58 std::shared_ptr<Table> table_;
59 std::shared_ptr<Schema> schema_;
60
61 std::vector<std::shared_ptr<Array>> arrays_;
62 std::vector<std::shared_ptr<ChunkedArray>> columns_;
63};
64
65TEST_F(TestTable, EmptySchema) {
66 auto empty_schema = ::arrow::schema({});
67 table_ = Table::Make(empty_schema, columns_);
68 ASSERT_OK(table_->ValidateFull());
69 ASSERT_EQ(0, table_->num_rows());
70 ASSERT_EQ(0, table_->num_columns());
71}
72
73TEST_F(TestTable, Ctors) {
74 const int length = 100;
75 MakeExample1(length);
76
77 table_ = Table::Make(schema_, columns_);
78 ASSERT_OK(table_->ValidateFull());
79 ASSERT_EQ(length, table_->num_rows());
80 ASSERT_EQ(3, table_->num_columns());
81
82 auto array_ctor = Table::Make(schema_, arrays_);
83 ASSERT_TRUE(table_->Equals(*array_ctor));
84
85 table_ = Table::Make(schema_, columns_, length);
86 ASSERT_OK(table_->ValidateFull());
87 ASSERT_EQ(length, table_->num_rows());
88
89 table_ = Table::Make(schema_, arrays_);
90 ASSERT_OK(table_->ValidateFull());
91 ASSERT_EQ(length, table_->num_rows());
92 ASSERT_EQ(3, table_->num_columns());
93}
94
95TEST_F(TestTable, Metadata) {
96 const int length = 100;
97 MakeExample1(length);
98
99 table_ = Table::Make(schema_, columns_);
100
101 ASSERT_TRUE(table_->schema()->Equals(*schema_));
102
103 auto col = table_->column(0);
104 ASSERT_EQ(schema_->field(0)->type(), col->type());
105}
106
107TEST_F(TestTable, InvalidColumns) {
108 // Check that columns are all the same length
109 const int length = 100;
110 MakeExample1(length);
111
112 table_ = Table::Make(schema_, columns_, length - 1);
113 ASSERT_RAISES(Invalid, table_->ValidateFull());
114
115 columns_.clear();
116
117 // Wrong number of columns
118 table_ = Table::Make(schema_, columns_, length);
119 ASSERT_RAISES(Invalid, table_->ValidateFull());
120
121 columns_ = {std::make_shared<ChunkedArray>(MakeRandomArray<Int32Array>(length)),
122 std::make_shared<ChunkedArray>(MakeRandomArray<UInt8Array>(length)),
123 std::make_shared<ChunkedArray>(MakeRandomArray<Int16Array>(length - 1))};
124
125 table_ = Table::Make(schema_, columns_, length);
126 ASSERT_RAISES(Invalid, table_->ValidateFull());
127}
128
129TEST_F(TestTable, AllColumnsAndFields) {
130 const int length = 100;
131 MakeExample1(length);
132 table_ = Table::Make(schema_, columns_);
133
134 auto columns = table_->columns();
135 auto fields = table_->fields();
136
137 for (int i = 0; i < table_->num_columns(); ++i) {
138 AssertChunkedEqual(*table_->column(i), *columns[i]);
139 AssertFieldEqual(*table_->field(i), *fields[i]);
140 }
141
142 // Zero length
143 std::vector<std::shared_ptr<Array>> t2_columns;
144 auto t2 = Table::Make(::arrow::schema({}), t2_columns);
145 columns = t2->columns();
146 fields = t2->fields();
147
148 ASSERT_EQ(0, columns.size());
149 ASSERT_EQ(0, fields.size());
150}
151
152TEST_F(TestTable, Equals) {
153 const int length = 100;
154 MakeExample1(length);
155
156 table_ = Table::Make(schema_, columns_);
157
158 ASSERT_TRUE(table_->Equals(*table_));
159 // Differing schema
160 auto f0 = field("f3", int32());
161 auto f1 = field("f4", uint8());
162 auto f2 = field("f5", int16());
163 std::vector<std::shared_ptr<Field>> fields = {f0, f1, f2};
164 auto other_schema = std::make_shared<Schema>(fields);
165 auto other = Table::Make(other_schema, columns_);
166 ASSERT_FALSE(table_->Equals(*other));
167 // Differing columns
168 std::vector<std::shared_ptr<ChunkedArray>> other_columns = {
169 std::make_shared<ChunkedArray>(MakeRandomArray<Int32Array>(length, 10)),
170 std::make_shared<ChunkedArray>(MakeRandomArray<UInt8Array>(length, 10)),
171 std::make_shared<ChunkedArray>(MakeRandomArray<Int16Array>(length, 10))};
172
173 other = Table::Make(schema_, other_columns);
174 ASSERT_FALSE(table_->Equals(*other));
175
176 // Differring schema metadata
177 other_schema = schema_->WithMetadata(::arrow::key_value_metadata({"key"}, {"value"}));
178 other = Table::Make(other_schema, columns_);
179 ASSERT_TRUE(table_->Equals(*other));
180 ASSERT_FALSE(table_->Equals(*other, /*check_metadata=*/true));
181}
182
183TEST_F(TestTable, FromRecordBatches) {
184 const int64_t length = 10;
185 MakeExample1(length);
186
187 auto batch1 = RecordBatch::Make(schema_, length, arrays_);
188
189 ASSERT_OK_AND_ASSIGN(auto result, Table::FromRecordBatches({batch1}));
190
191 auto expected = Table::Make(schema_, columns_);
192 ASSERT_TRUE(result->Equals(*expected));
193
194 std::vector<std::shared_ptr<ChunkedArray>> other_columns;
195 for (int i = 0; i < schema_->num_fields(); ++i) {
196 std::vector<std::shared_ptr<Array>> col_arrays = {arrays_[i], arrays_[i]};
197 other_columns.push_back(std::make_shared<ChunkedArray>(col_arrays));
198 }
199
200 ASSERT_OK_AND_ASSIGN(result, Table::FromRecordBatches({batch1, batch1}));
201 expected = Table::Make(schema_, other_columns);
202 ASSERT_TRUE(result->Equals(*expected));
203
204 // Error states
205 std::vector<std::shared_ptr<RecordBatch>> empty_batches;
206 ASSERT_RAISES(Invalid, Table::FromRecordBatches(empty_batches));
207
208 auto other_schema = ::arrow::schema({schema_->field(0), schema_->field(1)});
209
210 std::vector<std::shared_ptr<Array>> other_arrays = {arrays_[0], arrays_[1]};
211 auto batch2 = RecordBatch::Make(other_schema, length, other_arrays);
212 ASSERT_RAISES(Invalid, Table::FromRecordBatches({batch1, batch2}));
213}
214
215TEST_F(TestTable, FromRecordBatchesZeroLength) {
216 // ARROW-2307
217 MakeExample1(10);
218
219 ASSERT_OK_AND_ASSIGN(auto result, Table::FromRecordBatches(schema_, {}));
220
221 ASSERT_EQ(0, result->num_rows());
222 ASSERT_TRUE(result->schema()->Equals(*schema_));
223}
224
225TEST_F(TestTable, CombineChunksZeroColumn) {
226 // ARROW-11232
227 auto record_batch = RecordBatch::Make(schema({}), /*num_rows=*/10,
228 std::vector<std::shared_ptr<Array>>{});
229
230 ASSERT_OK_AND_ASSIGN(
231 auto table,
232 Table::FromRecordBatches(record_batch->schema(), {record_batch, record_batch}));
233 ASSERT_EQ(20, table->num_rows());
234
235 ASSERT_OK_AND_ASSIGN(auto combined, table->CombineChunks());
236
237 EXPECT_EQ(20, combined->num_rows());
238 EXPECT_TRUE(combined->Equals(*table));
239}
240
241TEST_F(TestTable, CombineChunksZeroRow) {
242 MakeExample1(10);
243
244 ASSERT_OK_AND_ASSIGN(auto table, Table::FromRecordBatches(schema_, {}));
245 ASSERT_EQ(0, table->num_rows());
246
247 ASSERT_OK_AND_ASSIGN(auto compacted, table->CombineChunks());
248
249 EXPECT_TRUE(compacted->Equals(*table));
250}
251
252TEST_F(TestTable, CombineChunks) {
253 MakeExample1(10);
254 auto batch1 = RecordBatch::Make(schema_, 10, arrays_);
255
256 MakeExample1(15);
257 auto batch2 = RecordBatch::Make(schema_, 15, arrays_);
258
259 ASSERT_OK_AND_ASSIGN(auto table, Table::FromRecordBatches({batch1, batch2}));
260 for (int i = 0; i < table->num_columns(); ++i) {
261 ASSERT_EQ(2, table->column(i)->num_chunks());
262 }
263
264 ASSERT_OK_AND_ASSIGN(auto compacted, table->CombineChunks());
265
266 EXPECT_TRUE(compacted->Equals(*table));
267 for (int i = 0; i < compacted->num_columns(); ++i) {
268 EXPECT_EQ(1, compacted->column(i)->num_chunks());
269 }
270}
271
272TEST_F(TestTable, LARGE_MEMORY_TEST(CombineChunksStringColumn)) {
273 schema_ = schema({field("str", utf8())});
274 arrays_ = {nullptr};
275
276 std::string value(1 << 16, '-');
277
278 auto num_rows = kBinaryMemoryLimit / static_cast<int64_t>(value.size());
279 StringBuilder builder;
280 ASSERT_OK(builder.Resize(num_rows));
281 ASSERT_OK(builder.ReserveData(value.size() * num_rows));
282 for (int i = 0; i < num_rows; ++i) builder.UnsafeAppend(value);
283 ASSERT_OK(builder.Finish(&arrays_[0]));
284
285 auto batch = RecordBatch::Make(schema_, num_rows, arrays_);
286
287 ASSERT_OK_AND_ASSIGN(auto table, Table::FromRecordBatches({batch, batch}));
288 ASSERT_EQ(table->column(0)->num_chunks(), 2);
289
290 ASSERT_OK_AND_ASSIGN(auto compacted, table->CombineChunks());
291 EXPECT_TRUE(compacted->Equals(*table));
292
293 // can't compact these columns any further; they contain too much character data
294 ASSERT_EQ(compacted->column(0)->num_chunks(), 2);
295}
296
297TEST_F(TestTable, ConcatenateTables) {
298 const int64_t length = 10;
299
300 MakeExample1(length);
301 auto batch1 = RecordBatch::Make(schema_, length, arrays_);
302
303 // generate different data
304 MakeExample1(length);
305 auto batch2 = RecordBatch::Make(schema_, length, arrays_);
306
307 ASSERT_OK_AND_ASSIGN(auto t1, Table::FromRecordBatches({batch1}));
308 ASSERT_OK_AND_ASSIGN(auto t2, Table::FromRecordBatches({batch2}));
309
310 ASSERT_OK_AND_ASSIGN(auto result, ConcatenateTables({t1, t2}));
311 ASSERT_OK_AND_ASSIGN(auto expected, Table::FromRecordBatches({batch1, batch2}));
312 AssertTablesEqual(*expected, *result);
313
314 // Error states
315 std::vector<std::shared_ptr<Table>> empty_tables;
316 ASSERT_RAISES(Invalid, ConcatenateTables(empty_tables));
317
318 auto other_schema = ::arrow::schema({schema_->field(0), schema_->field(1)});
319
320 std::vector<std::shared_ptr<Array>> other_arrays = {arrays_[0], arrays_[1]};
321 auto batch3 = RecordBatch::Make(other_schema, length, other_arrays);
322 ASSERT_OK_AND_ASSIGN(auto t3, Table::FromRecordBatches({batch3}));
323
324 ASSERT_RAISES(Invalid, ConcatenateTables({t1, t3}));
325}
326
327std::shared_ptr<Table> MakeTableWithOneNullFilledColumn(
328 const std::string& column_name, const std::shared_ptr<DataType>& data_type,
329 const int length) {
330 auto array_of_nulls = *MakeArrayOfNull(data_type, length);
331 return Table::Make(schema({field(column_name, data_type)}), {array_of_nulls});
332}
333
334using TestPromoteTableToSchema = TestTable;
335
336TEST_F(TestPromoteTableToSchema, IdenticalSchema) {
337 const int length = 10;
338 auto metadata =
339 std::shared_ptr<KeyValueMetadata>(new KeyValueMetadata({"foo"}, {"bar"}));
340 MakeExample1(length);
341 std::shared_ptr<Table> table = Table::Make(schema_, arrays_);
342
343 ASSERT_OK_AND_ASSIGN(auto result,
344 PromoteTableToSchema(table, schema_->WithMetadata(metadata)));
345
346 std::shared_ptr<Table> expected = table->ReplaceSchemaMetadata(metadata);
347
348 ASSERT_TRUE(result->Equals(*expected));
349}
350
351// The promoted table's fields are ordered the same as the promote-to schema.
352TEST_F(TestPromoteTableToSchema, FieldsReorderedAfterPromotion) {
353 const int length = 10;
354 MakeExample1(length);
355
356 std::vector<std::shared_ptr<Field>> reversed_fields(schema_->fields().crbegin(),
357 schema_->fields().crend());
358 std::vector<std::shared_ptr<Array>> reversed_arrays(arrays_.crbegin(), arrays_.crend());
359
360 std::shared_ptr<Table> table = Table::Make(schema(reversed_fields), reversed_arrays);
361
362 ASSERT_OK_AND_ASSIGN(auto result, PromoteTableToSchema(table, schema_));
363
364 ASSERT_TRUE(result->schema()->Equals(*schema_));
365}
366
367TEST_F(TestPromoteTableToSchema, PromoteNullTypeField) {
368 const int length = 10;
369 auto metadata =
370 std::shared_ptr<KeyValueMetadata>(new KeyValueMetadata({"foo"}, {"bar"}));
371 auto table_with_null_column = MakeTableWithOneNullFilledColumn("field", null(), length)
372 ->ReplaceSchemaMetadata(metadata);
373 auto promoted_schema = schema({field("field", int32())});
374
375 ASSERT_OK_AND_ASSIGN(auto result,
376 PromoteTableToSchema(table_with_null_column, promoted_schema));
377
378 ASSERT_TRUE(
379 result->Equals(*MakeTableWithOneNullFilledColumn("field", int32(), length)));
380}
381
382TEST_F(TestPromoteTableToSchema, AddMissingField) {
383 const int length = 10;
384 auto f0 = field("f0", int32());
385 auto table = Table::Make(schema({}), std::vector<std::shared_ptr<Array>>(), length);
386 auto promoted_schema = schema({field("field", int32())});
387
388 ASSERT_OK_AND_ASSIGN(auto result, PromoteTableToSchema(table, promoted_schema));
389
390 ASSERT_TRUE(
391 result->Equals(*MakeTableWithOneNullFilledColumn("field", int32(), length)));
392}
393
394TEST_F(TestPromoteTableToSchema, IncompatibleTypes) {
395 const int length = 10;
396 auto table = MakeTableWithOneNullFilledColumn("field", int32(), length);
397
398 // Invalid promotion: int32 to null.
399 ASSERT_RAISES(Invalid, PromoteTableToSchema(table, schema({field("field", null())})));
400
401 // Invalid promotion: int32 to uint32.
402 ASSERT_RAISES(Invalid, PromoteTableToSchema(table, schema({field("field", uint32())})));
403}
404
405TEST_F(TestPromoteTableToSchema, IncompatibleNullity) {
406 const int length = 10;
407 auto table = MakeTableWithOneNullFilledColumn("field", int32(), length);
408 ASSERT_RAISES(Invalid,
409 PromoteTableToSchema(
410 table, schema({field("field", uint32())->WithNullable(false)})));
411}
412
413TEST_F(TestPromoteTableToSchema, DuplicateFieldNames) {
414 const int length = 10;
415
416 auto table = Table::Make(
417 schema({field("field", int32()), field("field", null())}),
418 {MakeRandomArray<Int32Array>(length), MakeRandomArray<NullArray>(length)});
419
420 ASSERT_RAISES(Invalid, PromoteTableToSchema(table, schema({field("field", int32())})));
421}
422
423TEST_F(TestPromoteTableToSchema, TableFieldAbsentFromSchema) {
424 const int length = 10;
425
426 auto table =
427 Table::Make(schema({field("f0", int32())}), {MakeRandomArray<Int32Array>(length)});
428
429 std::shared_ptr<Table> result;
430 ASSERT_RAISES(Invalid, PromoteTableToSchema(table, schema({field("f1", int32())})));
431}
432
433class ConcatenateTablesWithPromotionTest : public TestTable {
434 protected:
435 ConcatenateTablesOptions GetOptions() {
436 ConcatenateTablesOptions options;
437 options.unify_schemas = true;
438 return options;
439 }
440
441 void MakeExample2(int length) {
442 auto f0 = field("f0", int32());
443 auto f1 = field("f1", null());
444
445 std::vector<std::shared_ptr<Field>> fields = {f0, f1};
446 schema_ = std::make_shared<Schema>(fields);
447
448 arrays_ = {MakeRandomArray<Int32Array>(length), MakeRandomArray<NullArray>(length)};
449
450 columns_ = {std::make_shared<ChunkedArray>(arrays_[0]),
451 std::make_shared<ChunkedArray>(arrays_[1])};
452 }
453
454 void AssertTablesEqualUnorderedFields(const Table& lhs, const Table& rhs) {
455 ASSERT_EQ(lhs.schema()->num_fields(), rhs.schema()->num_fields());
456 if (lhs.schema()->metadata()) {
457 ASSERT_NE(nullptr, rhs.schema()->metadata());
458 ASSERT_TRUE(lhs.schema()->metadata()->Equals(*rhs.schema()->metadata()));
459 } else {
460 ASSERT_EQ(nullptr, rhs.schema()->metadata());
461 }
462 for (int i = 0; i < lhs.schema()->num_fields(); ++i) {
463 const auto& lhs_field = lhs.schema()->field(i);
464 const auto& rhs_field = rhs.schema()->GetFieldByName(lhs_field->name());
465 ASSERT_NE(nullptr, rhs_field);
466 ASSERT_TRUE(lhs_field->Equals(rhs_field, true));
467 const auto& lhs_column = lhs.column(i);
468 const auto& rhs_column = rhs.GetColumnByName(lhs_field->name());
469 AssertChunkedEqual(*lhs_column, *rhs_column);
470 }
471 }
472};
473
474TEST_F(ConcatenateTablesWithPromotionTest, Simple) {
475 const int64_t length = 10;
476
477 MakeExample1(length);
478 auto batch1 = RecordBatch::Make(schema_, length, arrays_);
479
480 ASSERT_OK_AND_ASSIGN(auto f1_nulls, MakeArrayOfNull(schema_->field(1)->type(), length));
481 ASSERT_OK_AND_ASSIGN(auto f2_nulls, MakeArrayOfNull(schema_->field(2)->type(), length));
482
483 MakeExample2(length);
484 auto batch2 = RecordBatch::Make(schema_, length, arrays_);
485
486 auto batch2_null_filled =
487 RecordBatch::Make(batch1->schema(), length, {arrays_[0], f1_nulls, f2_nulls});
488
489 ASSERT_OK_AND_ASSIGN(auto t1, Table::FromRecordBatches({batch1}));
490 ASSERT_OK_AND_ASSIGN(auto t2, Table::FromRecordBatches({batch2}));
491 ASSERT_OK_AND_ASSIGN(auto t3, Table::FromRecordBatches({batch2_null_filled}));
492
493 ASSERT_OK_AND_ASSIGN(auto result, ConcatenateTables({t1, t2}, GetOptions()));
494 ASSERT_OK_AND_ASSIGN(auto expected, ConcatenateTables({t1, t3}));
495 AssertTablesEqualUnorderedFields(*expected, *result);
496
497 ASSERT_OK_AND_ASSIGN(result, ConcatenateTables({t2, t1}, GetOptions()));
498 ASSERT_OK_AND_ASSIGN(expected, ConcatenateTables({t3, t1}));
499 AssertTablesEqualUnorderedFields(*expected, *result);
500}
501
502TEST_F(TestTable, Slice) {
503 const int64_t length = 10;
504
505 MakeExample1(length);
506 auto batch = RecordBatch::Make(schema_, length, arrays_);
507
508 ASSERT_OK_AND_ASSIGN(auto half, Table::FromRecordBatches({batch}));
509 ASSERT_OK_AND_ASSIGN(auto whole, Table::FromRecordBatches({batch, batch}));
510 ASSERT_OK_AND_ASSIGN(auto three, Table::FromRecordBatches({batch, batch, batch}));
511
512 AssertTablesEqual(*whole->Slice(0, length), *half);
513 AssertTablesEqual(*whole->Slice(length), *half);
514 AssertTablesEqual(*whole->Slice(length / 3, 2 * (length - length / 3)),
515 *three->Slice(length + length / 3, 2 * (length - length / 3)));
516}
517
518TEST_F(TestTable, RemoveColumn) {
519 const int64_t length = 10;
520 MakeExample1(length);
521
522 auto table_sp = Table::Make(schema_, columns_);
523 const Table& table = *table_sp;
524
525 ASSERT_OK_AND_ASSIGN(auto result, table.RemoveColumn(0));
526
527 auto ex_schema = ::arrow::schema({schema_->field(1), schema_->field(2)});
528 std::vector<std::shared_ptr<ChunkedArray>> ex_columns = {table.column(1),
529 table.column(2)};
530
531 auto expected = Table::Make(ex_schema, ex_columns);
532 ASSERT_TRUE(result->Equals(*expected));
533
534 ASSERT_OK_AND_ASSIGN(result, table.RemoveColumn(1));
535 ex_schema = ::arrow::schema({schema_->field(0), schema_->field(2)});
536 ex_columns = {table.column(0), table.column(2)};
537
538 expected = Table::Make(ex_schema, ex_columns);
539 ASSERT_TRUE(result->Equals(*expected));
540
541 ASSERT_OK_AND_ASSIGN(result, table.RemoveColumn(2));
542 ex_schema = ::arrow::schema({schema_->field(0), schema_->field(1)});
543 ex_columns = {table.column(0), table.column(1)};
544 expected = Table::Make(ex_schema, ex_columns);
545 ASSERT_TRUE(result->Equals(*expected));
546}
547
548TEST_F(TestTable, SetColumn) {
549 const int64_t length = 10;
550 MakeExample1(length);
551
552 auto table_sp = Table::Make(schema_, columns_);
553 const Table& table = *table_sp;
554
555 ASSERT_OK_AND_ASSIGN(auto result,
556 table.SetColumn(0, schema_->field(1), table.column(1)));
557
558 auto ex_schema =
559 ::arrow::schema({schema_->field(1), schema_->field(1), schema_->field(2)});
560
561 auto expected =
562 Table::Make(ex_schema, {table.column(1), table.column(1), table.column(2)});
563 ASSERT_TRUE(result->Equals(*expected));
564}
565
566TEST_F(TestTable, RenameColumns) {
567 MakeExample1(10);
568 auto table = Table::Make(schema_, columns_);
569 EXPECT_THAT(table->ColumnNames(), testing::ElementsAre("f0", "f1", "f2"));
570
571 ASSERT_OK_AND_ASSIGN(auto renamed, table->RenameColumns({"zero", "one", "two"}));
572 EXPECT_THAT(renamed->ColumnNames(), testing::ElementsAre("zero", "one", "two"));
573 ASSERT_OK(renamed->ValidateFull());
574
575 ASSERT_RAISES(Invalid, table->RenameColumns({"hello", "world"}));
576}
577
578TEST_F(TestTable, SelectColumns) {
579 MakeExample1(10);
580 auto table = Table::Make(schema_, columns_);
581
582 ASSERT_OK_AND_ASSIGN(auto subset, table->SelectColumns({0, 2}));
583 ASSERT_OK(subset->ValidateFull());
584
585 auto expexted_schema = ::arrow::schema({schema_->field(0), schema_->field(2)});
586 auto expected = Table::Make(expexted_schema, {table->column(0), table->column(2)});
587 ASSERT_TRUE(subset->Equals(*expected));
588
589 // Out of bounds indices
590 ASSERT_RAISES(Invalid, table->SelectColumns({0, 3}));
591 ASSERT_RAISES(Invalid, table->SelectColumns({-1}));
592}
593
594TEST_F(TestTable, RemoveColumnEmpty) {
595 // ARROW-1865
596 const int64_t length = 10;
597
598 auto f0 = field("f0", int32());
599 auto schema = ::arrow::schema({f0});
600 auto a0 = MakeRandomArray<Int32Array>(length);
601
602 auto table = Table::Make(schema, {std::make_shared<ChunkedArray>(a0)});
603
604 ASSERT_OK_AND_ASSIGN(auto empty, table->RemoveColumn(0));
605
606 ASSERT_EQ(table->num_rows(), empty->num_rows());
607
608 ASSERT_OK_AND_ASSIGN(auto added, empty->AddColumn(0, f0, table->column(0)));
609 ASSERT_EQ(table->num_rows(), added->num_rows());
610}
611
612TEST_F(TestTable, AddColumn) {
613 const int64_t length = 10;
614 MakeExample1(length);
615
616 auto table_sp = Table::Make(schema_, columns_);
617 const Table& table = *table_sp;
618
619 auto f0 = schema_->field(0);
620
621 // Some negative tests with invalid index
622 ASSERT_RAISES(Invalid, table.AddColumn(10, f0, columns_[0]));
623 ASSERT_RAISES(Invalid, table.AddColumn(4, f0, columns_[0]));
624 ASSERT_RAISES(Invalid, table.AddColumn(-1, f0, columns_[0]));
625
626 // Add column with wrong length
627 auto longer_col =
628 std::make_shared<ChunkedArray>(MakeRandomArray<Int32Array>(length + 1));
629 ASSERT_RAISES(Invalid, table.AddColumn(0, f0, longer_col));
630
631 // Add column 0 in different places
632 ASSERT_OK_AND_ASSIGN(auto result, table.AddColumn(0, f0, columns_[0]));
633 auto ex_schema = ::arrow::schema(
634 {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)});
635
636 auto expected = Table::Make(
637 ex_schema, {table.column(0), table.column(0), table.column(1), table.column(2)});
638 ASSERT_TRUE(result->Equals(*expected));
639
640 ASSERT_OK_AND_ASSIGN(result, table.AddColumn(1, f0, columns_[0]));
641 ex_schema = ::arrow::schema(
642 {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)});
643
644 expected = Table::Make(
645 ex_schema, {table.column(0), table.column(0), table.column(1), table.column(2)});
646 ASSERT_TRUE(result->Equals(*expected));
647
648 ASSERT_OK_AND_ASSIGN(result, table.AddColumn(2, f0, columns_[0]));
649 ex_schema = ::arrow::schema(
650 {schema_->field(0), schema_->field(1), schema_->field(0), schema_->field(2)});
651 expected = Table::Make(
652 ex_schema, {table.column(0), table.column(1), table.column(0), table.column(2)});
653 ASSERT_TRUE(result->Equals(*expected));
654
655 ASSERT_OK_AND_ASSIGN(result, table.AddColumn(3, f0, columns_[0]));
656 ex_schema = ::arrow::schema(
657 {schema_->field(0), schema_->field(1), schema_->field(2), schema_->field(0)});
658 expected = Table::Make(
659 ex_schema, {table.column(0), table.column(1), table.column(2), table.column(0)});
660 ASSERT_TRUE(result->Equals(*expected));
661}
662
663class TestTableBatchReader : public TestBase {};
664
665TEST_F(TestTableBatchReader, ReadNext) {
666 ArrayVector c1, c2;
667
668 auto a1 = MakeRandomArray<Int32Array>(10);
669 auto a2 = MakeRandomArray<Int32Array>(20);
670 auto a3 = MakeRandomArray<Int32Array>(30);
671 auto a4 = MakeRandomArray<Int32Array>(10);
672
673 auto sch1 = arrow::schema({field("f1", int32()), field("f2", int32())});
674
675 std::vector<std::shared_ptr<ChunkedArray>> columns;
676
677 std::shared_ptr<RecordBatch> batch;
678
679 std::vector<std::shared_ptr<Array>> arrays_1 = {a1, a4, a2};
680 std::vector<std::shared_ptr<Array>> arrays_2 = {a2, a2};
681 columns = {std::make_shared<ChunkedArray>(arrays_1),
682 std::make_shared<ChunkedArray>(arrays_2)};
683 auto t1 = Table::Make(sch1, columns);
684
685 TableBatchReader i1(*t1);
686
687 ASSERT_OK(i1.ReadNext(&batch));
688 ASSERT_EQ(10, batch->num_rows());
689
690 ASSERT_OK(i1.ReadNext(&batch));
691 ASSERT_EQ(10, batch->num_rows());
692
693 ASSERT_OK(i1.ReadNext(&batch));
694 ASSERT_EQ(20, batch->num_rows());
695
696 ASSERT_OK(i1.ReadNext(&batch));
697 ASSERT_EQ(nullptr, batch);
698
699 arrays_1 = {a1};
700 arrays_2 = {a4};
701 columns = {std::make_shared<ChunkedArray>(arrays_1),
702 std::make_shared<ChunkedArray>(arrays_2)};
703 auto t2 = Table::Make(sch1, columns);
704
705 TableBatchReader i2(*t2);
706
707 ASSERT_OK(i2.ReadNext(&batch));
708 ASSERT_EQ(10, batch->num_rows());
709
710 // Ensure non-sliced
711 ASSERT_EQ(a1->data().get(), batch->column_data(0).get());
712 ASSERT_EQ(a4->data().get(), batch->column_data(1).get());
713
714 ASSERT_OK(i1.ReadNext(&batch));
715 ASSERT_EQ(nullptr, batch);
716}
717
718TEST_F(TestTableBatchReader, Chunksize) {
719 auto a1 = MakeRandomArray<Int32Array>(10);
720 auto a2 = MakeRandomArray<Int32Array>(20);
721 auto a3 = MakeRandomArray<Int32Array>(10);
722
723 auto sch1 = arrow::schema({field("f1", int32())});
724
725 std::vector<std::shared_ptr<Array>> arrays = {a1, a2, a3};
726 auto t1 = Table::Make(sch1, {std::make_shared<ChunkedArray>(arrays)});
727
728 TableBatchReader i1(*t1);
729
730 i1.set_chunksize(15);
731
732 std::shared_ptr<RecordBatch> batch;
733 ASSERT_OK(i1.ReadNext(&batch));
734 ASSERT_OK(batch->ValidateFull());
735 ASSERT_EQ(10, batch->num_rows());
736
737 ASSERT_OK(i1.ReadNext(&batch));
738 ASSERT_OK(batch->ValidateFull());
739 ASSERT_EQ(15, batch->num_rows());
740
741 ASSERT_OK(i1.ReadNext(&batch));
742 ASSERT_OK(batch->ValidateFull());
743 ASSERT_EQ(5, batch->num_rows());
744
745 ASSERT_OK(i1.ReadNext(&batch));
746 ASSERT_OK(batch->ValidateFull());
747 ASSERT_EQ(10, batch->num_rows());
748
749 ASSERT_OK(i1.ReadNext(&batch));
750 ASSERT_EQ(nullptr, batch);
751}
752
753} // namespace arrow