]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/pretty_print_test.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / pretty_print_test.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "arrow/pretty_print.h"
19
20 #include <gtest/gtest.h>
21
22 #include <cstdint>
23 #include <cstring>
24 #include <limits>
25 #include <memory>
26 #include <sstream>
27 #include <string>
28 #include <vector>
29
30 #include "arrow/array.h"
31 #include "arrow/table.h"
32 #include "arrow/testing/gtest_util.h"
33 #include "arrow/type.h"
34 #include "arrow/util/key_value_metadata.h"
35
36 namespace arrow {
37
38 class TestPrettyPrint : public ::testing::Test {
39 public:
40 void SetUp() {}
41
42 void Print(const Array& array) {}
43
44 private:
45 std::ostringstream sink_;
46 };
47
48 template <typename T>
49 void CheckStream(const T& obj, const PrettyPrintOptions& options, const char* expected) {
50 std::ostringstream sink;
51 ASSERT_OK(PrettyPrint(obj, options, &sink));
52 std::string result = sink.str();
53 ASSERT_EQ(std::string(expected, strlen(expected)), result);
54 }
55
56 void CheckArray(const Array& arr, const PrettyPrintOptions& options, const char* expected,
57 bool check_operator = true) {
58 ARROW_SCOPED_TRACE("For datatype: ", arr.type()->ToString());
59 CheckStream(arr, options, expected);
60
61 if (options.indent == 0 && check_operator) {
62 std::stringstream ss;
63 ss << arr;
64 std::string result = std::string(expected, strlen(expected));
65 ASSERT_EQ(result, ss.str());
66 }
67 }
68
69 template <typename T>
70 void Check(const T& obj, const PrettyPrintOptions& options, const char* expected) {
71 std::string result;
72 ASSERT_OK(PrettyPrint(obj, options, &result));
73 ASSERT_EQ(std::string(expected, strlen(expected)), result);
74 }
75
76 template <typename TYPE, typename C_TYPE>
77 void CheckPrimitive(const std::shared_ptr<DataType>& type,
78 const PrettyPrintOptions& options, const std::vector<bool>& is_valid,
79 const std::vector<C_TYPE>& values, const char* expected,
80 bool check_operator = true) {
81 std::shared_ptr<Array> array;
82 ArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, &array);
83 CheckArray(*array, options, expected, check_operator);
84 }
85
86 template <typename TYPE, typename C_TYPE>
87 void CheckPrimitive(const PrettyPrintOptions& options, const std::vector<bool>& is_valid,
88 const std::vector<C_TYPE>& values, const char* expected,
89 bool check_operator = true) {
90 CheckPrimitive<TYPE, C_TYPE>(TypeTraits<TYPE>::type_singleton(), options, is_valid,
91 values, expected, check_operator);
92 }
93
94 TEST_F(TestPrettyPrint, PrimitiveType) {
95 std::vector<bool> is_valid = {true, true, false, true, false};
96
97 std::vector<int32_t> values = {0, 1, 2, 3, 4};
98 static const char* expected = R"expected([
99 0,
100 1,
101 null,
102 3,
103 null
104 ])expected";
105 CheckPrimitive<Int32Type, int32_t>({0, 10}, is_valid, values, expected);
106
107 static const char* expected_na = R"expected([
108 0,
109 1,
110 NA,
111 3,
112 NA
113 ])expected";
114 CheckPrimitive<Int32Type, int32_t>({0, 10, 2, "NA"}, is_valid, values, expected_na,
115 false);
116
117 static const char* ex_in2 = R"expected( [
118 0,
119 1,
120 null,
121 3,
122 null
123 ])expected";
124 CheckPrimitive<Int32Type, int32_t>({2, 10}, is_valid, values, ex_in2);
125 static const char* ex_in2_w2 = R"expected( [
126 0,
127 1,
128 ...
129 3,
130 null
131 ])expected";
132 CheckPrimitive<Int32Type, int32_t>({2, 2}, is_valid, values, ex_in2_w2);
133
134 std::vector<double> values2 = {0., 1., 2., 3., 4.};
135 static const char* ex2 = R"expected([
136 0,
137 1,
138 null,
139 3,
140 null
141 ])expected";
142 CheckPrimitive<DoubleType, double>({0, 10}, is_valid, values2, ex2);
143 static const char* ex2_in2 = R"expected( [
144 0,
145 1,
146 null,
147 3,
148 null
149 ])expected";
150 CheckPrimitive<DoubleType, double>({2, 10}, is_valid, values2, ex2_in2);
151
152 std::vector<std::string> values3 = {"foo", "bar", "", "baz", ""};
153 static const char* ex3 = R"expected([
154 "foo",
155 "bar",
156 null,
157 "baz",
158 null
159 ])expected";
160 CheckPrimitive<StringType, std::string>({0, 10}, is_valid, values3, ex3);
161 CheckPrimitive<LargeStringType, std::string>({0, 10}, is_valid, values3, ex3);
162 static const char* ex3_in2 = R"expected( [
163 "foo",
164 "bar",
165 null,
166 "baz",
167 null
168 ])expected";
169 CheckPrimitive<StringType, std::string>({2, 10}, is_valid, values3, ex3_in2);
170 CheckPrimitive<LargeStringType, std::string>({2, 10}, is_valid, values3, ex3_in2);
171 }
172
173 TEST_F(TestPrettyPrint, PrimitiveTypeNoNewlines) {
174 std::vector<bool> is_valid = {true, true, false, true, false};
175 std::vector<int32_t> values = {0, 1, 2, 3, 4};
176
177 PrettyPrintOptions options{};
178 options.skip_new_lines = true;
179 options.window = 4;
180
181 const char* expected = "[0,1,null,3,null]";
182 CheckPrimitive<Int32Type, int32_t>(options, is_valid, values, expected, false);
183
184 // With ellipsis
185 is_valid.insert(is_valid.end(), 20, true);
186 is_valid.insert(is_valid.end(), {true, false, true});
187 values.insert(values.end(), 20, 99);
188 values.insert(values.end(), {44, 43, 42});
189
190 expected = "[0,1,null,3,...,99,44,null,42]";
191 CheckPrimitive<Int32Type, int32_t>(options, is_valid, values, expected, false);
192 }
193
194 TEST_F(TestPrettyPrint, Int8) {
195 static const char* expected = R"expected([
196 0,
197 127,
198 -128
199 ])expected";
200 CheckPrimitive<Int8Type, int8_t>({0, 10}, {true, true, true}, {0, 127, -128}, expected);
201 }
202
203 TEST_F(TestPrettyPrint, UInt8) {
204 static const char* expected = R"expected([
205 0,
206 255
207 ])expected";
208 CheckPrimitive<UInt8Type, uint8_t>({0, 10}, {true, true}, {0, 255}, expected);
209 }
210
211 TEST_F(TestPrettyPrint, Int64) {
212 static const char* expected = R"expected([
213 0,
214 9223372036854775807,
215 -9223372036854775808
216 ])expected";
217 CheckPrimitive<Int64Type, int64_t>(
218 {0, 10}, {true, true, true}, {0, 9223372036854775807LL, -9223372036854775807LL - 1},
219 expected);
220 }
221
222 TEST_F(TestPrettyPrint, UInt64) {
223 static const char* expected = R"expected([
224 0,
225 9223372036854775803,
226 18446744073709551615
227 ])expected";
228 CheckPrimitive<UInt64Type, uint64_t>(
229 {0, 10}, {true, true, true}, {0, 9223372036854775803ULL, 18446744073709551615ULL},
230 expected);
231 }
232
233 TEST_F(TestPrettyPrint, DateTimeTypes) {
234 std::vector<bool> is_valid = {true, true, false, true, false};
235
236 {
237 std::vector<int32_t> values = {0, 1, 2, 31, 4};
238 static const char* expected = R"expected([
239 1970-01-01,
240 1970-01-02,
241 null,
242 1970-02-01,
243 null
244 ])expected";
245 CheckPrimitive<Date32Type, int32_t>({0, 10}, is_valid, values, expected);
246 }
247
248 {
249 constexpr int64_t ms_per_day = 24 * 60 * 60 * 1000;
250 std::vector<int64_t> values = {0 * ms_per_day, 1 * ms_per_day, 2 * ms_per_day,
251 31 * ms_per_day, 4 * ms_per_day};
252 static const char* expected = R"expected([
253 1970-01-01,
254 1970-01-02,
255 null,
256 1970-02-01,
257 null
258 ])expected";
259 CheckPrimitive<Date64Type, int64_t>({0, 10}, is_valid, values, expected);
260 }
261
262 {
263 std::vector<int64_t> values = {
264 0, 1, 2, 678 + 1000000 * (5 + 60 * (4 + 60 * (3 + 24 * int64_t(1)))), 4};
265 static const char* expected = R"expected([
266 1970-01-01 00:00:00.000000,
267 1970-01-01 00:00:00.000001,
268 null,
269 1970-01-02 03:04:05.000678,
270 null
271 ])expected";
272 CheckPrimitive<TimestampType, int64_t>(timestamp(TimeUnit::MICRO, "Transylvania"),
273 {0, 10}, is_valid, values, expected);
274 }
275
276 {
277 std::vector<int32_t> values = {1, 62, 2, 3 + 60 * (2 + 60 * 1), 4};
278 static const char* expected = R"expected([
279 00:00:01,
280 00:01:02,
281 null,
282 01:02:03,
283 null
284 ])expected";
285 CheckPrimitive<Time32Type, int32_t>(time32(TimeUnit::SECOND), {0, 10}, is_valid,
286 values, expected);
287 }
288
289 {
290 std::vector<int64_t> values = {
291 0, 1, 2, 678 + int64_t(1000000000) * (5 + 60 * (4 + 60 * 3)), 4};
292 static const char* expected = R"expected([
293 00:00:00.000000000,
294 00:00:00.000000001,
295 null,
296 03:04:05.000000678,
297 null
298 ])expected";
299 CheckPrimitive<Time64Type, int64_t>(time64(TimeUnit::NANO), {0, 10}, is_valid, values,
300 expected);
301 }
302 }
303
304 TEST_F(TestPrettyPrint, TestIntervalTypes) {
305 std::vector<bool> is_valid = {true, true, false, true, false};
306
307 {
308 std::vector<DayTimeIntervalType::DayMilliseconds> values = {
309 {1, 2}, {-3, 4}, {}, {}, {}};
310 static const char* expected = R"expected([
311 1d2ms,
312 -3d4ms,
313 null,
314 0d0ms,
315 null
316 ])expected";
317 CheckPrimitive<DayTimeIntervalType, DayTimeIntervalType::DayMilliseconds>(
318 {0, 10}, is_valid, values, expected);
319 }
320 {
321 std::vector<MonthDayNanoIntervalType::MonthDayNanos> values = {
322 {1, 2, 3}, {-3, 4, -5}, {}, {}, {}};
323 static const char* expected = R"expected([
324 1M2d3ns,
325 -3M4d-5ns,
326 null,
327 0M0d0ns,
328 null
329 ])expected";
330 CheckPrimitive<MonthDayNanoIntervalType, MonthDayNanoIntervalType::MonthDayNanos>(
331 {0, 10}, is_valid, values, expected);
332 }
333 }
334
335 TEST_F(TestPrettyPrint, DateTimeTypesWithOutOfRangeValues) {
336 // Our vendored date library allows years within [-32767, 32767],
337 // which limits the range of values which can be displayed.
338 const int32_t min_int32 = std::numeric_limits<int32_t>::min();
339 const int32_t max_int32 = std::numeric_limits<int32_t>::max();
340 const int64_t min_int64 = std::numeric_limits<int64_t>::min();
341 const int64_t max_int64 = std::numeric_limits<int64_t>::max();
342
343 const int32_t min_date32 = -12687428;
344 const int32_t max_date32 = 11248737;
345 const int64_t min_date64 = 86400000LL * min_date32;
346 const int64_t max_date64 = 86400000LL * (max_date32 + 1) - 1;
347
348 const int32_t min_time32_seconds = 0;
349 const int32_t max_time32_seconds = 86399;
350 const int32_t min_time32_millis = 0;
351 const int32_t max_time32_millis = 86399999;
352 const int64_t min_time64_micros = 0;
353 const int64_t max_time64_micros = 86399999999LL;
354 const int64_t min_time64_nanos = 0;
355 const int64_t max_time64_nanos = 86399999999999LL;
356
357 const int64_t min_timestamp_seconds = -1096193779200LL;
358 const int64_t max_timestamp_seconds = 971890963199LL;
359 const int64_t min_timestamp_millis = min_timestamp_seconds * 1000;
360 const int64_t max_timestamp_millis = max_timestamp_seconds * 1000 + 999;
361 const int64_t min_timestamp_micros = min_timestamp_millis * 1000;
362 const int64_t max_timestamp_micros = max_timestamp_millis * 1000 + 999;
363
364 std::vector<bool> is_valid = {false, false, false, false, true,
365 true, true, true, true, true};
366
367 // Dates
368 {
369 std::vector<int32_t> values = {min_int32, max_int32, min_date32 - 1, max_date32 + 1,
370 min_int32, max_int32, min_date32 - 1, max_date32 + 1,
371 min_date32, max_date32};
372 static const char* expected = R"expected([
373 null,
374 null,
375 null,
376 null,
377 <value out of range: -2147483648>,
378 <value out of range: 2147483647>,
379 <value out of range: -12687429>,
380 <value out of range: 11248738>,
381 -32767-01-01,
382 32767-12-31
383 ])expected";
384 CheckPrimitive<Date32Type, int32_t>({0, 10}, is_valid, values, expected);
385 }
386 {
387 std::vector<int64_t> values = {min_int64, max_int64, min_date64 - 1, max_date64 + 1,
388 min_int64, max_int64, min_date64 - 1, max_date64 + 1,
389 min_date64, max_date64};
390 static const char* expected = R"expected([
391 null,
392 null,
393 null,
394 null,
395 <value out of range: -9223372036854775808>,
396 <value out of range: 9223372036854775807>,
397 <value out of range: -1096193779200001>,
398 <value out of range: 971890963200000>,
399 -32767-01-01,
400 32767-12-31
401 ])expected";
402 CheckPrimitive<Date64Type, int64_t>({0, 10}, is_valid, values, expected);
403 }
404
405 // Times
406 {
407 std::vector<int32_t> values = {min_int32,
408 max_int32,
409 min_time32_seconds - 1,
410 max_time32_seconds + 1,
411 min_int32,
412 max_int32,
413 min_time32_seconds - 1,
414 max_time32_seconds + 1,
415 min_time32_seconds,
416 max_time32_seconds};
417 static const char* expected = R"expected([
418 null,
419 null,
420 null,
421 null,
422 <value out of range: -2147483648>,
423 <value out of range: 2147483647>,
424 <value out of range: -1>,
425 <value out of range: 86400>,
426 00:00:00,
427 23:59:59
428 ])expected";
429 CheckPrimitive<Time32Type, int32_t>(time32(TimeUnit::SECOND), {0, 10}, is_valid,
430 values, expected);
431 }
432 {
433 std::vector<int32_t> values = {
434 min_int32, max_int32, min_time32_millis - 1, max_time32_millis + 1,
435 min_int32, max_int32, min_time32_millis - 1, max_time32_millis + 1,
436 min_time32_millis, max_time32_millis};
437 static const char* expected = R"expected([
438 null,
439 null,
440 null,
441 null,
442 <value out of range: -2147483648>,
443 <value out of range: 2147483647>,
444 <value out of range: -1>,
445 <value out of range: 86400000>,
446 00:00:00.000,
447 23:59:59.999
448 ])expected";
449 CheckPrimitive<Time32Type, int32_t>(time32(TimeUnit::MILLI), {0, 10}, is_valid,
450 values, expected);
451 }
452 {
453 std::vector<int64_t> values = {
454 min_int64, max_int64, min_time64_micros - 1, max_time64_micros + 1,
455 min_int64, max_int64, min_time64_micros - 1, max_time64_micros + 1,
456 min_time64_micros, max_time64_micros};
457 static const char* expected = R"expected([
458 null,
459 null,
460 null,
461 null,
462 <value out of range: -9223372036854775808>,
463 <value out of range: 9223372036854775807>,
464 <value out of range: -1>,
465 <value out of range: 86400000000>,
466 00:00:00.000000,
467 23:59:59.999999
468 ])expected";
469 CheckPrimitive<Time64Type, int64_t>(time64(TimeUnit::MICRO), {0, 10}, is_valid,
470 values, expected);
471 }
472 {
473 std::vector<int64_t> values = {
474 min_int64, max_int64, min_time64_nanos - 1, max_time64_nanos + 1,
475 min_int64, max_int64, min_time64_nanos - 1, max_time64_nanos + 1,
476 min_time64_nanos, max_time64_nanos};
477 static const char* expected = R"expected([
478 null,
479 null,
480 null,
481 null,
482 <value out of range: -9223372036854775808>,
483 <value out of range: 9223372036854775807>,
484 <value out of range: -1>,
485 <value out of range: 86400000000000>,
486 00:00:00.000000000,
487 23:59:59.999999999
488 ])expected";
489 CheckPrimitive<Time64Type, int64_t>(time64(TimeUnit::NANO), {0, 10}, is_valid, values,
490 expected);
491 }
492
493 // Timestamps
494 {
495 std::vector<int64_t> values = {min_int64,
496 max_int64,
497 min_timestamp_seconds - 1,
498 max_timestamp_seconds + 1,
499 min_int64,
500 max_int64,
501 min_timestamp_seconds - 1,
502 max_timestamp_seconds + 1,
503 min_timestamp_seconds,
504 max_timestamp_seconds};
505 static const char* expected = R"expected([
506 null,
507 null,
508 null,
509 null,
510 <value out of range: -9223372036854775808>,
511 <value out of range: 9223372036854775807>,
512 <value out of range: -1096193779201>,
513 <value out of range: 971890963200>,
514 -32767-01-01 00:00:00,
515 32767-12-31 23:59:59
516 ])expected";
517 CheckPrimitive<TimestampType, int64_t>(timestamp(TimeUnit::SECOND), {0, 10}, is_valid,
518 values, expected);
519 }
520 {
521 std::vector<int64_t> values = {min_int64,
522 max_int64,
523 min_timestamp_millis - 1,
524 max_timestamp_millis + 1,
525 min_int64,
526 max_int64,
527 min_timestamp_millis - 1,
528 max_timestamp_millis + 1,
529 min_timestamp_millis,
530 max_timestamp_millis};
531 static const char* expected = R"expected([
532 null,
533 null,
534 null,
535 null,
536 <value out of range: -9223372036854775808>,
537 <value out of range: 9223372036854775807>,
538 <value out of range: -1096193779200001>,
539 <value out of range: 971890963200000>,
540 -32767-01-01 00:00:00.000,
541 32767-12-31 23:59:59.999
542 ])expected";
543 CheckPrimitive<TimestampType, int64_t>(timestamp(TimeUnit::MILLI), {0, 10}, is_valid,
544 values, expected);
545 }
546 {
547 std::vector<int64_t> values = {min_int64,
548 max_int64,
549 min_timestamp_micros - 1,
550 max_timestamp_micros + 1,
551 min_int64,
552 max_int64,
553 min_timestamp_micros - 1,
554 max_timestamp_micros + 1,
555 min_timestamp_micros,
556 max_timestamp_micros};
557 static const char* expected = R"expected([
558 null,
559 null,
560 null,
561 null,
562 <value out of range: -9223372036854775808>,
563 <value out of range: 9223372036854775807>,
564 <value out of range: -1096193779200000001>,
565 <value out of range: 971890963200000000>,
566 -32767-01-01 00:00:00.000000,
567 32767-12-31 23:59:59.999999
568 ])expected";
569 CheckPrimitive<TimestampType, int64_t>(timestamp(TimeUnit::MICRO), {0, 10}, is_valid,
570 values, expected);
571 }
572 // Note that while the values below are legal and correct, they used to
573 // trigger an internal signed overflow inside the vendored "date" library
574 // (https://github.com/HowardHinnant/date/issues/696).
575 {
576 std::vector<int64_t> values = {min_int64, max_int64};
577 static const char* expected = R"expected([
578 1677-09-21 00:12:43.145224192,
579 2262-04-11 23:47:16.854775807
580 ])expected";
581 CheckPrimitive<TimestampType, int64_t>(timestamp(TimeUnit::NANO), {0, 10},
582 {true, true}, values, expected);
583 }
584 }
585
586 TEST_F(TestPrettyPrint, StructTypeBasic) {
587 auto simple_1 = field("one", int32());
588 auto simple_2 = field("two", int32());
589 auto simple_struct = struct_({simple_1, simple_2});
590
591 auto array = ArrayFromJSON(simple_struct, "[[11, 22]]");
592
593 static const char* ex = R"expected(-- is_valid: all not null
594 -- child 0 type: int32
595 [
596 11
597 ]
598 -- child 1 type: int32
599 [
600 22
601 ])expected";
602 CheckStream(*array, {0, 10}, ex);
603
604 static const char* ex_2 = R"expected( -- is_valid: all not null
605 -- child 0 type: int32
606 [
607 11
608 ]
609 -- child 1 type: int32
610 [
611 22
612 ])expected";
613 CheckStream(*array, {2, 10}, ex_2);
614 }
615
616 TEST_F(TestPrettyPrint, StructTypeAdvanced) {
617 auto simple_1 = field("one", int32());
618 auto simple_2 = field("two", int32());
619 auto simple_struct = struct_({simple_1, simple_2});
620
621 auto array = ArrayFromJSON(simple_struct, "[[11, 22], null, [null, 33]]");
622
623 static const char* ex = R"expected(-- is_valid:
624 [
625 true,
626 false,
627 true
628 ]
629 -- child 0 type: int32
630 [
631 11,
632 0,
633 null
634 ]
635 -- child 1 type: int32
636 [
637 22,
638 0,
639 33
640 ])expected";
641 CheckStream(*array, {0, 10}, ex);
642 }
643
644 TEST_F(TestPrettyPrint, BinaryType) {
645 std::vector<bool> is_valid = {true, true, false, true, true, true};
646 std::vector<std::string> values = {"foo", "bar", "", "baz", "", "\xff"};
647 static const char* ex = "[\n 666F6F,\n 626172,\n null,\n 62617A,\n ,\n FF\n]";
648 CheckPrimitive<BinaryType, std::string>({0}, is_valid, values, ex);
649 CheckPrimitive<LargeBinaryType, std::string>({0}, is_valid, values, ex);
650 static const char* ex_in2 =
651 " [\n 666F6F,\n 626172,\n null,\n 62617A,\n ,\n FF\n ]";
652 CheckPrimitive<BinaryType, std::string>({2}, is_valid, values, ex_in2);
653 CheckPrimitive<LargeBinaryType, std::string>({2}, is_valid, values, ex_in2);
654 }
655
656 TEST_F(TestPrettyPrint, BinaryNoNewlines) {
657 std::vector<bool> is_valid = {true, true, false, true, true, true};
658 std::vector<std::string> values = {"foo", "bar", "", "baz", "", "\xff"};
659
660 PrettyPrintOptions options{};
661 options.skip_new_lines = true;
662
663 const char* expected = "[666F6F,626172,null,62617A,,FF]";
664 CheckPrimitive<BinaryType, std::string>(options, is_valid, values, expected, false);
665
666 // With ellipsis
667 options.window = 2;
668 expected = "[666F6F,626172,...,,FF]";
669 CheckPrimitive<BinaryType, std::string>(options, is_valid, values, expected, false);
670 }
671
672 TEST_F(TestPrettyPrint, ListType) {
673 auto list_type = list(int64());
674
675 static const char* ex = R"expected([
676 [
677 null
678 ],
679 [],
680 null,
681 [
682 4,
683 6,
684 7
685 ],
686 [
687 2,
688 3
689 ]
690 ])expected";
691 static const char* ex_2 = R"expected( [
692 [
693 null
694 ],
695 [],
696 null,
697 [
698 4,
699 6,
700 7
701 ],
702 [
703 2,
704 3
705 ]
706 ])expected";
707 static const char* ex_3 = R"expected([
708 [
709 null
710 ],
711 ...
712 [
713 2,
714 3
715 ]
716 ])expected";
717
718 auto array = ArrayFromJSON(list_type, "[[null], [], null, [4, 6, 7], [2, 3]]");
719 CheckArray(*array, {0, 10}, ex);
720 CheckArray(*array, {2, 10}, ex_2);
721 CheckStream(*array, {0, 1}, ex_3);
722
723 list_type = large_list(int64());
724 array = ArrayFromJSON(list_type, "[[null], [], null, [4, 6, 7], [2, 3]]");
725 CheckArray(*array, {0, 10}, ex);
726 CheckArray(*array, {2, 10}, ex_2);
727 CheckStream(*array, {0, 1}, ex_3);
728 }
729
730 TEST_F(TestPrettyPrint, ListTypeNoNewlines) {
731 auto list_type = list(int64());
732 auto empty_array = ArrayFromJSON(list_type, "[]");
733 auto array = ArrayFromJSON(list_type, "[[null], [], null, [4, 5, 6, 7, 8], [2, 3]]");
734
735 PrettyPrintOptions options{};
736 options.skip_new_lines = true;
737 options.null_rep = "NA";
738 CheckArray(*empty_array, options, "[]", false);
739 CheckArray(*array, options, "[[NA],[],NA,[4,5,6,7,8],[2,3]]", false);
740
741 options.window = 2;
742 CheckArray(*empty_array, options, "[]", false);
743 CheckArray(*array, options, "[[NA],[],...,[4,5,...,7,8],[2,3]]", false);
744 }
745
746 TEST_F(TestPrettyPrint, MapType) {
747 auto map_type = map(utf8(), int64());
748 auto array = ArrayFromJSON(map_type, R"([
749 [["joe", 0], ["mark", null]],
750 null,
751 [["cap", 8]],
752 []
753 ])");
754
755 static const char* ex = R"expected([
756 keys:
757 [
758 "joe",
759 "mark"
760 ]
761 values:
762 [
763 0,
764 null
765 ],
766 null,
767 keys:
768 [
769 "cap"
770 ]
771 values:
772 [
773 8
774 ],
775 keys:
776 []
777 values:
778 []
779 ])expected";
780 CheckArray(*array, {0, 10}, ex);
781 }
782
783 TEST_F(TestPrettyPrint, FixedSizeListType) {
784 auto list_type = fixed_size_list(int32(), 3);
785 auto array = ArrayFromJSON(list_type,
786 "[[null, 0, 1], [2, 3, null], null, [4, 6, 7], [8, 9, 5]]");
787
788 CheckArray(*array, {0, 10}, R"expected([
789 [
790 null,
791 0,
792 1
793 ],
794 [
795 2,
796 3,
797 null
798 ],
799 null,
800 [
801 4,
802 6,
803 7
804 ],
805 [
806 8,
807 9,
808 5
809 ]
810 ])expected");
811 CheckStream(*array, {0, 1}, R"expected([
812 [
813 null,
814 ...
815 1
816 ],
817 ...
818 [
819 8,
820 ...
821 5
822 ]
823 ])expected");
824 }
825
826 TEST_F(TestPrettyPrint, FixedSizeBinaryType) {
827 std::vector<bool> is_valid = {true, true, false, true, false};
828
829 auto type = fixed_size_binary(3);
830 auto array = ArrayFromJSON(type, "[\"foo\", \"bar\", null, \"baz\"]");
831
832 static const char* ex = "[\n 666F6F,\n 626172,\n null,\n 62617A\n]";
833 CheckArray(*array, {0, 10}, ex);
834 static const char* ex_2 = " [\n 666F6F,\n ...\n 62617A\n ]";
835 CheckArray(*array, {2, 1}, ex_2);
836 }
837
838 TEST_F(TestPrettyPrint, DecimalTypes) {
839 int32_t p = 19;
840 int32_t s = 4;
841
842 for (auto type : {decimal128(p, s), decimal256(p, s)}) {
843 auto array = ArrayFromJSON(type, "[\"123.4567\", \"456.7891\", null]");
844
845 static const char* ex = "[\n 123.4567,\n 456.7891,\n null\n]";
846 CheckArray(*array, {0}, ex);
847 }
848 }
849
850 TEST_F(TestPrettyPrint, DictionaryType) {
851 std::vector<bool> is_valid = {true, true, false, true, true, true};
852
853 std::shared_ptr<Array> dict;
854 std::vector<std::string> dict_values = {"foo", "bar", "baz"};
855 ArrayFromVector<StringType, std::string>(dict_values, &dict);
856 std::shared_ptr<DataType> dict_type = dictionary(int16(), utf8());
857
858 std::shared_ptr<Array> indices;
859 std::vector<int16_t> indices_values = {1, 2, -1, 0, 2, 0};
860 ArrayFromVector<Int16Type, int16_t>(is_valid, indices_values, &indices);
861 auto arr = std::make_shared<DictionaryArray>(dict_type, indices, dict);
862
863 static const char* expected = R"expected(
864 -- dictionary:
865 [
866 "foo",
867 "bar",
868 "baz"
869 ]
870 -- indices:
871 [
872 1,
873 2,
874 null,
875 0,
876 2,
877 0
878 ])expected";
879
880 CheckArray(*arr, {0}, expected);
881 }
882
883 TEST_F(TestPrettyPrint, ChunkedArrayPrimitiveType) {
884 auto array = ArrayFromJSON(int32(), "[0, 1, null, 3, null]");
885 ChunkedArray chunked_array(array);
886
887 static const char* expected = R"expected([
888 [
889 0,
890 1,
891 null,
892 3,
893 null
894 ]
895 ])expected";
896 CheckStream(chunked_array, {0}, expected);
897
898 ChunkedArray chunked_array_2({array, array});
899
900 static const char* expected_2 = R"expected([
901 [
902 0,
903 1,
904 null,
905 3,
906 null
907 ],
908 [
909 0,
910 1,
911 null,
912 3,
913 null
914 ]
915 ])expected";
916
917 CheckStream(chunked_array_2, {0}, expected_2);
918 }
919
920 TEST_F(TestPrettyPrint, TablePrimitive) {
921 std::shared_ptr<Field> int_field = field("column", int32());
922 auto array = ArrayFromJSON(int_field->type(), "[0, 1, null, 3, null]");
923 auto column = std::make_shared<ChunkedArray>(ArrayVector({array}));
924 std::shared_ptr<Schema> table_schema = schema({int_field});
925 std::shared_ptr<Table> table = Table::Make(table_schema, {column});
926
927 static const char* expected = R"expected(column: int32
928 ----
929 column:
930 [
931 [
932 0,
933 1,
934 null,
935 3,
936 null
937 ]
938 ]
939 )expected";
940 CheckStream(*table, {0}, expected);
941 }
942
943 TEST_F(TestPrettyPrint, SchemaWithDictionary) {
944 std::vector<bool> is_valid = {true, true, false, true, true, true};
945
946 std::shared_ptr<Array> dict;
947 std::vector<std::string> dict_values = {"foo", "bar", "baz"};
948 ArrayFromVector<StringType, std::string>(dict_values, &dict);
949
950 auto simple = field("one", int32());
951 auto simple_dict = field("two", dictionary(int16(), utf8()));
952 auto list_of_dict = field("three", list(simple_dict));
953 auto struct_with_dict = field("four", struct_({simple, simple_dict}));
954
955 auto sch = schema({simple, simple_dict, list_of_dict, struct_with_dict});
956
957 static const char* expected = R"expected(one: int32
958 two: dictionary<values=string, indices=int16, ordered=0>
959 three: list<two: dictionary<values=string, indices=int16, ordered=0>>
960 child 0, two: dictionary<values=string, indices=int16, ordered=0>
961 four: struct<one: int32, two: dictionary<values=string, indices=int16, ordered=0>>
962 child 0, one: int32
963 child 1, two: dictionary<values=string, indices=int16, ordered=0>)expected";
964
965 PrettyPrintOptions options;
966 Check(*sch, options, expected);
967 }
968
969 TEST_F(TestPrettyPrint, SchemaWithNotNull) {
970 auto simple = field("one", int32());
971 auto non_null = field("two", int32(), false);
972 auto list_simple = field("three", list(int32()));
973 auto list_non_null = field("four", list(int32()), false);
974 auto list_non_null2 = field("five", list(field("item", int32(), false)));
975
976 auto sch = schema({simple, non_null, list_simple, list_non_null, list_non_null2});
977
978 static const char* expected = R"expected(one: int32
979 two: int32 not null
980 three: list<item: int32>
981 child 0, item: int32
982 four: list<item: int32> not null
983 child 0, item: int32
984 five: list<item: int32 not null>
985 child 0, item: int32 not null)expected";
986
987 PrettyPrintOptions options;
988 Check(*sch, options, expected);
989 }
990
991 TEST_F(TestPrettyPrint, SchemaWithMetadata) {
992 // ARROW-7063
993 auto metadata1 = key_value_metadata({"foo1"}, {"bar1"});
994 auto metadata2 = key_value_metadata({"foo2"}, {"bar2"});
995 auto metadata3 = key_value_metadata(
996 {"foo3", "lorem"},
997 {"bar3",
998 R"(Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla accumsan vel
999 turpis et mollis. Aliquam tincidunt arcu id tortor blandit blandit. Donec
1000 eget leo quis lectus scelerisque varius. Class aptent taciti sociosqu ad
1001 litora torquent per conubia nostra, per inceptos himenaeos. Praesent
1002 faucibus, diam eu volutpat iaculis, tellus est porta ligula, a efficitur
1003 turpis nulla facilisis quam. Aliquam vitae lorem erat. Proin a dolor ac libero
1004 dignissim mollis vitae eu mauris. Quisque posuere tellus vitae massa
1005 pellentesque sagittis. Aenean feugiat, diam ac dignissim fermentum, lorem
1006 sapien commodo massa, vel volutpat orci nisi eu justo. Nulla non blandit
1007 sapien. Quisque pretium vestibulum urna eu vehicula.)"});
1008 auto my_schema = schema(
1009 {field("one", int32(), true, metadata1), field("two", utf8(), false, metadata2)},
1010 metadata3);
1011
1012 PrettyPrintOptions options;
1013 static const char* expected = R"(one: int32
1014 -- field metadata --
1015 foo1: 'bar1'
1016 two: string not null
1017 -- field metadata --
1018 foo2: 'bar2'
1019 -- schema metadata --
1020 foo3: 'bar3'
1021 lorem: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla ac' + 737)";
1022 Check(*my_schema, options, expected);
1023
1024 static const char* expected_verbose = R"(one: int32
1025 -- field metadata --
1026 foo1: 'bar1'
1027 two: string not null
1028 -- field metadata --
1029 foo2: 'bar2'
1030 -- schema metadata --
1031 foo3: 'bar3'
1032 lorem: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla accumsan vel
1033 turpis et mollis. Aliquam tincidunt arcu id tortor blandit blandit. Donec
1034 eget leo quis lectus scelerisque varius. Class aptent taciti sociosqu ad
1035 litora torquent per conubia nostra, per inceptos himenaeos. Praesent
1036 faucibus, diam eu volutpat iaculis, tellus est porta ligula, a efficitur
1037 turpis nulla facilisis quam. Aliquam vitae lorem erat. Proin a dolor ac libero
1038 dignissim mollis vitae eu mauris. Quisque posuere tellus vitae massa
1039 pellentesque sagittis. Aenean feugiat, diam ac dignissim fermentum, lorem
1040 sapien commodo massa, vel volutpat orci nisi eu justo. Nulla non blandit
1041 sapien. Quisque pretium vestibulum urna eu vehicula.')";
1042 options.truncate_metadata = false;
1043 Check(*my_schema, options, expected_verbose);
1044
1045 // Metadata that exactly fits
1046 auto metadata4 =
1047 key_value_metadata({"key"}, {("valuexxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
1048 "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")});
1049 my_schema = schema({field("f0", int32())}, metadata4);
1050 static const char* expected_fits = R"(f0: int32
1051 -- schema metadata --
1052 key: 'valuexxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')";
1053 options.truncate_metadata = false;
1054 Check(*my_schema, options, expected_fits);
1055
1056 // A large key
1057 auto metadata5 = key_value_metadata({"0123456789012345678901234567890123456789"},
1058 {("valuexxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
1059 "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")});
1060 my_schema = schema({field("f0", int32())}, metadata5);
1061 static const char* expected_big_key = R"(f0: int32
1062 -- schema metadata --
1063 0123456789012345678901234567890123456789: 'valuexxxxxxxxxxxxxxxxxxxxxxxxx' + 40)";
1064 options.truncate_metadata = true;
1065 Check(*my_schema, options, expected_big_key);
1066 }
1067
1068 TEST_F(TestPrettyPrint, SchemaIndentation) {
1069 // ARROW-6159
1070 auto simple = field("one", int32());
1071 auto non_null = field("two", int32(), false);
1072 auto sch = schema({simple, non_null});
1073
1074 static const char* expected = R"expected( one: int32
1075 two: int32 not null)expected";
1076
1077 PrettyPrintOptions options(/*indent=*/4);
1078 Check(*sch, options, expected);
1079 }
1080
1081 } // namespace arrow