1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
18 #include "arrow/pretty_print.h"
27 #include <sstream> // IWYU pragma: keep
29 #include <type_traits>
32 #include "arrow/array.h"
33 #include "arrow/chunked_array.h"
34 #include "arrow/record_batch.h"
35 #include "arrow/status.h"
36 #include "arrow/table.h"
37 #include "arrow/type.h"
38 #include "arrow/type_traits.h"
39 #include "arrow/util/checked_cast.h"
40 #include "arrow/util/formatting.h"
41 #include "arrow/util/int_util_internal.h"
42 #include "arrow/util/key_value_metadata.h"
43 #include "arrow/util/string.h"
44 #include "arrow/util/string_view.h"
45 #include "arrow/vendored/datetime.h"
46 #include "arrow/visitor_inline.h"
50 using internal::checked_cast
;
51 using internal::StringFormatter
;
57 PrettyPrinter(const PrettyPrintOptions
& options
, std::ostream
* sink
)
58 : options_(options
), indent_(options
.indent
), sink_(sink
) {}
60 inline void Write(util::string_view data
);
61 inline void WriteIndented(util::string_view data
);
62 inline void Newline();
64 inline void IndentAfterNewline();
65 void OpenArray(const Array
& array
);
66 void CloseArray(const Array
& array
);
67 void Flush() { (*sink_
) << std::flush
; }
69 PrettyPrintOptions
ChildOptions() const {
70 PrettyPrintOptions child_options
= options_
;
71 child_options
.indent
= indent_
;
76 const PrettyPrintOptions
& options_
;
81 void PrettyPrinter::OpenArray(const Array
& array
) {
82 if (!options_
.skip_new_lines
) {
86 if (array
.length() > 0) {
88 indent_
+= options_
.indent_size
;
92 void PrettyPrinter::CloseArray(const Array
& array
) {
93 if (array
.length() > 0) {
94 indent_
-= options_
.indent_size
;
95 if (!options_
.skip_new_lines
) {
102 void PrettyPrinter::Write(util::string_view data
) { (*sink_
) << data
; }
104 void PrettyPrinter::WriteIndented(util::string_view data
) {
109 void PrettyPrinter::Newline() {
110 if (options_
.skip_new_lines
) {
116 void PrettyPrinter::Indent() {
117 for (int i
= 0; i
< indent_
; ++i
) {
122 void PrettyPrinter::IndentAfterNewline() {
123 if (options_
.skip_new_lines
) {
129 class ArrayPrinter
: public PrettyPrinter
{
131 ArrayPrinter(const PrettyPrintOptions
& options
, std::ostream
* sink
)
132 : PrettyPrinter(options
, sink
) {}
135 template <typename FormatFunction
>
136 Status
WriteValues(const Array
& array
, FormatFunction
&& func
,
137 bool indent_non_null_values
= true) {
138 // `indent_non_null_values` should be false if `FormatFunction` applies
139 // indentation itself.
140 for (int64_t i
= 0; i
< array
.length(); ++i
) {
141 const bool is_last
= (i
== array
.length() - 1);
142 if ((i
>= options_
.window
) && (i
< (array
.length() - options_
.window
))) {
143 IndentAfterNewline();
145 if (!is_last
&& options_
.skip_new_lines
) {
148 i
= array
.length() - options_
.window
- 1;
149 } else if (array
.IsNull(i
)) {
150 IndentAfterNewline();
151 (*sink_
) << options_
.null_rep
;
156 if (indent_non_null_values
) {
157 IndentAfterNewline();
159 RETURN_NOT_OK(func(i
));
169 template <typename ArrayType
, typename Formatter
>
170 Status
WritePrimitiveValues(const ArrayType
& array
, Formatter
* formatter
) {
171 auto appender
= [&](util::string_view v
) { (*sink_
) << v
; };
172 auto format_func
= [&](int64_t i
) {
173 (*formatter
)(array
.GetView(i
), appender
);
176 return WriteValues(array
, std::move(format_func
));
179 template <typename ArrayType
, typename T
= typename
ArrayType::TypeClass
>
180 Status
WritePrimitiveValues(const ArrayType
& array
) {
181 StringFormatter
<T
> formatter
{array
.type()};
182 return WritePrimitiveValues(array
, &formatter
);
185 Status
WriteValidityBitmap(const Array
& array
);
187 Status
PrintChildren(const std::vector
<std::shared_ptr
<Array
>>& fields
, int64_t offset
,
189 for (size_t i
= 0; i
< fields
.size(); ++i
) {
192 std::stringstream ss
;
193 ss
<< "-- child " << i
<< " type: " << fields
[i
]->type()->ToString() << "\n";
196 std::shared_ptr
<Array
> field
= fields
[i
];
198 field
= field
->Slice(offset
, length
);
200 RETURN_NOT_OK(PrettyPrint(*field
, indent_
+ options_
.indent_size
, sink_
));
206 // WriteDataValues(): generic function to write values from an array
209 template <typename ArrayType
, typename T
= typename
ArrayType::TypeClass
>
210 enable_if_has_c_type
<T
, Status
> WriteDataValues(const ArrayType
& array
) {
211 return WritePrimitiveValues(array
);
214 Status
WriteDataValues(const HalfFloatArray
& array
) {
215 // XXX do not know how to format half floats yet
216 StringFormatter
<Int16Type
> formatter
{array
.type()};
217 return WritePrimitiveValues(array
, &formatter
);
220 template <typename ArrayType
, typename T
= typename
ArrayType::TypeClass
>
221 enable_if_string_like
<T
, Status
> WriteDataValues(const ArrayType
& array
) {
222 return WriteValues(array
, [&](int64_t i
) {
223 (*sink_
) << "\"" << array
.GetView(i
) << "\"";
228 template <typename ArrayType
, typename T
= typename
ArrayType::TypeClass
>
229 enable_if_t
<is_binary_like_type
<T
>::value
&& !is_decimal_type
<T
>::value
, Status
>
230 WriteDataValues(const ArrayType
& array
) {
231 return WriteValues(array
, [&](int64_t i
) {
232 (*sink_
) << HexEncode(array
.GetView(i
));
237 template <typename ArrayType
, typename T
= typename
ArrayType::TypeClass
>
238 enable_if_decimal
<T
, Status
> WriteDataValues(const ArrayType
& array
) {
239 return WriteValues(array
, [&](int64_t i
) {
240 (*sink_
) << array
.FormatValue(i
);
245 template <typename ArrayType
, typename T
= typename
ArrayType::TypeClass
>
246 enable_if_list_like
<T
, Status
> WriteDataValues(const ArrayType
& array
) {
247 const auto values
= array
.values();
248 const auto child_options
= ChildOptions();
249 ArrayPrinter
values_printer(child_options
, sink_
);
254 // XXX this could be much faster if ArrayPrinter allowed specifying start and
256 return values_printer
.Print(
257 *values
->Slice(array
.value_offset(i
), array
.value_length(i
)));
259 /*indent_non_null_values=*/false);
262 Status
WriteDataValues(const MapArray
& array
) {
263 const auto keys
= array
.keys();
264 const auto items
= array
.items();
265 const auto child_options
= ChildOptions();
266 ArrayPrinter
values_printer(child_options
, sink_
);
274 RETURN_NOT_OK(values_printer
.Print(
275 *keys
->Slice(array
.value_offset(i
), array
.value_length(i
))));
277 IndentAfterNewline();
278 (*sink_
) << "values:";
280 RETURN_NOT_OK(values_printer
.Print(
281 *items
->Slice(array
.value_offset(i
), array
.value_length(i
))));
284 /*indent_non_null_values=*/false);
288 template <typename T
>
289 enable_if_t
<std::is_base_of
<PrimitiveArray
, T
>::value
||
290 std::is_base_of
<FixedSizeBinaryArray
, T
>::value
||
291 std::is_base_of
<BinaryArray
, T
>::value
||
292 std::is_base_of
<LargeBinaryArray
, T
>::value
||
293 std::is_base_of
<ListArray
, T
>::value
||
294 std::is_base_of
<LargeListArray
, T
>::value
||
295 std::is_base_of
<MapArray
, T
>::value
||
296 std::is_base_of
<FixedSizeListArray
, T
>::value
,
298 Visit(const T
& array
) {
299 Status st
= array
.Validate();
301 (*sink_
) << "<Invalid array: " << st
.message() << ">";
306 if (array
.length() > 0) {
307 RETURN_NOT_OK(WriteDataValues(array
));
313 Status
Visit(const NullArray
& array
) {
314 (*sink_
) << array
.length() << " nulls";
318 Status
Visit(const ExtensionArray
& array
) { return Print(*array
.storage()); }
320 Status
Visit(const StructArray
& array
) {
321 RETURN_NOT_OK(WriteValidityBitmap(array
));
322 std::vector
<std::shared_ptr
<Array
>> children
;
323 children
.reserve(array
.num_fields());
324 for (int i
= 0; i
< array
.num_fields(); ++i
) {
325 children
.emplace_back(array
.field(i
));
327 return PrintChildren(children
, 0, array
.length());
330 Status
Visit(const UnionArray
& array
) {
331 RETURN_NOT_OK(WriteValidityBitmap(array
));
335 Write("-- type_ids: ");
336 UInt8Array
type_codes(array
.length(), array
.type_codes(), nullptr, 0, array
.offset());
337 RETURN_NOT_OK(PrettyPrint(type_codes
, indent_
+ options_
.indent_size
, sink_
));
339 if (array
.mode() == UnionMode::DENSE
) {
342 Write("-- value_offsets: ");
343 Int32Array
value_offsets(
344 array
.length(), checked_cast
<const DenseUnionArray
&>(array
).value_offsets(),
345 nullptr, 0, array
.offset());
346 RETURN_NOT_OK(PrettyPrint(value_offsets
, indent_
+ options_
.indent_size
, sink_
));
349 // Print the children without any offset, because the type ids are absolute
350 std::vector
<std::shared_ptr
<Array
>> children
;
351 children
.reserve(array
.num_fields());
352 for (int i
= 0; i
< array
.num_fields(); ++i
) {
353 children
.emplace_back(array
.field(i
));
355 return PrintChildren(children
, 0, array
.length() + array
.offset());
358 Status
Visit(const DictionaryArray
& array
) {
361 Write("-- dictionary:\n");
363 PrettyPrint(*array
.dictionary(), indent_
+ options_
.indent_size
, sink_
));
367 Write("-- indices:\n");
368 return PrettyPrint(*array
.indices(), indent_
+ options_
.indent_size
, sink_
);
371 Status
Print(const Array
& array
) {
372 RETURN_NOT_OK(VisitArrayInline(array
, this));
378 Status
ArrayPrinter::WriteValidityBitmap(const Array
& array
) {
380 Write("-- is_valid:");
382 if (array
.null_count() > 0) {
385 BooleanArray
is_valid(array
.length(), array
.null_bitmap(), nullptr, 0,
387 return PrettyPrint(is_valid
, indent_
+ options_
.indent_size
, sink_
);
389 Write(" all not null");
396 Status
PrettyPrint(const Array
& arr
, int indent
, std::ostream
* sink
) {
397 PrettyPrintOptions options
;
398 options
.indent
= indent
;
399 ArrayPrinter
printer(options
, sink
);
400 return printer
.Print(arr
);
403 Status
PrettyPrint(const Array
& arr
, const PrettyPrintOptions
& options
,
404 std::ostream
* sink
) {
405 ArrayPrinter
printer(options
, sink
);
406 return printer
.Print(arr
);
409 Status
PrettyPrint(const Array
& arr
, const PrettyPrintOptions
& options
,
410 std::string
* result
) {
411 std::ostringstream sink
;
412 RETURN_NOT_OK(PrettyPrint(arr
, options
, &sink
));
413 *result
= sink
.str();
417 Status
PrettyPrint(const ChunkedArray
& chunked_arr
, const PrettyPrintOptions
& options
,
418 std::ostream
* sink
) {
419 int num_chunks
= chunked_arr
.num_chunks();
420 int indent
= options
.indent
;
421 int window
= options
.window
;
423 for (int i
= 0; i
< indent
; ++i
) {
427 if (!options
.skip_new_lines
) {
430 bool skip_comma
= true;
431 for (int i
= 0; i
< num_chunks
; ++i
) {
436 if (!options
.skip_new_lines
) {
440 if ((i
>= window
) && (i
< (num_chunks
- window
))) {
441 for (int i
= 0; i
< indent
; ++i
) {
445 if (!options
.skip_new_lines
) {
448 i
= num_chunks
- window
- 1;
451 PrettyPrintOptions chunk_options
= options
;
452 chunk_options
.indent
+= options
.indent_size
;
453 ArrayPrinter
printer(chunk_options
, sink
);
454 RETURN_NOT_OK(printer
.Print(*chunked_arr
.chunk(i
)));
457 if (!options
.skip_new_lines
) {
461 for (int i
= 0; i
< indent
; ++i
) {
469 Status
PrettyPrint(const ChunkedArray
& chunked_arr
, const PrettyPrintOptions
& options
,
470 std::string
* result
) {
471 std::ostringstream sink
;
472 RETURN_NOT_OK(PrettyPrint(chunked_arr
, options
, &sink
));
473 *result
= sink
.str();
477 Status
PrettyPrint(const RecordBatch
& batch
, int indent
, std::ostream
* sink
) {
478 for (int i
= 0; i
< batch
.num_columns(); ++i
) {
479 const std::string
& name
= batch
.column_name(i
);
480 (*sink
) << name
<< ": ";
481 RETURN_NOT_OK(PrettyPrint(*batch
.column(i
), indent
+ 2, sink
));
484 (*sink
) << std::flush
;
488 Status
PrettyPrint(const RecordBatch
& batch
, const PrettyPrintOptions
& options
,
489 std::ostream
* sink
) {
490 for (int i
= 0; i
< batch
.num_columns(); ++i
) {
491 const std::string
& name
= batch
.column_name(i
);
492 PrettyPrintOptions column_options
= options
;
493 column_options
.indent
+= 2;
495 (*sink
) << name
<< ": ";
496 RETURN_NOT_OK(PrettyPrint(*batch
.column(i
), column_options
, sink
));
499 (*sink
) << std::flush
;
503 Status
PrettyPrint(const Table
& table
, const PrettyPrintOptions
& options
,
504 std::ostream
* sink
) {
505 RETURN_NOT_OK(PrettyPrint(*table
.schema(), options
, sink
));
509 PrettyPrintOptions column_options
= options
;
510 column_options
.indent
+= 2;
511 for (int i
= 0; i
< table
.num_columns(); ++i
) {
512 for (int j
= 0; j
< options
.indent
; ++j
) {
515 (*sink
) << table
.schema()->field(i
)->name() << ":\n";
516 RETURN_NOT_OK(PrettyPrint(*table
.column(i
), column_options
, sink
));
519 (*sink
) << std::flush
;
523 Status
DebugPrint(const Array
& arr
, int indent
) {
524 return PrettyPrint(arr
, indent
, &std::cerr
);
529 class SchemaPrinter
: public PrettyPrinter
{
531 SchemaPrinter(const Schema
& schema
, const PrettyPrintOptions
& options
,
533 : PrettyPrinter(options
, sink
), schema_(schema
) {}
535 Status
PrintType(const DataType
& type
, bool nullable
);
536 Status
PrintField(const Field
& field
);
538 void PrintVerboseMetadata(const KeyValueMetadata
& metadata
) {
539 for (int64_t i
= 0; i
< metadata
.size(); ++i
) {
542 Write(metadata
.key(i
) + ": '" + metadata
.value(i
) + "'");
546 void PrintTruncatedMetadata(const KeyValueMetadata
& metadata
) {
547 for (int64_t i
= 0; i
< metadata
.size(); ++i
) {
550 size_t size
= metadata
.value(i
).size();
551 size_t truncated_size
= std::max
<size_t>(10, 70 - metadata
.key(i
).size() - indent_
);
552 if (size
<= truncated_size
) {
553 Write(metadata
.key(i
) + ": '" + metadata
.value(i
) + "'");
557 Write(metadata
.key(i
) + ": '" + metadata
.value(i
).substr(0, truncated_size
) +
558 "' + " + std::to_string(size
- truncated_size
));
562 void PrintMetadata(const std::string
& metadata_type
, const KeyValueMetadata
& metadata
) {
563 if (metadata
.size() > 0) {
566 Write(metadata_type
);
567 if (options_
.truncate_metadata
) {
568 PrintTruncatedMetadata(metadata
);
570 PrintVerboseMetadata(metadata
);
576 for (int i
= 0; i
< schema_
.num_fields(); ++i
) {
583 RETURN_NOT_OK(PrintField(*schema_
.field(i
)));
586 if (options_
.show_schema_metadata
&& schema_
.metadata() != nullptr) {
587 PrintMetadata("-- schema metadata --", *schema_
.metadata());
594 const Schema
& schema_
;
597 Status
SchemaPrinter::PrintType(const DataType
& type
, bool nullable
) {
598 Write(type
.ToString());
602 for (int i
= 0; i
< type
.num_fields(); ++i
) {
606 std::stringstream ss
;
607 ss
<< "child " << i
<< ", ";
609 indent_
+= options_
.indent_size
;
610 WriteIndented(ss
.str());
611 RETURN_NOT_OK(PrintField(*type
.field(i
)));
612 indent_
-= options_
.indent_size
;
617 Status
SchemaPrinter::PrintField(const Field
& field
) {
620 RETURN_NOT_OK(PrintType(*field
.type(), field
.nullable()));
622 if (options_
.show_field_metadata
&& field
.metadata() != nullptr) {
623 indent_
+= options_
.indent_size
;
624 PrintMetadata("-- field metadata --", *field
.metadata());
625 indent_
-= options_
.indent_size
;
632 Status
PrettyPrint(const Schema
& schema
, const PrettyPrintOptions
& options
,
633 std::ostream
* sink
) {
634 SchemaPrinter
printer(schema
, options
, sink
);
635 return printer
.Print();
638 Status
PrettyPrint(const Schema
& schema
, const PrettyPrintOptions
& options
,
639 std::string
* result
) {
640 std::ostringstream sink
;
641 RETURN_NOT_OK(PrettyPrint(schema
, options
, &sink
));
642 *result
= sink
.str();