]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/pretty_print.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / pretty_print.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "arrow/pretty_print.h"
19
20 #include <algorithm>
21 #include <chrono>
22 #include <cstddef>
23 #include <cstdint>
24 #include <iostream>
25 #include <limits>
26 #include <memory>
27 #include <sstream> // IWYU pragma: keep
28 #include <string>
29 #include <type_traits>
30 #include <vector>
31
32 #include "arrow/array.h"
33 #include "arrow/chunked_array.h"
34 #include "arrow/record_batch.h"
35 #include "arrow/status.h"
36 #include "arrow/table.h"
37 #include "arrow/type.h"
38 #include "arrow/type_traits.h"
39 #include "arrow/util/checked_cast.h"
40 #include "arrow/util/formatting.h"
41 #include "arrow/util/int_util_internal.h"
42 #include "arrow/util/key_value_metadata.h"
43 #include "arrow/util/string.h"
44 #include "arrow/util/string_view.h"
45 #include "arrow/vendored/datetime.h"
46 #include "arrow/visitor_inline.h"
47
48 namespace arrow {
49
50 using internal::checked_cast;
51 using internal::StringFormatter;
52
53 namespace {
54
55 class PrettyPrinter {
56 public:
57 PrettyPrinter(const PrettyPrintOptions& options, std::ostream* sink)
58 : options_(options), indent_(options.indent), sink_(sink) {}
59
60 inline void Write(util::string_view data);
61 inline void WriteIndented(util::string_view data);
62 inline void Newline();
63 inline void Indent();
64 inline void IndentAfterNewline();
65 void OpenArray(const Array& array);
66 void CloseArray(const Array& array);
67 void Flush() { (*sink_) << std::flush; }
68
69 PrettyPrintOptions ChildOptions() const {
70 PrettyPrintOptions child_options = options_;
71 child_options.indent = indent_;
72 return child_options;
73 }
74
75 protected:
76 const PrettyPrintOptions& options_;
77 int indent_;
78 std::ostream* sink_;
79 };
80
81 void PrettyPrinter::OpenArray(const Array& array) {
82 if (!options_.skip_new_lines) {
83 Indent();
84 }
85 (*sink_) << "[";
86 if (array.length() > 0) {
87 Newline();
88 indent_ += options_.indent_size;
89 }
90 }
91
92 void PrettyPrinter::CloseArray(const Array& array) {
93 if (array.length() > 0) {
94 indent_ -= options_.indent_size;
95 if (!options_.skip_new_lines) {
96 Indent();
97 }
98 }
99 (*sink_) << "]";
100 }
101
102 void PrettyPrinter::Write(util::string_view data) { (*sink_) << data; }
103
104 void PrettyPrinter::WriteIndented(util::string_view data) {
105 Indent();
106 Write(data);
107 }
108
109 void PrettyPrinter::Newline() {
110 if (options_.skip_new_lines) {
111 return;
112 }
113 (*sink_) << "\n";
114 }
115
116 void PrettyPrinter::Indent() {
117 for (int i = 0; i < indent_; ++i) {
118 (*sink_) << " ";
119 }
120 }
121
122 void PrettyPrinter::IndentAfterNewline() {
123 if (options_.skip_new_lines) {
124 return;
125 }
126 Indent();
127 }
128
129 class ArrayPrinter : public PrettyPrinter {
130 public:
131 ArrayPrinter(const PrettyPrintOptions& options, std::ostream* sink)
132 : PrettyPrinter(options, sink) {}
133
134 private:
135 template <typename FormatFunction>
136 Status WriteValues(const Array& array, FormatFunction&& func,
137 bool indent_non_null_values = true) {
138 // `indent_non_null_values` should be false if `FormatFunction` applies
139 // indentation itself.
140 for (int64_t i = 0; i < array.length(); ++i) {
141 const bool is_last = (i == array.length() - 1);
142 if ((i >= options_.window) && (i < (array.length() - options_.window))) {
143 IndentAfterNewline();
144 (*sink_) << "...";
145 if (!is_last && options_.skip_new_lines) {
146 (*sink_) << ",";
147 }
148 i = array.length() - options_.window - 1;
149 } else if (array.IsNull(i)) {
150 IndentAfterNewline();
151 (*sink_) << options_.null_rep;
152 if (!is_last) {
153 (*sink_) << ",";
154 }
155 } else {
156 if (indent_non_null_values) {
157 IndentAfterNewline();
158 }
159 RETURN_NOT_OK(func(i));
160 if (!is_last) {
161 (*sink_) << ",";
162 }
163 }
164 Newline();
165 }
166 return Status::OK();
167 }
168
169 template <typename ArrayType, typename Formatter>
170 Status WritePrimitiveValues(const ArrayType& array, Formatter* formatter) {
171 auto appender = [&](util::string_view v) { (*sink_) << v; };
172 auto format_func = [&](int64_t i) {
173 (*formatter)(array.GetView(i), appender);
174 return Status::OK();
175 };
176 return WriteValues(array, std::move(format_func));
177 }
178
179 template <typename ArrayType, typename T = typename ArrayType::TypeClass>
180 Status WritePrimitiveValues(const ArrayType& array) {
181 StringFormatter<T> formatter{array.type()};
182 return WritePrimitiveValues(array, &formatter);
183 }
184
185 Status WriteValidityBitmap(const Array& array);
186
187 Status PrintChildren(const std::vector<std::shared_ptr<Array>>& fields, int64_t offset,
188 int64_t length) {
189 for (size_t i = 0; i < fields.size(); ++i) {
190 Newline();
191 Indent();
192 std::stringstream ss;
193 ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << "\n";
194 Write(ss.str());
195
196 std::shared_ptr<Array> field = fields[i];
197 if (offset != 0) {
198 field = field->Slice(offset, length);
199 }
200 RETURN_NOT_OK(PrettyPrint(*field, indent_ + options_.indent_size, sink_));
201 }
202 return Status::OK();
203 }
204
205 //
206 // WriteDataValues(): generic function to write values from an array
207 //
208
209 template <typename ArrayType, typename T = typename ArrayType::TypeClass>
210 enable_if_has_c_type<T, Status> WriteDataValues(const ArrayType& array) {
211 return WritePrimitiveValues(array);
212 }
213
214 Status WriteDataValues(const HalfFloatArray& array) {
215 // XXX do not know how to format half floats yet
216 StringFormatter<Int16Type> formatter{array.type()};
217 return WritePrimitiveValues(array, &formatter);
218 }
219
220 template <typename ArrayType, typename T = typename ArrayType::TypeClass>
221 enable_if_string_like<T, Status> WriteDataValues(const ArrayType& array) {
222 return WriteValues(array, [&](int64_t i) {
223 (*sink_) << "\"" << array.GetView(i) << "\"";
224 return Status::OK();
225 });
226 }
227
228 template <typename ArrayType, typename T = typename ArrayType::TypeClass>
229 enable_if_t<is_binary_like_type<T>::value && !is_decimal_type<T>::value, Status>
230 WriteDataValues(const ArrayType& array) {
231 return WriteValues(array, [&](int64_t i) {
232 (*sink_) << HexEncode(array.GetView(i));
233 return Status::OK();
234 });
235 }
236
237 template <typename ArrayType, typename T = typename ArrayType::TypeClass>
238 enable_if_decimal<T, Status> WriteDataValues(const ArrayType& array) {
239 return WriteValues(array, [&](int64_t i) {
240 (*sink_) << array.FormatValue(i);
241 return Status::OK();
242 });
243 }
244
245 template <typename ArrayType, typename T = typename ArrayType::TypeClass>
246 enable_if_list_like<T, Status> WriteDataValues(const ArrayType& array) {
247 const auto values = array.values();
248 const auto child_options = ChildOptions();
249 ArrayPrinter values_printer(child_options, sink_);
250
251 return WriteValues(
252 array,
253 [&](int64_t i) {
254 // XXX this could be much faster if ArrayPrinter allowed specifying start and
255 // stop endpoints.
256 return values_printer.Print(
257 *values->Slice(array.value_offset(i), array.value_length(i)));
258 },
259 /*indent_non_null_values=*/false);
260 }
261
262 Status WriteDataValues(const MapArray& array) {
263 const auto keys = array.keys();
264 const auto items = array.items();
265 const auto child_options = ChildOptions();
266 ArrayPrinter values_printer(child_options, sink_);
267
268 return WriteValues(
269 array,
270 [&](int64_t i) {
271 Indent();
272 (*sink_) << "keys:";
273 Newline();
274 RETURN_NOT_OK(values_printer.Print(
275 *keys->Slice(array.value_offset(i), array.value_length(i))));
276 Newline();
277 IndentAfterNewline();
278 (*sink_) << "values:";
279 Newline();
280 RETURN_NOT_OK(values_printer.Print(
281 *items->Slice(array.value_offset(i), array.value_length(i))));
282 return Status::OK();
283 },
284 /*indent_non_null_values=*/false);
285 }
286
287 public:
288 template <typename T>
289 enable_if_t<std::is_base_of<PrimitiveArray, T>::value ||
290 std::is_base_of<FixedSizeBinaryArray, T>::value ||
291 std::is_base_of<BinaryArray, T>::value ||
292 std::is_base_of<LargeBinaryArray, T>::value ||
293 std::is_base_of<ListArray, T>::value ||
294 std::is_base_of<LargeListArray, T>::value ||
295 std::is_base_of<MapArray, T>::value ||
296 std::is_base_of<FixedSizeListArray, T>::value,
297 Status>
298 Visit(const T& array) {
299 Status st = array.Validate();
300 if (!st.ok()) {
301 (*sink_) << "<Invalid array: " << st.message() << ">";
302 return Status::OK();
303 }
304
305 OpenArray(array);
306 if (array.length() > 0) {
307 RETURN_NOT_OK(WriteDataValues(array));
308 }
309 CloseArray(array);
310 return Status::OK();
311 }
312
313 Status Visit(const NullArray& array) {
314 (*sink_) << array.length() << " nulls";
315 return Status::OK();
316 }
317
318 Status Visit(const ExtensionArray& array) { return Print(*array.storage()); }
319
320 Status Visit(const StructArray& array) {
321 RETURN_NOT_OK(WriteValidityBitmap(array));
322 std::vector<std::shared_ptr<Array>> children;
323 children.reserve(array.num_fields());
324 for (int i = 0; i < array.num_fields(); ++i) {
325 children.emplace_back(array.field(i));
326 }
327 return PrintChildren(children, 0, array.length());
328 }
329
330 Status Visit(const UnionArray& array) {
331 RETURN_NOT_OK(WriteValidityBitmap(array));
332
333 Newline();
334 Indent();
335 Write("-- type_ids: ");
336 UInt8Array type_codes(array.length(), array.type_codes(), nullptr, 0, array.offset());
337 RETURN_NOT_OK(PrettyPrint(type_codes, indent_ + options_.indent_size, sink_));
338
339 if (array.mode() == UnionMode::DENSE) {
340 Newline();
341 Indent();
342 Write("-- value_offsets: ");
343 Int32Array value_offsets(
344 array.length(), checked_cast<const DenseUnionArray&>(array).value_offsets(),
345 nullptr, 0, array.offset());
346 RETURN_NOT_OK(PrettyPrint(value_offsets, indent_ + options_.indent_size, sink_));
347 }
348
349 // Print the children without any offset, because the type ids are absolute
350 std::vector<std::shared_ptr<Array>> children;
351 children.reserve(array.num_fields());
352 for (int i = 0; i < array.num_fields(); ++i) {
353 children.emplace_back(array.field(i));
354 }
355 return PrintChildren(children, 0, array.length() + array.offset());
356 }
357
358 Status Visit(const DictionaryArray& array) {
359 Newline();
360 Indent();
361 Write("-- dictionary:\n");
362 RETURN_NOT_OK(
363 PrettyPrint(*array.dictionary(), indent_ + options_.indent_size, sink_));
364
365 Newline();
366 Indent();
367 Write("-- indices:\n");
368 return PrettyPrint(*array.indices(), indent_ + options_.indent_size, sink_);
369 }
370
371 Status Print(const Array& array) {
372 RETURN_NOT_OK(VisitArrayInline(array, this));
373 Flush();
374 return Status::OK();
375 }
376 };
377
378 Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
379 Indent();
380 Write("-- is_valid:");
381
382 if (array.null_count() > 0) {
383 Newline();
384 Indent();
385 BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0,
386 array.offset());
387 return PrettyPrint(is_valid, indent_ + options_.indent_size, sink_);
388 } else {
389 Write(" all not null");
390 return Status::OK();
391 }
392 }
393
394 } // namespace
395
396 Status PrettyPrint(const Array& arr, int indent, std::ostream* sink) {
397 PrettyPrintOptions options;
398 options.indent = indent;
399 ArrayPrinter printer(options, sink);
400 return printer.Print(arr);
401 }
402
403 Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
404 std::ostream* sink) {
405 ArrayPrinter printer(options, sink);
406 return printer.Print(arr);
407 }
408
409 Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
410 std::string* result) {
411 std::ostringstream sink;
412 RETURN_NOT_OK(PrettyPrint(arr, options, &sink));
413 *result = sink.str();
414 return Status::OK();
415 }
416
417 Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
418 std::ostream* sink) {
419 int num_chunks = chunked_arr.num_chunks();
420 int indent = options.indent;
421 int window = options.window;
422
423 for (int i = 0; i < indent; ++i) {
424 (*sink) << " ";
425 }
426 (*sink) << "[";
427 if (!options.skip_new_lines) {
428 *sink << "\n";
429 }
430 bool skip_comma = true;
431 for (int i = 0; i < num_chunks; ++i) {
432 if (skip_comma) {
433 skip_comma = false;
434 } else {
435 (*sink) << ",";
436 if (!options.skip_new_lines) {
437 *sink << "\n";
438 }
439 }
440 if ((i >= window) && (i < (num_chunks - window))) {
441 for (int i = 0; i < indent; ++i) {
442 (*sink) << " ";
443 }
444 (*sink) << "...";
445 if (!options.skip_new_lines) {
446 *sink << "\n";
447 }
448 i = num_chunks - window - 1;
449 skip_comma = true;
450 } else {
451 PrettyPrintOptions chunk_options = options;
452 chunk_options.indent += options.indent_size;
453 ArrayPrinter printer(chunk_options, sink);
454 RETURN_NOT_OK(printer.Print(*chunked_arr.chunk(i)));
455 }
456 }
457 if (!options.skip_new_lines) {
458 *sink << "\n";
459 }
460
461 for (int i = 0; i < indent; ++i) {
462 (*sink) << " ";
463 }
464 (*sink) << "]";
465
466 return Status::OK();
467 }
468
469 Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
470 std::string* result) {
471 std::ostringstream sink;
472 RETURN_NOT_OK(PrettyPrint(chunked_arr, options, &sink));
473 *result = sink.str();
474 return Status::OK();
475 }
476
477 Status PrettyPrint(const RecordBatch& batch, int indent, std::ostream* sink) {
478 for (int i = 0; i < batch.num_columns(); ++i) {
479 const std::string& name = batch.column_name(i);
480 (*sink) << name << ": ";
481 RETURN_NOT_OK(PrettyPrint(*batch.column(i), indent + 2, sink));
482 (*sink) << "\n";
483 }
484 (*sink) << std::flush;
485 return Status::OK();
486 }
487
488 Status PrettyPrint(const RecordBatch& batch, const PrettyPrintOptions& options,
489 std::ostream* sink) {
490 for (int i = 0; i < batch.num_columns(); ++i) {
491 const std::string& name = batch.column_name(i);
492 PrettyPrintOptions column_options = options;
493 column_options.indent += 2;
494
495 (*sink) << name << ": ";
496 RETURN_NOT_OK(PrettyPrint(*batch.column(i), column_options, sink));
497 (*sink) << "\n";
498 }
499 (*sink) << std::flush;
500 return Status::OK();
501 }
502
503 Status PrettyPrint(const Table& table, const PrettyPrintOptions& options,
504 std::ostream* sink) {
505 RETURN_NOT_OK(PrettyPrint(*table.schema(), options, sink));
506 (*sink) << "\n";
507 (*sink) << "----\n";
508
509 PrettyPrintOptions column_options = options;
510 column_options.indent += 2;
511 for (int i = 0; i < table.num_columns(); ++i) {
512 for (int j = 0; j < options.indent; ++j) {
513 (*sink) << " ";
514 }
515 (*sink) << table.schema()->field(i)->name() << ":\n";
516 RETURN_NOT_OK(PrettyPrint(*table.column(i), column_options, sink));
517 (*sink) << "\n";
518 }
519 (*sink) << std::flush;
520 return Status::OK();
521 }
522
523 Status DebugPrint(const Array& arr, int indent) {
524 return PrettyPrint(arr, indent, &std::cerr);
525 }
526
527 namespace {
528
529 class SchemaPrinter : public PrettyPrinter {
530 public:
531 SchemaPrinter(const Schema& schema, const PrettyPrintOptions& options,
532 std::ostream* sink)
533 : PrettyPrinter(options, sink), schema_(schema) {}
534
535 Status PrintType(const DataType& type, bool nullable);
536 Status PrintField(const Field& field);
537
538 void PrintVerboseMetadata(const KeyValueMetadata& metadata) {
539 for (int64_t i = 0; i < metadata.size(); ++i) {
540 Newline();
541 Indent();
542 Write(metadata.key(i) + ": '" + metadata.value(i) + "'");
543 }
544 }
545
546 void PrintTruncatedMetadata(const KeyValueMetadata& metadata) {
547 for (int64_t i = 0; i < metadata.size(); ++i) {
548 Newline();
549 Indent();
550 size_t size = metadata.value(i).size();
551 size_t truncated_size = std::max<size_t>(10, 70 - metadata.key(i).size() - indent_);
552 if (size <= truncated_size) {
553 Write(metadata.key(i) + ": '" + metadata.value(i) + "'");
554 continue;
555 }
556
557 Write(metadata.key(i) + ": '" + metadata.value(i).substr(0, truncated_size) +
558 "' + " + std::to_string(size - truncated_size));
559 }
560 }
561
562 void PrintMetadata(const std::string& metadata_type, const KeyValueMetadata& metadata) {
563 if (metadata.size() > 0) {
564 Newline();
565 Indent();
566 Write(metadata_type);
567 if (options_.truncate_metadata) {
568 PrintTruncatedMetadata(metadata);
569 } else {
570 PrintVerboseMetadata(metadata);
571 }
572 }
573 }
574
575 Status Print() {
576 for (int i = 0; i < schema_.num_fields(); ++i) {
577 if (i > 0) {
578 Newline();
579 Indent();
580 } else {
581 Indent();
582 }
583 RETURN_NOT_OK(PrintField(*schema_.field(i)));
584 }
585
586 if (options_.show_schema_metadata && schema_.metadata() != nullptr) {
587 PrintMetadata("-- schema metadata --", *schema_.metadata());
588 }
589 Flush();
590 return Status::OK();
591 }
592
593 private:
594 const Schema& schema_;
595 };
596
597 Status SchemaPrinter::PrintType(const DataType& type, bool nullable) {
598 Write(type.ToString());
599 if (!nullable) {
600 Write(" not null");
601 }
602 for (int i = 0; i < type.num_fields(); ++i) {
603 Newline();
604 Indent();
605
606 std::stringstream ss;
607 ss << "child " << i << ", ";
608
609 indent_ += options_.indent_size;
610 WriteIndented(ss.str());
611 RETURN_NOT_OK(PrintField(*type.field(i)));
612 indent_ -= options_.indent_size;
613 }
614 return Status::OK();
615 }
616
617 Status SchemaPrinter::PrintField(const Field& field) {
618 Write(field.name());
619 Write(": ");
620 RETURN_NOT_OK(PrintType(*field.type(), field.nullable()));
621
622 if (options_.show_field_metadata && field.metadata() != nullptr) {
623 indent_ += options_.indent_size;
624 PrintMetadata("-- field metadata --", *field.metadata());
625 indent_ -= options_.indent_size;
626 }
627 return Status::OK();
628 }
629
630 } // namespace
631
632 Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
633 std::ostream* sink) {
634 SchemaPrinter printer(schema, options, sink);
635 return printer.Print();
636 }
637
638 Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
639 std::string* result) {
640 std::ostringstream sink;
641 RETURN_NOT_OK(PrettyPrint(schema, options, &sink));
642 *result = sink.str();
643 return Status::OK();
644 }
645
646 } // namespace arrow