1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
8 #include "tools/sst_dump_tool_imp.h"
10 #ifndef __STDC_FORMAT_MACROS
11 #define __STDC_FORMAT_MACROS
20 #include "db/memtable.h"
21 #include "db/write_batch_internal.h"
22 #include "options/cf_options.h"
23 #include "rocksdb/db.h"
24 #include "rocksdb/env.h"
25 #include "rocksdb/iterator.h"
26 #include "rocksdb/slice_transform.h"
27 #include "rocksdb/status.h"
28 #include "rocksdb/table_properties.h"
29 #include "rocksdb/utilities/ldb_cmd.h"
30 #include "table/block.h"
31 #include "table/block_based_table_builder.h"
32 #include "table/block_based_table_factory.h"
33 #include "table/block_builder.h"
34 #include "table/format.h"
35 #include "table/meta_blocks.h"
36 #include "table/plain_table_factory.h"
37 #include "table/table_reader.h"
38 #include "util/compression.h"
39 #include "util/random.h"
41 #include "port/port.h"
45 using std::dynamic_pointer_cast
;
47 SstFileReader::SstFileReader(const std::string
& file_path
,
50 :file_name_(file_path
), read_num_(0), verify_checksum_(verify_checksum
),
51 output_hex_(output_hex
), ioptions_(options_
),
52 internal_comparator_(BytewiseComparator()) {
53 fprintf(stdout
, "Process %s\n", file_path
.c_str());
54 init_result_
= GetTableReader(file_name_
);
57 extern const uint64_t kBlockBasedTableMagicNumber
;
58 extern const uint64_t kLegacyBlockBasedTableMagicNumber
;
59 extern const uint64_t kPlainTableMagicNumber
;
60 extern const uint64_t kLegacyPlainTableMagicNumber
;
62 const char* testFileName
= "test_file_name";
64 Status
SstFileReader::GetTableReader(const std::string
& file_path
) {
65 // Warning about 'magic_number' being uninitialized shows up only in UBsan
66 // builds. Though access is guarded by 's.ok()' checks, fix the issue to
67 // avoid any warnings.
68 uint64_t magic_number
= Footer::kInvalidTableMagicNumber
;
70 // read table magic number
73 unique_ptr
<RandomAccessFile
> file
;
75 Status s
= options_
.env
->NewRandomAccessFile(file_path
, &file
, soptions_
);
77 s
= options_
.env
->GetFileSize(file_path
, &file_size
);
80 file_
.reset(new RandomAccessFileReader(std::move(file
)));
83 s
= ReadFooterFromFile(file_
.get(), file_size
, &footer
);
86 magic_number
= footer
.table_magic_number();
90 if (magic_number
== kPlainTableMagicNumber
||
91 magic_number
== kLegacyPlainTableMagicNumber
) {
92 soptions_
.use_mmap_reads
= true;
93 options_
.env
->NewRandomAccessFile(file_path
, &file
, soptions_
);
94 file_
.reset(new RandomAccessFileReader(std::move(file
)));
96 options_
.comparator
= &internal_comparator_
;
97 // For old sst format, ReadTableProperties might fail but file can be read
98 if (ReadTableProperties(magic_number
, file_
.get(), file_size
).ok()) {
99 SetTableOptionsByMagicNumber(magic_number
);
101 SetOldTableOptions();
106 s
= NewTableReader(ioptions_
, soptions_
, internal_comparator_
, file_size
,
112 Status
SstFileReader::NewTableReader(
113 const ImmutableCFOptions
& ioptions
, const EnvOptions
& soptions
,
114 const InternalKeyComparator
& internal_comparator
, uint64_t file_size
,
115 unique_ptr
<TableReader
>* table_reader
) {
116 // We need to turn off pre-fetching of index and filter nodes for
118 shared_ptr
<BlockBasedTableFactory
> block_table_factory
=
119 dynamic_pointer_cast
<BlockBasedTableFactory
>(options_
.table_factory
);
121 if (block_table_factory
) {
122 return block_table_factory
->NewTableReader(
123 TableReaderOptions(ioptions_
, soptions_
, internal_comparator_
,
124 /*skip_filters=*/false),
125 std::move(file_
), file_size
, &table_reader_
, /*enable_prefetch=*/false);
128 assert(!block_table_factory
);
130 // For all other factory implementation
131 return options_
.table_factory
->NewTableReader(
132 TableReaderOptions(ioptions_
, soptions_
, internal_comparator_
),
133 std::move(file_
), file_size
, &table_reader_
);
136 Status
SstFileReader::DumpTable(const std::string
& out_filename
) {
137 unique_ptr
<WritableFile
> out_file
;
138 Env
* env
= Env::Default();
139 env
->NewWritableFile(out_filename
, &out_file
, soptions_
);
140 Status s
= table_reader_
->DumpTable(out_file
.get());
145 uint64_t SstFileReader::CalculateCompressedTableSize(
146 const TableBuilderOptions
& tb_options
, size_t block_size
) {
147 unique_ptr
<WritableFile
> out_file
;
148 unique_ptr
<Env
> env(NewMemEnv(Env::Default()));
149 env
->NewWritableFile(testFileName
, &out_file
, soptions_
);
150 unique_ptr
<WritableFileWriter
> dest_writer
;
151 dest_writer
.reset(new WritableFileWriter(std::move(out_file
), soptions_
));
152 BlockBasedTableOptions table_options
;
153 table_options
.block_size
= block_size
;
154 BlockBasedTableFactory
block_based_tf(table_options
);
155 unique_ptr
<TableBuilder
> table_builder
;
156 table_builder
.reset(block_based_tf
.NewTableBuilder(
158 TablePropertiesCollectorFactory::Context::kUnknownColumnFamily
,
160 unique_ptr
<InternalIterator
> iter(table_reader_
->NewIterator(ReadOptions()));
161 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
162 if (!iter
->status().ok()) {
163 fputs(iter
->status().ToString().c_str(), stderr
);
166 table_builder
->Add(iter
->key(), iter
->value());
168 Status s
= table_builder
->Finish();
170 fputs(s
.ToString().c_str(), stderr
);
173 uint64_t size
= table_builder
->FileSize();
174 env
->DeleteFile(testFileName
);
178 int SstFileReader::ShowAllCompressionSizes(size_t block_size
) {
179 ReadOptions read_options
;
181 const ImmutableCFOptions
imoptions(opts
);
182 rocksdb::InternalKeyComparator
ikc(opts
.comparator
);
183 std::vector
<std::unique_ptr
<IntTblPropCollectorFactory
> >
184 block_based_table_factories
;
186 fprintf(stdout
, "Block Size: %" ROCKSDB_PRIszt
"\n", block_size
);
188 std::pair
<CompressionType
, const char*> compressions
[] = {
189 {CompressionType::kNoCompression
, "kNoCompression"},
190 {CompressionType::kSnappyCompression
, "kSnappyCompression"},
191 {CompressionType::kZlibCompression
, "kZlibCompression"},
192 {CompressionType::kBZip2Compression
, "kBZip2Compression"},
193 {CompressionType::kLZ4Compression
, "kLZ4Compression"},
194 {CompressionType::kLZ4HCCompression
, "kLZ4HCCompression"},
195 {CompressionType::kXpressCompression
, "kXpressCompression"},
196 {CompressionType::kZSTD
, "kZSTD"}};
198 for (auto& i
: compressions
) {
199 if (CompressionTypeSupported(i
.first
)) {
200 CompressionOptions compress_opt
;
201 std::string column_family_name
;
202 int unknown_level
= -1;
203 TableBuilderOptions
tb_opts(imoptions
, ikc
, &block_based_table_factories
,
204 i
.first
, compress_opt
,
205 nullptr /* compression_dict */,
206 false /* skip_filters */, column_family_name
,
208 uint64_t file_size
= CalculateCompressedTableSize(tb_opts
, block_size
);
209 fprintf(stdout
, "Compression: %s", i
.second
);
210 fprintf(stdout
, " Size: %" PRIu64
"\n", file_size
);
212 fprintf(stdout
, "Unsupported compression type: %s.\n", i
.second
);
218 Status
SstFileReader::ReadTableProperties(uint64_t table_magic_number
,
219 RandomAccessFileReader
* file
,
220 uint64_t file_size
) {
221 TableProperties
* table_properties
= nullptr;
222 Status s
= rocksdb::ReadTableProperties(file
, file_size
, table_magic_number
,
223 ioptions_
, &table_properties
);
225 table_properties_
.reset(table_properties
);
227 fprintf(stdout
, "Not able to read table properties\n");
232 Status
SstFileReader::SetTableOptionsByMagicNumber(
233 uint64_t table_magic_number
) {
234 assert(table_properties_
);
235 if (table_magic_number
== kBlockBasedTableMagicNumber
||
236 table_magic_number
== kLegacyBlockBasedTableMagicNumber
) {
237 options_
.table_factory
= std::make_shared
<BlockBasedTableFactory
>();
238 fprintf(stdout
, "Sst file format: block-based\n");
239 auto& props
= table_properties_
->user_collected_properties
;
240 auto pos
= props
.find(BlockBasedTablePropertyNames::kIndexType
);
241 if (pos
!= props
.end()) {
242 auto index_type_on_file
= static_cast<BlockBasedTableOptions::IndexType
>(
243 DecodeFixed32(pos
->second
.c_str()));
244 if (index_type_on_file
==
245 BlockBasedTableOptions::IndexType::kHashSearch
) {
246 options_
.prefix_extractor
.reset(NewNoopTransform());
249 } else if (table_magic_number
== kPlainTableMagicNumber
||
250 table_magic_number
== kLegacyPlainTableMagicNumber
) {
251 options_
.allow_mmap_reads
= true;
253 PlainTableOptions plain_table_options
;
254 plain_table_options
.user_key_len
= kPlainTableVariableLength
;
255 plain_table_options
.bloom_bits_per_key
= 0;
256 plain_table_options
.hash_table_ratio
= 0;
257 plain_table_options
.index_sparseness
= 1;
258 plain_table_options
.huge_page_tlb_size
= 0;
259 plain_table_options
.encoding_type
= kPlain
;
260 plain_table_options
.full_scan_mode
= true;
262 options_
.table_factory
.reset(NewPlainTableFactory(plain_table_options
));
263 fprintf(stdout
, "Sst file format: plain table\n");
265 char error_msg_buffer
[80];
266 snprintf(error_msg_buffer
, sizeof(error_msg_buffer
) - 1,
267 "Unsupported table magic number --- %lx",
268 (long)table_magic_number
);
269 return Status::InvalidArgument(error_msg_buffer
);
275 Status
SstFileReader::SetOldTableOptions() {
276 assert(table_properties_
== nullptr);
277 options_
.table_factory
= std::make_shared
<BlockBasedTableFactory
>();
278 fprintf(stdout
, "Sst file format: block-based(old version)\n");
283 Status
SstFileReader::ReadSequential(bool print_kv
, uint64_t read_num
,
284 bool has_from
, const std::string
& from_key
,
285 bool has_to
, const std::string
& to_key
,
286 bool use_from_as_prefix
) {
287 if (!table_reader_
) {
291 InternalIterator
* iter
=
292 table_reader_
->NewIterator(ReadOptions(verify_checksum_
, false));
296 ikey
.SetMaxPossibleForUserKey(from_key
);
297 iter
->Seek(ikey
.Encode());
301 for (; iter
->Valid(); iter
->Next()) {
302 Slice key
= iter
->key();
303 Slice value
= iter
->value();
305 if (read_num
> 0 && i
> read_num
)
308 ParsedInternalKey ikey
;
309 if (!ParseInternalKey(key
, &ikey
)) {
310 std::cerr
<< "Internal Key ["
311 << key
.ToString(true /* in hex*/)
312 << "] parse error!\n";
316 // the key returned is not prefixed with out 'from' key
317 if (use_from_as_prefix
&& !ikey
.user_key
.starts_with(from_key
)) {
321 // If end marker was specified, we stop before it
322 if (has_to
&& BytewiseComparator()->Compare(ikey
.user_key
, to_key
) >= 0) {
327 fprintf(stdout
, "%s => %s\n",
328 ikey
.DebugString(output_hex_
).c_str(),
329 value
.ToString(output_hex_
).c_str());
335 Status ret
= iter
->status();
340 Status
SstFileReader::ReadTableProperties(
341 std::shared_ptr
<const TableProperties
>* table_properties
) {
342 if (!table_reader_
) {
346 *table_properties
= table_reader_
->GetTableProperties();
354 R
"(sst_dump --file=<data_dir_OR_sst_file> [--command=check|scan|raw]
355 --file=<data_dir_OR_sst_file>
356 Path to SST file or directory containing SST files
358 --command=check|scan|raw
359 check: Iterate over entries in files but dont print anything except if an error is encounterd (default command)
360 scan: Iterate over entries in files and print them to screen
361 raw: Dump all the table contents to <file_name>_dump.txt
364 Can be combined with scan command to print the keys and values in Hex
367 Key to start reading from when executing check|scan
370 Key to stop reading at when executing check|scan
373 Returns all keys with this prefix when executing check|scan
374 Cannot be used in conjunction with --from
377 Maximum number of entries to read when executing check|scan
380 Verify file checksum when executing check|scan
383 Can be combined with --from and --to to indicate that these values are encoded in Hex
386 Print table properties after iterating over the file
388 --show_compression_sizes
389 Independent command that will recreate the SST file using 16K block size with different
390 compressions and report the size of the file using such compression
392 --set_block_size=<block_size>
393 Can be combined with --show_compression_sizes to set the block size that will be used
394 when trying different compression algorithms
396 --parse_internal_key=<0xKEY>
397 Convenience option to parse an internal key on the command line. Dumps the
398 internal key in hex format {'key' @ SN: type}
404 int SSTDumpTool::Run(int argc
, char** argv
) {
405 const char* dir_or_file
= nullptr;
406 uint64_t read_num
= -1;
411 bool verify_checksum
= false;
412 bool output_hex
= false;
413 bool input_key_hex
= false;
414 bool has_from
= false;
416 bool use_from_as_prefix
= false;
417 bool show_properties
= false;
418 bool show_compression_sizes
= false;
419 bool show_summary
= false;
420 bool set_block_size
= false;
421 std::string from_key
;
423 std::string block_size_str
;
425 uint64_t total_num_files
= 0;
426 uint64_t total_num_data_blocks
= 0;
427 uint64_t total_data_block_size
= 0;
428 uint64_t total_index_block_size
= 0;
429 uint64_t total_filter_block_size
= 0;
430 for (int i
= 1; i
< argc
; i
++) {
431 if (strncmp(argv
[i
], "--file=", 7) == 0) {
432 dir_or_file
= argv
[i
] + 7;
433 } else if (strcmp(argv
[i
], "--output_hex") == 0) {
435 } else if (strcmp(argv
[i
], "--input_key_hex") == 0) {
436 input_key_hex
= true;
437 } else if (sscanf(argv
[i
],
439 (unsigned long*)&n
, &junk
) == 1) {
441 } else if (strcmp(argv
[i
], "--verify_checksum") == 0) {
442 verify_checksum
= true;
443 } else if (strncmp(argv
[i
], "--command=", 10) == 0) {
444 command
= argv
[i
] + 10;
445 } else if (strncmp(argv
[i
], "--from=", 7) == 0) {
446 from_key
= argv
[i
] + 7;
448 } else if (strncmp(argv
[i
], "--to=", 5) == 0) {
449 to_key
= argv
[i
] + 5;
451 } else if (strncmp(argv
[i
], "--prefix=", 9) == 0) {
452 from_key
= argv
[i
] + 9;
453 use_from_as_prefix
= true;
454 } else if (strcmp(argv
[i
], "--show_properties") == 0) {
455 show_properties
= true;
456 } else if (strcmp(argv
[i
], "--show_compression_sizes") == 0) {
457 show_compression_sizes
= true;
458 } else if (strcmp(argv
[i
], "--show_summary") == 0) {
460 } else if (strncmp(argv
[i
], "--set_block_size=", 17) == 0) {
461 set_block_size
= true;
462 block_size_str
= argv
[i
] + 17;
463 std::istringstream
iss(block_size_str
);
465 fprintf(stderr
, "block size must be numeric");
469 } else if (strncmp(argv
[i
], "--parse_internal_key=", 21) == 0) {
470 std::string
in_key(argv
[i
] + 21);
472 in_key
= rocksdb::LDBCommand::HexToString(in_key
);
474 std::cerr
<< "ERROR: Invalid key input '"
476 << "' Use 0x{hex representation of internal rocksdb key}" << std::endl
;
479 Slice sl_key
= rocksdb::Slice(in_key
);
480 ParsedInternalKey ikey
;
482 if (!ParseInternalKey(sl_key
, &ikey
)) {
483 std::cerr
<< "Internal Key [" << sl_key
.ToString(true /* in hex*/)
484 << "] parse error!\n";
487 fprintf(stdout
, "key=%s\n", ikey
.DebugString(true).c_str());
490 fprintf(stderr
, "Unrecognized argument '%s'\n\n", argv
[i
]);
496 if (use_from_as_prefix
&& has_from
) {
497 fprintf(stderr
, "Cannot specify --prefix and --from\n\n");
502 if (has_from
|| use_from_as_prefix
) {
503 from_key
= rocksdb::LDBCommand::HexToString(from_key
);
506 to_key
= rocksdb::LDBCommand::HexToString(to_key
);
510 if (dir_or_file
== nullptr) {
511 fprintf(stderr
, "file or directory must be specified.\n\n");
516 std::vector
<std::string
> filenames
;
517 rocksdb::Env
* env
= rocksdb::Env::Default();
518 rocksdb::Status st
= env
->GetChildren(dir_or_file
, &filenames
);
522 filenames
.push_back(dir_or_file
);
526 fprintf(stdout
, "from [%s] to [%s]\n",
527 rocksdb::Slice(from_key
).ToString(true).c_str(),
528 rocksdb::Slice(to_key
).ToString(true).c_str());
530 uint64_t total_read
= 0;
531 for (size_t i
= 0; i
< filenames
.size(); i
++) {
532 std::string filename
= filenames
.at(i
);
533 if (filename
.length() <= 4 ||
534 filename
.rfind(".sst") != filename
.length() - 4) {
539 filename
= std::string(dir_or_file
) + "/" + filename
;
542 rocksdb::SstFileReader
reader(filename
, verify_checksum
,
544 if (!reader
.getStatus().ok()) {
545 fprintf(stderr
, "%s: %s\n", filename
.c_str(),
546 reader
.getStatus().ToString().c_str());
550 if (show_compression_sizes
) {
551 if (set_block_size
) {
552 reader
.ShowAllCompressionSizes(block_size
);
554 reader
.ShowAllCompressionSizes(16384);
559 if (command
== "raw") {
560 std::string out_filename
= filename
.substr(0, filename
.length() - 4);
561 out_filename
.append("_dump.txt");
563 st
= reader
.DumpTable(out_filename
);
565 fprintf(stderr
, "%s: %s\n", filename
.c_str(), st
.ToString().c_str());
568 fprintf(stdout
, "raw dump written to file %s\n", &out_filename
[0]);
573 // scan all files in give file path.
574 if (command
== "" || command
== "scan" || command
== "check") {
575 st
= reader
.ReadSequential(
576 command
== "scan", read_num
> 0 ? (read_num
- total_read
) : read_num
,
577 has_from
|| use_from_as_prefix
, from_key
, has_to
, to_key
,
580 fprintf(stderr
, "%s: %s\n", filename
.c_str(),
581 st
.ToString().c_str());
583 total_read
+= reader
.GetReadNumber();
584 if (read_num
> 0 && total_read
> read_num
) {
589 if (show_properties
|| show_summary
) {
590 const rocksdb::TableProperties
* table_properties
;
592 std::shared_ptr
<const rocksdb::TableProperties
>
593 table_properties_from_reader
;
594 st
= reader
.ReadTableProperties(&table_properties_from_reader
);
596 fprintf(stderr
, "%s: %s\n", filename
.c_str(), st
.ToString().c_str());
597 fprintf(stderr
, "Try to use initial table properties\n");
598 table_properties
= reader
.GetInitTableProperties();
600 table_properties
= table_properties_from_reader
.get();
602 if (table_properties
!= nullptr) {
603 if (show_properties
) {
605 "Table Properties:\n"
606 "------------------------------\n"
608 table_properties
->ToString("\n ", ": ").c_str());
609 fprintf(stdout
, "# deleted keys: %" PRIu64
"\n",
610 rocksdb::GetDeletedKeys(
611 table_properties
->user_collected_properties
));
613 bool property_present
;
614 uint64_t merge_operands
= rocksdb::GetMergeOperands(
615 table_properties
->user_collected_properties
, &property_present
);
616 if (property_present
) {
617 fprintf(stdout
, " # merge operands: %" PRIu64
"\n",
620 fprintf(stdout
, " # merge operands: UNKNOWN\n");
623 total_num_files
+= 1;
624 total_num_data_blocks
+= table_properties
->num_data_blocks
;
625 total_data_block_size
+= table_properties
->data_size
;
626 total_index_block_size
+= table_properties
->index_size
;
627 total_filter_block_size
+= table_properties
->filter_size
;
629 if (show_properties
) {
631 "Raw user collected properties\n"
632 "------------------------------\n");
633 for (const auto& kv
: table_properties
->user_collected_properties
) {
634 std::string prop_name
= kv
.first
;
635 std::string prop_val
= Slice(kv
.second
).ToString(true);
636 fprintf(stdout
, " # %s: 0x%s\n", prop_name
.c_str(),
643 fprintf(stdout
, "total number of files: %" PRIu64
"\n", total_num_files
);
644 fprintf(stdout
, "total number of data blocks: %" PRIu64
"\n",
645 total_num_data_blocks
);
646 fprintf(stdout
, "total data block size: %" PRIu64
"\n",
647 total_data_block_size
);
648 fprintf(stdout
, "total index block size: %" PRIu64
"\n",
649 total_index_block_size
);
650 fprintf(stdout
, "total filter block size: %" PRIu64
"\n",
651 total_filter_block_size
);
655 } // namespace rocksdb
657 #endif // ROCKSDB_LITE