1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2012 Facebook.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file.
12 #include "utilities/checkpoint/checkpoint_impl.h"
18 #include <unordered_set>
21 #include "db/wal_manager.h"
22 #include "file/file_util.h"
23 #include "file/filename.h"
24 #include "logging/logging.h"
25 #include "port/port.h"
26 #include "rocksdb/db.h"
27 #include "rocksdb/env.h"
28 #include "rocksdb/metadata.h"
29 #include "rocksdb/options.h"
30 #include "rocksdb/transaction_log.h"
31 #include "rocksdb/types.h"
32 #include "rocksdb/utilities/checkpoint.h"
33 #include "test_util/sync_point.h"
34 #include "util/cast_util.h"
35 #include "util/file_checksum_helper.h"
37 namespace ROCKSDB_NAMESPACE
{
39 Status
Checkpoint::Create(DB
* db
, Checkpoint
** checkpoint_ptr
) {
40 *checkpoint_ptr
= new CheckpointImpl(db
);
44 Status
Checkpoint::CreateCheckpoint(const std::string
& /*checkpoint_dir*/,
45 uint64_t /*log_size_for_flush*/,
46 uint64_t* /*sequence_number_ptr*/) {
47 return Status::NotSupported("");
50 void CheckpointImpl::CleanStagingDirectory(const std::string
& full_private_path
,
52 std::vector
<std::string
> subchildren
;
53 Status s
= db_
->GetEnv()->FileExists(full_private_path
);
57 ROCKS_LOG_INFO(info_log
, "File exists %s -- %s", full_private_path
.c_str(),
58 s
.ToString().c_str());
59 s
= db_
->GetEnv()->GetChildren(full_private_path
, &subchildren
);
61 for (auto& subchild
: subchildren
) {
62 std::string subchild_path
= full_private_path
+ "/" + subchild
;
63 s
= db_
->GetEnv()->DeleteFile(subchild_path
);
64 ROCKS_LOG_INFO(info_log
, "Delete file %s -- %s", subchild_path
.c_str(),
65 s
.ToString().c_str());
68 // finally delete the private dir
69 s
= db_
->GetEnv()->DeleteDir(full_private_path
);
70 ROCKS_LOG_INFO(info_log
, "Delete dir %s -- %s", full_private_path
.c_str(),
71 s
.ToString().c_str());
74 Status
Checkpoint::ExportColumnFamily(
75 ColumnFamilyHandle
* /*handle*/, const std::string
& /*export_dir*/,
76 ExportImportFilesMetaData
** /*metadata*/) {
77 return Status::NotSupported("");
80 // Builds an openable snapshot of RocksDB
81 Status
CheckpointImpl::CreateCheckpoint(const std::string
& checkpoint_dir
,
82 uint64_t log_size_for_flush
,
83 uint64_t* sequence_number_ptr
) {
84 DBOptions db_options
= db_
->GetDBOptions();
86 Status s
= db_
->GetEnv()->FileExists(checkpoint_dir
);
88 return Status::InvalidArgument("Directory exists");
89 } else if (!s
.IsNotFound()) {
90 assert(s
.IsIOError());
96 "Started the snapshot process -- creating snapshot in directory %s",
97 checkpoint_dir
.c_str());
99 size_t final_nonslash_idx
= checkpoint_dir
.find_last_not_of('/');
100 if (final_nonslash_idx
== std::string::npos
) {
101 // npos means it's only slashes or empty. Non-empty means it's the root
102 // directory, but it shouldn't be because we verified above the directory
104 assert(checkpoint_dir
.empty());
105 return Status::InvalidArgument("invalid checkpoint directory name");
108 std::string full_private_path
=
109 checkpoint_dir
.substr(0, final_nonslash_idx
+ 1) + ".tmp";
110 ROCKS_LOG_INFO(db_options
.info_log
,
111 "Snapshot process -- using temporary directory %s",
112 full_private_path
.c_str());
113 CleanStagingDirectory(full_private_path
, db_options
.info_log
.get());
114 // create snapshot directory
115 s
= db_
->GetEnv()->CreateDir(full_private_path
);
116 uint64_t sequence_number
= 0;
118 // enable file deletions
119 s
= db_
->DisableFileDeletions();
120 const bool disabled_file_deletions
= s
.ok();
122 if (s
.ok() || s
.IsNotSupported()) {
123 s
= CreateCustomCheckpoint(
124 [&](const std::string
& src_dirname
, const std::string
& fname
,
126 ROCKS_LOG_INFO(db_options
.info_log
, "Hard Linking %s",
128 return db_
->GetFileSystem()->LinkFile(
129 src_dirname
+ "/" + fname
, full_private_path
+ "/" + fname
,
130 IOOptions(), nullptr);
131 } /* link_file_cb */,
132 [&](const std::string
& src_dirname
, const std::string
& fname
,
133 uint64_t size_limit_bytes
, FileType
,
134 const std::string
& /* checksum_func_name */,
135 const std::string
& /* checksum_val */,
136 const Temperature temperature
) {
137 ROCKS_LOG_INFO(db_options
.info_log
, "Copying %s", fname
.c_str());
138 return CopyFile(db_
->GetFileSystem(), src_dirname
+ "/" + fname
,
139 full_private_path
+ "/" + fname
, size_limit_bytes
,
140 db_options
.use_fsync
, nullptr, temperature
);
141 } /* copy_file_cb */,
142 [&](const std::string
& fname
, const std::string
& contents
, FileType
) {
143 ROCKS_LOG_INFO(db_options
.info_log
, "Creating %s", fname
.c_str());
144 return CreateFile(db_
->GetFileSystem(),
145 full_private_path
+ "/" + fname
, contents
,
146 db_options
.use_fsync
);
147 } /* create_file_cb */,
148 &sequence_number
, log_size_for_flush
);
150 // we copied all the files, enable file deletions
151 if (disabled_file_deletions
) {
152 Status ss
= db_
->EnableFileDeletions(false);
154 ss
.PermitUncheckedError();
160 // move tmp private backup to real snapshot directory
161 s
= db_
->GetEnv()->RenameFile(full_private_path
, checkpoint_dir
);
164 std::unique_ptr
<FSDirectory
> checkpoint_directory
;
165 s
= db_
->GetFileSystem()->NewDirectory(checkpoint_dir
, IOOptions(),
166 &checkpoint_directory
, nullptr);
167 if (s
.ok() && checkpoint_directory
!= nullptr) {
168 s
= checkpoint_directory
->FsyncWithDirOptions(
169 IOOptions(), nullptr,
170 DirFsyncOptions(DirFsyncOptions::FsyncReason::kDirRenamed
));
175 if (sequence_number_ptr
!= nullptr) {
176 *sequence_number_ptr
= sequence_number
;
178 // here we know that we succeeded and installed the new snapshot
179 ROCKS_LOG_INFO(db_options
.info_log
, "Snapshot DONE. All is good");
180 ROCKS_LOG_INFO(db_options
.info_log
, "Snapshot sequence number: %" PRIu64
,
183 // clean all the files we might have created
184 ROCKS_LOG_INFO(db_options
.info_log
, "Snapshot failed -- %s",
185 s
.ToString().c_str());
186 CleanStagingDirectory(full_private_path
, db_options
.info_log
.get());
191 Status
CheckpointImpl::CreateCustomCheckpoint(
192 std::function
<Status(const std::string
& src_dirname
,
193 const std::string
& src_fname
, FileType type
)>
196 Status(const std::string
& src_dirname
, const std::string
& src_fname
,
197 uint64_t size_limit_bytes
, FileType type
,
198 const std::string
& checksum_func_name
,
199 const std::string
& checksum_val
, const Temperature temperature
)>
201 std::function
<Status(const std::string
& fname
, const std::string
& contents
,
204 uint64_t* sequence_number
, uint64_t log_size_for_flush
,
205 bool get_live_table_checksum
) {
206 *sequence_number
= db_
->GetLatestSequenceNumber();
208 LiveFilesStorageInfoOptions opts
;
209 opts
.include_checksum_info
= get_live_table_checksum
;
210 opts
.wal_size_for_flush
= log_size_for_flush
;
212 std::vector
<LiveFileStorageInfo
> infos
;
214 Status s
= db_
->GetLiveFilesStorageInfo(opts
, &infos
);
220 // Verify that everything except WAL files are in same directory
221 // (db_paths / cf_paths not supported)
222 std::unordered_set
<std::string
> dirs
;
223 for (auto& info
: infos
) {
224 if (info
.file_type
!= kWalFile
) {
225 dirs
.insert(info
.directory
);
228 if (dirs
.size() > 1) {
229 return Status::NotSupported(
230 "db_paths / cf_paths not supported for Checkpoint nor BackupEngine");
235 for (auto& info
: infos
) {
237 if (!info
.replacement_contents
.empty()) {
238 // Currently should only be used for CURRENT file.
239 assert(info
.file_type
== kCurrentFile
);
241 if (info
.size
!= info
.replacement_contents
.size()) {
242 s
= Status::Corruption("Inconsistent size metadata for " +
243 info
.relative_filename
);
245 s
= create_file_cb(info
.relative_filename
, info
.replacement_contents
,
249 if (same_fs
&& !info
.trim_to_size
) {
250 s
= link_file_cb(info
.directory
, info
.relative_filename
,
252 if (s
.IsNotSupported()) {
258 if (!same_fs
|| info
.trim_to_size
) {
259 assert(info
.file_checksum_func_name
.empty() ==
260 !opts
.include_checksum_info
);
261 // no assertion on file_checksum because empty is used for both "not
262 // set" and "unknown"
263 if (opts
.include_checksum_info
) {
264 s
= copy_file_cb(info
.directory
, info
.relative_filename
, info
.size
,
265 info
.file_type
, info
.file_checksum_func_name
,
266 info
.file_checksum
, info
.temperature
);
268 s
= copy_file_cb(info
.directory
, info
.relative_filename
, info
.size
,
269 info
.file_type
, kUnknownFileChecksumFuncName
,
270 kUnknownFileChecksum
, info
.temperature
);
282 // Exports all live SST files of a specified Column Family onto export_dir,
283 // returning SST files information in metadata.
284 Status
CheckpointImpl::ExportColumnFamily(
285 ColumnFamilyHandle
* handle
, const std::string
& export_dir
,
286 ExportImportFilesMetaData
** metadata
) {
287 auto cfh
= static_cast_with_check
<ColumnFamilyHandleImpl
>(handle
);
288 const auto cf_name
= cfh
->GetName();
289 const auto db_options
= db_
->GetDBOptions();
291 assert(metadata
!= nullptr);
292 assert(*metadata
== nullptr);
293 auto s
= db_
->GetEnv()->FileExists(export_dir
);
295 return Status::InvalidArgument("Specified export_dir exists");
296 } else if (!s
.IsNotFound()) {
297 assert(s
.IsIOError());
301 const auto final_nonslash_idx
= export_dir
.find_last_not_of('/');
302 if (final_nonslash_idx
== std::string::npos
) {
303 return Status::InvalidArgument("Specified export_dir invalid");
305 ROCKS_LOG_INFO(db_options
.info_log
,
306 "[%s] export column family onto export directory %s",
307 cf_name
.c_str(), export_dir
.c_str());
309 // Create a temporary export directory.
310 const auto tmp_export_dir
=
311 export_dir
.substr(0, final_nonslash_idx
+ 1) + ".tmp";
312 s
= db_
->GetEnv()->CreateDir(tmp_export_dir
);
315 s
= db_
->Flush(ROCKSDB_NAMESPACE::FlushOptions(), handle
);
318 ColumnFamilyMetaData db_metadata
;
320 // Export live sst files with file deletions disabled.
321 s
= db_
->DisableFileDeletions();
323 db_
->GetColumnFamilyMetaData(handle
, &db_metadata
);
325 s
= ExportFilesInMetaData(
326 db_options
, db_metadata
,
327 [&](const std::string
& src_dirname
, const std::string
& fname
) {
328 ROCKS_LOG_INFO(db_options
.info_log
, "[%s] HardLinking %s",
329 cf_name
.c_str(), fname
.c_str());
330 return db_
->GetEnv()->LinkFile(src_dirname
+ fname
,
331 tmp_export_dir
+ fname
);
333 [&](const std::string
& src_dirname
, const std::string
& fname
) {
334 ROCKS_LOG_INFO(db_options
.info_log
, "[%s] Copying %s",
335 cf_name
.c_str(), fname
.c_str());
336 return CopyFile(db_
->GetFileSystem(), src_dirname
+ fname
,
337 tmp_export_dir
+ fname
, 0, db_options
.use_fsync
,
338 nullptr, Temperature::kUnknown
);
341 const auto enable_status
= db_
->EnableFileDeletions(false /*force*/);
348 auto moved_to_user_specified_dir
= false;
350 // Move temporary export directory to the actual export directory.
351 s
= db_
->GetEnv()->RenameFile(tmp_export_dir
, export_dir
);
355 // Fsync export directory.
356 moved_to_user_specified_dir
= true;
357 std::unique_ptr
<FSDirectory
> dir_ptr
;
358 s
= db_
->GetFileSystem()->NewDirectory(export_dir
, IOOptions(), &dir_ptr
,
361 assert(dir_ptr
!= nullptr);
362 s
= dir_ptr
->FsyncWithDirOptions(
363 IOOptions(), nullptr,
364 DirFsyncOptions(DirFsyncOptions::FsyncReason::kDirRenamed
));
369 // Export of files succeeded. Fill in the metadata information.
370 auto result_metadata
= new ExportImportFilesMetaData();
371 result_metadata
->db_comparator_name
= handle
->GetComparator()->Name();
372 for (const auto& level_metadata
: db_metadata
.levels
) {
373 for (const auto& file_metadata
: level_metadata
.files
) {
374 LiveFileMetaData live_file_metadata
;
375 live_file_metadata
.size
= file_metadata
.size
;
376 live_file_metadata
.name
= std::move(file_metadata
.name
);
377 live_file_metadata
.file_number
= file_metadata
.file_number
;
378 live_file_metadata
.db_path
= export_dir
;
379 live_file_metadata
.smallest_seqno
= file_metadata
.smallest_seqno
;
380 live_file_metadata
.largest_seqno
= file_metadata
.largest_seqno
;
381 live_file_metadata
.smallestkey
= std::move(file_metadata
.smallestkey
);
382 live_file_metadata
.largestkey
= std::move(file_metadata
.largestkey
);
383 live_file_metadata
.oldest_blob_file_number
=
384 file_metadata
.oldest_blob_file_number
;
385 live_file_metadata
.level
= level_metadata
.level
;
386 result_metadata
->files
.push_back(live_file_metadata
);
388 *metadata
= result_metadata
;
390 ROCKS_LOG_INFO(db_options
.info_log
, "[%s] Export succeeded.",
393 // Failure: Clean up all the files/directories created.
394 ROCKS_LOG_INFO(db_options
.info_log
, "[%s] Export failed. %s",
395 cf_name
.c_str(), s
.ToString().c_str());
396 std::vector
<std::string
> subchildren
;
397 const auto cleanup_dir
=
398 moved_to_user_specified_dir
? export_dir
: tmp_export_dir
;
399 db_
->GetEnv()->GetChildren(cleanup_dir
, &subchildren
);
400 for (const auto& subchild
: subchildren
) {
401 const auto subchild_path
= cleanup_dir
+ "/" + subchild
;
402 const auto status
= db_
->GetEnv()->DeleteFile(subchild_path
);
404 ROCKS_LOG_WARN(db_options
.info_log
, "Failed to cleanup file %s: %s",
405 subchild_path
.c_str(), status
.ToString().c_str());
408 const auto status
= db_
->GetEnv()->DeleteDir(cleanup_dir
);
410 ROCKS_LOG_WARN(db_options
.info_log
, "Failed to cleanup dir %s: %s",
411 cleanup_dir
.c_str(), status
.ToString().c_str());
417 Status
CheckpointImpl::ExportFilesInMetaData(
418 const DBOptions
& db_options
, const ColumnFamilyMetaData
& metadata
,
419 std::function
<Status(const std::string
& src_dirname
,
420 const std::string
& src_fname
)>
422 std::function
<Status(const std::string
& src_dirname
,
423 const std::string
& src_fname
)>
426 auto hardlink_file
= true;
428 // Copy/hard link files in metadata.
429 size_t num_files
= 0;
430 for (const auto& level_metadata
: metadata
.levels
) {
431 for (const auto& file_metadata
: level_metadata
.files
) {
434 const auto ok
= ParseFileName(file_metadata
.name
, &number
, &type
);
436 s
= Status::Corruption("Could not parse file name");
440 // We should only get sst files here.
441 assert(type
== kTableFile
);
442 assert(file_metadata
.size
> 0 && file_metadata
.name
[0] == '/');
443 const auto src_fname
= file_metadata
.name
;
447 s
= link_file_cb(db_
->GetName(), src_fname
);
448 if (num_files
== 1 && s
.IsNotSupported()) {
449 // Fallback to copy if link failed due to cross-device directories.
450 hardlink_file
= false;
454 if (!hardlink_file
) {
455 s
= copy_file_cb(db_
->GetName(), src_fname
);
462 ROCKS_LOG_INFO(db_options
.info_log
, "Number of table files %" ROCKSDB_PRIszt
,
467 } // namespace ROCKSDB_NAMESPACE
469 #endif // ROCKSDB_LITE