]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/file/delete_scheduler.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / rocksdb / file / delete_scheduler.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5
6#ifndef ROCKSDB_LITE
7
f67539c2 8#include "file/delete_scheduler.h"
7c673cae
FG
9
10#include <thread>
11#include <vector>
12
f67539c2
TL
13#include "file/sst_file_manager_impl.h"
14#include "logging/logging.h"
7c673cae
FG
15#include "port/port.h"
16#include "rocksdb/env.h"
f67539c2 17#include "test_util/sync_point.h"
7c673cae 18#include "util/mutexlock.h"
7c673cae 19
f67539c2 20namespace ROCKSDB_NAMESPACE {
7c673cae 21
f67539c2
TL
22DeleteScheduler::DeleteScheduler(Env* env, FileSystem* fs,
23 int64_t rate_bytes_per_sec, Logger* info_log,
11fdf7f2
TL
24 SstFileManagerImpl* sst_file_manager,
25 double max_trash_db_ratio,
26 uint64_t bytes_max_delete_chunk)
7c673cae 27 : env_(env),
f67539c2 28 fs_(fs),
11fdf7f2 29 total_trash_size_(0),
7c673cae
FG
30 rate_bytes_per_sec_(rate_bytes_per_sec),
31 pending_files_(0),
11fdf7f2 32 bytes_max_delete_chunk_(bytes_max_delete_chunk),
7c673cae
FG
33 closing_(false),
34 cv_(&mu_),
35 info_log_(info_log),
11fdf7f2
TL
36 sst_file_manager_(sst_file_manager),
37 max_trash_db_ratio_(max_trash_db_ratio) {
38 assert(sst_file_manager != nullptr);
39 assert(max_trash_db_ratio >= 0);
7c673cae
FG
40 bg_thread_.reset(
41 new port::Thread(&DeleteScheduler::BackgroundEmptyTrash, this));
42}
43
44DeleteScheduler::~DeleteScheduler() {
45 {
46 InstrumentedMutexLock l(&mu_);
47 closing_ = true;
48 cv_.SignalAll();
49 }
50 if (bg_thread_) {
51 bg_thread_->join();
52 }
53}
54
11fdf7f2 55Status DeleteScheduler::DeleteFile(const std::string& file_path,
494da23a
TL
56 const std::string& dir_to_sync,
57 const bool force_bg) {
7c673cae 58 Status s;
494da23a 59 if (rate_bytes_per_sec_.load() <= 0 || (!force_bg &&
11fdf7f2 60 total_trash_size_.load() >
494da23a 61 sst_file_manager_->GetTotalSize() * max_trash_db_ratio_.load())) {
11fdf7f2
TL
62 // Rate limiting is disabled or trash size makes up more than
63 // max_trash_db_ratio_ (default 25%) of the total DB size
7c673cae 64 TEST_SYNC_POINT("DeleteScheduler::DeleteFile");
f67539c2 65 s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
11fdf7f2 66 if (s.ok()) {
7c673cae
FG
67 sst_file_manager_->OnDeleteFile(file_path);
68 }
69 return s;
70 }
71
72 // Move file to trash
11fdf7f2
TL
73 std::string trash_file;
74 s = MarkAsTrash(file_path, &trash_file);
75
7c673cae 76 if (!s.ok()) {
f67539c2
TL
77 ROCKS_LOG_ERROR(info_log_, "Failed to mark %s as trash -- %s",
78 file_path.c_str(), s.ToString().c_str());
79 s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
11fdf7f2 80 if (s.ok()) {
7c673cae
FG
81 sst_file_manager_->OnDeleteFile(file_path);
82 }
83 return s;
84 }
85
11fdf7f2
TL
86 // Update the total trash size
87 uint64_t trash_file_size = 0;
f67539c2 88 fs_->GetFileSize(trash_file, IOOptions(), &trash_file_size, nullptr);
11fdf7f2
TL
89 total_trash_size_.fetch_add(trash_file_size);
90
7c673cae
FG
91 // Add file to delete queue
92 {
93 InstrumentedMutexLock l(&mu_);
11fdf7f2 94 queue_.emplace(trash_file, dir_to_sync);
7c673cae
FG
95 pending_files_++;
96 if (pending_files_ == 1) {
97 cv_.SignalAll();
98 }
99 }
100 return s;
101}
102
103std::map<std::string, Status> DeleteScheduler::GetBackgroundErrors() {
104 InstrumentedMutexLock l(&mu_);
105 return bg_errors_;
106}
107
11fdf7f2
TL
108const std::string DeleteScheduler::kTrashExtension = ".trash";
109bool DeleteScheduler::IsTrashFile(const std::string& file_path) {
110 return (file_path.size() >= kTrashExtension.size() &&
111 file_path.rfind(kTrashExtension) ==
112 file_path.size() - kTrashExtension.size());
113}
114
115Status DeleteScheduler::CleanupDirectory(Env* env, SstFileManagerImpl* sfm,
116 const std::string& path) {
7c673cae 117 Status s;
11fdf7f2
TL
118 // Check if there are any files marked as trash in this path
119 std::vector<std::string> files_in_path;
120 s = env->GetChildren(path, &files_in_path);
121 if (!s.ok()) {
122 return s;
123 }
124 for (const std::string& current_file : files_in_path) {
125 if (!DeleteScheduler::IsTrashFile(current_file)) {
126 // not a trash file, skip
127 continue;
128 }
129
130 Status file_delete;
131 std::string trash_file = path + "/" + current_file;
132 if (sfm) {
133 // We have an SstFileManager that will schedule the file delete
134 sfm->OnAddFile(trash_file);
135 file_delete = sfm->ScheduleFileDeletion(trash_file, path);
136 } else {
137 // Delete the file immediately
138 file_delete = env->DeleteFile(trash_file);
139 }
140
141 if (s.ok() && !file_delete.ok()) {
142 s = file_delete;
143 }
144 }
145
146 return s;
147}
148
149Status DeleteScheduler::MarkAsTrash(const std::string& file_path,
150 std::string* trash_file) {
151 // Sanity check of the path
7c673cae
FG
152 size_t idx = file_path.rfind("/");
153 if (idx == std::string::npos || idx == file_path.size() - 1) {
154 return Status::InvalidArgument("file_path is corrupted");
155 }
7c673cae 156
11fdf7f2
TL
157 Status s;
158 if (DeleteScheduler::IsTrashFile(file_path)) {
159 // This is already a trash file
160 *trash_file = file_path;
7c673cae
FG
161 return s;
162 }
163
11fdf7f2 164 *trash_file = file_path + kTrashExtension;
7c673cae
FG
165 // TODO(tec) : Implement Env::RenameFileIfNotExist and remove
166 // file_move_mu mutex.
11fdf7f2 167 int cnt = 0;
7c673cae
FG
168 InstrumentedMutexLock l(&file_move_mu_);
169 while (true) {
f67539c2 170 s = fs_->FileExists(*trash_file, IOOptions(), nullptr);
7c673cae
FG
171 if (s.IsNotFound()) {
172 // We found a path for our file in trash
f67539c2 173 s = fs_->RenameFile(file_path, *trash_file, IOOptions(), nullptr);
7c673cae
FG
174 break;
175 } else if (s.ok()) {
176 // Name conflict, generate new random suffix
11fdf7f2 177 *trash_file = file_path + std::to_string(cnt) + kTrashExtension;
7c673cae
FG
178 } else {
179 // Error during FileExists call, we cannot continue
180 break;
181 }
11fdf7f2 182 cnt++;
7c673cae 183 }
11fdf7f2
TL
184 if (s.ok()) {
185 sst_file_manager_->OnMoveFile(file_path, *trash_file);
7c673cae
FG
186 }
187 return s;
188}
189
190void DeleteScheduler::BackgroundEmptyTrash() {
191 TEST_SYNC_POINT("DeleteScheduler::BackgroundEmptyTrash");
192
193 while (true) {
194 InstrumentedMutexLock l(&mu_);
195 while (queue_.empty() && !closing_) {
196 cv_.Wait();
197 }
198
199 if (closing_) {
200 return;
201 }
202
203 // Delete all files in queue_
204 uint64_t start_time = env_->NowMicros();
205 uint64_t total_deleted_bytes = 0;
206 int64_t current_delete_rate = rate_bytes_per_sec_.load();
207 while (!queue_.empty() && !closing_) {
208 if (current_delete_rate != rate_bytes_per_sec_.load()) {
209 // User changed the delete rate
210 current_delete_rate = rate_bytes_per_sec_.load();
211 start_time = env_->NowMicros();
212 total_deleted_bytes = 0;
213 }
214
215 // Get new file to delete
11fdf7f2
TL
216 const FileAndDir& fad = queue_.front();
217 std::string path_in_trash = fad.fname;
7c673cae
FG
218
219 // We dont need to hold the lock while deleting the file
220 mu_.Unlock();
221 uint64_t deleted_bytes = 0;
11fdf7f2 222 bool is_complete = true;
7c673cae 223 // Delete file from trash and update total_penlty value
11fdf7f2
TL
224 Status s =
225 DeleteTrashFile(path_in_trash, fad.dir, &deleted_bytes, &is_complete);
7c673cae
FG
226 total_deleted_bytes += deleted_bytes;
227 mu_.Lock();
11fdf7f2
TL
228 if (is_complete) {
229 queue_.pop();
230 }
7c673cae
FG
231
232 if (!s.ok()) {
233 bg_errors_[path_in_trash] = s;
234 }
235
236 // Apply penlty if necessary
237 uint64_t total_penlty;
238 if (current_delete_rate > 0) {
239 // rate limiting is enabled
240 total_penlty =
241 ((total_deleted_bytes * kMicrosInSecond) / current_delete_rate);
242 while (!closing_ && !cv_.TimedWait(start_time + total_penlty)) {}
243 } else {
244 // rate limiting is disabled
245 total_penlty = 0;
246 }
247 TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait",
248 &total_penlty);
249
11fdf7f2
TL
250 if (is_complete) {
251 pending_files_--;
252 }
7c673cae
FG
253 if (pending_files_ == 0) {
254 // Unblock WaitForEmptyTrash since there are no more files waiting
255 // to be deleted
256 cv_.SignalAll();
257 }
258 }
259 }
260}
261
262Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
11fdf7f2
TL
263 const std::string& dir_to_sync,
264 uint64_t* deleted_bytes,
265 bool* is_complete) {
7c673cae 266 uint64_t file_size;
f67539c2 267 Status s = fs_->GetFileSize(path_in_trash, IOOptions(), &file_size, nullptr);
11fdf7f2
TL
268 *is_complete = true;
269 TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile");
7c673cae 270 if (s.ok()) {
11fdf7f2
TL
271 bool need_full_delete = true;
272 if (bytes_max_delete_chunk_ != 0 && file_size > bytes_max_delete_chunk_) {
273 uint64_t num_hard_links = 2;
274 // We don't have to worry aobut data race between linking a new
275 // file after the number of file link check and ftruncte because
276 // the file is now in trash and no hardlink is supposed to create
277 // to trash files by RocksDB.
f67539c2
TL
278 Status my_status = fs_->NumFileLinks(path_in_trash, IOOptions(),
279 &num_hard_links, nullptr);
11fdf7f2
TL
280 if (my_status.ok()) {
281 if (num_hard_links == 1) {
f67539c2
TL
282 std::unique_ptr<FSWritableFile> wf;
283 my_status = fs_->ReopenWritableFile(path_in_trash, FileOptions(),
284 &wf, nullptr);
11fdf7f2 285 if (my_status.ok()) {
f67539c2
TL
286 my_status = wf->Truncate(file_size - bytes_max_delete_chunk_,
287 IOOptions(), nullptr);
11fdf7f2
TL
288 if (my_status.ok()) {
289 TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:Fsync");
f67539c2 290 my_status = wf->Fsync(IOOptions(), nullptr);
11fdf7f2
TL
291 }
292 }
293 if (my_status.ok()) {
294 *deleted_bytes = bytes_max_delete_chunk_;
295 need_full_delete = false;
296 *is_complete = false;
297 } else {
298 ROCKS_LOG_WARN(info_log_,
299 "Failed to partially delete %s from trash -- %s",
300 path_in_trash.c_str(), my_status.ToString().c_str());
301 }
302 } else {
303 ROCKS_LOG_INFO(info_log_,
304 "Cannot delete %s slowly through ftruncate from trash "
305 "as it has other links",
306 path_in_trash.c_str());
307 }
308 } else if (!num_link_error_printed_) {
309 ROCKS_LOG_INFO(
310 info_log_,
311 "Cannot delete files slowly through ftruncate from trash "
312 "as Env::NumFileLinks() returns error: %s",
313 my_status.ToString().c_str());
314 num_link_error_printed_ = true;
315 }
316 }
7c673cae 317
11fdf7f2 318 if (need_full_delete) {
f67539c2 319 s = fs_->DeleteFile(path_in_trash, IOOptions(), nullptr);
11fdf7f2 320 if (!dir_to_sync.empty()) {
f67539c2 321 std::unique_ptr<FSDirectory> dir_obj;
11fdf7f2 322 if (s.ok()) {
f67539c2 323 s = fs_->NewDirectory(dir_to_sync, IOOptions(), &dir_obj, nullptr);
11fdf7f2
TL
324 }
325 if (s.ok()) {
f67539c2 326 s = dir_obj->Fsync(IOOptions(), nullptr);
11fdf7f2
TL
327 TEST_SYNC_POINT_CALLBACK(
328 "DeleteScheduler::DeleteTrashFile::AfterSyncDir",
329 reinterpret_cast<void*>(const_cast<std::string*>(&dir_to_sync)));
330 }
331 }
332 *deleted_bytes = file_size;
333 sst_file_manager_->OnDeleteFile(path_in_trash);
334 }
335 }
7c673cae
FG
336 if (!s.ok()) {
337 // Error while getting file size or while deleting
338 ROCKS_LOG_ERROR(info_log_, "Failed to delete %s from trash -- %s",
339 path_in_trash.c_str(), s.ToString().c_str());
340 *deleted_bytes = 0;
341 } else {
11fdf7f2 342 total_trash_size_.fetch_sub(*deleted_bytes);
7c673cae
FG
343 }
344
345 return s;
346}
347
348void DeleteScheduler::WaitForEmptyTrash() {
349 InstrumentedMutexLock l(&mu_);
350 while (pending_files_ > 0 && !closing_) {
351 cv_.Wait();
352 }
353}
354
f67539c2 355} // namespace ROCKSDB_NAMESPACE
7c673cae
FG
356
357#endif // ROCKSDB_LITE