]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | #ifndef ROCKSDB_LITE | |
7 | ||
f67539c2 | 8 | #include "file/delete_scheduler.h" |
7c673cae | 9 | |
20effc67 | 10 | #include <cinttypes> |
7c673cae FG |
11 | #include <thread> |
12 | #include <vector> | |
13 | ||
f67539c2 TL |
14 | #include "file/sst_file_manager_impl.h" |
15 | #include "logging/logging.h" | |
7c673cae FG |
16 | #include "port/port.h" |
17 | #include "rocksdb/env.h" | |
1e59de90 TL |
18 | #include "rocksdb/file_system.h" |
19 | #include "rocksdb/system_clock.h" | |
f67539c2 | 20 | #include "test_util/sync_point.h" |
7c673cae | 21 | #include "util/mutexlock.h" |
7c673cae | 22 | |
f67539c2 | 23 | namespace ROCKSDB_NAMESPACE { |
7c673cae | 24 | |
1e59de90 | 25 | DeleteScheduler::DeleteScheduler(SystemClock* clock, FileSystem* fs, |
f67539c2 | 26 | int64_t rate_bytes_per_sec, Logger* info_log, |
11fdf7f2 TL |
27 | SstFileManagerImpl* sst_file_manager, |
28 | double max_trash_db_ratio, | |
29 | uint64_t bytes_max_delete_chunk) | |
1e59de90 | 30 | : clock_(clock), |
f67539c2 | 31 | fs_(fs), |
11fdf7f2 | 32 | total_trash_size_(0), |
7c673cae FG |
33 | rate_bytes_per_sec_(rate_bytes_per_sec), |
34 | pending_files_(0), | |
11fdf7f2 | 35 | bytes_max_delete_chunk_(bytes_max_delete_chunk), |
7c673cae FG |
36 | closing_(false), |
37 | cv_(&mu_), | |
20effc67 | 38 | bg_thread_(nullptr), |
7c673cae | 39 | info_log_(info_log), |
11fdf7f2 TL |
40 | sst_file_manager_(sst_file_manager), |
41 | max_trash_db_ratio_(max_trash_db_ratio) { | |
42 | assert(sst_file_manager != nullptr); | |
43 | assert(max_trash_db_ratio >= 0); | |
20effc67 | 44 | MaybeCreateBackgroundThread(); |
7c673cae FG |
45 | } |
46 | ||
47 | DeleteScheduler::~DeleteScheduler() { | |
48 | { | |
49 | InstrumentedMutexLock l(&mu_); | |
50 | closing_ = true; | |
51 | cv_.SignalAll(); | |
52 | } | |
53 | if (bg_thread_) { | |
54 | bg_thread_->join(); | |
55 | } | |
1e59de90 TL |
56 | for (const auto& it : bg_errors_) { |
57 | it.second.PermitUncheckedError(); | |
58 | } | |
7c673cae FG |
59 | } |
60 | ||
11fdf7f2 | 61 | Status DeleteScheduler::DeleteFile(const std::string& file_path, |
494da23a TL |
62 | const std::string& dir_to_sync, |
63 | const bool force_bg) { | |
1e59de90 TL |
64 | if (rate_bytes_per_sec_.load() <= 0 || |
65 | (!force_bg && | |
66 | total_trash_size_.load() > | |
67 | sst_file_manager_->GetTotalSize() * max_trash_db_ratio_.load())) { | |
11fdf7f2 TL |
68 | // Rate limiting is disabled or trash size makes up more than |
69 | // max_trash_db_ratio_ (default 25%) of the total DB size | |
7c673cae | 70 | TEST_SYNC_POINT("DeleteScheduler::DeleteFile"); |
1e59de90 | 71 | Status s = fs_->DeleteFile(file_path, IOOptions(), nullptr); |
11fdf7f2 | 72 | if (s.ok()) { |
20effc67 TL |
73 | s = sst_file_manager_->OnDeleteFile(file_path); |
74 | ROCKS_LOG_INFO(info_log_, | |
75 | "Deleted file %s immediately, rate_bytes_per_sec %" PRIi64 | |
76 | ", total_trash_size %" PRIu64 " max_trash_db_ratio %lf", | |
77 | file_path.c_str(), rate_bytes_per_sec_.load(), | |
78 | total_trash_size_.load(), max_trash_db_ratio_.load()); | |
79 | InstrumentedMutexLock l(&mu_); | |
80 | RecordTick(stats_.get(), FILES_DELETED_IMMEDIATELY); | |
7c673cae FG |
81 | } |
82 | return s; | |
83 | } | |
84 | ||
85 | // Move file to trash | |
11fdf7f2 | 86 | std::string trash_file; |
1e59de90 | 87 | Status s = MarkAsTrash(file_path, &trash_file); |
20effc67 TL |
88 | ROCKS_LOG_INFO(info_log_, "Mark file: %s as trash -- %s", trash_file.c_str(), |
89 | s.ToString().c_str()); | |
11fdf7f2 | 90 | |
7c673cae | 91 | if (!s.ok()) { |
f67539c2 TL |
92 | ROCKS_LOG_ERROR(info_log_, "Failed to mark %s as trash -- %s", |
93 | file_path.c_str(), s.ToString().c_str()); | |
94 | s = fs_->DeleteFile(file_path, IOOptions(), nullptr); | |
11fdf7f2 | 95 | if (s.ok()) { |
20effc67 TL |
96 | s = sst_file_manager_->OnDeleteFile(file_path); |
97 | ROCKS_LOG_INFO(info_log_, "Deleted file %s immediately", | |
98 | trash_file.c_str()); | |
99 | InstrumentedMutexLock l(&mu_); | |
100 | RecordTick(stats_.get(), FILES_DELETED_IMMEDIATELY); | |
7c673cae FG |
101 | } |
102 | return s; | |
103 | } | |
104 | ||
11fdf7f2 TL |
105 | // Update the total trash size |
106 | uint64_t trash_file_size = 0; | |
1e59de90 TL |
107 | IOStatus io_s = |
108 | fs_->GetFileSize(trash_file, IOOptions(), &trash_file_size, nullptr); | |
109 | if (io_s.ok()) { | |
110 | total_trash_size_.fetch_add(trash_file_size); | |
111 | } | |
112 | //**TODO: What should we do if we failed to | |
113 | // get the file size? | |
11fdf7f2 | 114 | |
7c673cae FG |
115 | // Add file to delete queue |
116 | { | |
117 | InstrumentedMutexLock l(&mu_); | |
20effc67 | 118 | RecordTick(stats_.get(), FILES_MARKED_TRASH); |
11fdf7f2 | 119 | queue_.emplace(trash_file, dir_to_sync); |
7c673cae FG |
120 | pending_files_++; |
121 | if (pending_files_ == 1) { | |
122 | cv_.SignalAll(); | |
123 | } | |
124 | } | |
125 | return s; | |
126 | } | |
127 | ||
128 | std::map<std::string, Status> DeleteScheduler::GetBackgroundErrors() { | |
129 | InstrumentedMutexLock l(&mu_); | |
130 | return bg_errors_; | |
131 | } | |
132 | ||
11fdf7f2 TL |
133 | const std::string DeleteScheduler::kTrashExtension = ".trash"; |
134 | bool DeleteScheduler::IsTrashFile(const std::string& file_path) { | |
135 | return (file_path.size() >= kTrashExtension.size() && | |
136 | file_path.rfind(kTrashExtension) == | |
137 | file_path.size() - kTrashExtension.size()); | |
138 | } | |
139 | ||
140 | Status DeleteScheduler::CleanupDirectory(Env* env, SstFileManagerImpl* sfm, | |
141 | const std::string& path) { | |
7c673cae | 142 | Status s; |
11fdf7f2 TL |
143 | // Check if there are any files marked as trash in this path |
144 | std::vector<std::string> files_in_path; | |
1e59de90 TL |
145 | const auto& fs = env->GetFileSystem(); |
146 | IOOptions io_opts; | |
147 | io_opts.do_not_recurse = true; | |
148 | s = fs->GetChildren(path, io_opts, &files_in_path, | |
149 | /*IODebugContext*=*/nullptr); | |
11fdf7f2 TL |
150 | if (!s.ok()) { |
151 | return s; | |
152 | } | |
153 | for (const std::string& current_file : files_in_path) { | |
154 | if (!DeleteScheduler::IsTrashFile(current_file)) { | |
155 | // not a trash file, skip | |
156 | continue; | |
157 | } | |
158 | ||
159 | Status file_delete; | |
160 | std::string trash_file = path + "/" + current_file; | |
161 | if (sfm) { | |
162 | // We have an SstFileManager that will schedule the file delete | |
20effc67 | 163 | s = sfm->OnAddFile(trash_file); |
11fdf7f2 TL |
164 | file_delete = sfm->ScheduleFileDeletion(trash_file, path); |
165 | } else { | |
166 | // Delete the file immediately | |
167 | file_delete = env->DeleteFile(trash_file); | |
168 | } | |
169 | ||
170 | if (s.ok() && !file_delete.ok()) { | |
171 | s = file_delete; | |
172 | } | |
173 | } | |
174 | ||
175 | return s; | |
176 | } | |
177 | ||
178 | Status DeleteScheduler::MarkAsTrash(const std::string& file_path, | |
179 | std::string* trash_file) { | |
180 | // Sanity check of the path | |
7c673cae FG |
181 | size_t idx = file_path.rfind("/"); |
182 | if (idx == std::string::npos || idx == file_path.size() - 1) { | |
183 | return Status::InvalidArgument("file_path is corrupted"); | |
184 | } | |
7c673cae | 185 | |
11fdf7f2 TL |
186 | if (DeleteScheduler::IsTrashFile(file_path)) { |
187 | // This is already a trash file | |
188 | *trash_file = file_path; | |
1e59de90 | 189 | return Status::OK(); |
7c673cae FG |
190 | } |
191 | ||
11fdf7f2 | 192 | *trash_file = file_path + kTrashExtension; |
7c673cae FG |
193 | // TODO(tec) : Implement Env::RenameFileIfNotExist and remove |
194 | // file_move_mu mutex. | |
11fdf7f2 | 195 | int cnt = 0; |
1e59de90 | 196 | Status s; |
7c673cae FG |
197 | InstrumentedMutexLock l(&file_move_mu_); |
198 | while (true) { | |
f67539c2 | 199 | s = fs_->FileExists(*trash_file, IOOptions(), nullptr); |
7c673cae FG |
200 | if (s.IsNotFound()) { |
201 | // We found a path for our file in trash | |
f67539c2 | 202 | s = fs_->RenameFile(file_path, *trash_file, IOOptions(), nullptr); |
7c673cae FG |
203 | break; |
204 | } else if (s.ok()) { | |
205 | // Name conflict, generate new random suffix | |
11fdf7f2 | 206 | *trash_file = file_path + std::to_string(cnt) + kTrashExtension; |
7c673cae FG |
207 | } else { |
208 | // Error during FileExists call, we cannot continue | |
209 | break; | |
210 | } | |
11fdf7f2 | 211 | cnt++; |
7c673cae | 212 | } |
11fdf7f2 | 213 | if (s.ok()) { |
1e59de90 | 214 | s = sst_file_manager_->OnMoveFile(file_path, *trash_file); |
7c673cae FG |
215 | } |
216 | return s; | |
217 | } | |
218 | ||
219 | void DeleteScheduler::BackgroundEmptyTrash() { | |
220 | TEST_SYNC_POINT("DeleteScheduler::BackgroundEmptyTrash"); | |
221 | ||
222 | while (true) { | |
223 | InstrumentedMutexLock l(&mu_); | |
224 | while (queue_.empty() && !closing_) { | |
225 | cv_.Wait(); | |
226 | } | |
227 | ||
228 | if (closing_) { | |
229 | return; | |
230 | } | |
231 | ||
232 | // Delete all files in queue_ | |
1e59de90 | 233 | uint64_t start_time = clock_->NowMicros(); |
7c673cae FG |
234 | uint64_t total_deleted_bytes = 0; |
235 | int64_t current_delete_rate = rate_bytes_per_sec_.load(); | |
236 | while (!queue_.empty() && !closing_) { | |
237 | if (current_delete_rate != rate_bytes_per_sec_.load()) { | |
238 | // User changed the delete rate | |
239 | current_delete_rate = rate_bytes_per_sec_.load(); | |
1e59de90 | 240 | start_time = clock_->NowMicros(); |
7c673cae | 241 | total_deleted_bytes = 0; |
20effc67 TL |
242 | ROCKS_LOG_INFO(info_log_, "rate_bytes_per_sec is changed to %" PRIi64, |
243 | current_delete_rate); | |
7c673cae FG |
244 | } |
245 | ||
246 | // Get new file to delete | |
11fdf7f2 TL |
247 | const FileAndDir& fad = queue_.front(); |
248 | std::string path_in_trash = fad.fname; | |
7c673cae | 249 | |
20effc67 | 250 | // We don't need to hold the lock while deleting the file |
7c673cae FG |
251 | mu_.Unlock(); |
252 | uint64_t deleted_bytes = 0; | |
11fdf7f2 | 253 | bool is_complete = true; |
7c673cae | 254 | // Delete file from trash and update total_penlty value |
11fdf7f2 TL |
255 | Status s = |
256 | DeleteTrashFile(path_in_trash, fad.dir, &deleted_bytes, &is_complete); | |
7c673cae FG |
257 | total_deleted_bytes += deleted_bytes; |
258 | mu_.Lock(); | |
11fdf7f2 TL |
259 | if (is_complete) { |
260 | queue_.pop(); | |
261 | } | |
7c673cae FG |
262 | |
263 | if (!s.ok()) { | |
264 | bg_errors_[path_in_trash] = s; | |
265 | } | |
266 | ||
20effc67 TL |
267 | // Apply penalty if necessary |
268 | uint64_t total_penalty; | |
7c673cae FG |
269 | if (current_delete_rate > 0) { |
270 | // rate limiting is enabled | |
20effc67 | 271 | total_penalty = |
7c673cae | 272 | ((total_deleted_bytes * kMicrosInSecond) / current_delete_rate); |
20effc67 TL |
273 | ROCKS_LOG_INFO(info_log_, |
274 | "Rate limiting is enabled with penalty %" PRIu64 | |
275 | " after deleting file %s", | |
276 | total_penalty, path_in_trash.c_str()); | |
277 | while (!closing_ && !cv_.TimedWait(start_time + total_penalty)) { | |
278 | } | |
7c673cae FG |
279 | } else { |
280 | // rate limiting is disabled | |
20effc67 TL |
281 | total_penalty = 0; |
282 | ROCKS_LOG_INFO(info_log_, | |
283 | "Rate limiting is disabled after deleting file %s", | |
284 | path_in_trash.c_str()); | |
7c673cae FG |
285 | } |
286 | TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait", | |
20effc67 | 287 | &total_penalty); |
7c673cae | 288 | |
11fdf7f2 TL |
289 | if (is_complete) { |
290 | pending_files_--; | |
291 | } | |
7c673cae FG |
292 | if (pending_files_ == 0) { |
293 | // Unblock WaitForEmptyTrash since there are no more files waiting | |
294 | // to be deleted | |
295 | cv_.SignalAll(); | |
296 | } | |
297 | } | |
298 | } | |
299 | } | |
300 | ||
301 | Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash, | |
11fdf7f2 TL |
302 | const std::string& dir_to_sync, |
303 | uint64_t* deleted_bytes, | |
304 | bool* is_complete) { | |
7c673cae | 305 | uint64_t file_size; |
f67539c2 | 306 | Status s = fs_->GetFileSize(path_in_trash, IOOptions(), &file_size, nullptr); |
11fdf7f2 TL |
307 | *is_complete = true; |
308 | TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile"); | |
7c673cae | 309 | if (s.ok()) { |
11fdf7f2 TL |
310 | bool need_full_delete = true; |
311 | if (bytes_max_delete_chunk_ != 0 && file_size > bytes_max_delete_chunk_) { | |
312 | uint64_t num_hard_links = 2; | |
313 | // We don't have to worry aobut data race between linking a new | |
314 | // file after the number of file link check and ftruncte because | |
315 | // the file is now in trash and no hardlink is supposed to create | |
316 | // to trash files by RocksDB. | |
f67539c2 TL |
317 | Status my_status = fs_->NumFileLinks(path_in_trash, IOOptions(), |
318 | &num_hard_links, nullptr); | |
11fdf7f2 TL |
319 | if (my_status.ok()) { |
320 | if (num_hard_links == 1) { | |
f67539c2 | 321 | std::unique_ptr<FSWritableFile> wf; |
1e59de90 TL |
322 | my_status = fs_->ReopenWritableFile(path_in_trash, FileOptions(), &wf, |
323 | nullptr); | |
11fdf7f2 | 324 | if (my_status.ok()) { |
f67539c2 TL |
325 | my_status = wf->Truncate(file_size - bytes_max_delete_chunk_, |
326 | IOOptions(), nullptr); | |
11fdf7f2 TL |
327 | if (my_status.ok()) { |
328 | TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:Fsync"); | |
f67539c2 | 329 | my_status = wf->Fsync(IOOptions(), nullptr); |
11fdf7f2 TL |
330 | } |
331 | } | |
332 | if (my_status.ok()) { | |
333 | *deleted_bytes = bytes_max_delete_chunk_; | |
334 | need_full_delete = false; | |
335 | *is_complete = false; | |
336 | } else { | |
337 | ROCKS_LOG_WARN(info_log_, | |
338 | "Failed to partially delete %s from trash -- %s", | |
339 | path_in_trash.c_str(), my_status.ToString().c_str()); | |
340 | } | |
341 | } else { | |
342 | ROCKS_LOG_INFO(info_log_, | |
343 | "Cannot delete %s slowly through ftruncate from trash " | |
344 | "as it has other links", | |
345 | path_in_trash.c_str()); | |
346 | } | |
347 | } else if (!num_link_error_printed_) { | |
348 | ROCKS_LOG_INFO( | |
349 | info_log_, | |
350 | "Cannot delete files slowly through ftruncate from trash " | |
351 | "as Env::NumFileLinks() returns error: %s", | |
352 | my_status.ToString().c_str()); | |
353 | num_link_error_printed_ = true; | |
354 | } | |
355 | } | |
7c673cae | 356 | |
11fdf7f2 | 357 | if (need_full_delete) { |
f67539c2 | 358 | s = fs_->DeleteFile(path_in_trash, IOOptions(), nullptr); |
11fdf7f2 | 359 | if (!dir_to_sync.empty()) { |
f67539c2 | 360 | std::unique_ptr<FSDirectory> dir_obj; |
11fdf7f2 | 361 | if (s.ok()) { |
f67539c2 | 362 | s = fs_->NewDirectory(dir_to_sync, IOOptions(), &dir_obj, nullptr); |
11fdf7f2 TL |
363 | } |
364 | if (s.ok()) { | |
1e59de90 TL |
365 | s = dir_obj->FsyncWithDirOptions( |
366 | IOOptions(), nullptr, | |
367 | DirFsyncOptions(DirFsyncOptions::FsyncReason::kFileDeleted)); | |
11fdf7f2 TL |
368 | TEST_SYNC_POINT_CALLBACK( |
369 | "DeleteScheduler::DeleteTrashFile::AfterSyncDir", | |
370 | reinterpret_cast<void*>(const_cast<std::string*>(&dir_to_sync))); | |
371 | } | |
372 | } | |
20effc67 TL |
373 | if (s.ok()) { |
374 | *deleted_bytes = file_size; | |
375 | s = sst_file_manager_->OnDeleteFile(path_in_trash); | |
376 | } | |
11fdf7f2 TL |
377 | } |
378 | } | |
7c673cae FG |
379 | if (!s.ok()) { |
380 | // Error while getting file size or while deleting | |
381 | ROCKS_LOG_ERROR(info_log_, "Failed to delete %s from trash -- %s", | |
382 | path_in_trash.c_str(), s.ToString().c_str()); | |
383 | *deleted_bytes = 0; | |
384 | } else { | |
11fdf7f2 | 385 | total_trash_size_.fetch_sub(*deleted_bytes); |
7c673cae FG |
386 | } |
387 | ||
388 | return s; | |
389 | } | |
390 | ||
391 | void DeleteScheduler::WaitForEmptyTrash() { | |
392 | InstrumentedMutexLock l(&mu_); | |
393 | while (pending_files_ > 0 && !closing_) { | |
394 | cv_.Wait(); | |
395 | } | |
396 | } | |
397 | ||
20effc67 TL |
398 | void DeleteScheduler::MaybeCreateBackgroundThread() { |
399 | if (bg_thread_ == nullptr && rate_bytes_per_sec_.load() > 0) { | |
400 | bg_thread_.reset( | |
401 | new port::Thread(&DeleteScheduler::BackgroundEmptyTrash, this)); | |
402 | ROCKS_LOG_INFO(info_log_, | |
403 | "Created background thread for deletion scheduler with " | |
404 | "rate_bytes_per_sec: %" PRIi64, | |
405 | rate_bytes_per_sec_.load()); | |
406 | } | |
407 | } | |
408 | ||
f67539c2 | 409 | } // namespace ROCKSDB_NAMESPACE |
7c673cae FG |
410 | |
411 | #endif // ROCKSDB_LITE |