]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | #ifndef ROCKSDB_LITE | |
7 | ||
f67539c2 | 8 | #include "file/delete_scheduler.h" |
7c673cae FG |
9 | |
10 | #include <thread> | |
11 | #include <vector> | |
12 | ||
f67539c2 TL |
13 | #include "file/sst_file_manager_impl.h" |
14 | #include "logging/logging.h" | |
7c673cae FG |
15 | #include "port/port.h" |
16 | #include "rocksdb/env.h" | |
f67539c2 | 17 | #include "test_util/sync_point.h" |
7c673cae | 18 | #include "util/mutexlock.h" |
7c673cae | 19 | |
f67539c2 | 20 | namespace ROCKSDB_NAMESPACE { |
7c673cae | 21 | |
f67539c2 TL |
22 | DeleteScheduler::DeleteScheduler(Env* env, FileSystem* fs, |
23 | int64_t rate_bytes_per_sec, Logger* info_log, | |
11fdf7f2 TL |
24 | SstFileManagerImpl* sst_file_manager, |
25 | double max_trash_db_ratio, | |
26 | uint64_t bytes_max_delete_chunk) | |
7c673cae | 27 | : env_(env), |
f67539c2 | 28 | fs_(fs), |
11fdf7f2 | 29 | total_trash_size_(0), |
7c673cae FG |
30 | rate_bytes_per_sec_(rate_bytes_per_sec), |
31 | pending_files_(0), | |
11fdf7f2 | 32 | bytes_max_delete_chunk_(bytes_max_delete_chunk), |
7c673cae FG |
33 | closing_(false), |
34 | cv_(&mu_), | |
35 | info_log_(info_log), | |
11fdf7f2 TL |
36 | sst_file_manager_(sst_file_manager), |
37 | max_trash_db_ratio_(max_trash_db_ratio) { | |
38 | assert(sst_file_manager != nullptr); | |
39 | assert(max_trash_db_ratio >= 0); | |
7c673cae FG |
40 | bg_thread_.reset( |
41 | new port::Thread(&DeleteScheduler::BackgroundEmptyTrash, this)); | |
42 | } | |
43 | ||
44 | DeleteScheduler::~DeleteScheduler() { | |
45 | { | |
46 | InstrumentedMutexLock l(&mu_); | |
47 | closing_ = true; | |
48 | cv_.SignalAll(); | |
49 | } | |
50 | if (bg_thread_) { | |
51 | bg_thread_->join(); | |
52 | } | |
53 | } | |
54 | ||
11fdf7f2 | 55 | Status DeleteScheduler::DeleteFile(const std::string& file_path, |
494da23a TL |
56 | const std::string& dir_to_sync, |
57 | const bool force_bg) { | |
7c673cae | 58 | Status s; |
494da23a | 59 | if (rate_bytes_per_sec_.load() <= 0 || (!force_bg && |
11fdf7f2 | 60 | total_trash_size_.load() > |
494da23a | 61 | sst_file_manager_->GetTotalSize() * max_trash_db_ratio_.load())) { |
11fdf7f2 TL |
62 | // Rate limiting is disabled or trash size makes up more than |
63 | // max_trash_db_ratio_ (default 25%) of the total DB size | |
7c673cae | 64 | TEST_SYNC_POINT("DeleteScheduler::DeleteFile"); |
f67539c2 | 65 | s = fs_->DeleteFile(file_path, IOOptions(), nullptr); |
11fdf7f2 | 66 | if (s.ok()) { |
7c673cae FG |
67 | sst_file_manager_->OnDeleteFile(file_path); |
68 | } | |
69 | return s; | |
70 | } | |
71 | ||
72 | // Move file to trash | |
11fdf7f2 TL |
73 | std::string trash_file; |
74 | s = MarkAsTrash(file_path, &trash_file); | |
75 | ||
7c673cae | 76 | if (!s.ok()) { |
f67539c2 TL |
77 | ROCKS_LOG_ERROR(info_log_, "Failed to mark %s as trash -- %s", |
78 | file_path.c_str(), s.ToString().c_str()); | |
79 | s = fs_->DeleteFile(file_path, IOOptions(), nullptr); | |
11fdf7f2 | 80 | if (s.ok()) { |
7c673cae FG |
81 | sst_file_manager_->OnDeleteFile(file_path); |
82 | } | |
83 | return s; | |
84 | } | |
85 | ||
11fdf7f2 TL |
86 | // Update the total trash size |
87 | uint64_t trash_file_size = 0; | |
f67539c2 | 88 | fs_->GetFileSize(trash_file, IOOptions(), &trash_file_size, nullptr); |
11fdf7f2 TL |
89 | total_trash_size_.fetch_add(trash_file_size); |
90 | ||
7c673cae FG |
91 | // Add file to delete queue |
92 | { | |
93 | InstrumentedMutexLock l(&mu_); | |
11fdf7f2 | 94 | queue_.emplace(trash_file, dir_to_sync); |
7c673cae FG |
95 | pending_files_++; |
96 | if (pending_files_ == 1) { | |
97 | cv_.SignalAll(); | |
98 | } | |
99 | } | |
100 | return s; | |
101 | } | |
102 | ||
103 | std::map<std::string, Status> DeleteScheduler::GetBackgroundErrors() { | |
104 | InstrumentedMutexLock l(&mu_); | |
105 | return bg_errors_; | |
106 | } | |
107 | ||
11fdf7f2 TL |
108 | const std::string DeleteScheduler::kTrashExtension = ".trash"; |
109 | bool DeleteScheduler::IsTrashFile(const std::string& file_path) { | |
110 | return (file_path.size() >= kTrashExtension.size() && | |
111 | file_path.rfind(kTrashExtension) == | |
112 | file_path.size() - kTrashExtension.size()); | |
113 | } | |
114 | ||
115 | Status DeleteScheduler::CleanupDirectory(Env* env, SstFileManagerImpl* sfm, | |
116 | const std::string& path) { | |
7c673cae | 117 | Status s; |
11fdf7f2 TL |
118 | // Check if there are any files marked as trash in this path |
119 | std::vector<std::string> files_in_path; | |
120 | s = env->GetChildren(path, &files_in_path); | |
121 | if (!s.ok()) { | |
122 | return s; | |
123 | } | |
124 | for (const std::string& current_file : files_in_path) { | |
125 | if (!DeleteScheduler::IsTrashFile(current_file)) { | |
126 | // not a trash file, skip | |
127 | continue; | |
128 | } | |
129 | ||
130 | Status file_delete; | |
131 | std::string trash_file = path + "/" + current_file; | |
132 | if (sfm) { | |
133 | // We have an SstFileManager that will schedule the file delete | |
134 | sfm->OnAddFile(trash_file); | |
135 | file_delete = sfm->ScheduleFileDeletion(trash_file, path); | |
136 | } else { | |
137 | // Delete the file immediately | |
138 | file_delete = env->DeleteFile(trash_file); | |
139 | } | |
140 | ||
141 | if (s.ok() && !file_delete.ok()) { | |
142 | s = file_delete; | |
143 | } | |
144 | } | |
145 | ||
146 | return s; | |
147 | } | |
148 | ||
149 | Status DeleteScheduler::MarkAsTrash(const std::string& file_path, | |
150 | std::string* trash_file) { | |
151 | // Sanity check of the path | |
7c673cae FG |
152 | size_t idx = file_path.rfind("/"); |
153 | if (idx == std::string::npos || idx == file_path.size() - 1) { | |
154 | return Status::InvalidArgument("file_path is corrupted"); | |
155 | } | |
7c673cae | 156 | |
11fdf7f2 TL |
157 | Status s; |
158 | if (DeleteScheduler::IsTrashFile(file_path)) { | |
159 | // This is already a trash file | |
160 | *trash_file = file_path; | |
7c673cae FG |
161 | return s; |
162 | } | |
163 | ||
11fdf7f2 | 164 | *trash_file = file_path + kTrashExtension; |
7c673cae FG |
165 | // TODO(tec) : Implement Env::RenameFileIfNotExist and remove |
166 | // file_move_mu mutex. | |
11fdf7f2 | 167 | int cnt = 0; |
7c673cae FG |
168 | InstrumentedMutexLock l(&file_move_mu_); |
169 | while (true) { | |
f67539c2 | 170 | s = fs_->FileExists(*trash_file, IOOptions(), nullptr); |
7c673cae FG |
171 | if (s.IsNotFound()) { |
172 | // We found a path for our file in trash | |
f67539c2 | 173 | s = fs_->RenameFile(file_path, *trash_file, IOOptions(), nullptr); |
7c673cae FG |
174 | break; |
175 | } else if (s.ok()) { | |
176 | // Name conflict, generate new random suffix | |
11fdf7f2 | 177 | *trash_file = file_path + std::to_string(cnt) + kTrashExtension; |
7c673cae FG |
178 | } else { |
179 | // Error during FileExists call, we cannot continue | |
180 | break; | |
181 | } | |
11fdf7f2 | 182 | cnt++; |
7c673cae | 183 | } |
11fdf7f2 TL |
184 | if (s.ok()) { |
185 | sst_file_manager_->OnMoveFile(file_path, *trash_file); | |
7c673cae FG |
186 | } |
187 | return s; | |
188 | } | |
189 | ||
190 | void DeleteScheduler::BackgroundEmptyTrash() { | |
191 | TEST_SYNC_POINT("DeleteScheduler::BackgroundEmptyTrash"); | |
192 | ||
193 | while (true) { | |
194 | InstrumentedMutexLock l(&mu_); | |
195 | while (queue_.empty() && !closing_) { | |
196 | cv_.Wait(); | |
197 | } | |
198 | ||
199 | if (closing_) { | |
200 | return; | |
201 | } | |
202 | ||
203 | // Delete all files in queue_ | |
204 | uint64_t start_time = env_->NowMicros(); | |
205 | uint64_t total_deleted_bytes = 0; | |
206 | int64_t current_delete_rate = rate_bytes_per_sec_.load(); | |
207 | while (!queue_.empty() && !closing_) { | |
208 | if (current_delete_rate != rate_bytes_per_sec_.load()) { | |
209 | // User changed the delete rate | |
210 | current_delete_rate = rate_bytes_per_sec_.load(); | |
211 | start_time = env_->NowMicros(); | |
212 | total_deleted_bytes = 0; | |
213 | } | |
214 | ||
215 | // Get new file to delete | |
11fdf7f2 TL |
216 | const FileAndDir& fad = queue_.front(); |
217 | std::string path_in_trash = fad.fname; | |
7c673cae FG |
218 | |
219 | // We dont need to hold the lock while deleting the file | |
220 | mu_.Unlock(); | |
221 | uint64_t deleted_bytes = 0; | |
11fdf7f2 | 222 | bool is_complete = true; |
7c673cae | 223 | // Delete file from trash and update total_penlty value |
11fdf7f2 TL |
224 | Status s = |
225 | DeleteTrashFile(path_in_trash, fad.dir, &deleted_bytes, &is_complete); | |
7c673cae FG |
226 | total_deleted_bytes += deleted_bytes; |
227 | mu_.Lock(); | |
11fdf7f2 TL |
228 | if (is_complete) { |
229 | queue_.pop(); | |
230 | } | |
7c673cae FG |
231 | |
232 | if (!s.ok()) { | |
233 | bg_errors_[path_in_trash] = s; | |
234 | } | |
235 | ||
236 | // Apply penlty if necessary | |
237 | uint64_t total_penlty; | |
238 | if (current_delete_rate > 0) { | |
239 | // rate limiting is enabled | |
240 | total_penlty = | |
241 | ((total_deleted_bytes * kMicrosInSecond) / current_delete_rate); | |
242 | while (!closing_ && !cv_.TimedWait(start_time + total_penlty)) {} | |
243 | } else { | |
244 | // rate limiting is disabled | |
245 | total_penlty = 0; | |
246 | } | |
247 | TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait", | |
248 | &total_penlty); | |
249 | ||
11fdf7f2 TL |
250 | if (is_complete) { |
251 | pending_files_--; | |
252 | } | |
7c673cae FG |
253 | if (pending_files_ == 0) { |
254 | // Unblock WaitForEmptyTrash since there are no more files waiting | |
255 | // to be deleted | |
256 | cv_.SignalAll(); | |
257 | } | |
258 | } | |
259 | } | |
260 | } | |
261 | ||
262 | Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash, | |
11fdf7f2 TL |
263 | const std::string& dir_to_sync, |
264 | uint64_t* deleted_bytes, | |
265 | bool* is_complete) { | |
7c673cae | 266 | uint64_t file_size; |
f67539c2 | 267 | Status s = fs_->GetFileSize(path_in_trash, IOOptions(), &file_size, nullptr); |
11fdf7f2 TL |
268 | *is_complete = true; |
269 | TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile"); | |
7c673cae | 270 | if (s.ok()) { |
11fdf7f2 TL |
271 | bool need_full_delete = true; |
272 | if (bytes_max_delete_chunk_ != 0 && file_size > bytes_max_delete_chunk_) { | |
273 | uint64_t num_hard_links = 2; | |
274 | // We don't have to worry aobut data race between linking a new | |
275 | // file after the number of file link check and ftruncte because | |
276 | // the file is now in trash and no hardlink is supposed to create | |
277 | // to trash files by RocksDB. | |
f67539c2 TL |
278 | Status my_status = fs_->NumFileLinks(path_in_trash, IOOptions(), |
279 | &num_hard_links, nullptr); | |
11fdf7f2 TL |
280 | if (my_status.ok()) { |
281 | if (num_hard_links == 1) { | |
f67539c2 TL |
282 | std::unique_ptr<FSWritableFile> wf; |
283 | my_status = fs_->ReopenWritableFile(path_in_trash, FileOptions(), | |
284 | &wf, nullptr); | |
11fdf7f2 | 285 | if (my_status.ok()) { |
f67539c2 TL |
286 | my_status = wf->Truncate(file_size - bytes_max_delete_chunk_, |
287 | IOOptions(), nullptr); | |
11fdf7f2 TL |
288 | if (my_status.ok()) { |
289 | TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:Fsync"); | |
f67539c2 | 290 | my_status = wf->Fsync(IOOptions(), nullptr); |
11fdf7f2 TL |
291 | } |
292 | } | |
293 | if (my_status.ok()) { | |
294 | *deleted_bytes = bytes_max_delete_chunk_; | |
295 | need_full_delete = false; | |
296 | *is_complete = false; | |
297 | } else { | |
298 | ROCKS_LOG_WARN(info_log_, | |
299 | "Failed to partially delete %s from trash -- %s", | |
300 | path_in_trash.c_str(), my_status.ToString().c_str()); | |
301 | } | |
302 | } else { | |
303 | ROCKS_LOG_INFO(info_log_, | |
304 | "Cannot delete %s slowly through ftruncate from trash " | |
305 | "as it has other links", | |
306 | path_in_trash.c_str()); | |
307 | } | |
308 | } else if (!num_link_error_printed_) { | |
309 | ROCKS_LOG_INFO( | |
310 | info_log_, | |
311 | "Cannot delete files slowly through ftruncate from trash " | |
312 | "as Env::NumFileLinks() returns error: %s", | |
313 | my_status.ToString().c_str()); | |
314 | num_link_error_printed_ = true; | |
315 | } | |
316 | } | |
7c673cae | 317 | |
11fdf7f2 | 318 | if (need_full_delete) { |
f67539c2 | 319 | s = fs_->DeleteFile(path_in_trash, IOOptions(), nullptr); |
11fdf7f2 | 320 | if (!dir_to_sync.empty()) { |
f67539c2 | 321 | std::unique_ptr<FSDirectory> dir_obj; |
11fdf7f2 | 322 | if (s.ok()) { |
f67539c2 | 323 | s = fs_->NewDirectory(dir_to_sync, IOOptions(), &dir_obj, nullptr); |
11fdf7f2 TL |
324 | } |
325 | if (s.ok()) { | |
f67539c2 | 326 | s = dir_obj->Fsync(IOOptions(), nullptr); |
11fdf7f2 TL |
327 | TEST_SYNC_POINT_CALLBACK( |
328 | "DeleteScheduler::DeleteTrashFile::AfterSyncDir", | |
329 | reinterpret_cast<void*>(const_cast<std::string*>(&dir_to_sync))); | |
330 | } | |
331 | } | |
332 | *deleted_bytes = file_size; | |
333 | sst_file_manager_->OnDeleteFile(path_in_trash); | |
334 | } | |
335 | } | |
7c673cae FG |
336 | if (!s.ok()) { |
337 | // Error while getting file size or while deleting | |
338 | ROCKS_LOG_ERROR(info_log_, "Failed to delete %s from trash -- %s", | |
339 | path_in_trash.c_str(), s.ToString().c_str()); | |
340 | *deleted_bytes = 0; | |
341 | } else { | |
11fdf7f2 | 342 | total_trash_size_.fetch_sub(*deleted_bytes); |
7c673cae FG |
343 | } |
344 | ||
345 | return s; | |
346 | } | |
347 | ||
348 | void DeleteScheduler::WaitForEmptyTrash() { | |
349 | InstrumentedMutexLock l(&mu_); | |
350 | while (pending_files_ > 0 && !closing_) { | |
351 | cv_.Wait(); | |
352 | } | |
353 | } | |
354 | ||
f67539c2 | 355 | } // namespace ROCKSDB_NAMESPACE |
7c673cae FG |
356 | |
357 | #endif // ROCKSDB_LITE |