]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | #pragma once | |
7 | ||
8 | #ifndef ROCKSDB_LITE | |
9 | ||
10 | #include <string> | |
11 | ||
f67539c2 | 12 | #include "db/compaction/compaction.h" |
f67539c2 | 13 | #include "file/delete_scheduler.h" |
1e59de90 | 14 | #include "port/port.h" |
7c673cae | 15 | #include "rocksdb/sst_file_manager.h" |
7c673cae | 16 | |
f67539c2 | 17 | namespace ROCKSDB_NAMESPACE { |
1e59de90 TL |
18 | class ErrorHandler; |
19 | class FileSystem; | |
20 | class SystemClock; | |
7c673cae FG |
21 | class Logger; |
22 | ||
1e59de90 TL |
23 | // SstFileManager is used to track SST and blob files in the DB and control |
24 | // their deletion rate. All SstFileManager public functions are thread-safe. | |
7c673cae FG |
25 | class SstFileManagerImpl : public SstFileManager { |
26 | public: | |
1e59de90 TL |
27 | explicit SstFileManagerImpl(const std::shared_ptr<SystemClock>& clock, |
28 | const std::shared_ptr<FileSystem>& fs, | |
29 | const std::shared_ptr<Logger>& logger, | |
11fdf7f2 TL |
30 | int64_t rate_bytes_per_sec, |
31 | double max_trash_db_ratio, | |
32 | uint64_t bytes_max_delete_chunk); | |
7c673cae FG |
33 | |
34 | ~SstFileManagerImpl(); | |
35 | ||
1e59de90 TL |
36 | // DB will call OnAddFile whenever a new sst/blob file is added. |
37 | Status OnAddFile(const std::string& file_path); | |
7c673cae | 38 | |
f67539c2 TL |
39 | // Overload where size of the file is provided by the caller rather than |
40 | // queried from the filesystem. This is an optimization. | |
1e59de90 | 41 | Status OnAddFile(const std::string& file_path, uint64_t file_size); |
f67539c2 | 42 | |
1e59de90 | 43 | // DB will call OnDeleteFile whenever a sst/blob file is deleted. |
7c673cae FG |
44 | Status OnDeleteFile(const std::string& file_path); |
45 | ||
1e59de90 | 46 | // DB will call OnMoveFile whenever a sst/blob file is move to a new path. |
11fdf7f2 TL |
47 | Status OnMoveFile(const std::string& old_path, const std::string& new_path, |
48 | uint64_t* file_size = nullptr); | |
7c673cae FG |
49 | |
50 | // Update the maximum allowed space that should be used by RocksDB, if | |
1e59de90 TL |
51 | // the total size of the SST and blob files exceeds max_allowed_space, writes |
52 | // to RocksDB will fail. | |
7c673cae FG |
53 | // |
54 | // Setting max_allowed_space to 0 will disable this feature, maximum allowed | |
55 | // space will be infinite (Default value). | |
56 | // | |
57 | // thread-safe. | |
58 | void SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) override; | |
59 | ||
11fdf7f2 TL |
60 | void SetCompactionBufferSize(uint64_t compaction_buffer_size) override; |
61 | ||
1e59de90 TL |
62 | // Return true if the total size of SST and blob files exceeded the maximum |
63 | // allowed space usage. | |
7c673cae FG |
64 | // |
65 | // thread-safe. | |
66 | bool IsMaxAllowedSpaceReached() override; | |
67 | ||
11fdf7f2 TL |
68 | bool IsMaxAllowedSpaceReachedIncludingCompactions() override; |
69 | ||
70 | // Returns true is there is enough (approximate) space for the specified | |
71 | // compaction. Space is approximate because this function conservatively | |
72 | // estimates how much space is currently being used by compactions (i.e. | |
73 | // if a compaction has started, this function bumps the used space by | |
74 | // the full compaction size). | |
75 | bool EnoughRoomForCompaction(ColumnFamilyData* cfd, | |
76 | const std::vector<CompactionInputFiles>& inputs, | |
1e59de90 | 77 | const Status& bg_error); |
11fdf7f2 TL |
78 | |
79 | // Bookkeeping so total_file_sizes_ goes back to normal after compaction | |
80 | // finishes | |
81 | void OnCompactionCompletion(Compaction* c); | |
82 | ||
83 | uint64_t GetCompactionsReservedSize(); | |
84 | ||
7c673cae FG |
85 | // Return the total size of all tracked files. |
86 | uint64_t GetTotalSize() override; | |
87 | ||
88 | // Return a map containing all tracked files and there corresponding sizes. | |
89 | std::unordered_map<std::string, uint64_t> GetTrackedFiles() override; | |
90 | ||
91 | // Return delete rate limit in bytes per second. | |
92 | virtual int64_t GetDeleteRateBytesPerSecond() override; | |
93 | ||
94 | // Update the delete rate limit in bytes per second. | |
95 | virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) override; | |
96 | ||
11fdf7f2 TL |
97 | // Return trash/DB size ratio where new files will be deleted immediately |
98 | virtual double GetMaxTrashDBRatio() override; | |
99 | ||
100 | // Update trash/DB size ratio where new files will be deleted immediately | |
101 | virtual void SetMaxTrashDBRatio(double ratio) override; | |
102 | ||
103 | // Return the total size of trash files | |
104 | uint64_t GetTotalTrashSize() override; | |
105 | ||
106 | // Called by each DB instance using this sst file manager to reserve | |
107 | // disk buffer space for recovery from out of space errors | |
108 | void ReserveDiskBuffer(uint64_t buffer, const std::string& path); | |
109 | ||
110 | // Set a flag upon encountering disk full. May enqueue the ErrorHandler | |
111 | // instance for background polling and recovery | |
112 | void StartErrorRecovery(ErrorHandler* db, Status bg_error); | |
113 | ||
114 | // Remove the given Errorhandler instance from the recovery queue. Its | |
115 | // not guaranteed | |
116 | bool CancelErrorRecovery(ErrorHandler* db); | |
117 | ||
494da23a TL |
118 | // Mark file as trash and schedule it's deletion. If force_bg is set, it |
119 | // forces the file to be deleting in the background regardless of DB size, | |
120 | // except when rate limited delete is disabled | |
11fdf7f2 | 121 | virtual Status ScheduleFileDeletion(const std::string& file_path, |
494da23a TL |
122 | const std::string& dir_to_sync, |
123 | const bool force_bg = false); | |
7c673cae FG |
124 | |
125 | // Wait for all files being deleteing in the background to finish or for | |
126 | // destructor to be called. | |
127 | virtual void WaitForEmptyTrash(); | |
128 | ||
11fdf7f2 TL |
129 | DeleteScheduler* delete_scheduler() { return &delete_scheduler_; } |
130 | ||
131 | // Stop the error recovery background thread. This should be called only | |
132 | // once in the object's lifetime, and before the destructor | |
133 | void Close(); | |
134 | ||
20effc67 TL |
135 | void SetStatisticsPtr(const std::shared_ptr<Statistics>& stats) override { |
136 | stats_ = stats; | |
137 | delete_scheduler_.SetStatisticsPtr(stats); | |
138 | } | |
139 | ||
7c673cae FG |
140 | private: |
141 | // REQUIRES: mutex locked | |
1e59de90 | 142 | void OnAddFileImpl(const std::string& file_path, uint64_t file_size); |
7c673cae FG |
143 | // REQUIRES: mutex locked |
144 | void OnDeleteFileImpl(const std::string& file_path); | |
145 | ||
11fdf7f2 TL |
146 | void ClearError(); |
147 | bool CheckFreeSpace() { | |
148 | return bg_err_.severity() == Status::Severity::kSoftError; | |
149 | } | |
150 | ||
1e59de90 | 151 | std::shared_ptr<SystemClock> clock_; |
f67539c2 | 152 | std::shared_ptr<FileSystem> fs_; |
7c673cae FG |
153 | std::shared_ptr<Logger> logger_; |
154 | // Mutex to protect tracked_files_, total_files_size_ | |
155 | port::Mutex mu_; | |
156 | // The summation of the sizes of all files in tracked_files_ map | |
157 | uint64_t total_files_size_; | |
11fdf7f2 TL |
158 | // Compactions should only execute if they can leave at least |
159 | // this amount of buffer space for logs and flushes | |
160 | uint64_t compaction_buffer_size_; | |
161 | // Estimated size of the current ongoing compactions | |
162 | uint64_t cur_compactions_reserved_size_; | |
7c673cae FG |
163 | // A map containing all tracked files and there sizes |
164 | // file_path => file_size | |
165 | std::unordered_map<std::string, uint64_t> tracked_files_; | |
1e59de90 | 166 | // The maximum allowed space (in bytes) for sst and blob files. |
7c673cae | 167 | uint64_t max_allowed_space_; |
1e59de90 | 168 | // DeleteScheduler used to throttle file deletion. |
7c673cae | 169 | DeleteScheduler delete_scheduler_; |
11fdf7f2 TL |
170 | port::CondVar cv_; |
171 | // Flag to force error recovery thread to exit | |
172 | bool closing_; | |
173 | // Background error recovery thread | |
174 | std::unique_ptr<port::Thread> bg_thread_; | |
175 | // A path in the filesystem corresponding to this SFM. This is used for | |
176 | // calling Env::GetFreeSpace. Posix requires a path in the filesystem | |
177 | std::string path_; | |
178 | // Save the current background error | |
179 | Status bg_err_; | |
180 | // Amount of free disk headroom before allowing recovery from hard errors | |
181 | uint64_t reserved_disk_buffer_; | |
182 | // For soft errors, amount of free disk space before we can allow | |
183 | // compactions to run full throttle. If disk space is below this trigger, | |
184 | // compactions will be gated by free disk space > input size | |
185 | uint64_t free_space_trigger_; | |
1e59de90 | 186 | // List of database error handler instances tracked by this SstFileManager. |
11fdf7f2 TL |
187 | std::list<ErrorHandler*> error_handler_list_; |
188 | // Pointer to ErrorHandler instance that is currently processing recovery | |
189 | ErrorHandler* cur_instance_; | |
20effc67 | 190 | std::shared_ptr<Statistics> stats_; |
7c673cae FG |
191 | }; |
192 | ||
f67539c2 | 193 | } // namespace ROCKSDB_NAMESPACE |
7c673cae FG |
194 | |
195 | #endif // ROCKSDB_LITE |