]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/util/sst_file_manager_impl.h
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / util / sst_file_manager_impl.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #pragma once
7
8 #ifndef ROCKSDB_LITE
9
10 #include <string>
11
12 #include "port/port.h"
13
14 #include "db/compaction.h"
15 #include "db/error_handler.h"
16 #include "rocksdb/sst_file_manager.h"
17 #include "util/delete_scheduler.h"
18
19 namespace rocksdb {
20
21 class Env;
22 class Logger;
23
24 // SstFileManager is used to track SST files in the DB and control there
25 // deletion rate.
26 // All SstFileManager public functions are thread-safe.
27 class SstFileManagerImpl : public SstFileManager {
28 public:
29 explicit SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger,
30 int64_t rate_bytes_per_sec,
31 double max_trash_db_ratio,
32 uint64_t bytes_max_delete_chunk);
33
34 ~SstFileManagerImpl();
35
36 // DB will call OnAddFile whenever a new sst file is added.
37 Status OnAddFile(const std::string& file_path, bool compaction = false);
38
39 // DB will call OnDeleteFile whenever an sst file is deleted.
40 Status OnDeleteFile(const std::string& file_path);
41
42 // DB will call OnMoveFile whenever an sst file is move to a new path.
43 Status OnMoveFile(const std::string& old_path, const std::string& new_path,
44 uint64_t* file_size = nullptr);
45
46 // Update the maximum allowed space that should be used by RocksDB, if
47 // the total size of the SST files exceeds max_allowed_space, writes to
48 // RocksDB will fail.
49 //
50 // Setting max_allowed_space to 0 will disable this feature, maximum allowed
51 // space will be infinite (Default value).
52 //
53 // thread-safe.
54 void SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) override;
55
56 void SetCompactionBufferSize(uint64_t compaction_buffer_size) override;
57
58 // Return true if the total size of SST files exceeded the maximum allowed
59 // space usage.
60 //
61 // thread-safe.
62 bool IsMaxAllowedSpaceReached() override;
63
64 bool IsMaxAllowedSpaceReachedIncludingCompactions() override;
65
66 // Returns true is there is enough (approximate) space for the specified
67 // compaction. Space is approximate because this function conservatively
68 // estimates how much space is currently being used by compactions (i.e.
69 // if a compaction has started, this function bumps the used space by
70 // the full compaction size).
71 bool EnoughRoomForCompaction(ColumnFamilyData* cfd,
72 const std::vector<CompactionInputFiles>& inputs,
73 Status bg_error);
74
75 // Bookkeeping so total_file_sizes_ goes back to normal after compaction
76 // finishes
77 void OnCompactionCompletion(Compaction* c);
78
79 uint64_t GetCompactionsReservedSize();
80
81 // Return the total size of all tracked files.
82 uint64_t GetTotalSize() override;
83
84 // Return a map containing all tracked files and there corresponding sizes.
85 std::unordered_map<std::string, uint64_t> GetTrackedFiles() override;
86
87 // Return delete rate limit in bytes per second.
88 virtual int64_t GetDeleteRateBytesPerSecond() override;
89
90 // Update the delete rate limit in bytes per second.
91 virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) override;
92
93 // Return trash/DB size ratio where new files will be deleted immediately
94 virtual double GetMaxTrashDBRatio() override;
95
96 // Update trash/DB size ratio where new files will be deleted immediately
97 virtual void SetMaxTrashDBRatio(double ratio) override;
98
99 // Return the total size of trash files
100 uint64_t GetTotalTrashSize() override;
101
102 // Called by each DB instance using this sst file manager to reserve
103 // disk buffer space for recovery from out of space errors
104 void ReserveDiskBuffer(uint64_t buffer, const std::string& path);
105
106 // Set a flag upon encountering disk full. May enqueue the ErrorHandler
107 // instance for background polling and recovery
108 void StartErrorRecovery(ErrorHandler* db, Status bg_error);
109
110 // Remove the given Errorhandler instance from the recovery queue. Its
111 // not guaranteed
112 bool CancelErrorRecovery(ErrorHandler* db);
113
114 // Mark file as trash and schedule it's deletion. If force_bg is set, it
115 // forces the file to be deleting in the background regardless of DB size,
116 // except when rate limited delete is disabled
117 virtual Status ScheduleFileDeletion(const std::string& file_path,
118 const std::string& dir_to_sync,
119 const bool force_bg = false);
120
121 // Wait for all files being deleteing in the background to finish or for
122 // destructor to be called.
123 virtual void WaitForEmptyTrash();
124
125 DeleteScheduler* delete_scheduler() { return &delete_scheduler_; }
126
127 // Stop the error recovery background thread. This should be called only
128 // once in the object's lifetime, and before the destructor
129 void Close();
130
131 private:
132 // REQUIRES: mutex locked
133 void OnAddFileImpl(const std::string& file_path, uint64_t file_size,
134 bool compaction);
135 // REQUIRES: mutex locked
136 void OnDeleteFileImpl(const std::string& file_path);
137
138 void ClearError();
139 bool CheckFreeSpace() {
140 return bg_err_.severity() == Status::Severity::kSoftError;
141 }
142
143 Env* env_;
144 std::shared_ptr<Logger> logger_;
145 // Mutex to protect tracked_files_, total_files_size_
146 port::Mutex mu_;
147 // The summation of the sizes of all files in tracked_files_ map
148 uint64_t total_files_size_;
149 // The summation of all output files of in-progress compactions
150 uint64_t in_progress_files_size_;
151 // Compactions should only execute if they can leave at least
152 // this amount of buffer space for logs and flushes
153 uint64_t compaction_buffer_size_;
154 // Estimated size of the current ongoing compactions
155 uint64_t cur_compactions_reserved_size_;
156 // A map containing all tracked files and there sizes
157 // file_path => file_size
158 std::unordered_map<std::string, uint64_t> tracked_files_;
159 // A set of files belonging to in-progress compactions
160 std::unordered_set<std::string> in_progress_files_;
161 // The maximum allowed space (in bytes) for sst files.
162 uint64_t max_allowed_space_;
163 // DeleteScheduler used to throttle file deletition.
164 DeleteScheduler delete_scheduler_;
165 port::CondVar cv_;
166 // Flag to force error recovery thread to exit
167 bool closing_;
168 // Background error recovery thread
169 std::unique_ptr<port::Thread> bg_thread_;
170 // A path in the filesystem corresponding to this SFM. This is used for
171 // calling Env::GetFreeSpace. Posix requires a path in the filesystem
172 std::string path_;
173 // Save the current background error
174 Status bg_err_;
175 // Amount of free disk headroom before allowing recovery from hard errors
176 uint64_t reserved_disk_buffer_;
177 // For soft errors, amount of free disk space before we can allow
178 // compactions to run full throttle. If disk space is below this trigger,
179 // compactions will be gated by free disk space > input size
180 uint64_t free_space_trigger_;
181 // List of database error handler instances tracked by this sst file manager
182 std::list<ErrorHandler*> error_handler_list_;
183 // Pointer to ErrorHandler instance that is currently processing recovery
184 ErrorHandler* cur_instance_;
185 };
186
187 } // namespace rocksdb
188
189 #endif // ROCKSDB_LITE