]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/file/sst_file_manager_impl.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / file / sst_file_manager_impl.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5
6#pragma once
7
8#ifndef ROCKSDB_LITE
9
10#include <string>
11
f67539c2 12#include "db/compaction/compaction.h"
f67539c2 13#include "file/delete_scheduler.h"
1e59de90 14#include "port/port.h"
7c673cae 15#include "rocksdb/sst_file_manager.h"
7c673cae 16
f67539c2 17namespace ROCKSDB_NAMESPACE {
1e59de90
TL
18class ErrorHandler;
19class FileSystem;
20class SystemClock;
7c673cae
FG
21class Logger;
22
1e59de90
TL
23// SstFileManager is used to track SST and blob files in the DB and control
24// their deletion rate. All SstFileManager public functions are thread-safe.
7c673cae
FG
25class SstFileManagerImpl : public SstFileManager {
26 public:
1e59de90
TL
27 explicit SstFileManagerImpl(const std::shared_ptr<SystemClock>& clock,
28 const std::shared_ptr<FileSystem>& fs,
29 const std::shared_ptr<Logger>& logger,
11fdf7f2
TL
30 int64_t rate_bytes_per_sec,
31 double max_trash_db_ratio,
32 uint64_t bytes_max_delete_chunk);
7c673cae
FG
33
34 ~SstFileManagerImpl();
35
1e59de90
TL
36 // DB will call OnAddFile whenever a new sst/blob file is added.
37 Status OnAddFile(const std::string& file_path);
7c673cae 38
f67539c2
TL
39 // Overload where size of the file is provided by the caller rather than
40 // queried from the filesystem. This is an optimization.
1e59de90 41 Status OnAddFile(const std::string& file_path, uint64_t file_size);
f67539c2 42
1e59de90 43 // DB will call OnDeleteFile whenever a sst/blob file is deleted.
7c673cae
FG
44 Status OnDeleteFile(const std::string& file_path);
45
1e59de90 46 // DB will call OnMoveFile whenever a sst/blob file is move to a new path.
11fdf7f2
TL
47 Status OnMoveFile(const std::string& old_path, const std::string& new_path,
48 uint64_t* file_size = nullptr);
7c673cae
FG
49
50 // Update the maximum allowed space that should be used by RocksDB, if
1e59de90
TL
51 // the total size of the SST and blob files exceeds max_allowed_space, writes
52 // to RocksDB will fail.
7c673cae
FG
53 //
54 // Setting max_allowed_space to 0 will disable this feature, maximum allowed
55 // space will be infinite (Default value).
56 //
57 // thread-safe.
58 void SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) override;
59
11fdf7f2
TL
60 void SetCompactionBufferSize(uint64_t compaction_buffer_size) override;
61
1e59de90
TL
62 // Return true if the total size of SST and blob files exceeded the maximum
63 // allowed space usage.
7c673cae
FG
64 //
65 // thread-safe.
66 bool IsMaxAllowedSpaceReached() override;
67
11fdf7f2
TL
68 bool IsMaxAllowedSpaceReachedIncludingCompactions() override;
69
70 // Returns true is there is enough (approximate) space for the specified
71 // compaction. Space is approximate because this function conservatively
72 // estimates how much space is currently being used by compactions (i.e.
73 // if a compaction has started, this function bumps the used space by
74 // the full compaction size).
75 bool EnoughRoomForCompaction(ColumnFamilyData* cfd,
76 const std::vector<CompactionInputFiles>& inputs,
1e59de90 77 const Status& bg_error);
11fdf7f2
TL
78
79 // Bookkeeping so total_file_sizes_ goes back to normal after compaction
80 // finishes
81 void OnCompactionCompletion(Compaction* c);
82
83 uint64_t GetCompactionsReservedSize();
84
7c673cae
FG
85 // Return the total size of all tracked files.
86 uint64_t GetTotalSize() override;
87
88 // Return a map containing all tracked files and there corresponding sizes.
89 std::unordered_map<std::string, uint64_t> GetTrackedFiles() override;
90
91 // Return delete rate limit in bytes per second.
92 virtual int64_t GetDeleteRateBytesPerSecond() override;
93
94 // Update the delete rate limit in bytes per second.
95 virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) override;
96
11fdf7f2
TL
97 // Return trash/DB size ratio where new files will be deleted immediately
98 virtual double GetMaxTrashDBRatio() override;
99
100 // Update trash/DB size ratio where new files will be deleted immediately
101 virtual void SetMaxTrashDBRatio(double ratio) override;
102
103 // Return the total size of trash files
104 uint64_t GetTotalTrashSize() override;
105
106 // Called by each DB instance using this sst file manager to reserve
107 // disk buffer space for recovery from out of space errors
108 void ReserveDiskBuffer(uint64_t buffer, const std::string& path);
109
110 // Set a flag upon encountering disk full. May enqueue the ErrorHandler
111 // instance for background polling and recovery
112 void StartErrorRecovery(ErrorHandler* db, Status bg_error);
113
114 // Remove the given Errorhandler instance from the recovery queue. Its
115 // not guaranteed
116 bool CancelErrorRecovery(ErrorHandler* db);
117
494da23a
TL
118 // Mark file as trash and schedule it's deletion. If force_bg is set, it
119 // forces the file to be deleting in the background regardless of DB size,
120 // except when rate limited delete is disabled
11fdf7f2 121 virtual Status ScheduleFileDeletion(const std::string& file_path,
494da23a
TL
122 const std::string& dir_to_sync,
123 const bool force_bg = false);
7c673cae
FG
124
125 // Wait for all files being deleteing in the background to finish or for
126 // destructor to be called.
127 virtual void WaitForEmptyTrash();
128
11fdf7f2
TL
129 DeleteScheduler* delete_scheduler() { return &delete_scheduler_; }
130
131 // Stop the error recovery background thread. This should be called only
132 // once in the object's lifetime, and before the destructor
133 void Close();
134
20effc67
TL
135 void SetStatisticsPtr(const std::shared_ptr<Statistics>& stats) override {
136 stats_ = stats;
137 delete_scheduler_.SetStatisticsPtr(stats);
138 }
139
7c673cae
FG
140 private:
141 // REQUIRES: mutex locked
1e59de90 142 void OnAddFileImpl(const std::string& file_path, uint64_t file_size);
7c673cae
FG
143 // REQUIRES: mutex locked
144 void OnDeleteFileImpl(const std::string& file_path);
145
11fdf7f2
TL
146 void ClearError();
147 bool CheckFreeSpace() {
148 return bg_err_.severity() == Status::Severity::kSoftError;
149 }
150
1e59de90 151 std::shared_ptr<SystemClock> clock_;
f67539c2 152 std::shared_ptr<FileSystem> fs_;
7c673cae
FG
153 std::shared_ptr<Logger> logger_;
154 // Mutex to protect tracked_files_, total_files_size_
155 port::Mutex mu_;
156 // The summation of the sizes of all files in tracked_files_ map
157 uint64_t total_files_size_;
11fdf7f2
TL
158 // Compactions should only execute if they can leave at least
159 // this amount of buffer space for logs and flushes
160 uint64_t compaction_buffer_size_;
161 // Estimated size of the current ongoing compactions
162 uint64_t cur_compactions_reserved_size_;
7c673cae
FG
163 // A map containing all tracked files and there sizes
164 // file_path => file_size
165 std::unordered_map<std::string, uint64_t> tracked_files_;
1e59de90 166 // The maximum allowed space (in bytes) for sst and blob files.
7c673cae 167 uint64_t max_allowed_space_;
1e59de90 168 // DeleteScheduler used to throttle file deletion.
7c673cae 169 DeleteScheduler delete_scheduler_;
11fdf7f2
TL
170 port::CondVar cv_;
171 // Flag to force error recovery thread to exit
172 bool closing_;
173 // Background error recovery thread
174 std::unique_ptr<port::Thread> bg_thread_;
175 // A path in the filesystem corresponding to this SFM. This is used for
176 // calling Env::GetFreeSpace. Posix requires a path in the filesystem
177 std::string path_;
178 // Save the current background error
179 Status bg_err_;
180 // Amount of free disk headroom before allowing recovery from hard errors
181 uint64_t reserved_disk_buffer_;
182 // For soft errors, amount of free disk space before we can allow
183 // compactions to run full throttle. If disk space is below this trigger,
184 // compactions will be gated by free disk space > input size
185 uint64_t free_space_trigger_;
1e59de90 186 // List of database error handler instances tracked by this SstFileManager.
11fdf7f2
TL
187 std::list<ErrorHandler*> error_handler_list_;
188 // Pointer to ErrorHandler instance that is currently processing recovery
189 ErrorHandler* cur_instance_;
20effc67 190 std::shared_ptr<Statistics> stats_;
7c673cae
FG
191};
192
f67539c2 193} // namespace ROCKSDB_NAMESPACE
7c673cae
FG
194
195#endif // ROCKSDB_LITE