]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/error_handler.cc
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / db / error_handler.cc
1 // Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 #include "db/error_handler.h"
7 #include "db/db_impl.h"
8 #include "db/event_helpers.h"
9 #include "util/sst_file_manager_impl.h"
10
11 namespace rocksdb {
12
13 // Maps to help decide the severity of an error based on the
14 // BackgroundErrorReason, Code, SubCode and whether db_options.paranoid_checks
15 // is set or not. There are 3 maps, going from most specific to least specific
16 // (i.e from all 4 fields in a tuple to only the BackgroundErrorReason and
17 // paranoid_checks). The less specific map serves as a catch all in case we miss
18 // a specific error code or subcode.
19 std::map<std::tuple<BackgroundErrorReason, Status::Code, Status::SubCode, bool>,
20 Status::Severity>
21 ErrorSeverityMap = {
22 // Errors during BG compaction
23 {std::make_tuple(BackgroundErrorReason::kCompaction,
24 Status::Code::kIOError, Status::SubCode::kNoSpace,
25 true),
26 Status::Severity::kSoftError},
27 {std::make_tuple(BackgroundErrorReason::kCompaction,
28 Status::Code::kIOError, Status::SubCode::kNoSpace,
29 false),
30 Status::Severity::kNoError},
31 {std::make_tuple(BackgroundErrorReason::kCompaction,
32 Status::Code::kIOError, Status::SubCode::kSpaceLimit,
33 true),
34 Status::Severity::kHardError},
35 // Errors during BG flush
36 {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
37 Status::SubCode::kNoSpace, true),
38 Status::Severity::kHardError},
39 {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
40 Status::SubCode::kNoSpace, false),
41 Status::Severity::kNoError},
42 {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
43 Status::SubCode::kSpaceLimit, true),
44 Status::Severity::kHardError},
45 // Errors during Write
46 {std::make_tuple(BackgroundErrorReason::kWriteCallback,
47 Status::Code::kIOError, Status::SubCode::kNoSpace,
48 true),
49 Status::Severity::kHardError},
50 {std::make_tuple(BackgroundErrorReason::kWriteCallback,
51 Status::Code::kIOError, Status::SubCode::kNoSpace,
52 false),
53 Status::Severity::kHardError},
54 };
55
56 std::map<std::tuple<BackgroundErrorReason, Status::Code, bool>, Status::Severity>
57 DefaultErrorSeverityMap = {
58 // Errors during BG compaction
59 {std::make_tuple(BackgroundErrorReason::kCompaction,
60 Status::Code::kCorruption, true),
61 Status::Severity::kUnrecoverableError},
62 {std::make_tuple(BackgroundErrorReason::kCompaction,
63 Status::Code::kCorruption, false),
64 Status::Severity::kNoError},
65 {std::make_tuple(BackgroundErrorReason::kCompaction,
66 Status::Code::kIOError, true),
67 Status::Severity::kFatalError},
68 {std::make_tuple(BackgroundErrorReason::kCompaction,
69 Status::Code::kIOError, false),
70 Status::Severity::kNoError},
71 // Errors during BG flush
72 {std::make_tuple(BackgroundErrorReason::kFlush,
73 Status::Code::kCorruption, true),
74 Status::Severity::kUnrecoverableError},
75 {std::make_tuple(BackgroundErrorReason::kFlush,
76 Status::Code::kCorruption, false),
77 Status::Severity::kNoError},
78 {std::make_tuple(BackgroundErrorReason::kFlush,
79 Status::Code::kIOError, true),
80 Status::Severity::kFatalError},
81 {std::make_tuple(BackgroundErrorReason::kFlush,
82 Status::Code::kIOError, false),
83 Status::Severity::kNoError},
84 // Errors during Write
85 {std::make_tuple(BackgroundErrorReason::kWriteCallback,
86 Status::Code::kCorruption, true),
87 Status::Severity::kUnrecoverableError},
88 {std::make_tuple(BackgroundErrorReason::kWriteCallback,
89 Status::Code::kCorruption, false),
90 Status::Severity::kNoError},
91 {std::make_tuple(BackgroundErrorReason::kWriteCallback,
92 Status::Code::kIOError, true),
93 Status::Severity::kFatalError},
94 {std::make_tuple(BackgroundErrorReason::kWriteCallback,
95 Status::Code::kIOError, false),
96 Status::Severity::kNoError},
97 };
98
99 std::map<std::tuple<BackgroundErrorReason, bool>, Status::Severity>
100 DefaultReasonMap = {
101 // Errors during BG compaction
102 {std::make_tuple(BackgroundErrorReason::kCompaction, true),
103 Status::Severity::kFatalError},
104 {std::make_tuple(BackgroundErrorReason::kCompaction, false),
105 Status::Severity::kNoError},
106 // Errors during BG flush
107 {std::make_tuple(BackgroundErrorReason::kFlush, true),
108 Status::Severity::kFatalError},
109 {std::make_tuple(BackgroundErrorReason::kFlush, false),
110 Status::Severity::kNoError},
111 // Errors during Write
112 {std::make_tuple(BackgroundErrorReason::kWriteCallback, true),
113 Status::Severity::kFatalError},
114 {std::make_tuple(BackgroundErrorReason::kWriteCallback, false),
115 Status::Severity::kFatalError},
116 // Errors during Memtable update
117 {std::make_tuple(BackgroundErrorReason::kMemTable, true),
118 Status::Severity::kFatalError},
119 {std::make_tuple(BackgroundErrorReason::kMemTable, false),
120 Status::Severity::kFatalError},
121 };
122
123 void ErrorHandler::CancelErrorRecovery() {
124 #ifndef ROCKSDB_LITE
125 db_mutex_->AssertHeld();
126
127 // We'll release the lock before calling sfm, so make sure no new
128 // recovery gets scheduled at that point
129 auto_recovery_ = false;
130 SstFileManagerImpl* sfm = reinterpret_cast<SstFileManagerImpl*>(
131 db_options_.sst_file_manager.get());
132 if (sfm) {
133 // This may or may not cancel a pending recovery
134 db_mutex_->Unlock();
135 bool cancelled = sfm->CancelErrorRecovery(this);
136 db_mutex_->Lock();
137 if (cancelled) {
138 recovery_in_prog_ = false;
139 }
140 }
141 #endif
142 }
143
144 // This is the main function for looking at an error during a background
145 // operation and deciding the severity, and error recovery strategy. The high
146 // level algorithm is as follows -
147 // 1. Classify the severity of the error based on the ErrorSeverityMap,
148 // DefaultErrorSeverityMap and DefaultReasonMap defined earlier
149 // 2. Call a Status code specific override function to adjust the severity
150 // if needed. The reason for this is our ability to recover may depend on
151 // the exact options enabled in DBOptions
152 // 3. Determine if auto recovery is possible. A listener notification callback
153 // is called, which can disable the auto recovery even if we decide its
154 // feasible
155 // 4. For Status::NoSpace() errors, rely on SstFileManagerImpl to control
156 // the actual recovery. If no sst file manager is specified in DBOptions,
157 // a default one is allocated during DB::Open(), so there will always be
158 // one.
159 // This can also get called as part of a recovery operation. In that case, we
160 // also track the error separately in recovery_error_ so we can tell in the
161 // end whether recovery succeeded or not
162 Status ErrorHandler::SetBGError(const Status& bg_err, BackgroundErrorReason reason) {
163 db_mutex_->AssertHeld();
164
165 if (bg_err.ok()) {
166 return Status::OK();
167 }
168
169 // Check if recovery is currently in progress. If it is, we will save this
170 // error so we can check it at the end to see if recovery succeeded or not
171 if (recovery_in_prog_ && recovery_error_.ok()) {
172 recovery_error_ = bg_err;
173 }
174
175 bool paranoid = db_options_.paranoid_checks;
176 Status::Severity sev = Status::Severity::kFatalError;
177 Status new_bg_err;
178 bool found = false;
179
180 {
181 auto entry = ErrorSeverityMap.find(std::make_tuple(reason, bg_err.code(),
182 bg_err.subcode(), paranoid));
183 if (entry != ErrorSeverityMap.end()) {
184 sev = entry->second;
185 found = true;
186 }
187 }
188
189 if (!found) {
190 auto entry = DefaultErrorSeverityMap.find(std::make_tuple(reason,
191 bg_err.code(), paranoid));
192 if (entry != DefaultErrorSeverityMap.end()) {
193 sev = entry->second;
194 found = true;
195 }
196 }
197
198 if (!found) {
199 auto entry = DefaultReasonMap.find(std::make_tuple(reason, paranoid));
200 if (entry != DefaultReasonMap.end()) {
201 sev = entry->second;
202 }
203 }
204
205 new_bg_err = Status(bg_err, sev);
206
207 bool auto_recovery = auto_recovery_;
208 if (new_bg_err.severity() >= Status::Severity::kFatalError && auto_recovery) {
209 auto_recovery = false;
210 ;
211 }
212
213 // Allow some error specific overrides
214 if (new_bg_err == Status::NoSpace()) {
215 new_bg_err = OverrideNoSpaceError(new_bg_err, &auto_recovery);
216 }
217
218 if (!new_bg_err.ok()) {
219 Status s = new_bg_err;
220 EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s,
221 db_mutex_, &auto_recovery);
222 if (!s.ok() && (s.severity() > bg_error_.severity())) {
223 bg_error_ = s;
224 } else {
225 // This error is less severe than previously encountered error. Don't
226 // take any further action
227 return bg_error_;
228 }
229 }
230
231 if (auto_recovery) {
232 recovery_in_prog_ = true;
233
234 // Kick-off error specific recovery
235 if (bg_error_ == Status::NoSpace()) {
236 RecoverFromNoSpace();
237 }
238 }
239 return bg_error_;
240 }
241
242 Status ErrorHandler::OverrideNoSpaceError(Status bg_error,
243 bool* auto_recovery) {
244 #ifndef ROCKSDB_LITE
245 if (bg_error.severity() >= Status::Severity::kFatalError) {
246 return bg_error;
247 }
248
249 if (db_options_.sst_file_manager.get() == nullptr) {
250 // We rely on SFM to poll for enough disk space and recover
251 *auto_recovery = false;
252 return bg_error;
253 }
254
255 if (db_options_.allow_2pc &&
256 (bg_error.severity() <= Status::Severity::kSoftError)) {
257 // Don't know how to recover, as the contents of the current WAL file may
258 // be inconsistent, and it may be needed for 2PC. If 2PC is not enabled,
259 // we can just flush the memtable and discard the log
260 *auto_recovery = false;
261 return Status(bg_error, Status::Severity::kFatalError);
262 }
263
264 {
265 uint64_t free_space;
266 if (db_options_.env->GetFreeSpace(db_options_.db_paths[0].path,
267 &free_space) == Status::NotSupported()) {
268 *auto_recovery = false;
269 }
270 }
271
272 return bg_error;
273 #else
274 (void)auto_recovery;
275 return Status(bg_error, Status::Severity::kFatalError);
276 #endif
277 }
278
279 void ErrorHandler::RecoverFromNoSpace() {
280 #ifndef ROCKSDB_LITE
281 SstFileManagerImpl* sfm =
282 reinterpret_cast<SstFileManagerImpl*>(db_options_.sst_file_manager.get());
283
284 // Inform SFM of the error, so it can kick-off the recovery
285 if (sfm) {
286 sfm->StartErrorRecovery(this, bg_error_);
287 }
288 #endif
289 }
290
291 Status ErrorHandler::ClearBGError() {
292 #ifndef ROCKSDB_LITE
293 db_mutex_->AssertHeld();
294
295 // Signal that recovery succeeded
296 if (recovery_error_.ok()) {
297 Status old_bg_error = bg_error_;
298 bg_error_ = Status::OK();
299 recovery_in_prog_ = false;
300 EventHelpers::NotifyOnErrorRecoveryCompleted(db_options_.listeners,
301 old_bg_error, db_mutex_);
302 }
303 return recovery_error_;
304 #else
305 return bg_error_;
306 #endif
307 }
308
309 Status ErrorHandler::RecoverFromBGError(bool is_manual) {
310 #ifndef ROCKSDB_LITE
311 InstrumentedMutexLock l(db_mutex_);
312 if (is_manual) {
313 // If its a manual recovery and there's a background recovery in progress
314 // return busy status
315 if (recovery_in_prog_) {
316 return Status::Busy();
317 }
318 recovery_in_prog_ = true;
319 }
320
321 if (bg_error_.severity() == Status::Severity::kSoftError) {
322 // Simply clear the background error and return
323 recovery_error_ = Status::OK();
324 return ClearBGError();
325 }
326
327 // Reset recovery_error_. We will use this to record any errors that happen
328 // during the recovery process. While recovering, the only operations that
329 // can generate background errors should be the flush operations
330 recovery_error_ = Status::OK();
331 Status s = db_->ResumeImpl();
332 // For manual recover, shutdown, and fatal error cases, set
333 // recovery_in_prog_ to false. For automatic background recovery, leave it
334 // as is regardless of success or failure as it will be retried
335 if (is_manual || s.IsShutdownInProgress() ||
336 bg_error_.severity() >= Status::Severity::kFatalError) {
337 recovery_in_prog_ = false;
338 }
339 return s;
340 #else
341 (void)is_manual;
342 return bg_error_;
343 #endif
344 }
345 }