1 // Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
7 #include "monitoring/instrumented_mutex.h"
8 #include "options/db_options.h"
9 #include "rocksdb/io_status.h"
10 #include "rocksdb/listener.h"
11 #include "rocksdb/status.h"
13 namespace ROCKSDB_NAMESPACE
{
17 // This structure is used to store the DB recovery context. The context is
18 // the information that related to the recover actions. For example, it contains
19 // FlushReason, which tells the flush job why this flush is called.
20 struct DBRecoverContext
{
21 FlushReason flush_reason
;
23 DBRecoverContext() : flush_reason(FlushReason::kErrorRecovery
) {}
25 DBRecoverContext(FlushReason reason
) : flush_reason(reason
) {}
30 ErrorHandler(DBImpl
* db
, const ImmutableDBOptions
& db_options
,
31 InstrumentedMutex
* db_mutex
)
33 db_options_(db_options
),
36 recovery_thread_(nullptr),
38 auto_recovery_(false),
39 recovery_in_prog_(false),
40 soft_error_no_bg_work_(false),
41 is_db_stopped_(false),
42 bg_error_stats_(db_options
.statistics
) {
43 // Clear the checked flag for uninitialized errors
44 bg_error_
.PermitUncheckedError();
45 recovery_error_
.PermitUncheckedError();
46 recovery_io_error_
.PermitUncheckedError();
49 void EnableAutoRecovery() { auto_recovery_
= true; }
51 Status::Severity
GetErrorSeverity(BackgroundErrorReason reason
,
52 Status::Code code
, Status::SubCode subcode
);
54 const Status
& SetBGError(const Status
& bg_err
, BackgroundErrorReason reason
);
56 Status
GetBGError() const { return bg_error_
; }
58 Status
GetRecoveryError() const { return recovery_error_
; }
60 Status
ClearBGError();
62 bool IsDBStopped() { return is_db_stopped_
.load(std::memory_order_acquire
); }
64 bool IsBGWorkStopped() {
66 db_mutex_
->AssertHeld();
67 return !bg_error_
.ok() &&
68 (bg_error_
.severity() >= Status::Severity::kHardError
||
69 !auto_recovery_
|| soft_error_no_bg_work_
);
72 bool IsSoftErrorNoBGWork() { return soft_error_no_bg_work_
; }
74 bool IsRecoveryInProgress() { return recovery_in_prog_
; }
76 Status
RecoverFromBGError(bool is_manual
= false);
77 void CancelErrorRecovery();
79 void EndAutoRecovery();
83 const ImmutableDBOptions
& db_options_
;
85 // A separate Status variable used to record any errors during the
86 // recovery process from hard errors
87 Status recovery_error_
;
88 // A separate IO Status variable used to record any IO errors during
89 // the recovery process. At the same time, recovery_error_ is also set.
90 IOStatus recovery_io_error_
;
91 // The condition variable used with db_mutex during auto resume for time
93 InstrumentedCondVar cv_
;
95 std::unique_ptr
<port::Thread
> recovery_thread_
;
97 InstrumentedMutex
* db_mutex_
;
98 // A flag indicating whether automatic recovery from errors is enabled
100 bool recovery_in_prog_
;
101 // A flag to indicate that for the soft error, we should not allow any
102 // background work except the work is from recovery.
103 bool soft_error_no_bg_work_
;
105 // Used to store the context for recover, such as flush reason.
106 DBRecoverContext recover_context_
;
107 std::atomic
<bool> is_db_stopped_
;
109 // The pointer of DB statistics.
110 std::shared_ptr
<Statistics
> bg_error_stats_
;
112 const Status
& HandleKnownErrors(const Status
& bg_err
,
113 BackgroundErrorReason reason
);
114 Status
OverrideNoSpaceError(const Status
& bg_error
, bool* auto_recovery
);
115 void RecoverFromNoSpace();
116 const Status
& StartRecoverFromRetryableBGIOError(const IOStatus
& io_error
);
117 void RecoverFromRetryableBGIOError();
118 // First, if it is in recovery and the recovery_error is ok. Set the
119 // recovery_error_ to bg_err. Second, if the severity is higher than the
120 // current bg_error_, overwrite it.
121 void CheckAndSetRecoveryAndBGError(const Status
& bg_err
);
124 } // namespace ROCKSDB_NAMESPACE