]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | // Copyright (c) 2018-present, Facebook, Inc. All rights reserved. |
2 | // This source code is licensed under both the GPLv2 (found in the | |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
5 | #pragma once | |
6 | ||
7 | #include "monitoring/instrumented_mutex.h" | |
8 | #include "options/db_options.h" | |
20effc67 | 9 | #include "rocksdb/io_status.h" |
11fdf7f2 TL |
10 | #include "rocksdb/listener.h" |
11 | #include "rocksdb/status.h" | |
12 | ||
f67539c2 | 13 | namespace ROCKSDB_NAMESPACE { |
11fdf7f2 TL |
14 | |
15 | class DBImpl; | |
16 | ||
20effc67 TL |
17 | // This structure is used to store the DB recovery context. The context is |
18 | // the information that related to the recover actions. For example, it contains | |
19 | // FlushReason, which tells the flush job why this flush is called. | |
20 | struct DBRecoverContext { | |
21 | FlushReason flush_reason; | |
22 | ||
23 | DBRecoverContext() : flush_reason(FlushReason::kErrorRecovery) {} | |
24 | ||
25 | DBRecoverContext(FlushReason reason) : flush_reason(reason) {} | |
26 | }; | |
27 | ||
11fdf7f2 | 28 | class ErrorHandler { |
1e59de90 TL |
29 | public: |
30 | ErrorHandler(DBImpl* db, const ImmutableDBOptions& db_options, | |
31 | InstrumentedMutex* db_mutex) | |
32 | : db_(db), | |
33 | db_options_(db_options), | |
34 | cv_(db_mutex), | |
35 | end_recovery_(false), | |
36 | recovery_thread_(nullptr), | |
37 | db_mutex_(db_mutex), | |
38 | auto_recovery_(false), | |
39 | recovery_in_prog_(false), | |
40 | soft_error_no_bg_work_(false), | |
41 | is_db_stopped_(false), | |
42 | bg_error_stats_(db_options.statistics) { | |
43 | // Clear the checked flag for uninitialized errors | |
44 | bg_error_.PermitUncheckedError(); | |
45 | recovery_error_.PermitUncheckedError(); | |
46 | recovery_io_error_.PermitUncheckedError(); | |
47 | } | |
48 | ||
49 | void EnableAutoRecovery() { auto_recovery_ = true; } | |
50 | ||
51 | Status::Severity GetErrorSeverity(BackgroundErrorReason reason, | |
52 | Status::Code code, Status::SubCode subcode); | |
53 | ||
54 | const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason); | |
55 | ||
56 | Status GetBGError() const { return bg_error_; } | |
57 | ||
58 | Status GetRecoveryError() const { return recovery_error_; } | |
59 | ||
60 | Status ClearBGError(); | |
61 | ||
62 | bool IsDBStopped() { return is_db_stopped_.load(std::memory_order_acquire); } | |
63 | ||
64 | bool IsBGWorkStopped() { | |
65 | assert(db_mutex_); | |
66 | db_mutex_->AssertHeld(); | |
67 | return !bg_error_.ok() && | |
68 | (bg_error_.severity() >= Status::Severity::kHardError || | |
69 | !auto_recovery_ || soft_error_no_bg_work_); | |
70 | } | |
71 | ||
72 | bool IsSoftErrorNoBGWork() { return soft_error_no_bg_work_; } | |
73 | ||
74 | bool IsRecoveryInProgress() { return recovery_in_prog_; } | |
75 | ||
76 | Status RecoverFromBGError(bool is_manual = false); | |
77 | void CancelErrorRecovery(); | |
78 | ||
79 | void EndAutoRecovery(); | |
80 | ||
81 | private: | |
82 | DBImpl* db_; | |
83 | const ImmutableDBOptions& db_options_; | |
84 | Status bg_error_; | |
85 | // A separate Status variable used to record any errors during the | |
86 | // recovery process from hard errors | |
87 | Status recovery_error_; | |
88 | // A separate IO Status variable used to record any IO errors during | |
89 | // the recovery process. At the same time, recovery_error_ is also set. | |
90 | IOStatus recovery_io_error_; | |
91 | // The condition variable used with db_mutex during auto resume for time | |
92 | // wait. | |
93 | InstrumentedCondVar cv_; | |
94 | bool end_recovery_; | |
95 | std::unique_ptr<port::Thread> recovery_thread_; | |
96 | ||
97 | InstrumentedMutex* db_mutex_; | |
98 | // A flag indicating whether automatic recovery from errors is enabled | |
99 | bool auto_recovery_; | |
100 | bool recovery_in_prog_; | |
101 | // A flag to indicate that for the soft error, we should not allow any | |
102 | // background work except the work is from recovery. | |
103 | bool soft_error_no_bg_work_; | |
104 | ||
105 | // Used to store the context for recover, such as flush reason. | |
106 | DBRecoverContext recover_context_; | |
107 | std::atomic<bool> is_db_stopped_; | |
108 | ||
109 | // The pointer of DB statistics. | |
110 | std::shared_ptr<Statistics> bg_error_stats_; | |
111 | ||
112 | const Status& HandleKnownErrors(const Status& bg_err, | |
113 | BackgroundErrorReason reason); | |
114 | Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery); | |
115 | void RecoverFromNoSpace(); | |
116 | const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error); | |
117 | void RecoverFromRetryableBGIOError(); | |
118 | // First, if it is in recovery and the recovery_error is ok. Set the | |
119 | // recovery_error_ to bg_err. Second, if the severity is higher than the | |
120 | // current bg_error_, overwrite it. | |
121 | void CheckAndSetRecoveryAndBGError(const Status& bg_err); | |
11fdf7f2 TL |
122 | }; |
123 | ||
f67539c2 | 124 | } // namespace ROCKSDB_NAMESPACE |