]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/error_handler.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / db / error_handler.h
CommitLineData
11fdf7f2
TL
1// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
5#pragma once
6
7#include "monitoring/instrumented_mutex.h"
8#include "options/db_options.h"
20effc67 9#include "rocksdb/io_status.h"
11fdf7f2
TL
10#include "rocksdb/listener.h"
11#include "rocksdb/status.h"
12
f67539c2 13namespace ROCKSDB_NAMESPACE {
11fdf7f2
TL
14
15class DBImpl;
16
20effc67
TL
17// This structure is used to store the DB recovery context. The context is
18// the information that related to the recover actions. For example, it contains
19// FlushReason, which tells the flush job why this flush is called.
20struct DBRecoverContext {
21 FlushReason flush_reason;
22
23 DBRecoverContext() : flush_reason(FlushReason::kErrorRecovery) {}
24
25 DBRecoverContext(FlushReason reason) : flush_reason(reason) {}
26};
27
11fdf7f2 28class ErrorHandler {
1e59de90
TL
29 public:
30 ErrorHandler(DBImpl* db, const ImmutableDBOptions& db_options,
31 InstrumentedMutex* db_mutex)
32 : db_(db),
33 db_options_(db_options),
34 cv_(db_mutex),
35 end_recovery_(false),
36 recovery_thread_(nullptr),
37 db_mutex_(db_mutex),
38 auto_recovery_(false),
39 recovery_in_prog_(false),
40 soft_error_no_bg_work_(false),
41 is_db_stopped_(false),
42 bg_error_stats_(db_options.statistics) {
43 // Clear the checked flag for uninitialized errors
44 bg_error_.PermitUncheckedError();
45 recovery_error_.PermitUncheckedError();
46 recovery_io_error_.PermitUncheckedError();
47 }
48
49 void EnableAutoRecovery() { auto_recovery_ = true; }
50
51 Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
52 Status::Code code, Status::SubCode subcode);
53
54 const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason);
55
56 Status GetBGError() const { return bg_error_; }
57
58 Status GetRecoveryError() const { return recovery_error_; }
59
60 Status ClearBGError();
61
62 bool IsDBStopped() { return is_db_stopped_.load(std::memory_order_acquire); }
63
64 bool IsBGWorkStopped() {
65 assert(db_mutex_);
66 db_mutex_->AssertHeld();
67 return !bg_error_.ok() &&
68 (bg_error_.severity() >= Status::Severity::kHardError ||
69 !auto_recovery_ || soft_error_no_bg_work_);
70 }
71
72 bool IsSoftErrorNoBGWork() { return soft_error_no_bg_work_; }
73
74 bool IsRecoveryInProgress() { return recovery_in_prog_; }
75
76 Status RecoverFromBGError(bool is_manual = false);
77 void CancelErrorRecovery();
78
79 void EndAutoRecovery();
80
81 private:
82 DBImpl* db_;
83 const ImmutableDBOptions& db_options_;
84 Status bg_error_;
85 // A separate Status variable used to record any errors during the
86 // recovery process from hard errors
87 Status recovery_error_;
88 // A separate IO Status variable used to record any IO errors during
89 // the recovery process. At the same time, recovery_error_ is also set.
90 IOStatus recovery_io_error_;
91 // The condition variable used with db_mutex during auto resume for time
92 // wait.
93 InstrumentedCondVar cv_;
94 bool end_recovery_;
95 std::unique_ptr<port::Thread> recovery_thread_;
96
97 InstrumentedMutex* db_mutex_;
98 // A flag indicating whether automatic recovery from errors is enabled
99 bool auto_recovery_;
100 bool recovery_in_prog_;
101 // A flag to indicate that for the soft error, we should not allow any
102 // background work except the work is from recovery.
103 bool soft_error_no_bg_work_;
104
105 // Used to store the context for recover, such as flush reason.
106 DBRecoverContext recover_context_;
107 std::atomic<bool> is_db_stopped_;
108
109 // The pointer of DB statistics.
110 std::shared_ptr<Statistics> bg_error_stats_;
111
112 const Status& HandleKnownErrors(const Status& bg_err,
113 BackgroundErrorReason reason);
114 Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
115 void RecoverFromNoSpace();
116 const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
117 void RecoverFromRetryableBGIOError();
118 // First, if it is in recovery and the recovery_error is ok. Set the
119 // recovery_error_ to bg_err. Second, if the severity is higher than the
120 // current bg_error_, overwrite it.
121 void CheckAndSetRecoveryAndBGError(const Status& bg_err);
11fdf7f2
TL
122};
123
f67539c2 124} // namespace ROCKSDB_NAMESPACE