]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/scrub_machine.h
buildsys: switch source download to quincy
[ceph.git] / ceph / src / osd / scrub_machine.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #pragma once
4
5 #include <string>
6
7 #include <boost/statechart/custom_reaction.hpp>
8 #include <boost/statechart/deferral.hpp>
9 #include <boost/statechart/event.hpp>
10 #include <boost/statechart/event_base.hpp>
11 #include <boost/statechart/in_state_reaction.hpp>
12 #include <boost/statechart/simple_state.hpp>
13 #include <boost/statechart/state.hpp>
14 #include <boost/statechart/state_machine.hpp>
15 #include <boost/statechart/transition.hpp>
16
17 #include "common/version.h"
18 #include "include/Context.h"
19
20 #include "scrub_machine_lstnr.h"
21 #include "scrubber_common.h"
22
23 using namespace std::string_literals;
24
25 class PG; // holding a pointer to that one - just for testing
26 class PgScrubber;
27 namespace Scrub {
28
29 namespace sc = ::boost::statechart;
30 namespace mpl = ::boost::mpl;
31
32 //
33 // EVENTS
34 //
35
36 void on_event_creation(std::string_view nm);
37 void on_event_discard(std::string_view nm);
38
39 #define MEV(E) \
40 struct E : sc::event<E> { \
41 inline static int actv{0}; \
42 E() \
43 { \
44 if (!actv++) \
45 on_event_creation(#E); \
46 } \
47 ~E() \
48 { \
49 if (!--actv) \
50 on_event_discard(#E); \
51 } \
52 };
53
54 MEV(RemotesReserved) ///< all replicas have granted our reserve request
55 MEV(ReservationFailure) ///< a reservation request has failed
56
57 MEV(StartScrub) ///< initiate a new scrubbing session (relevant if we are a Primary)
58 MEV(AfterRepairScrub) ///< initiate a new scrubbing session. Only triggered at Recovery
59 ///< completion.
60 MEV(Unblocked) ///< triggered when the PG unblocked an object that was marked for
61 ///< scrubbing. Via the PGScrubUnblocked op
62 MEV(InternalSchedScrub)
63 MEV(SelectedChunkFree)
64 MEV(ChunkIsBusy)
65 MEV(ActivePushesUpd) ///< Update to active_pushes. 'active_pushes' represents recovery
66 ///< that is in-flight to the local ObjectStore
67 MEV(UpdatesApplied) // external
68 MEV(InternalAllUpdates) ///< the internal counterpart of UpdatesApplied
69 MEV(GotReplicas) ///< got a map from a replica
70
71 MEV(IntBmPreempted) ///< internal - BuildMap preempted. Required, as detected within the
72 ///< ctor
73 MEV(InternalError)
74
75 MEV(IntLocalMapDone)
76
77 MEV(DigestUpdate) ///< external. called upon success of a MODIFY op. See
78 ///< scrub_snapshot_metadata()
79 MEV(AllChunksDone)
80
81 MEV(StartReplica) ///< initiating replica scrub. replica_scrub_op() -> OSD Q ->
82 ///< replica_scrub()
83 MEV(StartReplicaNoWait) ///< 'start replica' when there are no pending updates
84
85 MEV(SchedReplica)
86 MEV(ReplicaPushesUpd) ///< Update to active_pushes. 'active_pushes' represents recovery
87 ///< that is in-flight to the local ObjectStore
88
89 MEV(FullReset) ///< guarantee that the FSM is in the quiescent state (i.e. NotActive)
90
91
92 struct NotActive; ///< the quiescent state. No active scrubbing.
93 struct ReservingReplicas; ///< securing scrub resources from replicas' OSDs
94 struct ActiveScrubbing; ///< the active state for a Primary. A sub-machine.
95 struct ReplicaWaitUpdates; ///< an active state for a replica. Waiting for all active
96 ///< operations to finish.
97 struct ActiveReplica; ///< an active state for a replica.
98
99
100 class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
101 public:
102 friend class PgScrubber;
103
104 public:
105 explicit ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub);
106 ~ScrubMachine();
107
108 PG* m_pg; // only used for dout messages
109 spg_t m_pg_id;
110 ScrubMachineListener* m_scrbr;
111
112 void my_states() const;
113 void assert_not_active() const;
114 [[nodiscard]] bool is_reserving() const;
115 };
116
117 /**
118 * The Scrubber's base (quiescent) state.
119 * Scrubbing is triggered by one of the following events:
120 * - (standard scenario for a Primary): 'StartScrub'. Initiates the OSDs resources
121 * reservation process. Will be issued by PG::scrub(), following a
122 * queued "PGScrub" op.
123 * - a special end-of-recovery Primary scrub event ('AfterRepairScrub') that is
124 * not required to reserve resources.
125 * - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by an incoming
126 * MOSDRepScrub message.
127 */
128 struct NotActive : sc::state<NotActive, ScrubMachine> {
129 explicit NotActive(my_context ctx);
130
131 using reactions = mpl::list<sc::transition<StartScrub, ReservingReplicas>,
132 // a scrubbing that was initiated at recovery completion,
133 // and requires no resource reservations:
134 sc::transition<AfterRepairScrub, ActiveScrubbing>,
135 sc::transition<StartReplica, ReplicaWaitUpdates>,
136 sc::transition<StartReplicaNoWait, ActiveReplica>>;
137 };
138
139 struct ReservingReplicas : sc::state<ReservingReplicas, ScrubMachine> {
140
141 explicit ReservingReplicas(my_context ctx);
142 using reactions = mpl::list<sc::custom_reaction<FullReset>,
143 // all replicas granted our resources request
144 sc::transition<RemotesReserved, ActiveScrubbing>,
145 sc::custom_reaction<ReservationFailure>>;
146
147 sc::result react(const FullReset&);
148
149 /// at least one replica denied us the scrub resources we've requested
150 sc::result react(const ReservationFailure&);
151 };
152
153
154 // the "active" sub-states
155
156 struct RangeBlocked; ///< the objects range is blocked
157 struct PendingTimer; ///< either delaying the scrub by some time and requeuing, or just
158 ///< requeue
159 struct NewChunk; ///< select a chunk to scrub, and verify its availability
160 struct WaitPushes;
161 struct WaitLastUpdate;
162 struct BuildMap;
163 struct DrainReplMaps; ///< a problem during BuildMap. Wait for all replicas to report,
164 ///< then restart.
165 struct WaitReplicas; ///< wait for all replicas to report
166
167 struct ActiveScrubbing : sc::state<ActiveScrubbing, ScrubMachine, PendingTimer> {
168
169 explicit ActiveScrubbing(my_context ctx);
170 ~ActiveScrubbing();
171
172 using reactions = mpl::list<
173 // done scrubbing
174 sc::transition<AllChunksDone, NotActive>,
175
176 sc::custom_reaction<InternalError>,
177 sc::custom_reaction<FullReset>>;
178
179 sc::result react(const AllChunksDone&);
180 sc::result react(const FullReset&);
181 sc::result react(const InternalError&);
182 };
183
184 struct RangeBlocked : sc::state<RangeBlocked, ActiveScrubbing> {
185 explicit RangeBlocked(my_context ctx);
186 using reactions = mpl::list<sc::transition<Unblocked, PendingTimer>>;
187 };
188
189 struct PendingTimer : sc::state<PendingTimer, ActiveScrubbing> {
190
191 explicit PendingTimer(my_context ctx);
192
193 using reactions = mpl::list<sc::transition<InternalSchedScrub, NewChunk>>;
194 };
195
196 struct NewChunk : sc::state<NewChunk, ActiveScrubbing> {
197
198 explicit NewChunk(my_context ctx);
199
200 using reactions = mpl::list<sc::transition<ChunkIsBusy, RangeBlocked>,
201 sc::custom_reaction<SelectedChunkFree>>;
202
203 sc::result react(const SelectedChunkFree&);
204 };
205
206 /**
207 * initiate the update process for this chunk
208 *
209 * Wait fo 'active_pushes' to clear.
210 * 'active_pushes' represents recovery that is in-flight to the local Objectstore, hence
211 * scrub waits until the correct data is readable (in-flight data to the Objectstore is
212 * not readable until written to disk, termed 'applied' here)
213 */
214 struct WaitPushes : sc::state<WaitPushes, ActiveScrubbing> {
215
216 explicit WaitPushes(my_context ctx);
217
218 using reactions = mpl::list<sc::custom_reaction<ActivePushesUpd>>;
219
220 sc::result react(const ActivePushesUpd&);
221 };
222
223 struct WaitLastUpdate : sc::state<WaitLastUpdate, ActiveScrubbing> {
224
225 explicit WaitLastUpdate(my_context ctx);
226
227 void on_new_updates(const UpdatesApplied&);
228
229 using reactions = mpl::list<sc::custom_reaction<InternalAllUpdates>,
230 sc::in_state_reaction<UpdatesApplied,
231 WaitLastUpdate,
232 &WaitLastUpdate::on_new_updates>>;
233
234 sc::result react(const InternalAllUpdates&);
235 };
236
237 struct BuildMap : sc::state<BuildMap, ActiveScrubbing> {
238 explicit BuildMap(my_context ctx);
239
240 // possible error scenarios:
241 // - an error reported by the backend will trigger an 'InternalError' event,
242 // handled by our parent state;
243 // - if preempted, we switch to DrainReplMaps, where we will wait for all
244 // replicas to send their maps before acknowledging the preemption;
245 // - an interval change will be handled by the relevant 'send-event' functions,
246 // and will translated into a 'FullReset' event.
247 using reactions =
248 mpl::list<sc::transition<IntBmPreempted, DrainReplMaps>,
249 sc::transition<InternalSchedScrub, BuildMap>, // looping, waiting
250 // for the backend to
251 // finish
252 sc::custom_reaction<IntLocalMapDone>>;
253
254 sc::result react(const IntLocalMapDone&);
255 };
256
257 /*
258 * "drain" scrub-maps responses from replicas
259 */
260 struct DrainReplMaps : sc::state<DrainReplMaps, ActiveScrubbing> {
261 explicit DrainReplMaps(my_context ctx);
262
263 using reactions =
264 mpl::list<sc::custom_reaction<GotReplicas> // all replicas are accounted for
265 >;
266
267 sc::result react(const GotReplicas&);
268 };
269
270 struct WaitReplicas : sc::state<WaitReplicas, ActiveScrubbing> {
271 explicit WaitReplicas(my_context ctx);
272
273 using reactions =
274 mpl::list<sc::custom_reaction<GotReplicas>, sc::deferral<DigestUpdate>>;
275
276 sc::result react(const GotReplicas&);
277 };
278
279 struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing> {
280 explicit WaitDigestUpdate(my_context ctx);
281
282 using reactions = mpl::list<sc::custom_reaction<DigestUpdate>>;
283 sc::result react(const DigestUpdate&);
284 };
285
286 // ----------------------------- the "replica active" states -----------------------
287
288 /*
289 * Waiting for 'active_pushes' to complete
290 *
291 * When in this state:
292 * - the details of the Primary's request were internalized by PgScrubber;
293 * - 'active' scrubbing is set
294 */
295 struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ScrubMachine> {
296 explicit ReplicaWaitUpdates(my_context ctx);
297 using reactions =
298 mpl::list<sc::custom_reaction<ReplicaPushesUpd>, sc::custom_reaction<FullReset>>;
299
300 sc::result react(const ReplicaPushesUpd&);
301 sc::result react(const FullReset&);
302 };
303
304
305 struct ActiveReplica : sc::state<ActiveReplica, ScrubMachine> {
306 explicit ActiveReplica(my_context ctx);
307 using reactions =
308 mpl::list<sc::custom_reaction<SchedReplica>, sc::custom_reaction<FullReset>>;
309
310 sc::result react(const SchedReplica&);
311 sc::result react(const FullReset&);
312 };
313
314 } // namespace Scrub