]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #pragma once | |
5 | ||
6 | #include <optional> | |
7 | ||
8 | #include <boost/statechart/custom_reaction.hpp> | |
9 | #include <boost/statechart/event.hpp> | |
10 | #include <boost/statechart/event_base.hpp> | |
11 | #include <boost/statechart/simple_state.hpp> | |
12 | #include <boost/statechart/state.hpp> | |
13 | #include <boost/statechart/state_machine.hpp> | |
14 | #include <boost/statechart/transition.hpp> | |
15 | ||
16 | #include "osd/recovery_types.h" | |
17 | ||
18 | namespace crimson::osd { | |
19 | ||
20 | namespace sc = boost::statechart; | |
21 | ||
22 | struct BackfillState { | |
23 | struct BackfillListener; | |
24 | struct PeeringFacade; | |
25 | struct PGFacade; | |
26 | ||
27 | // events comes first | |
28 | struct PrimaryScanned : sc::event<PrimaryScanned> { | |
29 | BackfillInterval result; | |
30 | PrimaryScanned(BackfillInterval&& result) | |
31 | : result(std::move(result)) { | |
32 | } | |
33 | }; | |
34 | ||
35 | struct ReplicaScanned : sc::event<ReplicaScanned> { | |
36 | pg_shard_t from; | |
37 | BackfillInterval result; | |
38 | ReplicaScanned(pg_shard_t from, BackfillInterval&& result) | |
39 | : from(std::move(from)), | |
40 | result(std::move(result)) { | |
41 | } | |
42 | }; | |
43 | ||
44 | struct ObjectPushed : sc::event<ObjectPushed> { | |
45 | // TODO: implement replica management; I don't want to follow | |
46 | // current convention where the backend layer is responsible | |
47 | // for tracking replicas. | |
48 | hobject_t object; | |
49 | pg_stat_t stat; | |
50 | ObjectPushed(hobject_t object) | |
51 | : object(std::move(object)) { | |
52 | } | |
53 | }; | |
54 | ||
55 | struct Triggered : sc::event<Triggered> { | |
56 | }; | |
57 | ||
58 | private: | |
59 | // internal events | |
60 | struct RequestPrimaryScanning : sc::event<RequestPrimaryScanning> { | |
61 | }; | |
62 | ||
63 | struct RequestReplicasScanning : sc::event<RequestReplicasScanning> { | |
64 | }; | |
65 | ||
66 | struct RequestWaiting : sc::event<RequestWaiting> { | |
67 | }; | |
68 | ||
69 | struct RequestDone : sc::event<RequestDone> { | |
70 | }; | |
71 | ||
72 | class ProgressTracker; | |
73 | ||
74 | public: | |
75 | ||
76 | struct Initial; | |
77 | struct Enqueuing; | |
78 | struct PrimaryScanning; | |
79 | struct ReplicasScanning; | |
80 | struct Waiting; | |
81 | struct Done; | |
82 | ||
83 | struct BackfillMachine : sc::state_machine<BackfillMachine, Initial> { | |
84 | BackfillMachine(BackfillState& backfill_state, | |
85 | BackfillListener& backfill_listener, | |
86 | std::unique_ptr<PeeringFacade> peering_state, | |
87 | std::unique_ptr<PGFacade> pg); | |
88 | ~BackfillMachine(); | |
89 | BackfillState& backfill_state; | |
90 | BackfillListener& backfill_listener; | |
91 | std::unique_ptr<PeeringFacade> peering_state; | |
92 | std::unique_ptr<PGFacade> pg; | |
93 | }; | |
94 | ||
95 | private: | |
96 | template <class S> | |
97 | struct StateHelper { | |
98 | StateHelper(); | |
99 | ~StateHelper(); | |
100 | ||
101 | BackfillState& backfill_state() { | |
102 | return static_cast<S*>(this) \ | |
103 | ->template context<BackfillMachine>().backfill_state; | |
104 | } | |
105 | BackfillListener& backfill_listener() { | |
106 | return static_cast<S*>(this) \ | |
107 | ->template context<BackfillMachine>().backfill_listener; | |
108 | } | |
109 | PeeringFacade& peering_state() { | |
110 | return *static_cast<S*>(this) \ | |
111 | ->template context<BackfillMachine>().peering_state; | |
112 | } | |
113 | PGFacade& pg() { | |
114 | return *static_cast<S*>(this)->template context<BackfillMachine>().pg; | |
115 | } | |
116 | ||
117 | const PeeringFacade& peering_state() const { | |
118 | return *static_cast<const S*>(this) \ | |
119 | ->template context<BackfillMachine>().peering_state; | |
120 | } | |
121 | const BackfillState& backfill_state() const { | |
122 | return static_cast<const S*>(this) \ | |
123 | ->template context<BackfillMachine>().backfill_state; | |
124 | } | |
125 | }; | |
126 | ||
127 | public: | |
128 | ||
129 | // states | |
130 | struct Crashed : sc::simple_state<Crashed, BackfillMachine>, | |
131 | StateHelper<Crashed> { | |
132 | explicit Crashed(); | |
133 | }; | |
134 | ||
135 | struct Initial : sc::state<Initial, BackfillMachine>, | |
136 | StateHelper<Initial> { | |
137 | using reactions = boost::mpl::list< | |
138 | sc::custom_reaction<Triggered>, | |
139 | sc::transition<sc::event_base, Crashed>>; | |
140 | explicit Initial(my_context); | |
141 | // initialize after triggering backfill by on_activate_complete(). | |
142 | // transit to Enqueuing. | |
143 | sc::result react(const Triggered&); | |
144 | }; | |
145 | ||
146 | struct Enqueuing : sc::state<Enqueuing, BackfillMachine>, | |
147 | StateHelper<Enqueuing> { | |
148 | using reactions = boost::mpl::list< | |
149 | sc::transition<RequestPrimaryScanning, PrimaryScanning>, | |
150 | sc::transition<RequestReplicasScanning, ReplicasScanning>, | |
151 | sc::transition<RequestWaiting, Waiting>, | |
152 | sc::transition<RequestDone, Done>, | |
153 | sc::transition<sc::event_base, Crashed>>; | |
154 | explicit Enqueuing(my_context); | |
155 | ||
156 | // indicate whether there is any remaining work to do when it comes | |
157 | // to comparing the hobject_t namespace between primary and replicas. | |
158 | // true doesn't necessarily mean backfill is done -- there could be | |
159 | // in-flight pushes or drops which had been enqueued but aren't | |
160 | // completed yet. | |
161 | static bool all_enqueued( | |
162 | const PeeringFacade& peering_state, | |
163 | const BackfillInterval& backfill_info, | |
164 | const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info); | |
165 | ||
166 | private: | |
167 | void maybe_update_range(); | |
168 | void trim_backfill_infos(); | |
169 | ||
170 | // these methods take BackfillIntervals instead of extracting them from | |
171 | // the state to emphasize the relationships across the main loop. | |
172 | bool all_emptied( | |
173 | const BackfillInterval& local_backfill_info, | |
174 | const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const; | |
175 | hobject_t earliest_peer_backfill( | |
176 | const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const; | |
177 | bool should_rescan_replicas( | |
178 | const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info, | |
179 | const BackfillInterval& backfill_info) const; | |
180 | // indicate whether a particular acting primary needs to scanned again | |
181 | // to process next piece of the hobject_t's namespace. | |
182 | // the logic is per analogy to replica_needs_scan(). See comments there. | |
183 | bool should_rescan_primary( | |
184 | const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info, | |
185 | const BackfillInterval& backfill_info) const; | |
186 | ||
187 | // the result_t is intermediary between {remove,update}_on_peers() and | |
188 | // updating BackfillIntervals in trim_backfilled_object_from_intervals. | |
189 | // This step is important because it affects the main loop's condition, | |
190 | // and thus deserves to be exposed instead of being called deeply from | |
191 | // {remove,update}_on_peers(). | |
192 | struct [[nodiscard]] result_t { | |
193 | std::set<pg_shard_t> pbi_targets; | |
194 | hobject_t new_last_backfill_started; | |
195 | }; | |
196 | void trim_backfilled_object_from_intervals( | |
197 | result_t&&, | |
198 | hobject_t& last_backfill_started, | |
199 | std::map<pg_shard_t, BackfillInterval>& peer_backfill_info); | |
200 | result_t remove_on_peers(const hobject_t& check); | |
201 | result_t update_on_peers(const hobject_t& check); | |
202 | }; | |
203 | ||
204 | struct PrimaryScanning : sc::state<PrimaryScanning, BackfillMachine>, | |
205 | StateHelper<PrimaryScanning> { | |
206 | using reactions = boost::mpl::list< | |
207 | sc::custom_reaction<ObjectPushed>, | |
208 | sc::custom_reaction<PrimaryScanned>, | |
209 | sc::transition<sc::event_base, Crashed>>; | |
210 | explicit PrimaryScanning(my_context); | |
211 | sc::result react(ObjectPushed); | |
212 | // collect scanning result and transit to Enqueuing. | |
213 | sc::result react(PrimaryScanned); | |
214 | }; | |
215 | ||
216 | struct ReplicasScanning : sc::state<ReplicasScanning, BackfillMachine>, | |
217 | StateHelper<ReplicasScanning> { | |
218 | using reactions = boost::mpl::list< | |
219 | sc::custom_reaction<ObjectPushed>, | |
220 | sc::custom_reaction<ReplicaScanned>, | |
221 | sc::transition<sc::event_base, Crashed>>; | |
222 | explicit ReplicasScanning(my_context); | |
223 | // collect scanning result; if all results are collected, transition | |
224 | // to Enqueuing will happen. | |
225 | sc::result react(ObjectPushed); | |
226 | sc::result react(ReplicaScanned); | |
227 | ||
228 | // indicate whether a particular peer should be scanned to retrieve | |
229 | // BackfillInterval for new range of hobject_t namespace. | |
230 | // true when bi.objects is exhausted, replica bi's end is not MAX, | |
231 | // and primary bi'begin is further than the replica's one. | |
232 | static bool replica_needs_scan( | |
233 | const BackfillInterval& replica_backfill_info, | |
234 | const BackfillInterval& local_backfill_info); | |
235 | ||
236 | private: | |
237 | std::set<pg_shard_t> waiting_on_backfill; | |
238 | }; | |
239 | ||
240 | struct Waiting : sc::state<Waiting, BackfillMachine>, | |
241 | StateHelper<Waiting> { | |
242 | using reactions = boost::mpl::list< | |
243 | sc::custom_reaction<ObjectPushed>, | |
244 | sc::transition<sc::event_base, Crashed>>; | |
245 | explicit Waiting(my_context); | |
246 | sc::result react(ObjectPushed); | |
247 | }; | |
248 | ||
249 | struct Done : sc::state<Done, BackfillMachine>, | |
250 | StateHelper<Done> { | |
251 | using reactions = boost::mpl::list< | |
252 | sc::transition<sc::event_base, Crashed>>; | |
253 | explicit Done(my_context); | |
254 | }; | |
255 | ||
256 | BackfillState(BackfillListener& backfill_listener, | |
257 | std::unique_ptr<PeeringFacade> peering_state, | |
258 | std::unique_ptr<PGFacade> pg); | |
259 | ~BackfillState(); | |
260 | ||
261 | void process_event( | |
262 | boost::intrusive_ptr<const sc::event_base> evt) { | |
263 | backfill_machine.process_event(*std::move(evt)); | |
264 | } | |
265 | ||
266 | hobject_t get_last_backfill_started() const { | |
267 | return last_backfill_started; | |
268 | } | |
269 | private: | |
270 | hobject_t last_backfill_started; | |
271 | BackfillInterval backfill_info; | |
272 | std::map<pg_shard_t, BackfillInterval> peer_backfill_info; | |
273 | BackfillMachine backfill_machine; | |
274 | std::unique_ptr<ProgressTracker> progress_tracker; | |
275 | }; | |
276 | ||
277 | // BackfillListener -- an interface used by the backfill FSM to request | |
278 | // low-level services like issueing `MOSDPGPush` or `MOSDPGBackfillRemove`. | |
279 | // The goals behind the interface are: 1) unittestability; 2) possibility | |
280 | // to retrofit classical OSD with BackfillState. For the second reason we | |
281 | // never use `seastar::future` -- instead responses to the requests are | |
282 | // conveyed as events; see ObjectPushed as an example. | |
283 | struct BackfillState::BackfillListener { | |
284 | virtual void request_replica_scan( | |
285 | const pg_shard_t& target, | |
286 | const hobject_t& begin, | |
287 | const hobject_t& end) = 0; | |
288 | ||
289 | virtual void request_primary_scan( | |
290 | const hobject_t& begin) = 0; | |
291 | ||
292 | virtual void enqueue_push( | |
293 | const hobject_t& obj, | |
294 | const eversion_t& v) = 0; | |
295 | ||
296 | virtual void enqueue_drop( | |
297 | const pg_shard_t& target, | |
298 | const hobject_t& obj, | |
299 | const eversion_t& v) = 0; | |
300 | ||
301 | virtual void maybe_flush() = 0; | |
302 | ||
303 | virtual void update_peers_last_backfill( | |
304 | const hobject_t& new_last_backfill) = 0; | |
305 | ||
306 | virtual bool budget_available() const = 0; | |
307 | ||
308 | virtual void backfilled() = 0; | |
309 | ||
310 | virtual ~BackfillListener() = default; | |
311 | }; | |
312 | ||
313 | // PeeringFacade -- a facade (in the GoF-defined meaning) simplifying | |
314 | // the interface of PeeringState. The motivation is to have an inventory | |
315 | // of behaviour that must be provided by a unit test's mock. | |
316 | struct BackfillState::PeeringFacade { | |
317 | virtual hobject_t earliest_backfill() const = 0; | |
318 | virtual const std::set<pg_shard_t>& get_backfill_targets() const = 0; | |
319 | virtual const hobject_t& get_peer_last_backfill(pg_shard_t peer) const = 0; | |
320 | virtual const eversion_t& get_last_update() const = 0; | |
321 | virtual const eversion_t& get_log_tail() const = 0; | |
322 | ||
323 | // the performance impact of `std::function` has not been considered yet. | |
324 | // If there is any proof (from e.g. profiling) about its significance, we | |
325 | // can switch back to the template variant. | |
326 | using scan_log_func_t = std::function<void(const pg_log_entry_t&)>; | |
327 | virtual void scan_log_after(eversion_t, scan_log_func_t) const = 0; | |
328 | ||
329 | virtual bool is_backfill_target(pg_shard_t peer) const = 0; | |
330 | virtual void update_complete_backfill_object_stats(const hobject_t &hoid, | |
331 | const pg_stat_t &stats) = 0; | |
332 | virtual bool is_backfilling() const = 0; | |
333 | virtual ~PeeringFacade() {} | |
334 | }; | |
335 | ||
336 | // PGFacade -- a facade (in the GoF-defined meaning) simplifying the huge | |
337 | // interface of crimson's PG class. The motivation is to have an inventory | |
338 | // of behaviour that must be provided by a unit test's mock. | |
339 | struct BackfillState::PGFacade { | |
340 | virtual const eversion_t& get_projected_last_update() const = 0; | |
341 | virtual ~PGFacade() {} | |
342 | }; | |
343 | ||
344 | class BackfillState::ProgressTracker { | |
345 | // TODO: apply_stat, | |
346 | enum class op_stage_t { | |
347 | enqueued_push, | |
348 | enqueued_drop, | |
349 | completed_push, | |
350 | }; | |
351 | ||
352 | struct registry_item_t { | |
353 | op_stage_t stage; | |
354 | std::optional<pg_stat_t> stats; | |
355 | }; | |
356 | ||
357 | BackfillMachine& backfill_machine; | |
358 | std::map<hobject_t, registry_item_t> registry; | |
359 | ||
360 | BackfillState& backfill_state() { | |
361 | return backfill_machine.backfill_state; | |
362 | } | |
363 | PeeringFacade& peering_state() { | |
364 | return *backfill_machine.peering_state; | |
365 | } | |
366 | BackfillListener& backfill_listener() { | |
367 | return backfill_machine.backfill_listener; | |
368 | } | |
369 | ||
370 | public: | |
371 | ProgressTracker(BackfillMachine& backfill_machine) | |
372 | : backfill_machine(backfill_machine) { | |
373 | } | |
374 | ||
375 | bool tracked_objects_completed() const; | |
376 | ||
377 | bool enqueue_push(const hobject_t&); | |
378 | void enqueue_drop(const hobject_t&); | |
379 | void complete_to(const hobject_t&, const pg_stat_t&); | |
380 | }; | |
381 | ||
382 | } // namespace crimson::osd |