]> git.proxmox.com Git - ceph.git/blame - ceph/src/crimson/osd/backfill_state.h
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / crimson / osd / backfill_state.h
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#pragma once
5
6#include <optional>
7
8#include <boost/statechart/custom_reaction.hpp>
9#include <boost/statechart/event.hpp>
10#include <boost/statechart/event_base.hpp>
11#include <boost/statechart/simple_state.hpp>
12#include <boost/statechart/state.hpp>
13#include <boost/statechart/state_machine.hpp>
14#include <boost/statechart/transition.hpp>
15
16#include "osd/recovery_types.h"
17
18namespace crimson::osd {
19
20namespace sc = boost::statechart;
21
22struct BackfillState {
23 struct BackfillListener;
24 struct PeeringFacade;
25 struct PGFacade;
26
27 // events comes first
28 struct PrimaryScanned : sc::event<PrimaryScanned> {
29 BackfillInterval result;
30 PrimaryScanned(BackfillInterval&& result)
31 : result(std::move(result)) {
32 }
33 };
34
35 struct ReplicaScanned : sc::event<ReplicaScanned> {
36 pg_shard_t from;
37 BackfillInterval result;
38 ReplicaScanned(pg_shard_t from, BackfillInterval&& result)
39 : from(std::move(from)),
40 result(std::move(result)) {
41 }
42 };
43
44 struct ObjectPushed : sc::event<ObjectPushed> {
45 // TODO: implement replica management; I don't want to follow
46 // current convention where the backend layer is responsible
47 // for tracking replicas.
48 hobject_t object;
49 pg_stat_t stat;
50 ObjectPushed(hobject_t object)
51 : object(std::move(object)) {
52 }
53 };
54
55 struct Triggered : sc::event<Triggered> {
56 };
57
58private:
59 // internal events
60 struct RequestPrimaryScanning : sc::event<RequestPrimaryScanning> {
61 };
62
63 struct RequestReplicasScanning : sc::event<RequestReplicasScanning> {
64 };
65
66 struct RequestWaiting : sc::event<RequestWaiting> {
67 };
68
69 struct RequestDone : sc::event<RequestDone> {
70 };
71
72 class ProgressTracker;
73
74public:
75
76 struct Initial;
77 struct Enqueuing;
78 struct PrimaryScanning;
79 struct ReplicasScanning;
80 struct Waiting;
81 struct Done;
82
83 struct BackfillMachine : sc::state_machine<BackfillMachine, Initial> {
84 BackfillMachine(BackfillState& backfill_state,
85 BackfillListener& backfill_listener,
86 std::unique_ptr<PeeringFacade> peering_state,
87 std::unique_ptr<PGFacade> pg);
88 ~BackfillMachine();
89 BackfillState& backfill_state;
90 BackfillListener& backfill_listener;
91 std::unique_ptr<PeeringFacade> peering_state;
92 std::unique_ptr<PGFacade> pg;
93 };
94
95private:
96 template <class S>
97 struct StateHelper {
98 StateHelper();
99 ~StateHelper();
100
101 BackfillState& backfill_state() {
102 return static_cast<S*>(this) \
103 ->template context<BackfillMachine>().backfill_state;
104 }
105 BackfillListener& backfill_listener() {
106 return static_cast<S*>(this) \
107 ->template context<BackfillMachine>().backfill_listener;
108 }
109 PeeringFacade& peering_state() {
110 return *static_cast<S*>(this) \
111 ->template context<BackfillMachine>().peering_state;
112 }
113 PGFacade& pg() {
114 return *static_cast<S*>(this)->template context<BackfillMachine>().pg;
115 }
116
117 const PeeringFacade& peering_state() const {
118 return *static_cast<const S*>(this) \
119 ->template context<BackfillMachine>().peering_state;
120 }
121 const BackfillState& backfill_state() const {
122 return static_cast<const S*>(this) \
123 ->template context<BackfillMachine>().backfill_state;
124 }
125 };
126
127public:
128
129 // states
130 struct Crashed : sc::simple_state<Crashed, BackfillMachine>,
131 StateHelper<Crashed> {
132 explicit Crashed();
133 };
134
135 struct Initial : sc::state<Initial, BackfillMachine>,
136 StateHelper<Initial> {
137 using reactions = boost::mpl::list<
138 sc::custom_reaction<Triggered>,
139 sc::transition<sc::event_base, Crashed>>;
140 explicit Initial(my_context);
141 // initialize after triggering backfill by on_activate_complete().
142 // transit to Enqueuing.
143 sc::result react(const Triggered&);
144 };
145
146 struct Enqueuing : sc::state<Enqueuing, BackfillMachine>,
147 StateHelper<Enqueuing> {
148 using reactions = boost::mpl::list<
149 sc::transition<RequestPrimaryScanning, PrimaryScanning>,
150 sc::transition<RequestReplicasScanning, ReplicasScanning>,
151 sc::transition<RequestWaiting, Waiting>,
152 sc::transition<RequestDone, Done>,
153 sc::transition<sc::event_base, Crashed>>;
154 explicit Enqueuing(my_context);
155
156 // indicate whether there is any remaining work to do when it comes
157 // to comparing the hobject_t namespace between primary and replicas.
158 // true doesn't necessarily mean backfill is done -- there could be
159 // in-flight pushes or drops which had been enqueued but aren't
160 // completed yet.
161 static bool all_enqueued(
162 const PeeringFacade& peering_state,
163 const BackfillInterval& backfill_info,
164 const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info);
165
166 private:
167 void maybe_update_range();
168 void trim_backfill_infos();
169
170 // these methods take BackfillIntervals instead of extracting them from
171 // the state to emphasize the relationships across the main loop.
172 bool all_emptied(
173 const BackfillInterval& local_backfill_info,
174 const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const;
175 hobject_t earliest_peer_backfill(
176 const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const;
177 bool should_rescan_replicas(
178 const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info,
179 const BackfillInterval& backfill_info) const;
180 // indicate whether a particular acting primary needs to scanned again
181 // to process next piece of the hobject_t's namespace.
182 // the logic is per analogy to replica_needs_scan(). See comments there.
183 bool should_rescan_primary(
184 const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info,
185 const BackfillInterval& backfill_info) const;
186
187 // the result_t is intermediary between {remove,update}_on_peers() and
188 // updating BackfillIntervals in trim_backfilled_object_from_intervals.
189 // This step is important because it affects the main loop's condition,
190 // and thus deserves to be exposed instead of being called deeply from
191 // {remove,update}_on_peers().
192 struct [[nodiscard]] result_t {
193 std::set<pg_shard_t> pbi_targets;
194 hobject_t new_last_backfill_started;
195 };
196 void trim_backfilled_object_from_intervals(
197 result_t&&,
198 hobject_t& last_backfill_started,
199 std::map<pg_shard_t, BackfillInterval>& peer_backfill_info);
200 result_t remove_on_peers(const hobject_t& check);
201 result_t update_on_peers(const hobject_t& check);
202 };
203
204 struct PrimaryScanning : sc::state<PrimaryScanning, BackfillMachine>,
205 StateHelper<PrimaryScanning> {
206 using reactions = boost::mpl::list<
207 sc::custom_reaction<ObjectPushed>,
208 sc::custom_reaction<PrimaryScanned>,
209 sc::transition<sc::event_base, Crashed>>;
210 explicit PrimaryScanning(my_context);
211 sc::result react(ObjectPushed);
212 // collect scanning result and transit to Enqueuing.
213 sc::result react(PrimaryScanned);
214 };
215
216 struct ReplicasScanning : sc::state<ReplicasScanning, BackfillMachine>,
217 StateHelper<ReplicasScanning> {
218 using reactions = boost::mpl::list<
219 sc::custom_reaction<ObjectPushed>,
220 sc::custom_reaction<ReplicaScanned>,
221 sc::transition<sc::event_base, Crashed>>;
222 explicit ReplicasScanning(my_context);
223 // collect scanning result; if all results are collected, transition
224 // to Enqueuing will happen.
225 sc::result react(ObjectPushed);
226 sc::result react(ReplicaScanned);
227
228 // indicate whether a particular peer should be scanned to retrieve
229 // BackfillInterval for new range of hobject_t namespace.
230 // true when bi.objects is exhausted, replica bi's end is not MAX,
231 // and primary bi'begin is further than the replica's one.
232 static bool replica_needs_scan(
233 const BackfillInterval& replica_backfill_info,
234 const BackfillInterval& local_backfill_info);
235
236 private:
237 std::set<pg_shard_t> waiting_on_backfill;
238 };
239
240 struct Waiting : sc::state<Waiting, BackfillMachine>,
241 StateHelper<Waiting> {
242 using reactions = boost::mpl::list<
243 sc::custom_reaction<ObjectPushed>,
244 sc::transition<sc::event_base, Crashed>>;
245 explicit Waiting(my_context);
246 sc::result react(ObjectPushed);
247 };
248
249 struct Done : sc::state<Done, BackfillMachine>,
250 StateHelper<Done> {
251 using reactions = boost::mpl::list<
252 sc::transition<sc::event_base, Crashed>>;
253 explicit Done(my_context);
254 };
255
256 BackfillState(BackfillListener& backfill_listener,
257 std::unique_ptr<PeeringFacade> peering_state,
258 std::unique_ptr<PGFacade> pg);
259 ~BackfillState();
260
261 void process_event(
262 boost::intrusive_ptr<const sc::event_base> evt) {
263 backfill_machine.process_event(*std::move(evt));
264 }
265
266 hobject_t get_last_backfill_started() const {
267 return last_backfill_started;
268 }
269private:
270 hobject_t last_backfill_started;
271 BackfillInterval backfill_info;
272 std::map<pg_shard_t, BackfillInterval> peer_backfill_info;
273 BackfillMachine backfill_machine;
274 std::unique_ptr<ProgressTracker> progress_tracker;
275};
276
277// BackfillListener -- an interface used by the backfill FSM to request
278// low-level services like issueing `MOSDPGPush` or `MOSDPGBackfillRemove`.
279// The goals behind the interface are: 1) unittestability; 2) possibility
280// to retrofit classical OSD with BackfillState. For the second reason we
281// never use `seastar::future` -- instead responses to the requests are
282// conveyed as events; see ObjectPushed as an example.
283struct BackfillState::BackfillListener {
284 virtual void request_replica_scan(
285 const pg_shard_t& target,
286 const hobject_t& begin,
287 const hobject_t& end) = 0;
288
289 virtual void request_primary_scan(
290 const hobject_t& begin) = 0;
291
292 virtual void enqueue_push(
293 const hobject_t& obj,
294 const eversion_t& v) = 0;
295
296 virtual void enqueue_drop(
297 const pg_shard_t& target,
298 const hobject_t& obj,
299 const eversion_t& v) = 0;
300
301 virtual void maybe_flush() = 0;
302
303 virtual void update_peers_last_backfill(
304 const hobject_t& new_last_backfill) = 0;
305
306 virtual bool budget_available() const = 0;
307
308 virtual void backfilled() = 0;
309
310 virtual ~BackfillListener() = default;
311};
312
313// PeeringFacade -- a facade (in the GoF-defined meaning) simplifying
314// the interface of PeeringState. The motivation is to have an inventory
315// of behaviour that must be provided by a unit test's mock.
316struct BackfillState::PeeringFacade {
317 virtual hobject_t earliest_backfill() const = 0;
318 virtual const std::set<pg_shard_t>& get_backfill_targets() const = 0;
319 virtual const hobject_t& get_peer_last_backfill(pg_shard_t peer) const = 0;
320 virtual const eversion_t& get_last_update() const = 0;
321 virtual const eversion_t& get_log_tail() const = 0;
322
323 // the performance impact of `std::function` has not been considered yet.
324 // If there is any proof (from e.g. profiling) about its significance, we
325 // can switch back to the template variant.
326 using scan_log_func_t = std::function<void(const pg_log_entry_t&)>;
327 virtual void scan_log_after(eversion_t, scan_log_func_t) const = 0;
328
329 virtual bool is_backfill_target(pg_shard_t peer) const = 0;
330 virtual void update_complete_backfill_object_stats(const hobject_t &hoid,
331 const pg_stat_t &stats) = 0;
332 virtual bool is_backfilling() const = 0;
333 virtual ~PeeringFacade() {}
334};
335
336// PGFacade -- a facade (in the GoF-defined meaning) simplifying the huge
337// interface of crimson's PG class. The motivation is to have an inventory
338// of behaviour that must be provided by a unit test's mock.
339struct BackfillState::PGFacade {
340 virtual const eversion_t& get_projected_last_update() const = 0;
341 virtual ~PGFacade() {}
342};
343
344class BackfillState::ProgressTracker {
345 // TODO: apply_stat,
346 enum class op_stage_t {
347 enqueued_push,
348 enqueued_drop,
349 completed_push,
350 };
351
352 struct registry_item_t {
353 op_stage_t stage;
354 std::optional<pg_stat_t> stats;
355 };
356
357 BackfillMachine& backfill_machine;
358 std::map<hobject_t, registry_item_t> registry;
359
360 BackfillState& backfill_state() {
361 return backfill_machine.backfill_state;
362 }
363 PeeringFacade& peering_state() {
364 return *backfill_machine.peering_state;
365 }
366 BackfillListener& backfill_listener() {
367 return backfill_machine.backfill_listener;
368 }
369
370public:
371 ProgressTracker(BackfillMachine& backfill_machine)
372 : backfill_machine(backfill_machine) {
373 }
374
375 bool tracked_objects_completed() const;
376
377 bool enqueue_push(const hobject_t&);
378 void enqueue_drop(const hobject_t&);
379 void complete_to(const hobject_t&, const pg_stat_t&);
380};
381
382} // namespace crimson::osd