1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
8 #include <boost/statechart/custom_reaction.hpp>
9 #include <boost/statechart/event.hpp>
10 #include <boost/statechart/event_base.hpp>
11 #include <boost/statechart/simple_state.hpp>
12 #include <boost/statechart/state.hpp>
13 #include <boost/statechart/state_machine.hpp>
14 #include <boost/statechart/transition.hpp>
16 #include "osd/recovery_types.h"
18 namespace crimson::osd
{
20 namespace sc
= boost::statechart
;
22 struct BackfillState
{
23 struct BackfillListener
;
28 struct PrimaryScanned
: sc::event
<PrimaryScanned
> {
29 BackfillInterval result
;
30 PrimaryScanned(BackfillInterval
&& result
)
31 : result(std::move(result
)) {
35 struct ReplicaScanned
: sc::event
<ReplicaScanned
> {
37 BackfillInterval result
;
38 ReplicaScanned(pg_shard_t from
, BackfillInterval
&& result
)
39 : from(std::move(from
)),
40 result(std::move(result
)) {
44 struct ObjectPushed
: sc::event
<ObjectPushed
> {
45 // TODO: implement replica management; I don't want to follow
46 // current convention where the backend layer is responsible
47 // for tracking replicas.
50 ObjectPushed(hobject_t object
)
51 : object(std::move(object
)) {
55 struct Triggered
: sc::event
<Triggered
> {
60 struct RequestPrimaryScanning
: sc::event
<RequestPrimaryScanning
> {
63 struct RequestReplicasScanning
: sc::event
<RequestReplicasScanning
> {
66 struct RequestWaiting
: sc::event
<RequestWaiting
> {
69 struct RequestDone
: sc::event
<RequestDone
> {
72 class ProgressTracker
;
78 struct PrimaryScanning
;
79 struct ReplicasScanning
;
83 struct BackfillMachine
: sc::state_machine
<BackfillMachine
, Initial
> {
84 BackfillMachine(BackfillState
& backfill_state
,
85 BackfillListener
& backfill_listener
,
86 std::unique_ptr
<PeeringFacade
> peering_state
,
87 std::unique_ptr
<PGFacade
> pg
);
89 BackfillState
& backfill_state
;
90 BackfillListener
& backfill_listener
;
91 std::unique_ptr
<PeeringFacade
> peering_state
;
92 std::unique_ptr
<PGFacade
> pg
;
101 BackfillState
& backfill_state() {
102 return static_cast<S
*>(this) \
103 ->template context
<BackfillMachine
>().backfill_state
;
105 BackfillListener
& backfill_listener() {
106 return static_cast<S
*>(this) \
107 ->template context
<BackfillMachine
>().backfill_listener
;
109 PeeringFacade
& peering_state() {
110 return *static_cast<S
*>(this) \
111 ->template context
<BackfillMachine
>().peering_state
;
114 return *static_cast<S
*>(this)->template context
<BackfillMachine
>().pg
;
117 const PeeringFacade
& peering_state() const {
118 return *static_cast<const S
*>(this) \
119 ->template context
<BackfillMachine
>().peering_state
;
121 const BackfillState
& backfill_state() const {
122 return static_cast<const S
*>(this) \
123 ->template context
<BackfillMachine
>().backfill_state
;
130 struct Crashed
: sc::simple_state
<Crashed
, BackfillMachine
>,
131 StateHelper
<Crashed
> {
135 struct Initial
: sc::state
<Initial
, BackfillMachine
>,
136 StateHelper
<Initial
> {
137 using reactions
= boost::mpl::list
<
138 sc::custom_reaction
<Triggered
>,
139 sc::transition
<sc::event_base
, Crashed
>>;
140 explicit Initial(my_context
);
141 // initialize after triggering backfill by on_activate_complete().
142 // transit to Enqueuing.
143 sc::result
react(const Triggered
&);
146 struct Enqueuing
: sc::state
<Enqueuing
, BackfillMachine
>,
147 StateHelper
<Enqueuing
> {
148 using reactions
= boost::mpl::list
<
149 sc::transition
<RequestPrimaryScanning
, PrimaryScanning
>,
150 sc::transition
<RequestReplicasScanning
, ReplicasScanning
>,
151 sc::transition
<RequestWaiting
, Waiting
>,
152 sc::transition
<RequestDone
, Done
>,
153 sc::transition
<sc::event_base
, Crashed
>>;
154 explicit Enqueuing(my_context
);
156 // indicate whether there is any remaining work to do when it comes
157 // to comparing the hobject_t namespace between primary and replicas.
158 // true doesn't necessarily mean backfill is done -- there could be
159 // in-flight pushes or drops which had been enqueued but aren't
161 static bool all_enqueued(
162 const PeeringFacade
& peering_state
,
163 const BackfillInterval
& backfill_info
,
164 const std::map
<pg_shard_t
, BackfillInterval
>& peer_backfill_info
);
167 void maybe_update_range();
168 void trim_backfill_infos();
170 // these methods take BackfillIntervals instead of extracting them from
171 // the state to emphasize the relationships across the main loop.
173 const BackfillInterval
& local_backfill_info
,
174 const std::map
<pg_shard_t
, BackfillInterval
>& peer_backfill_info
) const;
175 hobject_t
earliest_peer_backfill(
176 const std::map
<pg_shard_t
, BackfillInterval
>& peer_backfill_info
) const;
177 bool should_rescan_replicas(
178 const std::map
<pg_shard_t
, BackfillInterval
>& peer_backfill_info
,
179 const BackfillInterval
& backfill_info
) const;
180 // indicate whether a particular acting primary needs to scanned again
181 // to process next piece of the hobject_t's namespace.
182 // the logic is per analogy to replica_needs_scan(). See comments there.
183 bool should_rescan_primary(
184 const std::map
<pg_shard_t
, BackfillInterval
>& peer_backfill_info
,
185 const BackfillInterval
& backfill_info
) const;
187 // the result_t is intermediary between {remove,update}_on_peers() and
188 // updating BackfillIntervals in trim_backfilled_object_from_intervals.
189 // This step is important because it affects the main loop's condition,
190 // and thus deserves to be exposed instead of being called deeply from
191 // {remove,update}_on_peers().
192 struct [[nodiscard
]] result_t
{
193 std::set
<pg_shard_t
> pbi_targets
;
194 hobject_t new_last_backfill_started
;
196 void trim_backfilled_object_from_intervals(
198 hobject_t
& last_backfill_started
,
199 std::map
<pg_shard_t
, BackfillInterval
>& peer_backfill_info
);
200 result_t
remove_on_peers(const hobject_t
& check
);
201 result_t
update_on_peers(const hobject_t
& check
);
204 struct PrimaryScanning
: sc::state
<PrimaryScanning
, BackfillMachine
>,
205 StateHelper
<PrimaryScanning
> {
206 using reactions
= boost::mpl::list
<
207 sc::custom_reaction
<ObjectPushed
>,
208 sc::custom_reaction
<PrimaryScanned
>,
209 sc::transition
<sc::event_base
, Crashed
>>;
210 explicit PrimaryScanning(my_context
);
211 sc::result
react(ObjectPushed
);
212 // collect scanning result and transit to Enqueuing.
213 sc::result
react(PrimaryScanned
);
216 struct ReplicasScanning
: sc::state
<ReplicasScanning
, BackfillMachine
>,
217 StateHelper
<ReplicasScanning
> {
218 using reactions
= boost::mpl::list
<
219 sc::custom_reaction
<ObjectPushed
>,
220 sc::custom_reaction
<ReplicaScanned
>,
221 sc::transition
<sc::event_base
, Crashed
>>;
222 explicit ReplicasScanning(my_context
);
223 // collect scanning result; if all results are collected, transition
224 // to Enqueuing will happen.
225 sc::result
react(ObjectPushed
);
226 sc::result
react(ReplicaScanned
);
228 // indicate whether a particular peer should be scanned to retrieve
229 // BackfillInterval for new range of hobject_t namespace.
230 // true when bi.objects is exhausted, replica bi's end is not MAX,
231 // and primary bi'begin is further than the replica's one.
232 static bool replica_needs_scan(
233 const BackfillInterval
& replica_backfill_info
,
234 const BackfillInterval
& local_backfill_info
);
237 std::set
<pg_shard_t
> waiting_on_backfill
;
240 struct Waiting
: sc::state
<Waiting
, BackfillMachine
>,
241 StateHelper
<Waiting
> {
242 using reactions
= boost::mpl::list
<
243 sc::custom_reaction
<ObjectPushed
>,
244 sc::transition
<sc::event_base
, Crashed
>>;
245 explicit Waiting(my_context
);
246 sc::result
react(ObjectPushed
);
249 struct Done
: sc::state
<Done
, BackfillMachine
>,
251 using reactions
= boost::mpl::list
<
252 sc::transition
<sc::event_base
, Crashed
>>;
253 explicit Done(my_context
);
256 BackfillState(BackfillListener
& backfill_listener
,
257 std::unique_ptr
<PeeringFacade
> peering_state
,
258 std::unique_ptr
<PGFacade
> pg
);
262 boost::intrusive_ptr
<const sc::event_base
> evt
) {
263 backfill_machine
.process_event(*std::move(evt
));
266 hobject_t
get_last_backfill_started() const {
267 return last_backfill_started
;
270 hobject_t last_backfill_started
;
271 BackfillInterval backfill_info
;
272 std::map
<pg_shard_t
, BackfillInterval
> peer_backfill_info
;
273 BackfillMachine backfill_machine
;
274 std::unique_ptr
<ProgressTracker
> progress_tracker
;
277 // BackfillListener -- an interface used by the backfill FSM to request
278 // low-level services like issueing `MOSDPGPush` or `MOSDPGBackfillRemove`.
279 // The goals behind the interface are: 1) unittestability; 2) possibility
280 // to retrofit classical OSD with BackfillState. For the second reason we
281 // never use `seastar::future` -- instead responses to the requests are
282 // conveyed as events; see ObjectPushed as an example.
283 struct BackfillState::BackfillListener
{
284 virtual void request_replica_scan(
285 const pg_shard_t
& target
,
286 const hobject_t
& begin
,
287 const hobject_t
& end
) = 0;
289 virtual void request_primary_scan(
290 const hobject_t
& begin
) = 0;
292 virtual void enqueue_push(
293 const hobject_t
& obj
,
294 const eversion_t
& v
) = 0;
296 virtual void enqueue_drop(
297 const pg_shard_t
& target
,
298 const hobject_t
& obj
,
299 const eversion_t
& v
) = 0;
301 virtual void maybe_flush() = 0;
303 virtual void update_peers_last_backfill(
304 const hobject_t
& new_last_backfill
) = 0;
306 virtual bool budget_available() const = 0;
308 virtual void backfilled() = 0;
310 virtual ~BackfillListener() = default;
313 // PeeringFacade -- a facade (in the GoF-defined meaning) simplifying
314 // the interface of PeeringState. The motivation is to have an inventory
315 // of behaviour that must be provided by a unit test's mock.
316 struct BackfillState::PeeringFacade
{
317 virtual hobject_t
earliest_backfill() const = 0;
318 virtual const std::set
<pg_shard_t
>& get_backfill_targets() const = 0;
319 virtual const hobject_t
& get_peer_last_backfill(pg_shard_t peer
) const = 0;
320 virtual const eversion_t
& get_last_update() const = 0;
321 virtual const eversion_t
& get_log_tail() const = 0;
323 // the performance impact of `std::function` has not been considered yet.
324 // If there is any proof (from e.g. profiling) about its significance, we
325 // can switch back to the template variant.
326 using scan_log_func_t
= std::function
<void(const pg_log_entry_t
&)>;
327 virtual void scan_log_after(eversion_t
, scan_log_func_t
) const = 0;
329 virtual bool is_backfill_target(pg_shard_t peer
) const = 0;
330 virtual void update_complete_backfill_object_stats(const hobject_t
&hoid
,
331 const pg_stat_t
&stats
) = 0;
332 virtual bool is_backfilling() const = 0;
333 virtual ~PeeringFacade() {}
336 // PGFacade -- a facade (in the GoF-defined meaning) simplifying the huge
337 // interface of crimson's PG class. The motivation is to have an inventory
338 // of behaviour that must be provided by a unit test's mock.
339 struct BackfillState::PGFacade
{
340 virtual const eversion_t
& get_projected_last_update() const = 0;
341 virtual ~PGFacade() {}
344 class BackfillState::ProgressTracker
{
346 enum class op_stage_t
{
352 struct registry_item_t
{
354 std::optional
<pg_stat_t
> stats
;
357 BackfillMachine
& backfill_machine
;
358 std::map
<hobject_t
, registry_item_t
> registry
;
360 BackfillState
& backfill_state() {
361 return backfill_machine
.backfill_state
;
363 PeeringFacade
& peering_state() {
364 return *backfill_machine
.peering_state
;
366 BackfillListener
& backfill_listener() {
367 return backfill_machine
.backfill_listener
;
371 ProgressTracker(BackfillMachine
& backfill_machine
)
372 : backfill_machine(backfill_machine
) {
375 bool tracked_objects_completed() const;
377 bool enqueue_push(const hobject_t
&);
378 void enqueue_drop(const hobject_t
&);
379 void complete_to(const hobject_t
&, const pg_stat_t
&);
382 } // namespace crimson::osd