ceph/src/crimson/osd/backfill_state.h

   1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
   2 // vim: ts=8 sw=2 smarttab
   3
   4 #pragma once
   5
   6 #include <optional>
   7
   8 #include <boost/statechart/custom_reaction.hpp>
   9 #include <boost/statechart/event.hpp>
  10 #include <boost/statechart/event_base.hpp>
  11 #include <boost/statechart/simple_state.hpp>
  12 #include <boost/statechart/state.hpp>
  13 #include <boost/statechart/state_machine.hpp>
  14 #include <boost/statechart/transition.hpp>
  15
  16 #include "osd/recovery_types.h"
  17
  18 namespace crimson::osd {
  19
  20 namespace sc = boost::statechart;
  21
  22 struct BackfillState {
  23   struct BackfillListener;
  24   struct PeeringFacade;
  25   struct PGFacade;
  26
  27   // events comes first
  28   struct PrimaryScanned : sc::event<PrimaryScanned> {
  29     BackfillInterval result;
  30     PrimaryScanned(BackfillInterval&& result)
  31       : result(std::move(result)) {
  32     }
  33   };
  34
  35   struct ReplicaScanned : sc::event<ReplicaScanned> {
  36     pg_shard_t from;
  37     BackfillInterval result;
  38     ReplicaScanned(pg_shard_t from, BackfillInterval&& result)
  39       : from(std::move(from)),
  40         result(std::move(result)) {
  41     }
  42   };
  43
  44   struct ObjectPushed : sc::event<ObjectPushed> {
  45     // TODO: implement replica management; I don't want to follow
  46     // current convention where the backend layer is responsible
  47     // for tracking replicas.
  48     hobject_t object;
  49     pg_stat_t stat;
  50     ObjectPushed(hobject_t object)
  51       : object(std::move(object)) {
  52     }
  53   };
  54
  55   struct Triggered : sc::event<Triggered> {
  56   };
  57
  58 private:
  59   // internal events
  60   struct RequestPrimaryScanning : sc::event<RequestPrimaryScanning> {
  61   };
  62
  63   struct RequestReplicasScanning : sc::event<RequestReplicasScanning> {
  64   };
  65
  66   struct RequestWaiting : sc::event<RequestWaiting> {
  67   };
  68
  69   struct RequestDone : sc::event<RequestDone> {
  70   };
  71
  72   class ProgressTracker;
  73
  74 public:
  75
  76   struct Initial;
  77   struct Enqueuing;
  78   struct PrimaryScanning;
  79   struct ReplicasScanning;
  80   struct Waiting;
  81   struct Done;
  82
  83   struct BackfillMachine : sc::state_machine<BackfillMachine, Initial> {
  84     BackfillMachine(BackfillState& backfill_state,
  85                     BackfillListener& backfill_listener,
  86                     std::unique_ptr<PeeringFacade> peering_state,
  87                     std::unique_ptr<PGFacade> pg);
  88     ~BackfillMachine();
  89     BackfillState& backfill_state;
  90     BackfillListener& backfill_listener;
  91     std::unique_ptr<PeeringFacade> peering_state;
  92     std::unique_ptr<PGFacade> pg;
  93   };
  94
  95 private:
  96   template <class S>
  97   struct StateHelper {
  98     StateHelper();
  99     ~StateHelper();
 100
 101     BackfillState& backfill_state() {
 102       return static_cast<S*>(this) \
 103         ->template context<BackfillMachine>().backfill_state;
 104     }
 105     BackfillListener& backfill_listener() {
 106       return static_cast<S*>(this) \
 107         ->template context<BackfillMachine>().backfill_listener;
 108     }
 109     PeeringFacade& peering_state() {
 110       return *static_cast<S*>(this) \
 111         ->template context<BackfillMachine>().peering_state;
 112     }
 113     PGFacade& pg() {
 114       return *static_cast<S*>(this)->template context<BackfillMachine>().pg;
 115     }
 116
 117     const PeeringFacade& peering_state() const {
 118       return *static_cast<const S*>(this) \
 119         ->template context<BackfillMachine>().peering_state;
 120     }
 121     const BackfillState& backfill_state() const {
 122       return static_cast<const S*>(this) \
 123         ->template context<BackfillMachine>().backfill_state;
 124     }
 125   };
 126
 127 public:
 128
 129   // states
 130   struct Crashed : sc::simple_state<Crashed, BackfillMachine>,
 131                    StateHelper<Crashed> {
 132     explicit Crashed();
 133   };
 134
 135   struct Initial : sc::state<Initial, BackfillMachine>,
 136                    StateHelper<Initial> {
 137     using reactions = boost::mpl::list<
 138       sc::custom_reaction<Triggered>,
 139       sc::transition<sc::event_base, Crashed>>;
 140     explicit Initial(my_context);
 141     // initialize after triggering backfill by on_activate_complete().
 142     // transit to Enqueuing.
 143     sc::result react(const Triggered&);
 144   };
 145
 146   struct Enqueuing : sc::state<Enqueuing, BackfillMachine>,
 147                      StateHelper<Enqueuing> {
 148     using reactions = boost::mpl::list<
 149       sc::transition<RequestPrimaryScanning, PrimaryScanning>,
 150       sc::transition<RequestReplicasScanning, ReplicasScanning>,
 151       sc::transition<RequestWaiting, Waiting>,
 152       sc::transition<RequestDone, Done>,
 153       sc::transition<sc::event_base, Crashed>>;
 154     explicit Enqueuing(my_context);
 155
 156     // indicate whether there is any remaining work to do when it comes
 157     // to comparing the hobject_t namespace between primary and replicas.
 158     // true doesn't necessarily mean backfill is done -- there could be
 159     // in-flight pushes or drops which had been enqueued but aren't
 160     // completed yet.
 161     static bool all_enqueued(
 162       const PeeringFacade& peering_state,
 163       const BackfillInterval& backfill_info,
 164       const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info);
 165
 166   private:
 167     void maybe_update_range();
 168     void trim_backfill_infos();
 169
 170     // these methods take BackfillIntervals instead of extracting them from
 171     // the state to emphasize the relationships across the main loop.
 172     bool all_emptied(
 173       const BackfillInterval& local_backfill_info,
 174       const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const;
 175     hobject_t earliest_peer_backfill(
 176       const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const;
 177     bool should_rescan_replicas(
 178       const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info,
 179       const BackfillInterval& backfill_info) const;
 180     // indicate whether a particular acting primary needs to scanned again
 181     // to process next piece of the hobject_t's namespace.
 182     // the logic is per analogy to replica_needs_scan(). See comments there.
 183     bool should_rescan_primary(
 184       const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info,
 185       const BackfillInterval& backfill_info) const;
 186
 187     // the result_t is intermediary between {remove,update}_on_peers() and
 188     // updating BackfillIntervals in trim_backfilled_object_from_intervals.
 189     // This step is important because it affects the main loop's condition,
 190     // and thus deserves to be exposed instead of being called deeply from
 191     // {remove,update}_on_peers().
 192     struct [[nodiscard]] result_t {
 193       std::set<pg_shard_t> pbi_targets;
 194       hobject_t new_last_backfill_started;
 195     };
 196     void trim_backfilled_object_from_intervals(
 197       result_t&&,
 198       hobject_t& last_backfill_started,
 199       std::map<pg_shard_t, BackfillInterval>& peer_backfill_info);
 200     result_t remove_on_peers(const hobject_t& check);
 201     result_t update_on_peers(const hobject_t& check);
 202   };
 203
 204   struct PrimaryScanning : sc::state<PrimaryScanning, BackfillMachine>,
 205                            StateHelper<PrimaryScanning> {
 206     using reactions = boost::mpl::list<
 207       sc::custom_reaction<ObjectPushed>,
 208       sc::custom_reaction<PrimaryScanned>,
 209       sc::transition<sc::event_base, Crashed>>;
 210     explicit PrimaryScanning(my_context);
 211     sc::result react(ObjectPushed);
 212     // collect scanning result and transit to Enqueuing.
 213     sc::result react(PrimaryScanned);
 214   };
 215
 216   struct ReplicasScanning : sc::state<ReplicasScanning, BackfillMachine>,
 217                             StateHelper<ReplicasScanning> {
 218     using reactions = boost::mpl::list<
 219       sc::custom_reaction<ObjectPushed>,
 220       sc::custom_reaction<ReplicaScanned>,
 221       sc::transition<sc::event_base, Crashed>>;
 222     explicit ReplicasScanning(my_context);
 223     // collect scanning result; if all results are collected, transition
 224     // to Enqueuing will happen.
 225     sc::result react(ObjectPushed);
 226     sc::result react(ReplicaScanned);
 227
 228     // indicate whether a particular peer should be scanned to retrieve
 229     // BackfillInterval for new range of hobject_t namespace.
 230     // true when bi.objects is exhausted, replica bi's end is not MAX,
 231     // and primary bi'begin is further than the replica's one.
 232     static bool replica_needs_scan(
 233       const BackfillInterval& replica_backfill_info,
 234       const BackfillInterval& local_backfill_info);
 235
 236   private:
 237     std::set<pg_shard_t> waiting_on_backfill;
 238   };
 239
 240   struct Waiting : sc::state<Waiting, BackfillMachine>,
 241                    StateHelper<Waiting> {
 242     using reactions = boost::mpl::list<
 243       sc::custom_reaction<ObjectPushed>,
 244       sc::transition<sc::event_base, Crashed>>;
 245     explicit Waiting(my_context);
 246     sc::result react(ObjectPushed);
 247   };
 248
 249   struct Done : sc::state<Done, BackfillMachine>,
 250                 StateHelper<Done> {
 251     using reactions = boost::mpl::list<
 252       sc::transition<sc::event_base, Crashed>>;
 253     explicit Done(my_context);
 254   };
 255
 256   BackfillState(BackfillListener& backfill_listener,
 257                 std::unique_ptr<PeeringFacade> peering_state,
 258                 std::unique_ptr<PGFacade> pg);
 259   ~BackfillState();
 260
 261   void process_event(
 262     boost::intrusive_ptr<const sc::event_base> evt) {
 263     backfill_machine.process_event(*std::move(evt));
 264   }
 265
 266   hobject_t get_last_backfill_started() const {
 267     return last_backfill_started;
 268   }
 269 private:
 270   hobject_t last_backfill_started;
 271   BackfillInterval backfill_info;
 272   std::map<pg_shard_t, BackfillInterval> peer_backfill_info;
 273   BackfillMachine backfill_machine;
 274   std::unique_ptr<ProgressTracker> progress_tracker;
 275 };
 276
 277 // BackfillListener -- an interface used by the backfill FSM to request
 278 // low-level services like issueing `MOSDPGPush` or `MOSDPGBackfillRemove`.
 279 // The goals behind the interface are: 1) unittestability; 2) possibility
 280 // to retrofit classical OSD with BackfillState. For the second reason we
 281 // never use `seastar::future` -- instead responses to the requests are
 282 // conveyed as events; see ObjectPushed as an example.
 283 struct BackfillState::BackfillListener {
 284   virtual void request_replica_scan(
 285     const pg_shard_t& target,
 286     const hobject_t& begin,
 287     const hobject_t& end) = 0;
 288
 289   virtual void request_primary_scan(
 290     const hobject_t& begin) = 0;
 291
 292   virtual void enqueue_push(
 293     const hobject_t& obj,
 294     const eversion_t& v) = 0;
 295
 296   virtual void enqueue_drop(
 297     const pg_shard_t& target,
 298     const hobject_t& obj,
 299     const eversion_t& v) = 0;
 300
 301   virtual void maybe_flush() = 0;
 302
 303   virtual void update_peers_last_backfill(
 304     const hobject_t& new_last_backfill) = 0;
 305
 306   virtual bool budget_available() const = 0;
 307
 308   virtual void backfilled() = 0;
 309
 310   virtual ~BackfillListener() = default;
 311 };
 312
 313 // PeeringFacade -- a facade (in the GoF-defined meaning) simplifying
 314 // the interface of PeeringState. The motivation is to have an inventory
 315 // of behaviour that must be provided by a unit test's mock.
 316 struct BackfillState::PeeringFacade {
 317   virtual hobject_t earliest_backfill() const = 0;
 318   virtual const std::set<pg_shard_t>& get_backfill_targets() const = 0;
 319   virtual const hobject_t& get_peer_last_backfill(pg_shard_t peer) const = 0;
 320   virtual const eversion_t& get_last_update() const = 0;
 321   virtual const eversion_t& get_log_tail() const = 0;
 322
 323   // the performance impact of `std::function` has not been considered yet.
 324   // If there is any proof (from e.g. profiling) about its significance, we
 325   // can switch back to the template variant.
 326   using scan_log_func_t = std::function<void(const pg_log_entry_t&)>;
 327   virtual void scan_log_after(eversion_t, scan_log_func_t) const = 0;
 328
 329   virtual bool is_backfill_target(pg_shard_t peer) const = 0;
 330   virtual void update_complete_backfill_object_stats(const hobject_t &hoid,
 331                                              const pg_stat_t &stats) = 0;
 332   virtual bool is_backfilling() const = 0;
 333   virtual ~PeeringFacade() {}
 334 };
 335
 336 // PGFacade -- a facade (in the GoF-defined meaning) simplifying the huge
 337 // interface of crimson's PG class. The motivation is to have an inventory
 338 // of behaviour that must be provided by a unit test's mock.
 339 struct BackfillState::PGFacade {
 340   virtual const eversion_t& get_projected_last_update() const = 0;
 341   virtual ~PGFacade() {}
 342 };
 343
 344 class BackfillState::ProgressTracker {
 345   // TODO: apply_stat,
 346   enum class op_stage_t {
 347     enqueued_push,
 348     enqueued_drop,
 349     completed_push,
 350   };
 351
 352   struct registry_item_t {
 353     op_stage_t stage;
 354     std::optional<pg_stat_t> stats;
 355   };
 356
 357   BackfillMachine& backfill_machine;
 358   std::map<hobject_t, registry_item_t> registry;
 359
 360   BackfillState& backfill_state() {
 361     return backfill_machine.backfill_state;
 362   }
 363   PeeringFacade& peering_state() {
 364     return *backfill_machine.peering_state;
 365   }
 366   BackfillListener& backfill_listener() {
 367     return backfill_machine.backfill_listener;
 368   }
 369
 370 public:
 371   ProgressTracker(BackfillMachine& backfill_machine)
 372     : backfill_machine(backfill_machine) {
 373   }
 374
 375   bool tracked_objects_completed() const;
 376
 377   bool enqueue_push(const hobject_t&);
 378   void enqueue_drop(const hobject_t&);
 379   void complete_to(const hobject_t&, const pg_stat_t&);
 380 };
 381
 382 } // namespace crimson::osd