ceph/src/osd/scrubber/scrub_machine.cc

   1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
   2 // vim: ts=8 sw=2 smarttab
   3
   4 #include "scrub_machine.h"
   5
   6 #include <chrono>
   7 #include <typeinfo>
   8
   9 #include <boost/core/demangle.hpp>
  10
  11 #include "osd/OSD.h"
  12 #include "osd/OpRequest.h"
  13 #include "ScrubStore.h"
  14
  15 #define dout_context g_ceph_context
  16 #define dout_subsys ceph_subsys_osd
  17 #undef dout_prefix
  18 #define dout_prefix *_dout << " scrubberFSM "
  19
  20 using namespace std::chrono;
  21 using namespace std::chrono_literals;
  22 namespace sc = boost::statechart;
  23
  24 #define DECLARE_LOCALS                                           \
  25   ScrubMachineListener* scrbr = context<ScrubMachine>().m_scrbr; \
  26   std::ignore = scrbr;                                           \
  27   auto pg_id = context<ScrubMachine>().m_pg_id;                  \
  28   std::ignore = pg_id;
  29
  30 namespace Scrub {
  31
  32 // --------- trace/debug auxiliaries -------------------------------
  33
  34 void on_event_creation(std::string_view nm)
  35 {
  36   dout(20) << " event: --vvvv---- " << nm << dendl;
  37 }
  38
  39 void on_event_discard(std::string_view nm)
  40 {
  41   dout(20) << " event: --^^^^---- " << nm << dendl;
  42 }
  43
  44 std::string ScrubMachine::current_states_desc() const
  45 {
  46   std::string sts{"<"};
  47   for (auto si = state_begin(); si != state_end(); ++si) {
  48     const auto& siw{ *si };  // prevents a warning re side-effects
  49     // the '7' is the size of the 'scrub::'
  50     sts += boost::core::demangle(typeid(siw).name()).substr(7, std::string::npos) + "/";
  51   }
  52   return sts + ">";
  53 }
  54
  55 void ScrubMachine::assert_not_active() const
  56 {
  57   ceph_assert(state_cast<const NotActive*>());
  58 }
  59
  60 bool ScrubMachine::is_reserving() const
  61 {
  62   return state_cast<const ReservingReplicas*>();
  63 }
  64
  65 bool ScrubMachine::is_accepting_updates() const
  66 {
  67   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
  68   ceph_assert(scrbr->is_primary());
  69
  70   return state_cast<const WaitLastUpdate*>();
  71 }
  72
  73 // for the rest of the code in this file - we know what PG we are dealing with:
  74 #undef dout_prefix
  75 #define dout_prefix _prefix(_dout, this->context<ScrubMachine>())
  76
  77 template <class T>
  78 static ostream& _prefix(std::ostream* _dout, T& t)
  79 {
  80   return t.gen_prefix(*_dout);
  81 }
  82
  83 std::ostream& ScrubMachine::gen_prefix(std::ostream& out) const
  84 {
  85   return m_scrbr->gen_prefix(out) << "FSM: ";
  86 }
  87
  88 // ////////////// the actual actions
  89
  90 // ----------------------- NotActive -----------------------------------------
  91
  92 NotActive::NotActive(my_context ctx) : my_base(ctx)
  93 {
  94   dout(10) << "-- state -->> NotActive" << dendl;
  95   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
  96   scrbr->clear_queued_or_active();
  97 }
  98
  99 sc::result NotActive::react(const StartScrub&)
 100 {
 101   dout(10) << "NotActive::react(const StartScrub&)" << dendl;
 102   DECLARE_LOCALS;
 103   scrbr->set_scrub_begin_time();
 104   return transit<ReservingReplicas>();
 105 }
 106
 107 // ----------------------- ReservingReplicas ---------------------------------
 108
 109 ReservingReplicas::ReservingReplicas(my_context ctx) : my_base(ctx)
 110 {
 111   dout(10) << "-- state -->> ReservingReplicas" << dendl;
 112   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 113
 114   // prevent the OSD from starting another scrub while we are trying to secure
 115   // replicas resources
 116   scrbr->set_reserving_now();
 117   scrbr->reserve_replicas();
 118 }
 119
 120 ReservingReplicas::~ReservingReplicas()
 121 {
 122   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 123   scrbr->clear_reserving_now();
 124 }
 125
 126 sc::result ReservingReplicas::react(const ReservationFailure&)
 127 {
 128   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 129   dout(10) << "ReservingReplicas::react(const ReservationFailure&)" << dendl;
 130
 131   // the Scrubber must release all resources and abort the scrubbing
 132   scrbr->clear_pgscrub_state();
 133   return transit<NotActive>();
 134 }
 135
 136 /**
 137  * note: the event poster is handling the scrubber reset
 138  */
 139 sc::result ReservingReplicas::react(const FullReset&)
 140 {
 141   dout(10) << "ReservingReplicas::react(const FullReset&)" << dendl;
 142   return transit<NotActive>();
 143 }
 144
 145 // ----------------------- ActiveScrubbing -----------------------------------
 146
 147 ActiveScrubbing::ActiveScrubbing(my_context ctx) : my_base(ctx)
 148 {
 149   dout(10) << "-- state -->> ActiveScrubbing" << dendl;
 150   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 151   scrbr->on_init();
 152 }
 153
 154 /**
 155  *  upon exiting the Active state
 156  */
 157 ActiveScrubbing::~ActiveScrubbing()
 158 {
 159   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 160   dout(15) << __func__ << dendl;
 161   scrbr->unreserve_replicas();
 162   scrbr->clear_queued_or_active();
 163 }
 164
 165 /*
 166  * The only source of an InternalError event as of now is the BuildMap state,
 167  * when encountering a backend error.
 168  * We kill the scrub and reset the FSM.
 169  */
 170 sc::result ActiveScrubbing::react(const InternalError&)
 171 {
 172   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 173   dout(10) << __func__ << dendl;
 174   scrbr->clear_pgscrub_state();
 175   return transit<NotActive>();
 176 }
 177
 178 sc::result ActiveScrubbing::react(const FullReset&)
 179 {
 180   dout(10) << "ActiveScrubbing::react(const FullReset&)" << dendl;
 181   // caller takes care of clearing the scrubber & FSM states
 182   return transit<NotActive>();
 183 }
 184
 185 // ----------------------- RangeBlocked -----------------------------------
 186
 187 /*
 188  * Blocked. Will be released by kick_object_context_blocked() (or upon
 189  * an abort)
 190  *
 191  * Note: we are never expected to be waiting for long for a blocked object.
 192  * Unfortunately we know from experience that a bug elsewhere might result
 193  * in an indefinite wait in this state, for an object that is never released.
 194  * If that happens, all we can do is to issue a warning message to help
 195  * with the debugging.
 196  */
 197 RangeBlocked::RangeBlocked(my_context ctx) : my_base(ctx)
 198 {
 199   dout(10) << "-- state -->> Act/RangeBlocked" << dendl;
 200   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 201
 202   // arrange to have a warning message issued if we are stuck in this
 203   // state for longer than some reasonable number of minutes.
 204   m_timeout = scrbr->acquire_blocked_alarm();
 205 }
 206
 207 // ----------------------- PendingTimer -----------------------------------
 208
 209 /**
 210  *  Sleeping till timer reactivation - or just requeuing
 211  */
 212 PendingTimer::PendingTimer(my_context ctx) : my_base(ctx)
 213 {
 214   dout(10) << "-- state -->> Act/PendingTimer" << dendl;
 215   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 216
 217   scrbr->add_delayed_scheduling();
 218 }
 219
 220 // ----------------------- NewChunk -----------------------------------
 221
 222 /**
 223  *  Preconditions:
 224  *  - preemption data was set
 225  *  - epoch start was updated
 226  */
 227 NewChunk::NewChunk(my_context ctx) : my_base(ctx)
 228 {
 229   dout(10) << "-- state -->> Act/NewChunk" << dendl;
 230   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 231
 232   scrbr->get_preemptor().adjust_parameters();
 233
 234   //  choose range to work on
 235   //  select_range_n_notify() will signal either SelectedChunkFree or
 236   //  ChunkIsBusy. If 'busy', we transition to Blocked, and wait for the
 237   //  range to become available.
 238   scrbr->select_range_n_notify();
 239 }
 240
 241 sc::result NewChunk::react(const SelectedChunkFree&)
 242 {
 243   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 244   dout(10) << "NewChunk::react(const SelectedChunkFree&)" << dendl;
 245
 246   scrbr->set_subset_last_update(scrbr->search_log_for_updates());
 247   return transit<WaitPushes>();
 248 }
 249
 250 // ----------------------- WaitPushes -----------------------------------
 251
 252 WaitPushes::WaitPushes(my_context ctx) : my_base(ctx)
 253 {
 254   dout(10) << " -- state -->> Act/WaitPushes" << dendl;
 255   post_event(ActivePushesUpd{});
 256 }
 257
 258 /*
 259  * Triggered externally, by the entity that had an update re pushes
 260  */
 261 sc::result WaitPushes::react(const ActivePushesUpd&)
 262 {
 263   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 264   dout(10) << "WaitPushes::react(const ActivePushesUpd&) pending_active_pushes: "
 265            << scrbr->pending_active_pushes() << dendl;
 266
 267   if (!scrbr->pending_active_pushes()) {
 268     // done waiting
 269     return transit<WaitLastUpdate>();
 270   }
 271
 272   return discard_event();
 273 }
 274
 275 // ----------------------- WaitLastUpdate -----------------------------------
 276
 277 WaitLastUpdate::WaitLastUpdate(my_context ctx) : my_base(ctx)
 278 {
 279   dout(10) << " -- state -->> Act/WaitLastUpdate" << dendl;
 280   post_event(UpdatesApplied{});
 281 }
 282
 283 /**
 284  *  Note:
 285  *  Updates are locally readable immediately. Thus, on the replicas we do need
 286  *  to wait for the update notifications before scrubbing. For the Primary it's
 287  *  a bit different: on EC (and only there) rmw operations have an additional
 288  *  read roundtrip. That means that on the Primary we need to wait for
 289  *  last_update_applied (the replica side, even on EC, is still safe
 290  *  since the actual transaction will already be readable by commit time.
 291  */
 292 void WaitLastUpdate::on_new_updates(const UpdatesApplied&)
 293 {
 294   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 295   dout(10) << "WaitLastUpdate::on_new_updates(const UpdatesApplied&)" << dendl;
 296
 297   if (scrbr->has_pg_marked_new_updates()) {
 298     post_event(InternalAllUpdates{});
 299   } else {
 300     // will be requeued by op_applied
 301     dout(10) << "wait for EC read/modify/writes to queue" << dendl;
 302   }
 303 }
 304
 305 /*
 306  *  request maps from the replicas in the acting set
 307  */
 308 sc::result WaitLastUpdate::react(const InternalAllUpdates&)
 309 {
 310   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 311   dout(10) << "WaitLastUpdate::react(const InternalAllUpdates&)" << dendl;
 312
 313   scrbr->get_replicas_maps(scrbr->get_preemptor().is_preemptable());
 314   return transit<BuildMap>();
 315 }
 316
 317 // ----------------------- BuildMap -----------------------------------
 318
 319 BuildMap::BuildMap(my_context ctx) : my_base(ctx)
 320 {
 321   dout(10) << " -- state -->> Act/BuildMap" << dendl;
 322   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 323
 324   // no need to check for an epoch change, as all possible flows that brought us here have
 325   // a check_interval() verification of their final event.
 326
 327   if (scrbr->get_preemptor().was_preempted()) {
 328
 329     // we were preempted, either directly or by a replica
 330     dout(10) << __func__ << " preempted!!!" << dendl;
 331     scrbr->mark_local_map_ready();
 332     post_event(IntBmPreempted{});
 333
 334   } else {
 335
 336     auto ret = scrbr->build_primary_map_chunk();
 337
 338     if (ret == -EINPROGRESS) {
 339       // must wait for the backend to finish. No specific event provided.
 340       // build_primary_map_chunk() has already requeued us.
 341       dout(20) << "waiting for the backend..." << dendl;
 342
 343     } else if (ret < 0) {
 344
 345       dout(10) << "BuildMap::BuildMap() Error! Aborting. Ret: " << ret << dendl;
 346       post_event(InternalError{});
 347
 348     } else {
 349
 350       // the local map was created
 351       post_event(IntLocalMapDone{});
 352     }
 353   }
 354 }
 355
 356 sc::result BuildMap::react(const IntLocalMapDone&)
 357 {
 358   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 359   dout(10) << "BuildMap::react(const IntLocalMapDone&)" << dendl;
 360
 361   scrbr->mark_local_map_ready();
 362   return transit<WaitReplicas>();
 363 }
 364
 365 // ----------------------- DrainReplMaps -----------------------------------
 366
 367 DrainReplMaps::DrainReplMaps(my_context ctx) : my_base(ctx)
 368 {
 369   dout(10) << "-- state -->> Act/DrainReplMaps" << dendl;
 370   // we may have received all maps already. Send the event that will make us check.
 371   post_event(GotReplicas{});
 372 }
 373
 374 sc::result DrainReplMaps::react(const GotReplicas&)
 375 {
 376   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 377   dout(10) << "DrainReplMaps::react(const GotReplicas&)" << dendl;
 378
 379   if (scrbr->are_all_maps_available()) {
 380     // NewChunk will handle the preemption that brought us to this state
 381     return transit<PendingTimer>();
 382   }
 383
 384   dout(15) << "DrainReplMaps::react(const GotReplicas&): still draining incoming maps: "
 385            << scrbr->dump_awaited_maps() << dendl;
 386   return discard_event();
 387 }
 388
 389 // ----------------------- WaitReplicas -----------------------------------
 390
 391 WaitReplicas::WaitReplicas(my_context ctx) : my_base(ctx)
 392 {
 393   dout(10) << "-- state -->> Act/WaitReplicas" << dendl;
 394   post_event(GotReplicas{});
 395 }
 396
 397 /**
 398  * note: now that maps_compare_n_cleanup() is "futurized"(*), and we remain in this state
 399  *  for a while even after we got all our maps, we must prevent are_all_maps_available()
 400  *  (actually - the code after the if()) from being called more than once.
 401  * This is basically a separate state, but it's too transitory and artificial to justify
 402  *  the cost of a separate state.
 403
 404  * (*) "futurized" - in Crimson, the call to maps_compare_n_cleanup() returns immediately
 405  *  after initiating the process. The actual termination of the maps comparing etc' is
 406  *  signalled via an event. As we share the code with "classic" OSD, here too
 407  *  maps_compare_n_cleanup() is responsible for signalling the completion of the
 408  *  processing.
 409  */
 410 sc::result WaitReplicas::react(const GotReplicas&)
 411 {
 412   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 413   dout(10) << "WaitReplicas::react(const GotReplicas&)" << dendl;
 414
 415   if (!all_maps_already_called && scrbr->are_all_maps_available()) {
 416     dout(10) << "WaitReplicas::react(const GotReplicas&) got all" << dendl;
 417
 418     all_maps_already_called = true;
 419
 420     // were we preempted?
 421     if (scrbr->get_preemptor().disable_and_test()) {  // a test&set
 422
 423
 424       dout(10) << "WaitReplicas::react(const GotReplicas&) PREEMPTED!" << dendl;
 425       return transit<PendingTimer>();
 426
 427     } else {
 428
 429       // maps_compare_n_cleanup() will arrange for MapsCompared event to be sent:
 430       scrbr->maps_compare_n_cleanup();
 431       return discard_event();
 432     }
 433   } else {
 434     return discard_event();
 435   }
 436 }
 437
 438 sc::result WaitReplicas::react(const DigestUpdate&)
 439 {
 440   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 441   std::string warn_msg = "WaitReplicas::react(const DigestUpdate&): Unexpected DigestUpdate event";
 442   dout(10) << warn_msg << dendl;
 443   scrbr->log_cluster_warning(warn_msg);
 444   return discard_event();
 445 }
 446
 447 // ----------------------- WaitDigestUpdate -----------------------------------
 448
 449 WaitDigestUpdate::WaitDigestUpdate(my_context ctx) : my_base(ctx)
 450 {
 451   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 452   dout(10) << "-- state -->> Act/WaitDigestUpdate" << dendl;
 453
 454   // perform an initial check: maybe we already
 455   // have all the updates we need:
 456   // (note that DigestUpdate is usually an external event)
 457   post_event(DigestUpdate{});
 458 }
 459
 460 sc::result WaitDigestUpdate::react(const DigestUpdate&)
 461 {
 462   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 463   dout(10) << "WaitDigestUpdate::react(const DigestUpdate&)" << dendl;
 464
 465   // on_digest_updates() will either:
 466   // - do nothing - if we are still waiting for updates, or
 467   // - finish the scrubbing of the current chunk, and:
 468   //  - send NextChunk, or
 469   //  - send ScrubFinished
 470   scrbr->on_digest_updates();
 471   return discard_event();
 472 }
 473
 474 sc::result WaitDigestUpdate::react(const ScrubFinished&)
 475 {
 476   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 477   dout(10) << "WaitDigestUpdate::react(const ScrubFinished&)" << dendl;
 478   scrbr->set_scrub_duration();
 479   scrbr->scrub_finish();
 480   return transit<NotActive>();
 481 }
 482
 483 ScrubMachine::ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub)
 484     : m_pg_id{pg->pg_id}, m_scrbr{pg_scrub}
 485 {
 486 }
 487
 488 ScrubMachine::~ScrubMachine() = default;
 489
 490 // -------- for replicas -----------------------------------------------------
 491
 492 // ----------------------- ReplicaWaitUpdates --------------------------------
 493
 494 ReplicaWaitUpdates::ReplicaWaitUpdates(my_context ctx) : my_base(ctx)
 495 {
 496   dout(10) << "-- state -->> ReplicaWaitUpdates" << dendl;
 497   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 498   scrbr->on_replica_init();
 499 }
 500
 501 /*
 502  * Triggered externally, by the entity that had an update re pushes
 503  */
 504 sc::result ReplicaWaitUpdates::react(const ReplicaPushesUpd&)
 505 {
 506   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 507   dout(10) << "ReplicaWaitUpdates::react(const ReplicaPushesUpd&): "
 508            << scrbr->pending_active_pushes() << dendl;
 509
 510   if (scrbr->pending_active_pushes() == 0) {
 511
 512     // done waiting
 513     return transit<ActiveReplica>();
 514   }
 515
 516   return discard_event();
 517 }
 518
 519 /**
 520  * the event poster is handling the scrubber reset
 521  */
 522 sc::result ReplicaWaitUpdates::react(const FullReset&)
 523 {
 524   dout(10) << "ReplicaWaitUpdates::react(const FullReset&)" << dendl;
 525   return transit<NotActive>();
 526 }
 527
 528 // ----------------------- ActiveReplica -----------------------------------
 529
 530 ActiveReplica::ActiveReplica(my_context ctx) : my_base(ctx)
 531 {
 532   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 533   dout(10) << "-- state -->> ActiveReplica" << dendl;
 534   scrbr->on_replica_init();  // as we might have skipped ReplicaWaitUpdates
 535   post_event(SchedReplica{});
 536 }
 537
 538 sc::result ActiveReplica::react(const SchedReplica&)
 539 {
 540   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 541   dout(10) << "ActiveReplica::react(const SchedReplica&). is_preemptable? "
 542            << scrbr->get_preemptor().is_preemptable() << dendl;
 543
 544   if (scrbr->get_preemptor().was_preempted()) {
 545     dout(10) << "replica scrub job preempted" << dendl;
 546
 547     scrbr->send_preempted_replica();
 548     scrbr->replica_handling_done();
 549     return transit<NotActive>();
 550   }
 551
 552   // start or check progress of build_replica_map_chunk()
 553   auto ret_init = scrbr->build_replica_map_chunk();
 554   if (ret_init != -EINPROGRESS) {
 555     return transit<NotActive>();
 556   }
 557
 558   return discard_event();
 559 }
 560
 561 /**
 562  * the event poster is handling the scrubber reset
 563  */
 564 sc::result ActiveReplica::react(const FullReset&)
 565 {
 566   dout(10) << "ActiveReplica::react(const FullReset&)" << dendl;
 567   return transit<NotActive>();
 568 }
 569
 570 }  // namespace Scrub