ceph/src/osd/scrubber/scrub_machine.cc

   1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
   2 // vim: ts=8 sw=2 smarttab
   3
   4 #include "scrub_machine.h"
   5
   6 #include <chrono>
   7 #include <typeinfo>
   8
   9 #include <boost/core/demangle.hpp>
  10
  11 #include "osd/OSD.h"
  12 #include "osd/OpRequest.h"
  13 #include "ScrubStore.h"
  14
  15 #define dout_context g_ceph_context
  16 #define dout_subsys ceph_subsys_osd
  17 #undef dout_prefix
  18 #define dout_prefix *_dout << " scrubberFSM "
  19
  20 using namespace std::chrono;
  21 using namespace std::chrono_literals;
  22 namespace sc = boost::statechart;
  23
  24 #define DECLARE_LOCALS                                           \
  25   ScrubMachineListener* scrbr = context<ScrubMachine>().m_scrbr; \
  26   std::ignore = scrbr;                                           \
  27   auto pg_id = context<ScrubMachine>().m_pg_id;                  \
  28   std::ignore = pg_id;
  29
  30 namespace Scrub {
  31
  32 // --------- trace/debug auxiliaries -------------------------------
  33
  34 void on_event_creation(std::string_view nm)
  35 {
  36   dout(20) << " event: --vvvv---- " << nm << dendl;
  37 }
  38
  39 void on_event_discard(std::string_view nm)
  40 {
  41   dout(20) << " event: --^^^^---- " << nm << dendl;
  42 }
  43
  44 std::string ScrubMachine::current_states_desc() const
  45 {
  46   std::string sts{"<"};
  47   for (auto si = state_begin(); si != state_end(); ++si) {
  48     const auto& siw{ *si };  // prevents a warning re side-effects
  49     // the '7' is the size of the 'scrub::'
  50     sts += boost::core::demangle(typeid(siw).name()).substr(7, std::string::npos) + "/";
  51   }
  52   return sts + ">";
  53 }
  54
  55 void ScrubMachine::assert_not_active() const
  56 {
  57   ceph_assert(state_cast<const NotActive*>());
  58 }
  59
  60 bool ScrubMachine::is_reserving() const
  61 {
  62   return state_cast<const ReservingReplicas*>();
  63 }
  64
  65 bool ScrubMachine::is_accepting_updates() const
  66 {
  67   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
  68   ceph_assert(scrbr->is_primary());
  69
  70   return state_cast<const WaitLastUpdate*>();
  71 }
  72
  73 // for the rest of the code in this file - we know what PG we are dealing with:
  74 #undef dout_prefix
  75 #define dout_prefix _prefix(_dout, this->context<ScrubMachine>())
  76
  77 template <class T>
  78 static ostream& _prefix(std::ostream* _dout, T& t)
  79 {
  80   return t.gen_prefix(*_dout);
  81 }
  82
  83 std::ostream& ScrubMachine::gen_prefix(std::ostream& out) const
  84 {
  85   return m_scrbr->gen_prefix(out) << "FSM: ";
  86 }
  87
  88 // ////////////// the actual actions
  89
  90 // ----------------------- NotActive -----------------------------------------
  91
  92 NotActive::NotActive(my_context ctx) : my_base(ctx)
  93 {
  94   dout(10) << "-- state -->> NotActive" << dendl;
  95   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
  96   scrbr->clear_queued_or_active();
  97 }
  98
  99 sc::result NotActive::react(const StartScrub&)
 100 {
 101   dout(10) << "NotActive::react(const StartScrub&)" << dendl;
 102   DECLARE_LOCALS;
 103   scrbr->set_scrub_begin_time();
 104   return transit<ReservingReplicas>();
 105 }
 106
 107 sc::result NotActive::react(const AfterRepairScrub&)
 108 {
 109   dout(10) << "NotActive::react(const AfterRepairScrub&)" << dendl;
 110   DECLARE_LOCALS;
 111   scrbr->set_scrub_begin_time();
 112   return transit<ReservingReplicas>();
 113 }
 114
 115 // ----------------------- ReservingReplicas ---------------------------------
 116
 117 ReservingReplicas::ReservingReplicas(my_context ctx) : my_base(ctx)
 118 {
 119   dout(10) << "-- state -->> ReservingReplicas" << dendl;
 120   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 121
 122   // prevent the OSD from starting another scrub while we are trying to secure
 123   // replicas resources
 124   scrbr->set_reserving_now();
 125   scrbr->reserve_replicas();
 126 }
 127
 128 ReservingReplicas::~ReservingReplicas()
 129 {
 130   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 131   scrbr->clear_reserving_now();
 132 }
 133
 134 sc::result ReservingReplicas::react(const ReservationFailure&)
 135 {
 136   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 137   dout(10) << "ReservingReplicas::react(const ReservationFailure&)" << dendl;
 138
 139   // the Scrubber must release all resources and abort the scrubbing
 140   scrbr->clear_pgscrub_state();
 141   return transit<NotActive>();
 142 }
 143
 144 /**
 145  * note: the event poster is handling the scrubber reset
 146  */
 147 sc::result ReservingReplicas::react(const FullReset&)
 148 {
 149   dout(10) << "ReservingReplicas::react(const FullReset&)" << dendl;
 150   return transit<NotActive>();
 151 }
 152
 153 // ----------------------- ActiveScrubbing -----------------------------------
 154
 155 ActiveScrubbing::ActiveScrubbing(my_context ctx) : my_base(ctx)
 156 {
 157   dout(10) << "-- state -->> ActiveScrubbing" << dendl;
 158   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 159   scrbr->on_init();
 160 }
 161
 162 /**
 163  *  upon exiting the Active state
 164  */
 165 ActiveScrubbing::~ActiveScrubbing()
 166 {
 167   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 168   dout(15) << __func__ << dendl;
 169   scrbr->unreserve_replicas();
 170   scrbr->clear_queued_or_active();
 171 }
 172
 173 /*
 174  * The only source of an InternalError event as of now is the BuildMap state,
 175  * when encountering a backend error.
 176  * We kill the scrub and reset the FSM.
 177  */
 178 sc::result ActiveScrubbing::react(const InternalError&)
 179 {
 180   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 181   dout(10) << __func__ << dendl;
 182   scrbr->clear_pgscrub_state();
 183   return transit<NotActive>();
 184 }
 185
 186 sc::result ActiveScrubbing::react(const FullReset&)
 187 {
 188   dout(10) << "ActiveScrubbing::react(const FullReset&)" << dendl;
 189   // caller takes care of clearing the scrubber & FSM states
 190   return transit<NotActive>();
 191 }
 192
 193 // ----------------------- RangeBlocked -----------------------------------
 194
 195 /*
 196  * Blocked. Will be released by kick_object_context_blocked() (or upon
 197  * an abort)
 198  *
 199  * Note: we are never expected to be waiting for long for a blocked object.
 200  * Unfortunately we know from experience that a bug elsewhere might result
 201  * in an indefinite wait in this state, for an object that is never released.
 202  * If that happens, all we can do is to issue a warning message to help
 203  * with the debugging.
 204  */
 205 RangeBlocked::RangeBlocked(my_context ctx) : my_base(ctx)
 206 {
 207   dout(10) << "-- state -->> Act/RangeBlocked" << dendl;
 208   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 209
 210   // arrange to have a warning message issued if we are stuck in this
 211   // state for longer than some reasonable number of minutes.
 212   m_timeout = scrbr->acquire_blocked_alarm();
 213 }
 214
 215 // ----------------------- PendingTimer -----------------------------------
 216
 217 /**
 218  *  Sleeping till timer reactivation - or just requeuing
 219  */
 220 PendingTimer::PendingTimer(my_context ctx) : my_base(ctx)
 221 {
 222   dout(10) << "-- state -->> Act/PendingTimer" << dendl;
 223   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 224
 225   scrbr->add_delayed_scheduling();
 226 }
 227
 228 // ----------------------- NewChunk -----------------------------------
 229
 230 /**
 231  *  Preconditions:
 232  *  - preemption data was set
 233  *  - epoch start was updated
 234  */
 235 NewChunk::NewChunk(my_context ctx) : my_base(ctx)
 236 {
 237   dout(10) << "-- state -->> Act/NewChunk" << dendl;
 238   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 239
 240   scrbr->get_preemptor().adjust_parameters();
 241
 242   //  choose range to work on
 243   //  select_range_n_notify() will signal either SelectedChunkFree or
 244   //  ChunkIsBusy. If 'busy', we transition to Blocked, and wait for the
 245   //  range to become available.
 246   scrbr->select_range_n_notify();
 247 }
 248
 249 sc::result NewChunk::react(const SelectedChunkFree&)
 250 {
 251   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 252   dout(10) << "NewChunk::react(const SelectedChunkFree&)" << dendl;
 253
 254   scrbr->set_subset_last_update(scrbr->search_log_for_updates());
 255   return transit<WaitPushes>();
 256 }
 257
 258 // ----------------------- WaitPushes -----------------------------------
 259
 260 WaitPushes::WaitPushes(my_context ctx) : my_base(ctx)
 261 {
 262   dout(10) << " -- state -->> Act/WaitPushes" << dendl;
 263   post_event(ActivePushesUpd{});
 264 }
 265
 266 /*
 267  * Triggered externally, by the entity that had an update re pushes
 268  */
 269 sc::result WaitPushes::react(const ActivePushesUpd&)
 270 {
 271   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 272   dout(10) << "WaitPushes::react(const ActivePushesUpd&) pending_active_pushes: "
 273            << scrbr->pending_active_pushes() << dendl;
 274
 275   if (!scrbr->pending_active_pushes()) {
 276     // done waiting
 277     return transit<WaitLastUpdate>();
 278   }
 279
 280   return discard_event();
 281 }
 282
 283 // ----------------------- WaitLastUpdate -----------------------------------
 284
 285 WaitLastUpdate::WaitLastUpdate(my_context ctx) : my_base(ctx)
 286 {
 287   dout(10) << " -- state -->> Act/WaitLastUpdate" << dendl;
 288   post_event(UpdatesApplied{});
 289 }
 290
 291 /**
 292  *  Note:
 293  *  Updates are locally readable immediately. Thus, on the replicas we do need
 294  *  to wait for the update notifications before scrubbing. For the Primary it's
 295  *  a bit different: on EC (and only there) rmw operations have an additional
 296  *  read roundtrip. That means that on the Primary we need to wait for
 297  *  last_update_applied (the replica side, even on EC, is still safe
 298  *  since the actual transaction will already be readable by commit time.
 299  */
 300 void WaitLastUpdate::on_new_updates(const UpdatesApplied&)
 301 {
 302   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 303   dout(10) << "WaitLastUpdate::on_new_updates(const UpdatesApplied&)" << dendl;
 304
 305   if (scrbr->has_pg_marked_new_updates()) {
 306     post_event(InternalAllUpdates{});
 307   } else {
 308     // will be requeued by op_applied
 309     dout(10) << "wait for EC read/modify/writes to queue" << dendl;
 310   }
 311 }
 312
 313 /*
 314  *  request maps from the replicas in the acting set
 315  */
 316 sc::result WaitLastUpdate::react(const InternalAllUpdates&)
 317 {
 318   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 319   dout(10) << "WaitLastUpdate::react(const InternalAllUpdates&)" << dendl;
 320
 321   scrbr->get_replicas_maps(scrbr->get_preemptor().is_preemptable());
 322   return transit<BuildMap>();
 323 }
 324
 325 // ----------------------- BuildMap -----------------------------------
 326
 327 BuildMap::BuildMap(my_context ctx) : my_base(ctx)
 328 {
 329   dout(10) << " -- state -->> Act/BuildMap" << dendl;
 330   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 331
 332   // no need to check for an epoch change, as all possible flows that brought us here have
 333   // a check_interval() verification of their final event.
 334
 335   if (scrbr->get_preemptor().was_preempted()) {
 336
 337     // we were preempted, either directly or by a replica
 338     dout(10) << __func__ << " preempted!!!" << dendl;
 339     scrbr->mark_local_map_ready();
 340     post_event(IntBmPreempted{});
 341
 342   } else {
 343
 344     auto ret = scrbr->build_primary_map_chunk();
 345
 346     if (ret == -EINPROGRESS) {
 347       // must wait for the backend to finish. No specific event provided.
 348       // build_primary_map_chunk() has already requeued us.
 349       dout(20) << "waiting for the backend..." << dendl;
 350
 351     } else if (ret < 0) {
 352
 353       dout(10) << "BuildMap::BuildMap() Error! Aborting. Ret: " << ret << dendl;
 354       post_event(InternalError{});
 355
 356     } else {
 357
 358       // the local map was created
 359       post_event(IntLocalMapDone{});
 360     }
 361   }
 362 }
 363
 364 sc::result BuildMap::react(const IntLocalMapDone&)
 365 {
 366   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 367   dout(10) << "BuildMap::react(const IntLocalMapDone&)" << dendl;
 368
 369   scrbr->mark_local_map_ready();
 370   return transit<WaitReplicas>();
 371 }
 372
 373 // ----------------------- DrainReplMaps -----------------------------------
 374
 375 DrainReplMaps::DrainReplMaps(my_context ctx) : my_base(ctx)
 376 {
 377   dout(10) << "-- state -->> Act/DrainReplMaps" << dendl;
 378   // we may have received all maps already. Send the event that will make us check.
 379   post_event(GotReplicas{});
 380 }
 381
 382 sc::result DrainReplMaps::react(const GotReplicas&)
 383 {
 384   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 385   dout(10) << "DrainReplMaps::react(const GotReplicas&)" << dendl;
 386
 387   if (scrbr->are_all_maps_available()) {
 388     // NewChunk will handle the preemption that brought us to this state
 389     return transit<PendingTimer>();
 390   }
 391
 392   dout(15) << "DrainReplMaps::react(const GotReplicas&): still draining incoming maps: "
 393            << scrbr->dump_awaited_maps() << dendl;
 394   return discard_event();
 395 }
 396
 397 // ----------------------- WaitReplicas -----------------------------------
 398
 399 WaitReplicas::WaitReplicas(my_context ctx) : my_base(ctx)
 400 {
 401   dout(10) << "-- state -->> Act/WaitReplicas" << dendl;
 402   post_event(GotReplicas{});
 403 }
 404
 405 /**
 406  * note: now that maps_compare_n_cleanup() is "futurized"(*), and we remain in this state
 407  *  for a while even after we got all our maps, we must prevent are_all_maps_available()
 408  *  (actually - the code after the if()) from being called more than once.
 409  * This is basically a separate state, but it's too transitory and artificial to justify
 410  *  the cost of a separate state.
 411
 412  * (*) "futurized" - in Crimson, the call to maps_compare_n_cleanup() returns immediately
 413  *  after initiating the process. The actual termination of the maps comparing etc' is
 414  *  signalled via an event. As we share the code with "classic" OSD, here too
 415  *  maps_compare_n_cleanup() is responsible for signalling the completion of the
 416  *  processing.
 417  */
 418 sc::result WaitReplicas::react(const GotReplicas&)
 419 {
 420   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 421   dout(10) << "WaitReplicas::react(const GotReplicas&)" << dendl;
 422
 423   if (!all_maps_already_called && scrbr->are_all_maps_available()) {
 424     dout(10) << "WaitReplicas::react(const GotReplicas&) got all" << dendl;
 425
 426     all_maps_already_called = true;
 427
 428     // were we preempted?
 429     if (scrbr->get_preemptor().disable_and_test()) {  // a test&set
 430
 431
 432       dout(10) << "WaitReplicas::react(const GotReplicas&) PREEMPTED!" << dendl;
 433       return transit<PendingTimer>();
 434
 435     } else {
 436
 437       // maps_compare_n_cleanup() will arrange for MapsCompared event to be sent:
 438       scrbr->maps_compare_n_cleanup();
 439       return discard_event();
 440     }
 441   } else {
 442     return discard_event();
 443   }
 444 }
 445
 446 sc::result WaitReplicas::react(const DigestUpdate&)
 447 {
 448   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 449   std::string warn_msg = "WaitReplicas::react(const DigestUpdate&): Unexpected DigestUpdate event";
 450   dout(10) << warn_msg << dendl;
 451   scrbr->log_cluster_warning(warn_msg);
 452   return discard_event();
 453 }
 454
 455 // ----------------------- WaitDigestUpdate -----------------------------------
 456
 457 WaitDigestUpdate::WaitDigestUpdate(my_context ctx) : my_base(ctx)
 458 {
 459   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 460   dout(10) << "-- state -->> Act/WaitDigestUpdate" << dendl;
 461
 462   // perform an initial check: maybe we already
 463   // have all the updates we need:
 464   // (note that DigestUpdate is usually an external event)
 465   post_event(DigestUpdate{});
 466 }
 467
 468 sc::result WaitDigestUpdate::react(const DigestUpdate&)
 469 {
 470   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 471   dout(10) << "WaitDigestUpdate::react(const DigestUpdate&)" << dendl;
 472
 473   // on_digest_updates() will either:
 474   // - do nothing - if we are still waiting for updates, or
 475   // - finish the scrubbing of the current chunk, and:
 476   //  - send NextChunk, or
 477   //  - send ScrubFinished
 478   scrbr->on_digest_updates();
 479   return discard_event();
 480 }
 481
 482 sc::result WaitDigestUpdate::react(const ScrubFinished&)
 483 {
 484   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 485   dout(10) << "WaitDigestUpdate::react(const ScrubFinished&)" << dendl;
 486   scrbr->set_scrub_duration();
 487   scrbr->scrub_finish();
 488   return transit<NotActive>();
 489 }
 490
 491 ScrubMachine::ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub)
 492     : m_pg_id{pg->pg_id}, m_scrbr{pg_scrub}
 493 {
 494 }
 495
 496 ScrubMachine::~ScrubMachine() = default;
 497
 498 // -------- for replicas -----------------------------------------------------
 499
 500 // ----------------------- ReplicaWaitUpdates --------------------------------
 501
 502 ReplicaWaitUpdates::ReplicaWaitUpdates(my_context ctx) : my_base(ctx)
 503 {
 504   dout(10) << "-- state -->> ReplicaWaitUpdates" << dendl;
 505   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 506   scrbr->on_replica_init();
 507 }
 508
 509 /*
 510  * Triggered externally, by the entity that had an update re pushes
 511  */
 512 sc::result ReplicaWaitUpdates::react(const ReplicaPushesUpd&)
 513 {
 514   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 515   dout(10) << "ReplicaWaitUpdates::react(const ReplicaPushesUpd&): "
 516            << scrbr->pending_active_pushes() << dendl;
 517
 518   if (scrbr->pending_active_pushes() == 0) {
 519
 520     // done waiting
 521     return transit<ActiveReplica>();
 522   }
 523
 524   return discard_event();
 525 }
 526
 527 /**
 528  * the event poster is handling the scrubber reset
 529  */
 530 sc::result ReplicaWaitUpdates::react(const FullReset&)
 531 {
 532   dout(10) << "ReplicaWaitUpdates::react(const FullReset&)" << dendl;
 533   return transit<NotActive>();
 534 }
 535
 536 // ----------------------- ActiveReplica -----------------------------------
 537
 538 ActiveReplica::ActiveReplica(my_context ctx) : my_base(ctx)
 539 {
 540   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 541   dout(10) << "-- state -->> ActiveReplica" << dendl;
 542   scrbr->on_replica_init();  // as we might have skipped ReplicaWaitUpdates
 543   post_event(SchedReplica{});
 544 }
 545
 546 sc::result ActiveReplica::react(const SchedReplica&)
 547 {
 548   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 549   dout(10) << "ActiveReplica::react(const SchedReplica&). is_preemptable? "
 550            << scrbr->get_preemptor().is_preemptable() << dendl;
 551
 552   if (scrbr->get_preemptor().was_preempted()) {
 553     dout(10) << "replica scrub job preempted" << dendl;
 554
 555     scrbr->send_preempted_replica();
 556     scrbr->replica_handling_done();
 557     return transit<NotActive>();
 558   }
 559
 560   // start or check progress of build_replica_map_chunk()
 561   auto ret_init = scrbr->build_replica_map_chunk();
 562   if (ret_init != -EINPROGRESS) {
 563     return transit<NotActive>();
 564   }
 565
 566   return discard_event();
 567 }
 568
 569 /**
 570  * the event poster is handling the scrubber reset
 571  */
 572 sc::result ActiveReplica::react(const FullReset&)
 573 {
 574   dout(10) << "ActiveReplica::react(const FullReset&)" << dendl;
 575   return transit<NotActive>();
 576 }
 577
 578 }  // namespace Scrub