]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/scrubber/scrub_machine.cc
import ceph quincy 17.2.6
[ceph.git] / ceph / src / osd / scrubber / scrub_machine.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "scrub_machine.h"
5
6 #include <chrono>
7 #include <typeinfo>
8
9 #include <boost/core/demangle.hpp>
10
11 #include "osd/OSD.h"
12 #include "osd/OpRequest.h"
13 #include "ScrubStore.h"
14
15 #define dout_context g_ceph_context
16 #define dout_subsys ceph_subsys_osd
17 #undef dout_prefix
18 #define dout_prefix *_dout << " scrubberFSM "
19
20 using namespace std::chrono;
21 using namespace std::chrono_literals;
22 namespace sc = boost::statechart;
23
24 #define DECLARE_LOCALS \
25 ScrubMachineListener* scrbr = context<ScrubMachine>().m_scrbr; \
26 std::ignore = scrbr; \
27 auto pg_id = context<ScrubMachine>().m_pg_id; \
28 std::ignore = pg_id;
29
30 namespace Scrub {
31
32 // --------- trace/debug auxiliaries -------------------------------
33
34 void on_event_creation(std::string_view nm)
35 {
36 dout(20) << " event: --vvvv---- " << nm << dendl;
37 }
38
39 void on_event_discard(std::string_view nm)
40 {
41 dout(20) << " event: --^^^^---- " << nm << dendl;
42 }
43
44 std::string ScrubMachine::current_states_desc() const
45 {
46 std::string sts{"<"};
47 for (auto si = state_begin(); si != state_end(); ++si) {
48 const auto& siw{ *si }; // prevents a warning re side-effects
49 // the '7' is the size of the 'scrub::'
50 sts += boost::core::demangle(typeid(siw).name()).substr(7, std::string::npos) + "/";
51 }
52 return sts + ">";
53 }
54
55 void ScrubMachine::assert_not_active() const
56 {
57 ceph_assert(state_cast<const NotActive*>());
58 }
59
60 bool ScrubMachine::is_reserving() const
61 {
62 return state_cast<const ReservingReplicas*>();
63 }
64
65 bool ScrubMachine::is_accepting_updates() const
66 {
67 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
68 ceph_assert(scrbr->is_primary());
69
70 return state_cast<const WaitLastUpdate*>();
71 }
72
73 // for the rest of the code in this file - we know what PG we are dealing with:
74 #undef dout_prefix
75 #define dout_prefix _prefix(_dout, this->context<ScrubMachine>())
76
77 template <class T>
78 static ostream& _prefix(std::ostream* _dout, T& t)
79 {
80 return t.gen_prefix(*_dout);
81 }
82
83 std::ostream& ScrubMachine::gen_prefix(std::ostream& out) const
84 {
85 return m_scrbr->gen_prefix(out) << "FSM: ";
86 }
87
88 // ////////////// the actual actions
89
90 // ----------------------- NotActive -----------------------------------------
91
92 NotActive::NotActive(my_context ctx) : my_base(ctx)
93 {
94 dout(10) << "-- state -->> NotActive" << dendl;
95 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
96 scrbr->clear_queued_or_active();
97 }
98
99 sc::result NotActive::react(const StartScrub&)
100 {
101 dout(10) << "NotActive::react(const StartScrub&)" << dendl;
102 DECLARE_LOCALS;
103 scrbr->set_scrub_begin_time();
104 return transit<ReservingReplicas>();
105 }
106
107 sc::result NotActive::react(const AfterRepairScrub&)
108 {
109 dout(10) << "NotActive::react(const AfterRepairScrub&)" << dendl;
110 DECLARE_LOCALS;
111 scrbr->set_scrub_begin_time();
112 return transit<ReservingReplicas>();
113 }
114
115 // ----------------------- ReservingReplicas ---------------------------------
116
117 ReservingReplicas::ReservingReplicas(my_context ctx) : my_base(ctx)
118 {
119 dout(10) << "-- state -->> ReservingReplicas" << dendl;
120 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
121
122 // prevent the OSD from starting another scrub while we are trying to secure
123 // replicas resources
124 scrbr->set_reserving_now();
125 scrbr->reserve_replicas();
126 }
127
128 ReservingReplicas::~ReservingReplicas()
129 {
130 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
131 scrbr->clear_reserving_now();
132 }
133
134 sc::result ReservingReplicas::react(const ReservationFailure&)
135 {
136 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
137 dout(10) << "ReservingReplicas::react(const ReservationFailure&)" << dendl;
138
139 // the Scrubber must release all resources and abort the scrubbing
140 scrbr->clear_pgscrub_state();
141 return transit<NotActive>();
142 }
143
144 /**
145 * note: the event poster is handling the scrubber reset
146 */
147 sc::result ReservingReplicas::react(const FullReset&)
148 {
149 dout(10) << "ReservingReplicas::react(const FullReset&)" << dendl;
150 return transit<NotActive>();
151 }
152
153 // ----------------------- ActiveScrubbing -----------------------------------
154
155 ActiveScrubbing::ActiveScrubbing(my_context ctx) : my_base(ctx)
156 {
157 dout(10) << "-- state -->> ActiveScrubbing" << dendl;
158 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
159 scrbr->on_init();
160 }
161
162 /**
163 * upon exiting the Active state
164 */
165 ActiveScrubbing::~ActiveScrubbing()
166 {
167 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
168 dout(15) << __func__ << dendl;
169 scrbr->unreserve_replicas();
170 scrbr->clear_queued_or_active();
171 }
172
173 /*
174 * The only source of an InternalError event as of now is the BuildMap state,
175 * when encountering a backend error.
176 * We kill the scrub and reset the FSM.
177 */
178 sc::result ActiveScrubbing::react(const InternalError&)
179 {
180 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
181 dout(10) << __func__ << dendl;
182 scrbr->clear_pgscrub_state();
183 return transit<NotActive>();
184 }
185
186 sc::result ActiveScrubbing::react(const FullReset&)
187 {
188 dout(10) << "ActiveScrubbing::react(const FullReset&)" << dendl;
189 // caller takes care of clearing the scrubber & FSM states
190 return transit<NotActive>();
191 }
192
193 // ----------------------- RangeBlocked -----------------------------------
194
195 /*
196 * Blocked. Will be released by kick_object_context_blocked() (or upon
197 * an abort)
198 *
199 * Note: we are never expected to be waiting for long for a blocked object.
200 * Unfortunately we know from experience that a bug elsewhere might result
201 * in an indefinite wait in this state, for an object that is never released.
202 * If that happens, all we can do is to issue a warning message to help
203 * with the debugging.
204 */
205 RangeBlocked::RangeBlocked(my_context ctx) : my_base(ctx)
206 {
207 dout(10) << "-- state -->> Act/RangeBlocked" << dendl;
208 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
209
210 // arrange to have a warning message issued if we are stuck in this
211 // state for longer than some reasonable number of minutes.
212 m_timeout = scrbr->acquire_blocked_alarm();
213 }
214
215 // ----------------------- PendingTimer -----------------------------------
216
217 /**
218 * Sleeping till timer reactivation - or just requeuing
219 */
220 PendingTimer::PendingTimer(my_context ctx) : my_base(ctx)
221 {
222 dout(10) << "-- state -->> Act/PendingTimer" << dendl;
223 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
224
225 scrbr->add_delayed_scheduling();
226 }
227
228 // ----------------------- NewChunk -----------------------------------
229
230 /**
231 * Preconditions:
232 * - preemption data was set
233 * - epoch start was updated
234 */
235 NewChunk::NewChunk(my_context ctx) : my_base(ctx)
236 {
237 dout(10) << "-- state -->> Act/NewChunk" << dendl;
238 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
239
240 scrbr->get_preemptor().adjust_parameters();
241
242 // choose range to work on
243 // select_range_n_notify() will signal either SelectedChunkFree or
244 // ChunkIsBusy. If 'busy', we transition to Blocked, and wait for the
245 // range to become available.
246 scrbr->select_range_n_notify();
247 }
248
249 sc::result NewChunk::react(const SelectedChunkFree&)
250 {
251 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
252 dout(10) << "NewChunk::react(const SelectedChunkFree&)" << dendl;
253
254 scrbr->set_subset_last_update(scrbr->search_log_for_updates());
255 return transit<WaitPushes>();
256 }
257
258 // ----------------------- WaitPushes -----------------------------------
259
260 WaitPushes::WaitPushes(my_context ctx) : my_base(ctx)
261 {
262 dout(10) << " -- state -->> Act/WaitPushes" << dendl;
263 post_event(ActivePushesUpd{});
264 }
265
266 /*
267 * Triggered externally, by the entity that had an update re pushes
268 */
269 sc::result WaitPushes::react(const ActivePushesUpd&)
270 {
271 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
272 dout(10) << "WaitPushes::react(const ActivePushesUpd&) pending_active_pushes: "
273 << scrbr->pending_active_pushes() << dendl;
274
275 if (!scrbr->pending_active_pushes()) {
276 // done waiting
277 return transit<WaitLastUpdate>();
278 }
279
280 return discard_event();
281 }
282
283 // ----------------------- WaitLastUpdate -----------------------------------
284
285 WaitLastUpdate::WaitLastUpdate(my_context ctx) : my_base(ctx)
286 {
287 dout(10) << " -- state -->> Act/WaitLastUpdate" << dendl;
288 post_event(UpdatesApplied{});
289 }
290
291 /**
292 * Note:
293 * Updates are locally readable immediately. Thus, on the replicas we do need
294 * to wait for the update notifications before scrubbing. For the Primary it's
295 * a bit different: on EC (and only there) rmw operations have an additional
296 * read roundtrip. That means that on the Primary we need to wait for
297 * last_update_applied (the replica side, even on EC, is still safe
298 * since the actual transaction will already be readable by commit time.
299 */
300 void WaitLastUpdate::on_new_updates(const UpdatesApplied&)
301 {
302 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
303 dout(10) << "WaitLastUpdate::on_new_updates(const UpdatesApplied&)" << dendl;
304
305 if (scrbr->has_pg_marked_new_updates()) {
306 post_event(InternalAllUpdates{});
307 } else {
308 // will be requeued by op_applied
309 dout(10) << "wait for EC read/modify/writes to queue" << dendl;
310 }
311 }
312
313 /*
314 * request maps from the replicas in the acting set
315 */
316 sc::result WaitLastUpdate::react(const InternalAllUpdates&)
317 {
318 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
319 dout(10) << "WaitLastUpdate::react(const InternalAllUpdates&)" << dendl;
320
321 scrbr->get_replicas_maps(scrbr->get_preemptor().is_preemptable());
322 return transit<BuildMap>();
323 }
324
325 // ----------------------- BuildMap -----------------------------------
326
327 BuildMap::BuildMap(my_context ctx) : my_base(ctx)
328 {
329 dout(10) << " -- state -->> Act/BuildMap" << dendl;
330 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
331
332 // no need to check for an epoch change, as all possible flows that brought us here have
333 // a check_interval() verification of their final event.
334
335 if (scrbr->get_preemptor().was_preempted()) {
336
337 // we were preempted, either directly or by a replica
338 dout(10) << __func__ << " preempted!!!" << dendl;
339 scrbr->mark_local_map_ready();
340 post_event(IntBmPreempted{});
341
342 } else {
343
344 auto ret = scrbr->build_primary_map_chunk();
345
346 if (ret == -EINPROGRESS) {
347 // must wait for the backend to finish. No specific event provided.
348 // build_primary_map_chunk() has already requeued us.
349 dout(20) << "waiting for the backend..." << dendl;
350
351 } else if (ret < 0) {
352
353 dout(10) << "BuildMap::BuildMap() Error! Aborting. Ret: " << ret << dendl;
354 post_event(InternalError{});
355
356 } else {
357
358 // the local map was created
359 post_event(IntLocalMapDone{});
360 }
361 }
362 }
363
364 sc::result BuildMap::react(const IntLocalMapDone&)
365 {
366 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
367 dout(10) << "BuildMap::react(const IntLocalMapDone&)" << dendl;
368
369 scrbr->mark_local_map_ready();
370 return transit<WaitReplicas>();
371 }
372
373 // ----------------------- DrainReplMaps -----------------------------------
374
375 DrainReplMaps::DrainReplMaps(my_context ctx) : my_base(ctx)
376 {
377 dout(10) << "-- state -->> Act/DrainReplMaps" << dendl;
378 // we may have received all maps already. Send the event that will make us check.
379 post_event(GotReplicas{});
380 }
381
382 sc::result DrainReplMaps::react(const GotReplicas&)
383 {
384 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
385 dout(10) << "DrainReplMaps::react(const GotReplicas&)" << dendl;
386
387 if (scrbr->are_all_maps_available()) {
388 // NewChunk will handle the preemption that brought us to this state
389 return transit<PendingTimer>();
390 }
391
392 dout(15) << "DrainReplMaps::react(const GotReplicas&): still draining incoming maps: "
393 << scrbr->dump_awaited_maps() << dendl;
394 return discard_event();
395 }
396
397 // ----------------------- WaitReplicas -----------------------------------
398
399 WaitReplicas::WaitReplicas(my_context ctx) : my_base(ctx)
400 {
401 dout(10) << "-- state -->> Act/WaitReplicas" << dendl;
402 post_event(GotReplicas{});
403 }
404
405 /**
406 * note: now that maps_compare_n_cleanup() is "futurized"(*), and we remain in this state
407 * for a while even after we got all our maps, we must prevent are_all_maps_available()
408 * (actually - the code after the if()) from being called more than once.
409 * This is basically a separate state, but it's too transitory and artificial to justify
410 * the cost of a separate state.
411
412 * (*) "futurized" - in Crimson, the call to maps_compare_n_cleanup() returns immediately
413 * after initiating the process. The actual termination of the maps comparing etc' is
414 * signalled via an event. As we share the code with "classic" OSD, here too
415 * maps_compare_n_cleanup() is responsible for signalling the completion of the
416 * processing.
417 */
418 sc::result WaitReplicas::react(const GotReplicas&)
419 {
420 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
421 dout(10) << "WaitReplicas::react(const GotReplicas&)" << dendl;
422
423 if (!all_maps_already_called && scrbr->are_all_maps_available()) {
424 dout(10) << "WaitReplicas::react(const GotReplicas&) got all" << dendl;
425
426 all_maps_already_called = true;
427
428 // were we preempted?
429 if (scrbr->get_preemptor().disable_and_test()) { // a test&set
430
431
432 dout(10) << "WaitReplicas::react(const GotReplicas&) PREEMPTED!" << dendl;
433 return transit<PendingTimer>();
434
435 } else {
436
437 // maps_compare_n_cleanup() will arrange for MapsCompared event to be sent:
438 scrbr->maps_compare_n_cleanup();
439 return discard_event();
440 }
441 } else {
442 return discard_event();
443 }
444 }
445
446 sc::result WaitReplicas::react(const DigestUpdate&)
447 {
448 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
449 std::string warn_msg = "WaitReplicas::react(const DigestUpdate&): Unexpected DigestUpdate event";
450 dout(10) << warn_msg << dendl;
451 scrbr->log_cluster_warning(warn_msg);
452 return discard_event();
453 }
454
455 // ----------------------- WaitDigestUpdate -----------------------------------
456
457 WaitDigestUpdate::WaitDigestUpdate(my_context ctx) : my_base(ctx)
458 {
459 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
460 dout(10) << "-- state -->> Act/WaitDigestUpdate" << dendl;
461
462 // perform an initial check: maybe we already
463 // have all the updates we need:
464 // (note that DigestUpdate is usually an external event)
465 post_event(DigestUpdate{});
466 }
467
468 sc::result WaitDigestUpdate::react(const DigestUpdate&)
469 {
470 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
471 dout(10) << "WaitDigestUpdate::react(const DigestUpdate&)" << dendl;
472
473 // on_digest_updates() will either:
474 // - do nothing - if we are still waiting for updates, or
475 // - finish the scrubbing of the current chunk, and:
476 // - send NextChunk, or
477 // - send ScrubFinished
478 scrbr->on_digest_updates();
479 return discard_event();
480 }
481
482 sc::result WaitDigestUpdate::react(const ScrubFinished&)
483 {
484 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
485 dout(10) << "WaitDigestUpdate::react(const ScrubFinished&)" << dendl;
486 scrbr->set_scrub_duration();
487 scrbr->scrub_finish();
488 return transit<NotActive>();
489 }
490
491 ScrubMachine::ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub)
492 : m_pg_id{pg->pg_id}, m_scrbr{pg_scrub}
493 {
494 }
495
496 ScrubMachine::~ScrubMachine() = default;
497
498 // -------- for replicas -----------------------------------------------------
499
500 // ----------------------- ReplicaWaitUpdates --------------------------------
501
502 ReplicaWaitUpdates::ReplicaWaitUpdates(my_context ctx) : my_base(ctx)
503 {
504 dout(10) << "-- state -->> ReplicaWaitUpdates" << dendl;
505 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
506 scrbr->on_replica_init();
507 }
508
509 /*
510 * Triggered externally, by the entity that had an update re pushes
511 */
512 sc::result ReplicaWaitUpdates::react(const ReplicaPushesUpd&)
513 {
514 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
515 dout(10) << "ReplicaWaitUpdates::react(const ReplicaPushesUpd&): "
516 << scrbr->pending_active_pushes() << dendl;
517
518 if (scrbr->pending_active_pushes() == 0) {
519
520 // done waiting
521 return transit<ActiveReplica>();
522 }
523
524 return discard_event();
525 }
526
527 /**
528 * the event poster is handling the scrubber reset
529 */
530 sc::result ReplicaWaitUpdates::react(const FullReset&)
531 {
532 dout(10) << "ReplicaWaitUpdates::react(const FullReset&)" << dendl;
533 return transit<NotActive>();
534 }
535
536 // ----------------------- ActiveReplica -----------------------------------
537
538 ActiveReplica::ActiveReplica(my_context ctx) : my_base(ctx)
539 {
540 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
541 dout(10) << "-- state -->> ActiveReplica" << dendl;
542 scrbr->on_replica_init(); // as we might have skipped ReplicaWaitUpdates
543 post_event(SchedReplica{});
544 }
545
546 sc::result ActiveReplica::react(const SchedReplica&)
547 {
548 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
549 dout(10) << "ActiveReplica::react(const SchedReplica&). is_preemptable? "
550 << scrbr->get_preemptor().is_preemptable() << dendl;
551
552 if (scrbr->get_preemptor().was_preempted()) {
553 dout(10) << "replica scrub job preempted" << dendl;
554
555 scrbr->send_preempted_replica();
556 scrbr->replica_handling_done();
557 return transit<NotActive>();
558 }
559
560 // start or check progress of build_replica_map_chunk()
561 auto ret_init = scrbr->build_replica_map_chunk();
562 if (ret_init != -EINPROGRESS) {
563 return transit<NotActive>();
564 }
565
566 return discard_event();
567 }
568
569 /**
570 * the event poster is handling the scrubber reset
571 */
572 sc::result ActiveReplica::react(const FullReset&)
573 {
574 dout(10) << "ActiveReplica::react(const FullReset&)" << dendl;
575 return transit<NotActive>();
576 }
577
578 } // namespace Scrub