]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/scrubber/scrub_machine.cc
7987559d0261f1cd399fd3fc5daa7ae1a0ca2d38
[ceph.git] / ceph / src / osd / scrubber / scrub_machine.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "scrub_machine.h"
5
6 #include <chrono>
7 #include <typeinfo>
8
9 #include <boost/core/demangle.hpp>
10
11 #include "osd/OSD.h"
12 #include "osd/OpRequest.h"
13 #include "ScrubStore.h"
14
15 #define dout_context g_ceph_context
16 #define dout_subsys ceph_subsys_osd
17 #undef dout_prefix
18 #define dout_prefix *_dout << " scrubberFSM "
19
20 using namespace std::chrono;
21 using namespace std::chrono_literals;
22 namespace sc = boost::statechart;
23
24 #define DECLARE_LOCALS \
25 ScrubMachineListener* scrbr = context<ScrubMachine>().m_scrbr; \
26 std::ignore = scrbr; \
27 auto pg_id = context<ScrubMachine>().m_pg_id; \
28 std::ignore = pg_id;
29
30 namespace Scrub {
31
32 // --------- trace/debug auxiliaries -------------------------------
33
34 void on_event_creation(std::string_view nm)
35 {
36 dout(20) << " event: --vvvv---- " << nm << dendl;
37 }
38
39 void on_event_discard(std::string_view nm)
40 {
41 dout(20) << " event: --^^^^---- " << nm << dendl;
42 }
43
44 std::string ScrubMachine::current_states_desc() const
45 {
46 std::string sts{"<"};
47 for (auto si = state_begin(); si != state_end(); ++si) {
48 const auto& siw{ *si }; // prevents a warning re side-effects
49 // the '7' is the size of the 'scrub::'
50 sts += boost::core::demangle(typeid(siw).name()).substr(7, std::string::npos) + "/";
51 }
52 return sts + ">";
53 }
54
55 void ScrubMachine::assert_not_active() const
56 {
57 ceph_assert(state_cast<const NotActive*>());
58 }
59
60 bool ScrubMachine::is_reserving() const
61 {
62 return state_cast<const ReservingReplicas*>();
63 }
64
65 bool ScrubMachine::is_accepting_updates() const
66 {
67 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
68 ceph_assert(scrbr->is_primary());
69
70 return state_cast<const WaitLastUpdate*>();
71 }
72
73 // for the rest of the code in this file - we know what PG we are dealing with:
74 #undef dout_prefix
75 #define dout_prefix _prefix(_dout, this->context<ScrubMachine>())
76
77 template <class T>
78 static ostream& _prefix(std::ostream* _dout, T& t)
79 {
80 return t.gen_prefix(*_dout);
81 }
82
83 std::ostream& ScrubMachine::gen_prefix(std::ostream& out) const
84 {
85 return m_scrbr->gen_prefix(out) << "FSM: ";
86 }
87
88 // ////////////// the actual actions
89
90 // ----------------------- NotActive -----------------------------------------
91
92 NotActive::NotActive(my_context ctx) : my_base(ctx)
93 {
94 dout(10) << "-- state -->> NotActive" << dendl;
95 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
96 scrbr->clear_queued_or_active();
97 }
98
99 sc::result NotActive::react(const StartScrub&)
100 {
101 dout(10) << "NotActive::react(const StartScrub&)" << dendl;
102 DECLARE_LOCALS;
103 scrbr->set_scrub_begin_time();
104 return transit<ReservingReplicas>();
105 }
106
107 // ----------------------- ReservingReplicas ---------------------------------
108
109 ReservingReplicas::ReservingReplicas(my_context ctx) : my_base(ctx)
110 {
111 dout(10) << "-- state -->> ReservingReplicas" << dendl;
112 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
113
114 // prevent the OSD from starting another scrub while we are trying to secure
115 // replicas resources
116 scrbr->set_reserving_now();
117 scrbr->reserve_replicas();
118 }
119
120 ReservingReplicas::~ReservingReplicas()
121 {
122 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
123 scrbr->clear_reserving_now();
124 }
125
126 sc::result ReservingReplicas::react(const ReservationFailure&)
127 {
128 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
129 dout(10) << "ReservingReplicas::react(const ReservationFailure&)" << dendl;
130
131 // the Scrubber must release all resources and abort the scrubbing
132 scrbr->clear_pgscrub_state();
133 return transit<NotActive>();
134 }
135
136 /**
137 * note: the event poster is handling the scrubber reset
138 */
139 sc::result ReservingReplicas::react(const FullReset&)
140 {
141 dout(10) << "ReservingReplicas::react(const FullReset&)" << dendl;
142 return transit<NotActive>();
143 }
144
145 // ----------------------- ActiveScrubbing -----------------------------------
146
147 ActiveScrubbing::ActiveScrubbing(my_context ctx) : my_base(ctx)
148 {
149 dout(10) << "-- state -->> ActiveScrubbing" << dendl;
150 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
151 scrbr->on_init();
152 }
153
154 /**
155 * upon exiting the Active state
156 */
157 ActiveScrubbing::~ActiveScrubbing()
158 {
159 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
160 dout(15) << __func__ << dendl;
161 scrbr->unreserve_replicas();
162 scrbr->clear_queued_or_active();
163 }
164
165 /*
166 * The only source of an InternalError event as of now is the BuildMap state,
167 * when encountering a backend error.
168 * We kill the scrub and reset the FSM.
169 */
170 sc::result ActiveScrubbing::react(const InternalError&)
171 {
172 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
173 dout(10) << __func__ << dendl;
174 scrbr->clear_pgscrub_state();
175 return transit<NotActive>();
176 }
177
178 sc::result ActiveScrubbing::react(const FullReset&)
179 {
180 dout(10) << "ActiveScrubbing::react(const FullReset&)" << dendl;
181 // caller takes care of clearing the scrubber & FSM states
182 return transit<NotActive>();
183 }
184
185 // ----------------------- RangeBlocked -----------------------------------
186
187 /*
188 * Blocked. Will be released by kick_object_context_blocked() (or upon
189 * an abort)
190 *
191 * Note: we are never expected to be waiting for long for a blocked object.
192 * Unfortunately we know from experience that a bug elsewhere might result
193 * in an indefinite wait in this state, for an object that is never released.
194 * If that happens, all we can do is to issue a warning message to help
195 * with the debugging.
196 */
197 RangeBlocked::RangeBlocked(my_context ctx) : my_base(ctx)
198 {
199 dout(10) << "-- state -->> Act/RangeBlocked" << dendl;
200 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
201
202 // arrange to have a warning message issued if we are stuck in this
203 // state for longer than some reasonable number of minutes.
204 m_timeout = scrbr->acquire_blocked_alarm();
205 }
206
207 // ----------------------- PendingTimer -----------------------------------
208
209 /**
210 * Sleeping till timer reactivation - or just requeuing
211 */
212 PendingTimer::PendingTimer(my_context ctx) : my_base(ctx)
213 {
214 dout(10) << "-- state -->> Act/PendingTimer" << dendl;
215 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
216
217 scrbr->add_delayed_scheduling();
218 }
219
220 // ----------------------- NewChunk -----------------------------------
221
222 /**
223 * Preconditions:
224 * - preemption data was set
225 * - epoch start was updated
226 */
227 NewChunk::NewChunk(my_context ctx) : my_base(ctx)
228 {
229 dout(10) << "-- state -->> Act/NewChunk" << dendl;
230 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
231
232 scrbr->get_preemptor().adjust_parameters();
233
234 // choose range to work on
235 // select_range_n_notify() will signal either SelectedChunkFree or
236 // ChunkIsBusy. If 'busy', we transition to Blocked, and wait for the
237 // range to become available.
238 scrbr->select_range_n_notify();
239 }
240
241 sc::result NewChunk::react(const SelectedChunkFree&)
242 {
243 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
244 dout(10) << "NewChunk::react(const SelectedChunkFree&)" << dendl;
245
246 scrbr->set_subset_last_update(scrbr->search_log_for_updates());
247 return transit<WaitPushes>();
248 }
249
250 // ----------------------- WaitPushes -----------------------------------
251
252 WaitPushes::WaitPushes(my_context ctx) : my_base(ctx)
253 {
254 dout(10) << " -- state -->> Act/WaitPushes" << dendl;
255 post_event(ActivePushesUpd{});
256 }
257
258 /*
259 * Triggered externally, by the entity that had an update re pushes
260 */
261 sc::result WaitPushes::react(const ActivePushesUpd&)
262 {
263 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
264 dout(10) << "WaitPushes::react(const ActivePushesUpd&) pending_active_pushes: "
265 << scrbr->pending_active_pushes() << dendl;
266
267 if (!scrbr->pending_active_pushes()) {
268 // done waiting
269 return transit<WaitLastUpdate>();
270 }
271
272 return discard_event();
273 }
274
275 // ----------------------- WaitLastUpdate -----------------------------------
276
277 WaitLastUpdate::WaitLastUpdate(my_context ctx) : my_base(ctx)
278 {
279 dout(10) << " -- state -->> Act/WaitLastUpdate" << dendl;
280 post_event(UpdatesApplied{});
281 }
282
283 /**
284 * Note:
285 * Updates are locally readable immediately. Thus, on the replicas we do need
286 * to wait for the update notifications before scrubbing. For the Primary it's
287 * a bit different: on EC (and only there) rmw operations have an additional
288 * read roundtrip. That means that on the Primary we need to wait for
289 * last_update_applied (the replica side, even on EC, is still safe
290 * since the actual transaction will already be readable by commit time.
291 */
292 void WaitLastUpdate::on_new_updates(const UpdatesApplied&)
293 {
294 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
295 dout(10) << "WaitLastUpdate::on_new_updates(const UpdatesApplied&)" << dendl;
296
297 if (scrbr->has_pg_marked_new_updates()) {
298 post_event(InternalAllUpdates{});
299 } else {
300 // will be requeued by op_applied
301 dout(10) << "wait for EC read/modify/writes to queue" << dendl;
302 }
303 }
304
305 /*
306 * request maps from the replicas in the acting set
307 */
308 sc::result WaitLastUpdate::react(const InternalAllUpdates&)
309 {
310 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
311 dout(10) << "WaitLastUpdate::react(const InternalAllUpdates&)" << dendl;
312
313 scrbr->get_replicas_maps(scrbr->get_preemptor().is_preemptable());
314 return transit<BuildMap>();
315 }
316
317 // ----------------------- BuildMap -----------------------------------
318
319 BuildMap::BuildMap(my_context ctx) : my_base(ctx)
320 {
321 dout(10) << " -- state -->> Act/BuildMap" << dendl;
322 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
323
324 // no need to check for an epoch change, as all possible flows that brought us here have
325 // a check_interval() verification of their final event.
326
327 if (scrbr->get_preemptor().was_preempted()) {
328
329 // we were preempted, either directly or by a replica
330 dout(10) << __func__ << " preempted!!!" << dendl;
331 scrbr->mark_local_map_ready();
332 post_event(IntBmPreempted{});
333
334 } else {
335
336 auto ret = scrbr->build_primary_map_chunk();
337
338 if (ret == -EINPROGRESS) {
339 // must wait for the backend to finish. No specific event provided.
340 // build_primary_map_chunk() has already requeued us.
341 dout(20) << "waiting for the backend..." << dendl;
342
343 } else if (ret < 0) {
344
345 dout(10) << "BuildMap::BuildMap() Error! Aborting. Ret: " << ret << dendl;
346 post_event(InternalError{});
347
348 } else {
349
350 // the local map was created
351 post_event(IntLocalMapDone{});
352 }
353 }
354 }
355
356 sc::result BuildMap::react(const IntLocalMapDone&)
357 {
358 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
359 dout(10) << "BuildMap::react(const IntLocalMapDone&)" << dendl;
360
361 scrbr->mark_local_map_ready();
362 return transit<WaitReplicas>();
363 }
364
365 // ----------------------- DrainReplMaps -----------------------------------
366
367 DrainReplMaps::DrainReplMaps(my_context ctx) : my_base(ctx)
368 {
369 dout(10) << "-- state -->> Act/DrainReplMaps" << dendl;
370 // we may have received all maps already. Send the event that will make us check.
371 post_event(GotReplicas{});
372 }
373
374 sc::result DrainReplMaps::react(const GotReplicas&)
375 {
376 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
377 dout(10) << "DrainReplMaps::react(const GotReplicas&)" << dendl;
378
379 if (scrbr->are_all_maps_available()) {
380 // NewChunk will handle the preemption that brought us to this state
381 return transit<PendingTimer>();
382 }
383
384 dout(15) << "DrainReplMaps::react(const GotReplicas&): still draining incoming maps: "
385 << scrbr->dump_awaited_maps() << dendl;
386 return discard_event();
387 }
388
389 // ----------------------- WaitReplicas -----------------------------------
390
391 WaitReplicas::WaitReplicas(my_context ctx) : my_base(ctx)
392 {
393 dout(10) << "-- state -->> Act/WaitReplicas" << dendl;
394 post_event(GotReplicas{});
395 }
396
397 /**
398 * note: now that maps_compare_n_cleanup() is "futurized"(*), and we remain in this state
399 * for a while even after we got all our maps, we must prevent are_all_maps_available()
400 * (actually - the code after the if()) from being called more than once.
401 * This is basically a separate state, but it's too transitory and artificial to justify
402 * the cost of a separate state.
403
404 * (*) "futurized" - in Crimson, the call to maps_compare_n_cleanup() returns immediately
405 * after initiating the process. The actual termination of the maps comparing etc' is
406 * signalled via an event. As we share the code with "classic" OSD, here too
407 * maps_compare_n_cleanup() is responsible for signalling the completion of the
408 * processing.
409 */
410 sc::result WaitReplicas::react(const GotReplicas&)
411 {
412 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
413 dout(10) << "WaitReplicas::react(const GotReplicas&)" << dendl;
414
415 if (!all_maps_already_called && scrbr->are_all_maps_available()) {
416 dout(10) << "WaitReplicas::react(const GotReplicas&) got all" << dendl;
417
418 all_maps_already_called = true;
419
420 // were we preempted?
421 if (scrbr->get_preemptor().disable_and_test()) { // a test&set
422
423
424 dout(10) << "WaitReplicas::react(const GotReplicas&) PREEMPTED!" << dendl;
425 return transit<PendingTimer>();
426
427 } else {
428
429 // maps_compare_n_cleanup() will arrange for MapsCompared event to be sent:
430 scrbr->maps_compare_n_cleanup();
431 return discard_event();
432 }
433 } else {
434 return discard_event();
435 }
436 }
437
438 sc::result WaitReplicas::react(const DigestUpdate&)
439 {
440 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
441 std::string warn_msg = "WaitReplicas::react(const DigestUpdate&): Unexpected DigestUpdate event";
442 dout(10) << warn_msg << dendl;
443 scrbr->log_cluster_warning(warn_msg);
444 return discard_event();
445 }
446
447 // ----------------------- WaitDigestUpdate -----------------------------------
448
449 WaitDigestUpdate::WaitDigestUpdate(my_context ctx) : my_base(ctx)
450 {
451 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
452 dout(10) << "-- state -->> Act/WaitDigestUpdate" << dendl;
453
454 // perform an initial check: maybe we already
455 // have all the updates we need:
456 // (note that DigestUpdate is usually an external event)
457 post_event(DigestUpdate{});
458 }
459
460 sc::result WaitDigestUpdate::react(const DigestUpdate&)
461 {
462 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
463 dout(10) << "WaitDigestUpdate::react(const DigestUpdate&)" << dendl;
464
465 // on_digest_updates() will either:
466 // - do nothing - if we are still waiting for updates, or
467 // - finish the scrubbing of the current chunk, and:
468 // - send NextChunk, or
469 // - send ScrubFinished
470 scrbr->on_digest_updates();
471 return discard_event();
472 }
473
474 sc::result WaitDigestUpdate::react(const ScrubFinished&)
475 {
476 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
477 dout(10) << "WaitDigestUpdate::react(const ScrubFinished&)" << dendl;
478 scrbr->set_scrub_duration();
479 scrbr->scrub_finish();
480 return transit<NotActive>();
481 }
482
483 ScrubMachine::ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub)
484 : m_pg_id{pg->pg_id}, m_scrbr{pg_scrub}
485 {
486 }
487
488 ScrubMachine::~ScrubMachine() = default;
489
490 // -------- for replicas -----------------------------------------------------
491
492 // ----------------------- ReplicaWaitUpdates --------------------------------
493
494 ReplicaWaitUpdates::ReplicaWaitUpdates(my_context ctx) : my_base(ctx)
495 {
496 dout(10) << "-- state -->> ReplicaWaitUpdates" << dendl;
497 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
498 scrbr->on_replica_init();
499 }
500
501 /*
502 * Triggered externally, by the entity that had an update re pushes
503 */
504 sc::result ReplicaWaitUpdates::react(const ReplicaPushesUpd&)
505 {
506 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
507 dout(10) << "ReplicaWaitUpdates::react(const ReplicaPushesUpd&): "
508 << scrbr->pending_active_pushes() << dendl;
509
510 if (scrbr->pending_active_pushes() == 0) {
511
512 // done waiting
513 return transit<ActiveReplica>();
514 }
515
516 return discard_event();
517 }
518
519 /**
520 * the event poster is handling the scrubber reset
521 */
522 sc::result ReplicaWaitUpdates::react(const FullReset&)
523 {
524 dout(10) << "ReplicaWaitUpdates::react(const FullReset&)" << dendl;
525 return transit<NotActive>();
526 }
527
528 // ----------------------- ActiveReplica -----------------------------------
529
530 ActiveReplica::ActiveReplica(my_context ctx) : my_base(ctx)
531 {
532 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
533 dout(10) << "-- state -->> ActiveReplica" << dendl;
534 scrbr->on_replica_init(); // as we might have skipped ReplicaWaitUpdates
535 post_event(SchedReplica{});
536 }
537
538 sc::result ActiveReplica::react(const SchedReplica&)
539 {
540 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
541 dout(10) << "ActiveReplica::react(const SchedReplica&). is_preemptable? "
542 << scrbr->get_preemptor().is_preemptable() << dendl;
543
544 if (scrbr->get_preemptor().was_preempted()) {
545 dout(10) << "replica scrub job preempted" << dendl;
546
547 scrbr->send_preempted_replica();
548 scrbr->replica_handling_done();
549 return transit<NotActive>();
550 }
551
552 // start or check progress of build_replica_map_chunk()
553 auto ret_init = scrbr->build_replica_map_chunk();
554 if (ret_init != -EINPROGRESS) {
555 return transit<NotActive>();
556 }
557
558 return discard_event();
559 }
560
561 /**
562 * the event poster is handling the scrubber reset
563 */
564 sc::result ActiveReplica::react(const FullReset&)
565 {
566 dout(10) << "ActiveReplica::react(const FullReset&)" << dendl;
567 return transit<NotActive>();
568 }
569
570 } // namespace Scrub