]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/PeeringState.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / osd / PeeringState.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #pragma once
5
6 #include <boost/statechart/custom_reaction.hpp>
7 #include <boost/statechart/event.hpp>
8 #include <boost/statechart/simple_state.hpp>
9 #include <boost/statechart/state.hpp>
10 #include <boost/statechart/state_machine.hpp>
11 #include <boost/statechart/transition.hpp>
12 #include <boost/statechart/event_base.hpp>
13 #include <string>
14 #include <atomic>
15
16 #include "include/ceph_assert.h"
17 #include "include/common_fwd.h"
18
19 #include "PGLog.h"
20 #include "PGStateUtils.h"
21 #include "PGPeeringEvent.h"
22 #include "osd_types.h"
23 #include "os/ObjectStore.h"
24 #include "OSDMap.h"
25 #include "MissingLoc.h"
26 #include "osd/osd_perf_counters.h"
27 #include "common/ostream_temp.h"
28
29 struct PGPool {
30 epoch_t cached_epoch;
31 int64_t id;
32 std::string name;
33
34 pg_pool_t info;
35 SnapContext snapc; // the default pool snapc, ready to go.
36
37 PGPool(OSDMapRef map, int64_t i, const pg_pool_t& info,
38 const std::string& name)
39 : cached_epoch(map->get_epoch()),
40 id(i),
41 name(name),
42 info(info) {
43 snapc = info.get_snap_context();
44 }
45
46 void update(OSDMapRef map);
47
48 ceph::timespan get_readable_interval(ConfigProxy &conf) const {
49 double v = 0;
50 if (info.opts.get(pool_opts_t::READ_LEASE_INTERVAL, &v)) {
51 return ceph::make_timespan(v);
52 } else {
53 auto hbi = conf->osd_heartbeat_grace;
54 auto fac = conf->osd_pool_default_read_lease_ratio;
55 return ceph::make_timespan(hbi * fac);
56 }
57 }
58 };
59
60 struct PeeringCtx;
61
62 // [primary only] content recovery state
63 struct BufferedRecoveryMessages {
64 #if defined(WITH_SEASTAR)
65 std::map<int, std::vector<MessageURef>> message_map;
66 #else
67 std::map<int, std::vector<MessageRef>> message_map;
68 #endif
69
70 BufferedRecoveryMessages() = default;
71 BufferedRecoveryMessages(PeeringCtx &ctx);
72
73 void accept_buffered_messages(BufferedRecoveryMessages &m) {
74 for (auto &[target, ls] : m.message_map) {
75 auto &ovec = message_map[target];
76 // put buffered messages in front
77 ls.reserve(ls.size() + ovec.size());
78 ls.insert(ls.end(), std::make_move_iterator(ovec.begin()), std::make_move_iterator(ovec.end()));
79 ovec.clear();
80 ovec.swap(ls);
81 }
82 }
83
84 template <class MsgT> // MsgT = MessageRef for ceph-osd and MessageURef for crimson-osd
85 void send_osd_message(int target, MsgT&& m) {
86 message_map[target].emplace_back(std::forward<MsgT>(m));
87 }
88 void send_notify(int to, const pg_notify_t &n);
89 void send_query(int to, spg_t spgid, const pg_query_t &q);
90 void send_info(int to, spg_t to_spgid,
91 epoch_t min_epoch, epoch_t cur_epoch,
92 const pg_info_t &info,
93 std::optional<pg_lease_t> lease = {},
94 std::optional<pg_lease_ack_t> lease_ack = {});
95 };
96
97 struct HeartbeatStamps : public RefCountedObject {
98 mutable ceph::mutex lock = ceph::make_mutex("HeartbeatStamps::lock");
99
100 const int osd;
101
102 // we maintain an upper and lower bound on the delta between our local
103 // mono_clock time (minus the startup_time) to the peer OSD's mono_clock
104 // time (minus its startup_time).
105 //
106 // delta is (remote_clock_time - local_clock_time), so that
107 // local_time + delta -> peer_time, and peer_time - delta -> local_time.
108 //
109 // we have an upper and lower bound value on this delta, meaning the
110 // value of the remote clock is somewhere between [my_time + lb, my_time + ub]
111 //
112 // conversely, if we have a remote timestamp T, then that is
113 // [T - ub, T - lb] in terms of the local clock. i.e., if you are
114 // substracting the delta, then take care that you swap the role of the
115 // lb and ub values.
116
117 /// lower bound on peer clock - local clock
118 std::optional<ceph::signedspan> peer_clock_delta_lb;
119
120 /// upper bound on peer clock - local clock
121 std::optional<ceph::signedspan> peer_clock_delta_ub;
122
123 /// highest up_from we've seen from this rank
124 epoch_t up_from = 0;
125
126 void print(std::ostream& out) const {
127 std::lock_guard l(lock);
128 out << "hbstamp(osd." << osd << " up_from " << up_from
129 << " peer_clock_delta [";
130 if (peer_clock_delta_lb) {
131 out << *peer_clock_delta_lb;
132 }
133 out << ",";
134 if (peer_clock_delta_ub) {
135 out << *peer_clock_delta_ub;
136 }
137 out << "])";
138 }
139
140 void sent_ping(std::optional<ceph::signedspan> *delta_ub) {
141 std::lock_guard l(lock);
142 // the non-primaries need a lower bound on remote clock - local clock. if
143 // we assume the transit for the last ping_reply was
144 // instantaneous, that would be (the negative of) our last
145 // peer_clock_delta_lb value.
146 if (peer_clock_delta_lb) {
147 *delta_ub = - *peer_clock_delta_lb;
148 }
149 }
150
151 void got_ping(epoch_t this_up_from,
152 ceph::signedspan now,
153 ceph::signedspan peer_send_stamp,
154 std::optional<ceph::signedspan> delta_ub,
155 ceph::signedspan *out_delta_ub) {
156 std::lock_guard l(lock);
157 if (this_up_from < up_from) {
158 return;
159 }
160 if (this_up_from > up_from) {
161 up_from = this_up_from;
162 }
163 peer_clock_delta_lb = peer_send_stamp - now;
164 peer_clock_delta_ub = delta_ub;
165 *out_delta_ub = - *peer_clock_delta_lb;
166 }
167
168 void got_ping_reply(ceph::signedspan now,
169 ceph::signedspan peer_send_stamp,
170 std::optional<ceph::signedspan> delta_ub) {
171 std::lock_guard l(lock);
172 peer_clock_delta_lb = peer_send_stamp - now;
173 peer_clock_delta_ub = delta_ub;
174 }
175
176 private:
177 FRIEND_MAKE_REF(HeartbeatStamps);
178 HeartbeatStamps(int o)
179 : RefCountedObject(NULL),
180 osd(o) {}
181 };
182 using HeartbeatStampsRef = ceph::ref_t<HeartbeatStamps>;
183
184 inline std::ostream& operator<<(std::ostream& out, const HeartbeatStamps& hb)
185 {
186 hb.print(out);
187 return out;
188 }
189
190
191 struct PeeringCtx : BufferedRecoveryMessages {
192 ObjectStore::Transaction transaction;
193 HBHandle* handle = nullptr;
194
195 PeeringCtx() = default;
196
197 PeeringCtx(const PeeringCtx &) = delete;
198 PeeringCtx &operator=(const PeeringCtx &) = delete;
199
200 PeeringCtx(PeeringCtx &&) = default;
201 PeeringCtx &operator=(PeeringCtx &&) = default;
202
203 void reset_transaction() {
204 transaction = ObjectStore::Transaction();
205 }
206 };
207
208 /**
209 * Wraps PeeringCtx to hide the difference between buffering messages to
210 * be sent after flush or immediately.
211 */
212 struct PeeringCtxWrapper {
213 utime_t start_time;
214 BufferedRecoveryMessages &msgs;
215 ObjectStore::Transaction &transaction;
216 HBHandle * const handle = nullptr;
217
218 PeeringCtxWrapper(PeeringCtx &wrapped) :
219 msgs(wrapped),
220 transaction(wrapped.transaction),
221 handle(wrapped.handle) {}
222
223 PeeringCtxWrapper(BufferedRecoveryMessages &buf, PeeringCtx &wrapped)
224 : msgs(buf),
225 transaction(wrapped.transaction),
226 handle(wrapped.handle) {}
227
228 PeeringCtxWrapper(PeeringCtxWrapper &&ctx) = default;
229
230 template <class MsgT> // MsgT = MessageRef for ceph-osd and MessageURef for crimson-osd
231 void send_osd_message(int target, MsgT&& m) {
232 msgs.send_osd_message(target, std::forward<MsgT>(m));
233 }
234 void send_notify(int to, const pg_notify_t &n) {
235 msgs.send_notify(to, n);
236 }
237 void send_query(int to, spg_t spgid, const pg_query_t &q) {
238 msgs.send_query(to, spgid, q);
239 }
240 void send_info(int to, spg_t to_spgid,
241 epoch_t min_epoch, epoch_t cur_epoch,
242 const pg_info_t &info,
243 std::optional<pg_lease_t> lease = {},
244 std::optional<pg_lease_ack_t> lease_ack = {}) {
245 msgs.send_info(to, to_spgid, min_epoch, cur_epoch, info,
246 lease, lease_ack);
247 }
248 };
249
250 /* Encapsulates PG recovery process */
251 class PeeringState : public MissingLoc::MappingInfo {
252 public:
253 struct PeeringListener : public EpochSource {
254 /// Prepare t with written information
255 virtual void prepare_write(
256 pg_info_t &info,
257 pg_info_t &last_written_info,
258 PastIntervals &past_intervals,
259 PGLog &pglog,
260 bool dirty_info,
261 bool dirty_big_info,
262 bool need_write_epoch,
263 ObjectStore::Transaction &t) = 0;
264
265 /// Notify that info/history changed (generally to update scrub registration)
266 virtual void on_info_history_change() = 0;
267
268 /// Notify PG that Primary/Replica status has changed (to update scrub registration)
269 virtual void on_primary_status_change(bool was_primary, bool now_primary) = 0;
270
271 /// Need to reschedule next scrub. Assuming no change in role
272 virtual void reschedule_scrub() = 0;
273
274 /// Notify that a scrub has been requested
275 virtual void scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type) = 0;
276
277 /// Return current snap_trimq size
278 virtual uint64_t get_snap_trimq_size() const = 0;
279
280 /// Send cluster message to osd
281 #if defined(WITH_SEASTAR)
282 virtual void send_cluster_message(
283 int osd, MessageURef m, epoch_t epoch, bool share_map_update=false) = 0;
284 #else
285 virtual void send_cluster_message(
286 int osd, MessageRef m, epoch_t epoch, bool share_map_update=false) = 0;
287 #endif
288 /// Send pg_created to mon
289 virtual void send_pg_created(pg_t pgid) = 0;
290
291 virtual ceph::signedspan get_mnow() = 0;
292 virtual HeartbeatStampsRef get_hb_stamps(int peer) = 0;
293 virtual void schedule_renew_lease(epoch_t plr, ceph::timespan delay) = 0;
294 virtual void queue_check_readable(epoch_t lpr, ceph::timespan delay) = 0;
295 virtual void recheck_readable() = 0;
296
297 virtual unsigned get_target_pg_log_entries() const = 0;
298
299 // ============ Flush state ==================
300 /**
301 * try_flush_or_schedule_async()
302 *
303 * If true, caller may assume all past operations on this pg
304 * have been flushed. Else, caller will receive an on_flushed()
305 * call once the flush has completed.
306 */
307 virtual bool try_flush_or_schedule_async() = 0;
308 /// Arranges for a commit on t to call on_flushed() once flushed.
309 virtual void start_flush_on_transaction(
310 ObjectStore::Transaction &t) = 0;
311 /// Notification that all outstanding flushes for interval have completed
312 virtual void on_flushed() = 0;
313
314 //============= Recovery ====================
315 /// Arrange for even to be queued after delay
316 virtual void schedule_event_after(
317 PGPeeringEventRef event,
318 float delay) = 0;
319 /**
320 * request_local_background_io_reservation
321 *
322 * Request reservation at priority with on_grant queued on grant
323 * and on_preempt on preempt
324 */
325 virtual void request_local_background_io_reservation(
326 unsigned priority,
327 PGPeeringEventURef on_grant,
328 PGPeeringEventURef on_preempt) = 0;
329 /// Modify pending local background reservation request priority
330 virtual void update_local_background_io_priority(
331 unsigned priority) = 0;
332 /// Cancel pending local background reservation request
333 virtual void cancel_local_background_io_reservation() = 0;
334
335 /**
336 * request_remote_background_io_reservation
337 *
338 * Request reservation at priority with on_grant queued on grant
339 * and on_preempt on preempt
340 */
341 virtual void request_remote_recovery_reservation(
342 unsigned priority,
343 PGPeeringEventURef on_grant,
344 PGPeeringEventURef on_preempt) = 0;
345 /// Cancel pending remote background reservation request
346 virtual void cancel_remote_recovery_reservation() = 0;
347
348 /// Arrange for on_commit to be queued upon commit of t
349 virtual void schedule_event_on_commit(
350 ObjectStore::Transaction &t,
351 PGPeeringEventRef on_commit) = 0;
352
353 //============================ HB =============================
354 /// Update hb set to peers
355 virtual void update_heartbeat_peers(std::set<int> peers) = 0;
356
357 /// Std::set targets being probed in this interval
358 virtual void set_probe_targets(const std::set<pg_shard_t> &probe_set) = 0;
359 /// Clear targets being probed in this interval
360 virtual void clear_probe_targets() = 0;
361
362 /// Queue for a pg_temp of wanted
363 virtual void queue_want_pg_temp(const std::vector<int> &wanted) = 0;
364 /// Clear queue for a pg_temp of wanted
365 virtual void clear_want_pg_temp() = 0;
366
367 /// Arrange for stats to be shipped to mon to be updated for this pg
368 virtual void publish_stats_to_osd() = 0;
369 /// Clear stats to be shipped to mon for this pg
370 virtual void clear_publish_stats() = 0;
371
372 /// Notification to check outstanding operation targets
373 virtual void check_recovery_sources(const OSDMapRef& newmap) = 0;
374 /// Notification to check outstanding blocklist
375 virtual void check_blocklisted_watchers() = 0;
376 /// Notification to clear state associated with primary
377 virtual void clear_primary_state() = 0;
378
379 // =================== Event notification ====================
380 virtual void on_pool_change() = 0;
381 virtual void on_role_change() = 0;
382 virtual void on_change(ObjectStore::Transaction &t) = 0;
383 virtual void on_activate(interval_set<snapid_t> to_trim) = 0;
384 virtual void on_activate_complete() = 0;
385 virtual void on_new_interval() = 0;
386 virtual Context *on_clean() = 0;
387 virtual void on_activate_committed() = 0;
388 virtual void on_active_exit() = 0;
389
390 // ====================== PG deletion =======================
391 /// Notification of removal complete, t must be populated to complete removal
392 virtual void on_removal(ObjectStore::Transaction &t) = 0;
393 /// Perform incremental removal work
394 virtual std::pair<ghobject_t, bool> do_delete_work(
395 ObjectStore::Transaction &t, ghobject_t _next) = 0;
396
397 // ======================= PG Merge =========================
398 virtual void clear_ready_to_merge() = 0;
399 virtual void set_not_ready_to_merge_target(pg_t pgid, pg_t src) = 0;
400 virtual void set_not_ready_to_merge_source(pg_t pgid) = 0;
401 virtual void set_ready_to_merge_target(eversion_t lu, epoch_t les, epoch_t lec) = 0;
402 virtual void set_ready_to_merge_source(eversion_t lu) = 0;
403
404 // ==================== Std::map notifications ===================
405 virtual void on_active_actmap() = 0;
406 virtual void on_active_advmap(const OSDMapRef &osdmap) = 0;
407 virtual epoch_t oldest_stored_osdmap() = 0;
408
409 // ============ recovery reservation notifications ==========
410 virtual void on_backfill_reserved() = 0;
411 virtual void on_backfill_canceled() = 0;
412 virtual void on_recovery_reserved() = 0;
413
414 // ================recovery space accounting ================
415 virtual bool try_reserve_recovery_space(
416 int64_t primary_num_bytes, int64_t local_num_bytes) = 0;
417 virtual void unreserve_recovery_space() = 0;
418
419 // ================== Peering log events ====================
420 /// Get handler for rolling forward/back log entries
421 virtual PGLog::LogEntryHandlerRef get_log_handler(
422 ObjectStore::Transaction &t) = 0;
423
424 // ============ On disk representation changes ==============
425 virtual void rebuild_missing_set_with_deletes(PGLog &pglog) = 0;
426
427 // ======================= Logging ==========================
428 virtual PerfCounters &get_peering_perf() = 0;
429 virtual PerfCounters &get_perf_logger() = 0;
430 virtual void log_state_enter(const char *state) = 0;
431 virtual void log_state_exit(
432 const char *state_name, utime_t enter_time,
433 uint64_t events, utime_t event_dur) = 0;
434 virtual void dump_recovery_info(ceph::Formatter *f) const = 0;
435
436 virtual OstreamTemp get_clog_info() = 0;
437 virtual OstreamTemp get_clog_error() = 0;
438 virtual OstreamTemp get_clog_debug() = 0;
439
440 virtual ~PeeringListener() {}
441 };
442
443 struct QueryState : boost::statechart::event< QueryState > {
444 ceph::Formatter *f;
445 explicit QueryState(ceph::Formatter *f) : f(f) {}
446 void print(std::ostream *out) const {
447 *out << "Query";
448 }
449 };
450
451 struct QueryUnfound : boost::statechart::event< QueryUnfound > {
452 ceph::Formatter *f;
453 explicit QueryUnfound(ceph::Formatter *f) : f(f) {}
454 void print(std::ostream *out) const {
455 *out << "QueryUnfound";
456 }
457 };
458
459 struct AdvMap : boost::statechart::event< AdvMap > {
460 OSDMapRef osdmap;
461 OSDMapRef lastmap;
462 std::vector<int> newup, newacting;
463 int up_primary, acting_primary;
464 AdvMap(
465 OSDMapRef osdmap, OSDMapRef lastmap,
466 std::vector<int>& newup, int up_primary,
467 std::vector<int>& newacting, int acting_primary):
468 osdmap(osdmap), lastmap(lastmap),
469 newup(newup),
470 newacting(newacting),
471 up_primary(up_primary),
472 acting_primary(acting_primary) {}
473 void print(std::ostream *out) const {
474 *out << "AdvMap";
475 }
476 };
477
478 struct ActMap : boost::statechart::event< ActMap > {
479 ActMap() : boost::statechart::event< ActMap >() {}
480 void print(std::ostream *out) const {
481 *out << "ActMap";
482 }
483 };
484 struct Activate : boost::statechart::event< Activate > {
485 epoch_t activation_epoch;
486 explicit Activate(epoch_t q) : boost::statechart::event< Activate >(),
487 activation_epoch(q) {}
488 void print(std::ostream *out) const {
489 *out << "Activate from " << activation_epoch;
490 }
491 };
492 struct ActivateCommitted : boost::statechart::event< ActivateCommitted > {
493 epoch_t epoch;
494 epoch_t activation_epoch;
495 explicit ActivateCommitted(epoch_t e, epoch_t ae)
496 : boost::statechart::event< ActivateCommitted >(),
497 epoch(e),
498 activation_epoch(ae) {}
499 void print(std::ostream *out) const {
500 *out << "ActivateCommitted from " << activation_epoch
501 << " processed at " << epoch;
502 }
503 };
504 public:
505 struct UnfoundBackfill : boost::statechart::event<UnfoundBackfill> {
506 explicit UnfoundBackfill() {}
507 void print(std::ostream *out) const {
508 *out << "UnfoundBackfill";
509 }
510 };
511 struct UnfoundRecovery : boost::statechart::event<UnfoundRecovery> {
512 explicit UnfoundRecovery() {}
513 void print(std::ostream *out) const {
514 *out << "UnfoundRecovery";
515 }
516 };
517
518 struct RequestScrub : boost::statechart::event<RequestScrub> {
519 scrub_level_t deep;
520 scrub_type_t repair;
521 explicit RequestScrub(bool d, bool r) : deep(scrub_level_t(d)), repair(scrub_type_t(r)) {}
522 void print(std::ostream *out) const {
523 *out << "RequestScrub(" << ((deep==scrub_level_t::deep) ? "deep" : "shallow")
524 << ((repair==scrub_type_t::do_repair) ? " repair)" : ")");
525 }
526 };
527
528 TrivialEvent(Initialize)
529 TrivialEvent(GotInfo)
530 TrivialEvent(NeedUpThru)
531 TrivialEvent(Backfilled)
532 TrivialEvent(LocalBackfillReserved)
533 TrivialEvent(RejectTooFullRemoteReservation)
534 TrivialEvent(RequestBackfill)
535 TrivialEvent(RemoteRecoveryPreempted)
536 TrivialEvent(RemoteBackfillPreempted)
537 TrivialEvent(BackfillTooFull)
538 TrivialEvent(RecoveryTooFull)
539
540 TrivialEvent(MakePrimary)
541 TrivialEvent(MakeStray)
542 TrivialEvent(NeedActingChange)
543 TrivialEvent(IsIncomplete)
544 TrivialEvent(IsDown)
545
546 TrivialEvent(AllReplicasRecovered)
547 TrivialEvent(DoRecovery)
548 TrivialEvent(LocalRecoveryReserved)
549 TrivialEvent(AllRemotesReserved)
550 TrivialEvent(AllBackfillsReserved)
551 TrivialEvent(GoClean)
552
553 TrivialEvent(AllReplicasActivated)
554
555 TrivialEvent(IntervalFlush)
556
557 TrivialEvent(DeleteStart)
558 TrivialEvent(DeleteSome)
559
560 TrivialEvent(SetForceRecovery)
561 TrivialEvent(UnsetForceRecovery)
562 TrivialEvent(SetForceBackfill)
563 TrivialEvent(UnsetForceBackfill)
564
565 TrivialEvent(DeleteReserved)
566 TrivialEvent(DeleteInterrupted)
567
568 TrivialEvent(CheckReadable)
569
570 void start_handle(PeeringCtx *new_ctx);
571 void end_handle();
572 void begin_block_outgoing();
573 void end_block_outgoing();
574 void clear_blocked_outgoing();
575 private:
576
577 /* States */
578 struct Initial;
579 class PeeringMachine : public boost::statechart::state_machine< PeeringMachine, Initial > {
580 public:
581 PeeringState *state;
582 PGStateHistory *state_history;
583 CephContext *cct;
584 spg_t spgid;
585 DoutPrefixProvider *dpp;
586 PeeringListener *pl;
587
588 utime_t event_time;
589 uint64_t event_count;
590
591 void clear_event_counters() {
592 event_time = utime_t();
593 event_count = 0;
594 }
595
596 void log_enter(const char *state_name);
597 void log_exit(const char *state_name, utime_t duration);
598
599 PeeringMachine(
600 PeeringState *state, CephContext *cct,
601 spg_t spgid,
602 DoutPrefixProvider *dpp,
603 PeeringListener *pl,
604 PGStateHistory *state_history) :
605 state(state),
606 state_history(state_history),
607 cct(cct), spgid(spgid),
608 dpp(dpp), pl(pl),
609 event_count(0) {}
610
611 /* Accessor functions for state methods */
612 ObjectStore::Transaction& get_cur_transaction() {
613 ceph_assert(state->rctx);
614 return state->rctx->transaction;
615 }
616
617 PeeringCtxWrapper &get_recovery_ctx() {
618 assert(state->rctx);
619 return *(state->rctx);
620 }
621
622 void send_notify(int to, const pg_notify_t &n) {
623 ceph_assert(state->rctx);
624 state->rctx->send_notify(to, n);
625 }
626 void send_query(int to, const pg_query_t &query) {
627 state->rctx->send_query(
628 to,
629 spg_t(spgid.pgid, query.to),
630 query);
631 }
632 };
633 friend class PeeringMachine;
634
635 /* States */
636 // Initial
637 // Reset
638 // Start
639 // Started
640 // Primary
641 // WaitActingChange
642 // Peering
643 // GetInfo
644 // GetLog
645 // GetMissing
646 // WaitUpThru
647 // Incomplete
648 // Active
649 // Activating
650 // Clean
651 // Recovered
652 // Backfilling
653 // WaitRemoteBackfillReserved
654 // WaitLocalBackfillReserved
655 // NotBackfilling
656 // NotRecovering
657 // Recovering
658 // WaitRemoteRecoveryReserved
659 // WaitLocalRecoveryReserved
660 // ReplicaActive
661 // RepNotRecovering
662 // RepRecovering
663 // RepWaitBackfillReserved
664 // RepWaitRecoveryReserved
665 // Stray
666 // ToDelete
667 // WaitDeleteReserved
668 // Deleting
669 // Crashed
670
671 struct Crashed : boost::statechart::state< Crashed, PeeringMachine >, NamedState {
672 explicit Crashed(my_context ctx);
673 };
674
675 struct Reset;
676
677 struct Initial : boost::statechart::state< Initial, PeeringMachine >, NamedState {
678 explicit Initial(my_context ctx);
679 void exit();
680
681 typedef boost::mpl::list <
682 boost::statechart::transition< Initialize, Reset >,
683 boost::statechart::custom_reaction< NullEvt >,
684 boost::statechart::transition< boost::statechart::event_base, Crashed >
685 > reactions;
686
687 boost::statechart::result react(const MNotifyRec&);
688 boost::statechart::result react(const MInfoRec&);
689 boost::statechart::result react(const MLogRec&);
690 boost::statechart::result react(const boost::statechart::event_base&) {
691 return discard_event();
692 }
693 };
694
695 struct Reset : boost::statechart::state< Reset, PeeringMachine >, NamedState {
696 explicit Reset(my_context ctx);
697 void exit();
698
699 typedef boost::mpl::list <
700 boost::statechart::custom_reaction< QueryState >,
701 boost::statechart::custom_reaction< QueryUnfound >,
702 boost::statechart::custom_reaction< AdvMap >,
703 boost::statechart::custom_reaction< ActMap >,
704 boost::statechart::custom_reaction< NullEvt >,
705 boost::statechart::custom_reaction< IntervalFlush >,
706 boost::statechart::transition< boost::statechart::event_base, Crashed >
707 > reactions;
708 boost::statechart::result react(const QueryState& q);
709 boost::statechart::result react(const QueryUnfound& q);
710 boost::statechart::result react(const AdvMap&);
711 boost::statechart::result react(const ActMap&);
712 boost::statechart::result react(const IntervalFlush&);
713 boost::statechart::result react(const boost::statechart::event_base&) {
714 return discard_event();
715 }
716 };
717
718 struct Start;
719
720 struct Started : boost::statechart::state< Started, PeeringMachine, Start >, NamedState {
721 explicit Started(my_context ctx);
722 void exit();
723
724 typedef boost::mpl::list <
725 boost::statechart::custom_reaction< QueryState >,
726 boost::statechart::custom_reaction< QueryUnfound >,
727 boost::statechart::custom_reaction< AdvMap >,
728 boost::statechart::custom_reaction< IntervalFlush >,
729 // ignored
730 boost::statechart::custom_reaction< NullEvt >,
731 boost::statechart::custom_reaction<SetForceRecovery>,
732 boost::statechart::custom_reaction<UnsetForceRecovery>,
733 boost::statechart::custom_reaction<SetForceBackfill>,
734 boost::statechart::custom_reaction<UnsetForceBackfill>,
735 boost::statechart::custom_reaction<RequestScrub>,
736 boost::statechart::custom_reaction<CheckReadable>,
737 // crash
738 boost::statechart::transition< boost::statechart::event_base, Crashed >
739 > reactions;
740 boost::statechart::result react(const QueryState& q);
741 boost::statechart::result react(const QueryUnfound& q);
742 boost::statechart::result react(const AdvMap&);
743 boost::statechart::result react(const IntervalFlush&);
744 boost::statechart::result react(const boost::statechart::event_base&) {
745 return discard_event();
746 }
747 };
748
749 struct Primary;
750 struct Stray;
751
752 struct Start : boost::statechart::state< Start, Started >, NamedState {
753 explicit Start(my_context ctx);
754 void exit();
755
756 typedef boost::mpl::list <
757 boost::statechart::transition< MakePrimary, Primary >,
758 boost::statechart::transition< MakeStray, Stray >
759 > reactions;
760 };
761
762 struct Peering;
763 struct WaitActingChange;
764 struct Incomplete;
765 struct Down;
766
767 struct Primary : boost::statechart::state< Primary, Started, Peering >, NamedState {
768 explicit Primary(my_context ctx);
769 void exit();
770
771 typedef boost::mpl::list <
772 boost::statechart::custom_reaction< ActMap >,
773 boost::statechart::custom_reaction< MNotifyRec >,
774 boost::statechart::custom_reaction<SetForceRecovery>,
775 boost::statechart::custom_reaction<UnsetForceRecovery>,
776 boost::statechart::custom_reaction<SetForceBackfill>,
777 boost::statechart::custom_reaction<UnsetForceBackfill>,
778 boost::statechart::custom_reaction<RequestScrub>
779 > reactions;
780 boost::statechart::result react(const ActMap&);
781 boost::statechart::result react(const MNotifyRec&);
782 boost::statechart::result react(const SetForceRecovery&);
783 boost::statechart::result react(const UnsetForceRecovery&);
784 boost::statechart::result react(const SetForceBackfill&);
785 boost::statechart::result react(const UnsetForceBackfill&);
786 boost::statechart::result react(const RequestScrub&);
787 };
788
789 struct WaitActingChange : boost::statechart::state< WaitActingChange, Primary>,
790 NamedState {
791 typedef boost::mpl::list <
792 boost::statechart::custom_reaction< QueryState >,
793 boost::statechart::custom_reaction< QueryUnfound >,
794 boost::statechart::custom_reaction< AdvMap >,
795 boost::statechart::custom_reaction< MLogRec >,
796 boost::statechart::custom_reaction< MInfoRec >,
797 boost::statechart::custom_reaction< MNotifyRec >
798 > reactions;
799 explicit WaitActingChange(my_context ctx);
800 boost::statechart::result react(const QueryState& q);
801 boost::statechart::result react(const QueryUnfound& q);
802 boost::statechart::result react(const AdvMap&);
803 boost::statechart::result react(const MLogRec&);
804 boost::statechart::result react(const MInfoRec&);
805 boost::statechart::result react(const MNotifyRec&);
806 void exit();
807 };
808
809 struct GetInfo;
810 struct Active;
811
812 struct Peering : boost::statechart::state< Peering, Primary, GetInfo >, NamedState {
813 PastIntervals::PriorSet prior_set;
814 bool history_les_bound; //< need osd_find_best_info_ignore_history_les
815
816 explicit Peering(my_context ctx);
817 void exit();
818
819 typedef boost::mpl::list <
820 boost::statechart::custom_reaction< QueryState >,
821 boost::statechart::custom_reaction< QueryUnfound >,
822 boost::statechart::transition< Activate, Active >,
823 boost::statechart::custom_reaction< AdvMap >
824 > reactions;
825 boost::statechart::result react(const QueryState& q);
826 boost::statechart::result react(const QueryUnfound& q);
827 boost::statechart::result react(const AdvMap &advmap);
828 };
829
830 struct WaitLocalRecoveryReserved;
831 struct Activating;
832 struct Active : boost::statechart::state< Active, Primary, Activating >, NamedState {
833 explicit Active(my_context ctx);
834 void exit();
835
836 const std::set<pg_shard_t> remote_shards_to_reserve_recovery;
837 const std::set<pg_shard_t> remote_shards_to_reserve_backfill;
838 bool all_replicas_activated;
839
840 typedef boost::mpl::list <
841 boost::statechart::custom_reaction< QueryState >,
842 boost::statechart::custom_reaction< QueryUnfound >,
843 boost::statechart::custom_reaction< ActMap >,
844 boost::statechart::custom_reaction< AdvMap >,
845 boost::statechart::custom_reaction< MInfoRec >,
846 boost::statechart::custom_reaction< MNotifyRec >,
847 boost::statechart::custom_reaction< MLogRec >,
848 boost::statechart::custom_reaction< MTrim >,
849 boost::statechart::custom_reaction< Backfilled >,
850 boost::statechart::custom_reaction< ActivateCommitted >,
851 boost::statechart::custom_reaction< AllReplicasActivated >,
852 boost::statechart::custom_reaction< DeferRecovery >,
853 boost::statechart::custom_reaction< DeferBackfill >,
854 boost::statechart::custom_reaction< UnfoundRecovery >,
855 boost::statechart::custom_reaction< UnfoundBackfill >,
856 boost::statechart::custom_reaction< RemoteReservationRevokedTooFull>,
857 boost::statechart::custom_reaction< RemoteReservationRevoked>,
858 boost::statechart::custom_reaction< DoRecovery>,
859 boost::statechart::custom_reaction< RenewLease>,
860 boost::statechart::custom_reaction< MLeaseAck>,
861 boost::statechart::custom_reaction< CheckReadable>
862 > reactions;
863 boost::statechart::result react(const QueryState& q);
864 boost::statechart::result react(const QueryUnfound& q);
865 boost::statechart::result react(const ActMap&);
866 boost::statechart::result react(const AdvMap&);
867 boost::statechart::result react(const MInfoRec& infoevt);
868 boost::statechart::result react(const MNotifyRec& notevt);
869 boost::statechart::result react(const MLogRec& logevt);
870 boost::statechart::result react(const MTrim& trimevt);
871 boost::statechart::result react(const Backfilled&) {
872 return discard_event();
873 }
874 boost::statechart::result react(const ActivateCommitted&);
875 boost::statechart::result react(const AllReplicasActivated&);
876 boost::statechart::result react(const RenewLease&);
877 boost::statechart::result react(const MLeaseAck&);
878 boost::statechart::result react(const DeferRecovery& evt) {
879 return discard_event();
880 }
881 boost::statechart::result react(const DeferBackfill& evt) {
882 return discard_event();
883 }
884 boost::statechart::result react(const UnfoundRecovery& evt) {
885 return discard_event();
886 }
887 boost::statechart::result react(const UnfoundBackfill& evt) {
888 return discard_event();
889 }
890 boost::statechart::result react(const RemoteReservationRevokedTooFull&) {
891 return discard_event();
892 }
893 boost::statechart::result react(const RemoteReservationRevoked&) {
894 return discard_event();
895 }
896 boost::statechart::result react(const DoRecovery&) {
897 return discard_event();
898 }
899 boost::statechart::result react(const CheckReadable&);
900 void all_activated_and_committed();
901 };
902
903 struct Clean : boost::statechart::state< Clean, Active >, NamedState {
904 typedef boost::mpl::list<
905 boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
906 boost::statechart::custom_reaction<SetForceRecovery>,
907 boost::statechart::custom_reaction<SetForceBackfill>
908 > reactions;
909 explicit Clean(my_context ctx);
910 void exit();
911 boost::statechart::result react(const boost::statechart::event_base&) {
912 return discard_event();
913 }
914 };
915
916 struct Recovered : boost::statechart::state< Recovered, Active >, NamedState {
917 typedef boost::mpl::list<
918 boost::statechart::transition< GoClean, Clean >,
919 boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
920 boost::statechart::custom_reaction< AllReplicasActivated >
921 > reactions;
922 explicit Recovered(my_context ctx);
923 void exit();
924 boost::statechart::result react(const AllReplicasActivated&) {
925 post_event(GoClean());
926 return forward_event();
927 }
928 };
929
930 struct Backfilling : boost::statechart::state< Backfilling, Active >, NamedState {
931 typedef boost::mpl::list<
932 boost::statechart::custom_reaction< Backfilled >,
933 boost::statechart::custom_reaction< DeferBackfill >,
934 boost::statechart::custom_reaction< UnfoundBackfill >,
935 boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >,
936 boost::statechart::custom_reaction< RemoteReservationRevokedTooFull>,
937 boost::statechart::custom_reaction< RemoteReservationRevoked>
938 > reactions;
939 explicit Backfilling(my_context ctx);
940 boost::statechart::result react(const RemoteReservationRejectedTooFull& evt) {
941 // for compat with old peers
942 post_event(RemoteReservationRevokedTooFull());
943 return discard_event();
944 }
945 void backfill_release_reservations();
946 boost::statechart::result react(const Backfilled& evt);
947 boost::statechart::result react(const RemoteReservationRevokedTooFull& evt);
948 boost::statechart::result react(const RemoteReservationRevoked& evt);
949 boost::statechart::result react(const DeferBackfill& evt);
950 boost::statechart::result react(const UnfoundBackfill& evt);
951 void cancel_backfill();
952 void exit();
953 };
954
955 struct WaitRemoteBackfillReserved : boost::statechart::state< WaitRemoteBackfillReserved, Active >, NamedState {
956 typedef boost::mpl::list<
957 boost::statechart::custom_reaction< RemoteBackfillReserved >,
958 boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >,
959 boost::statechart::custom_reaction< RemoteReservationRevoked >,
960 boost::statechart::transition< AllBackfillsReserved, Backfilling >
961 > reactions;
962 std::set<pg_shard_t>::const_iterator backfill_osd_it;
963 explicit WaitRemoteBackfillReserved(my_context ctx);
964 void retry();
965 void exit();
966 boost::statechart::result react(const RemoteBackfillReserved& evt);
967 boost::statechart::result react(const RemoteReservationRejectedTooFull& evt);
968 boost::statechart::result react(const RemoteReservationRevoked& evt);
969 };
970
971 struct WaitLocalBackfillReserved : boost::statechart::state< WaitLocalBackfillReserved, Active >, NamedState {
972 typedef boost::mpl::list<
973 boost::statechart::transition< LocalBackfillReserved, WaitRemoteBackfillReserved >,
974 boost::statechart::custom_reaction< RemoteBackfillReserved >
975 > reactions;
976 explicit WaitLocalBackfillReserved(my_context ctx);
977 boost::statechart::result react(const RemoteBackfillReserved& evt) {
978 /* no-op */
979 return discard_event();
980 }
981 void exit();
982 };
983
984 struct NotBackfilling : boost::statechart::state< NotBackfilling, Active>, NamedState {
985 typedef boost::mpl::list<
986 boost::statechart::custom_reaction< QueryUnfound >,
987 boost::statechart::transition< RequestBackfill, WaitLocalBackfillReserved>,
988 boost::statechart::custom_reaction< RemoteBackfillReserved >,
989 boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >
990 > reactions;
991 explicit NotBackfilling(my_context ctx);
992 void exit();
993 boost::statechart::result react(const QueryUnfound& q);
994 boost::statechart::result react(const RemoteBackfillReserved& evt);
995 boost::statechart::result react(const RemoteReservationRejectedTooFull& evt);
996 };
997
998 struct NotRecovering : boost::statechart::state< NotRecovering, Active>, NamedState {
999 typedef boost::mpl::list<
1000 boost::statechart::custom_reaction< QueryUnfound >,
1001 boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
1002 boost::statechart::custom_reaction< DeferRecovery >,
1003 boost::statechart::custom_reaction< UnfoundRecovery >
1004 > reactions;
1005 explicit NotRecovering(my_context ctx);
1006 boost::statechart::result react(const QueryUnfound& q);
1007 boost::statechart::result react(const DeferRecovery& evt) {
1008 /* no-op */
1009 return discard_event();
1010 }
1011 boost::statechart::result react(const UnfoundRecovery& evt) {
1012 /* no-op */
1013 return discard_event();
1014 }
1015 void exit();
1016 };
1017
1018 struct ToDelete;
1019 struct RepNotRecovering;
1020 struct ReplicaActive : boost::statechart::state< ReplicaActive, Started, RepNotRecovering >, NamedState {
1021 explicit ReplicaActive(my_context ctx);
1022 void exit();
1023
1024 typedef boost::mpl::list <
1025 boost::statechart::custom_reaction< QueryState >,
1026 boost::statechart::custom_reaction< QueryUnfound >,
1027 boost::statechart::custom_reaction< ActMap >,
1028 boost::statechart::custom_reaction< MQuery >,
1029 boost::statechart::custom_reaction< MInfoRec >,
1030 boost::statechart::custom_reaction< MLogRec >,
1031 boost::statechart::custom_reaction< MTrim >,
1032 boost::statechart::custom_reaction< Activate >,
1033 boost::statechart::custom_reaction< ActivateCommitted >,
1034 boost::statechart::custom_reaction< DeferRecovery >,
1035 boost::statechart::custom_reaction< DeferBackfill >,
1036 boost::statechart::custom_reaction< UnfoundRecovery >,
1037 boost::statechart::custom_reaction< UnfoundBackfill >,
1038 boost::statechart::custom_reaction< RemoteBackfillPreempted >,
1039 boost::statechart::custom_reaction< RemoteRecoveryPreempted >,
1040 boost::statechart::custom_reaction< RecoveryDone >,
1041 boost::statechart::transition<DeleteStart, ToDelete>,
1042 boost::statechart::custom_reaction< MLease >
1043 > reactions;
1044 boost::statechart::result react(const QueryState& q);
1045 boost::statechart::result react(const QueryUnfound& q);
1046 boost::statechart::result react(const MInfoRec& infoevt);
1047 boost::statechart::result react(const MLogRec& logevt);
1048 boost::statechart::result react(const MTrim& trimevt);
1049 boost::statechart::result react(const ActMap&);
1050 boost::statechart::result react(const MQuery&);
1051 boost::statechart::result react(const Activate&);
1052 boost::statechart::result react(const ActivateCommitted&);
1053 boost::statechart::result react(const MLease&);
1054 boost::statechart::result react(const RecoveryDone&) {
1055 return discard_event();
1056 }
1057 boost::statechart::result react(const DeferRecovery& evt) {
1058 return discard_event();
1059 }
1060 boost::statechart::result react(const DeferBackfill& evt) {
1061 return discard_event();
1062 }
1063 boost::statechart::result react(const UnfoundRecovery& evt) {
1064 return discard_event();
1065 }
1066 boost::statechart::result react(const UnfoundBackfill& evt) {
1067 return discard_event();
1068 }
1069 boost::statechart::result react(const RemoteBackfillPreempted& evt) {
1070 return discard_event();
1071 }
1072 boost::statechart::result react(const RemoteRecoveryPreempted& evt) {
1073 return discard_event();
1074 }
1075 };
1076
1077 struct RepRecovering : boost::statechart::state< RepRecovering, ReplicaActive >, NamedState {
1078 typedef boost::mpl::list<
1079 boost::statechart::transition< RecoveryDone, RepNotRecovering >,
1080 // for compat with old peers
1081 boost::statechart::transition< RemoteReservationRejectedTooFull, RepNotRecovering >,
1082 boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >,
1083 boost::statechart::custom_reaction< BackfillTooFull >,
1084 boost::statechart::custom_reaction< RemoteRecoveryPreempted >,
1085 boost::statechart::custom_reaction< RemoteBackfillPreempted >
1086 > reactions;
1087 explicit RepRecovering(my_context ctx);
1088 boost::statechart::result react(const RemoteRecoveryPreempted &evt);
1089 boost::statechart::result react(const BackfillTooFull &evt);
1090 boost::statechart::result react(const RemoteBackfillPreempted &evt);
1091 void exit();
1092 };
1093
1094 struct RepWaitBackfillReserved : boost::statechart::state< RepWaitBackfillReserved, ReplicaActive >, NamedState {
1095 typedef boost::mpl::list<
1096 boost::statechart::custom_reaction< RemoteBackfillReserved >,
1097 boost::statechart::custom_reaction< RejectTooFullRemoteReservation >,
1098 boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >,
1099 boost::statechart::custom_reaction< RemoteReservationCanceled >
1100 > reactions;
1101 explicit RepWaitBackfillReserved(my_context ctx);
1102 void exit();
1103 boost::statechart::result react(const RemoteBackfillReserved &evt);
1104 boost::statechart::result react(const RejectTooFullRemoteReservation &evt);
1105 boost::statechart::result react(const RemoteReservationRejectedTooFull &evt);
1106 boost::statechart::result react(const RemoteReservationCanceled &evt);
1107 };
1108
1109 struct RepWaitRecoveryReserved : boost::statechart::state< RepWaitRecoveryReserved, ReplicaActive >, NamedState {
1110 typedef boost::mpl::list<
1111 boost::statechart::custom_reaction< RemoteRecoveryReserved >,
1112 // for compat with old peers
1113 boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >,
1114 boost::statechart::custom_reaction< RemoteReservationCanceled >
1115 > reactions;
1116 explicit RepWaitRecoveryReserved(my_context ctx);
1117 void exit();
1118 boost::statechart::result react(const RemoteRecoveryReserved &evt);
1119 boost::statechart::result react(const RemoteReservationRejectedTooFull &evt) {
1120 // for compat with old peers
1121 post_event(RemoteReservationCanceled());
1122 return discard_event();
1123 }
1124 boost::statechart::result react(const RemoteReservationCanceled &evt);
1125 };
1126
1127 struct RepNotRecovering : boost::statechart::state< RepNotRecovering, ReplicaActive>, NamedState {
1128 typedef boost::mpl::list<
1129 boost::statechart::custom_reaction< RequestRecoveryPrio >,
1130 boost::statechart::custom_reaction< RequestBackfillPrio >,
1131 boost::statechart::custom_reaction< RejectTooFullRemoteReservation >,
1132 boost::statechart::transition< RemoteReservationRejectedTooFull, RepNotRecovering >,
1133 boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >,
1134 boost::statechart::custom_reaction< RemoteRecoveryReserved >,
1135 boost::statechart::custom_reaction< RemoteBackfillReserved >,
1136 boost::statechart::transition< RecoveryDone, RepNotRecovering > // for compat with pre-reservation peers
1137 > reactions;
1138 explicit RepNotRecovering(my_context ctx);
1139 boost::statechart::result react(const RequestRecoveryPrio &evt);
1140 boost::statechart::result react(const RequestBackfillPrio &evt);
1141 boost::statechart::result react(const RemoteBackfillReserved &evt) {
1142 // my reservation completion raced with a RELEASE from primary
1143 return discard_event();
1144 }
1145 boost::statechart::result react(const RemoteRecoveryReserved &evt) {
1146 // my reservation completion raced with a RELEASE from primary
1147 return discard_event();
1148 }
1149 boost::statechart::result react(const RejectTooFullRemoteReservation &evt);
1150 void exit();
1151 };
1152
1153 struct Recovering : boost::statechart::state< Recovering, Active >, NamedState {
1154 typedef boost::mpl::list <
1155 boost::statechart::custom_reaction< AllReplicasRecovered >,
1156 boost::statechart::custom_reaction< DeferRecovery >,
1157 boost::statechart::custom_reaction< UnfoundRecovery >,
1158 boost::statechart::custom_reaction< RequestBackfill >
1159 > reactions;
1160 explicit Recovering(my_context ctx);
1161 void exit();
1162 void release_reservations(bool cancel = false);
1163 boost::statechart::result react(const AllReplicasRecovered &evt);
1164 boost::statechart::result react(const DeferRecovery& evt);
1165 boost::statechart::result react(const UnfoundRecovery& evt);
1166 boost::statechart::result react(const RequestBackfill &evt);
1167 };
1168
1169 struct WaitRemoteRecoveryReserved : boost::statechart::state< WaitRemoteRecoveryReserved, Active >, NamedState {
1170 typedef boost::mpl::list <
1171 boost::statechart::custom_reaction< RemoteRecoveryReserved >,
1172 boost::statechart::transition< AllRemotesReserved, Recovering >
1173 > reactions;
1174 std::set<pg_shard_t>::const_iterator remote_recovery_reservation_it;
1175 explicit WaitRemoteRecoveryReserved(my_context ctx);
1176 boost::statechart::result react(const RemoteRecoveryReserved &evt);
1177 void exit();
1178 };
1179
1180 struct WaitLocalRecoveryReserved : boost::statechart::state< WaitLocalRecoveryReserved, Active >, NamedState {
1181 typedef boost::mpl::list <
1182 boost::statechart::transition< LocalRecoveryReserved, WaitRemoteRecoveryReserved >,
1183 boost::statechart::custom_reaction< RecoveryTooFull >
1184 > reactions;
1185 explicit WaitLocalRecoveryReserved(my_context ctx);
1186 void exit();
1187 boost::statechart::result react(const RecoveryTooFull &evt);
1188 };
1189
1190 struct Activating : boost::statechart::state< Activating, Active >, NamedState {
1191 typedef boost::mpl::list <
1192 boost::statechart::transition< AllReplicasRecovered, Recovered >,
1193 boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
1194 boost::statechart::transition< RequestBackfill, WaitLocalBackfillReserved >
1195 > reactions;
1196 explicit Activating(my_context ctx);
1197 void exit();
1198 };
1199
1200 struct Stray : boost::statechart::state< Stray, Started >,
1201 NamedState {
1202 explicit Stray(my_context ctx);
1203 void exit();
1204
1205 typedef boost::mpl::list <
1206 boost::statechart::custom_reaction< MQuery >,
1207 boost::statechart::custom_reaction< MLogRec >,
1208 boost::statechart::custom_reaction< MInfoRec >,
1209 boost::statechart::custom_reaction< ActMap >,
1210 boost::statechart::custom_reaction< RecoveryDone >,
1211 boost::statechart::transition<DeleteStart, ToDelete>
1212 > reactions;
1213 boost::statechart::result react(const MQuery& query);
1214 boost::statechart::result react(const MLogRec& logevt);
1215 boost::statechart::result react(const MInfoRec& infoevt);
1216 boost::statechart::result react(const ActMap&);
1217 boost::statechart::result react(const RecoveryDone&) {
1218 return discard_event();
1219 }
1220 };
1221
1222 struct WaitDeleteReserved;
1223 struct ToDelete : boost::statechart::state<ToDelete, Started, WaitDeleteReserved>, NamedState {
1224 unsigned priority = 0;
1225 typedef boost::mpl::list <
1226 boost::statechart::custom_reaction< ActMap >,
1227 boost::statechart::custom_reaction< ActivateCommitted >,
1228 boost::statechart::custom_reaction< DeleteSome >
1229 > reactions;
1230 explicit ToDelete(my_context ctx);
1231 boost::statechart::result react(const ActMap &evt);
1232 boost::statechart::result react(const DeleteSome &evt) {
1233 // happens if we drop out of Deleting due to reprioritization etc.
1234 return discard_event();
1235 }
1236 boost::statechart::result react(const ActivateCommitted&) {
1237 // Can happens if we were activated as a stray but not actually pulled
1238 // from prior to the pg going clean and sending a delete.
1239 return discard_event();
1240 }
1241 void exit();
1242 };
1243
1244 struct Deleting;
1245 struct WaitDeleteReserved : boost::statechart::state<WaitDeleteReserved,
1246 ToDelete>, NamedState {
1247 typedef boost::mpl::list <
1248 boost::statechart::transition<DeleteReserved, Deleting>
1249 > reactions;
1250 explicit WaitDeleteReserved(my_context ctx);
1251 void exit();
1252 };
1253
1254 struct Deleting : boost::statechart::state<Deleting,
1255 ToDelete>, NamedState {
1256 typedef boost::mpl::list <
1257 boost::statechart::custom_reaction< DeleteSome >,
1258 boost::statechart::transition<DeleteInterrupted, WaitDeleteReserved>
1259 > reactions;
1260 ghobject_t next;
1261 explicit Deleting(my_context ctx);
1262 boost::statechart::result react(const DeleteSome &evt);
1263 void exit();
1264 };
1265
1266 struct GetLog;
1267
1268 struct GetInfo : boost::statechart::state< GetInfo, Peering >, NamedState {
1269 std::set<pg_shard_t> peer_info_requested;
1270
1271 explicit GetInfo(my_context ctx);
1272 void exit();
1273 void get_infos();
1274
1275 typedef boost::mpl::list <
1276 boost::statechart::custom_reaction< QueryState >,
1277 boost::statechart::custom_reaction< QueryUnfound >,
1278 boost::statechart::transition< GotInfo, GetLog >,
1279 boost::statechart::custom_reaction< MNotifyRec >,
1280 boost::statechart::transition< IsDown, Down >
1281 > reactions;
1282 boost::statechart::result react(const QueryState& q);
1283 boost::statechart::result react(const QueryUnfound& q);
1284 boost::statechart::result react(const MNotifyRec& infoevt);
1285 };
1286
1287 struct GotLog : boost::statechart::event< GotLog > {
1288 GotLog() : boost::statechart::event< GotLog >() {}
1289 };
1290
1291 struct GetLog : boost::statechart::state< GetLog, Peering >, NamedState {
1292 pg_shard_t auth_log_shard;
1293 boost::intrusive_ptr<MOSDPGLog> msg;
1294
1295 explicit GetLog(my_context ctx);
1296 void exit();
1297
1298 typedef boost::mpl::list <
1299 boost::statechart::custom_reaction< QueryState >,
1300 boost::statechart::custom_reaction< QueryUnfound >,
1301 boost::statechart::custom_reaction< MLogRec >,
1302 boost::statechart::custom_reaction< GotLog >,
1303 boost::statechart::custom_reaction< AdvMap >,
1304 boost::statechart::transition< NeedActingChange, WaitActingChange >,
1305 boost::statechart::transition< IsIncomplete, Incomplete >
1306 > reactions;
1307 boost::statechart::result react(const AdvMap&);
1308 boost::statechart::result react(const QueryState& q);
1309 boost::statechart::result react(const QueryUnfound& q);
1310 boost::statechart::result react(const MLogRec& logevt);
1311 boost::statechart::result react(const GotLog&);
1312 };
1313
1314 struct WaitUpThru;
1315
1316 struct GetMissing : boost::statechart::state< GetMissing, Peering >, NamedState {
1317 std::set<pg_shard_t> peer_missing_requested;
1318
1319 explicit GetMissing(my_context ctx);
1320 void exit();
1321
1322 typedef boost::mpl::list <
1323 boost::statechart::custom_reaction< QueryState >,
1324 boost::statechart::custom_reaction< QueryUnfound >,
1325 boost::statechart::custom_reaction< MLogRec >,
1326 boost::statechart::transition< NeedUpThru, WaitUpThru >
1327 > reactions;
1328 boost::statechart::result react(const QueryState& q);
1329 boost::statechart::result react(const QueryUnfound& q);
1330 boost::statechart::result react(const MLogRec& logevt);
1331 };
1332
1333 struct WaitUpThru : boost::statechart::state< WaitUpThru, Peering >, NamedState {
1334 explicit WaitUpThru(my_context ctx);
1335 void exit();
1336
1337 typedef boost::mpl::list <
1338 boost::statechart::custom_reaction< QueryState >,
1339 boost::statechart::custom_reaction< QueryUnfound >,
1340 boost::statechart::custom_reaction< ActMap >,
1341 boost::statechart::custom_reaction< MLogRec >
1342 > reactions;
1343 boost::statechart::result react(const QueryState& q);
1344 boost::statechart::result react(const QueryUnfound& q);
1345 boost::statechart::result react(const ActMap& am);
1346 boost::statechart::result react(const MLogRec& logrec);
1347 };
1348
1349 struct Down : boost::statechart::state< Down, Peering>, NamedState {
1350 explicit Down(my_context ctx);
1351 typedef boost::mpl::list <
1352 boost::statechart::custom_reaction< QueryState >,
1353 boost::statechart::custom_reaction< QueryUnfound >,
1354 boost::statechart::custom_reaction< MNotifyRec >
1355 > reactions;
1356 boost::statechart::result react(const QueryState& q);
1357 boost::statechart::result react(const QueryUnfound& q);
1358 boost::statechart::result react(const MNotifyRec& infoevt);
1359 void exit();
1360 };
1361
1362 struct Incomplete : boost::statechart::state< Incomplete, Peering>, NamedState {
1363 typedef boost::mpl::list <
1364 boost::statechart::custom_reaction< AdvMap >,
1365 boost::statechart::custom_reaction< MNotifyRec >,
1366 boost::statechart::custom_reaction< QueryUnfound >,
1367 boost::statechart::custom_reaction< QueryState >
1368 > reactions;
1369 explicit Incomplete(my_context ctx);
1370 boost::statechart::result react(const AdvMap &advmap);
1371 boost::statechart::result react(const MNotifyRec& infoevt);
1372 boost::statechart::result react(const QueryUnfound& q);
1373 boost::statechart::result react(const QueryState& q);
1374 void exit();
1375 };
1376
1377 PGStateHistory state_history;
1378 CephContext* cct;
1379 spg_t spgid;
1380 DoutPrefixProvider *dpp;
1381 PeeringListener *pl;
1382
1383 /// context passed in by state machine caller
1384 PeeringCtx *orig_ctx;
1385
1386 /// populated if we are buffering messages pending a flush
1387 std::optional<BufferedRecoveryMessages> messages_pending_flush;
1388
1389 /**
1390 * populated between start_handle() and end_handle(), points into
1391 * the message lists for messages_pending_flush while blocking messages
1392 * or into orig_ctx otherwise
1393 */
1394 std::optional<PeeringCtxWrapper> rctx;
1395
1396 /**
1397 * OSDMap state
1398 */
1399 OSDMapRef osdmap_ref; ///< Reference to current OSDMap
1400 PGPool pool; ///< Current pool state
1401 epoch_t last_persisted_osdmap = 0; ///< Last osdmap epoch persisted
1402
1403
1404 /**
1405 * Peering state information
1406 */
1407 int role = -1; ///< 0 = primary, 1 = replica, -1=none.
1408 uint64_t state = 0; ///< PG_STATE_*
1409
1410 pg_shard_t primary; ///< id/shard of primary
1411 pg_shard_t pg_whoami; ///< my id/shard
1412 pg_shard_t up_primary; ///< id/shard of primary of up set
1413 std::vector<int> up; ///< crush mapping without temp pgs
1414 std::set<pg_shard_t> upset; ///< up in set form
1415 std::vector<int> acting; ///< actual acting set for the current interval
1416 std::set<pg_shard_t> actingset; ///< acting in set form
1417
1418 /// union of acting, recovery, and backfill targets
1419 std::set<pg_shard_t> acting_recovery_backfill;
1420
1421 std::vector<HeartbeatStampsRef> hb_stamps;
1422
1423 ceph::signedspan readable_interval = ceph::signedspan::zero();
1424
1425 /// how long we can service reads in this interval
1426 ceph::signedspan readable_until = ceph::signedspan::zero();
1427
1428 /// upper bound on any acting OSDs' readable_until in this interval
1429 ceph::signedspan readable_until_ub = ceph::signedspan::zero();
1430
1431 /// upper bound from prior interval(s)
1432 ceph::signedspan prior_readable_until_ub = ceph::signedspan::zero();
1433
1434 /// pg instances from prior interval(s) that may still be readable
1435 std::set<int> prior_readable_down_osds;
1436
1437 /// [replica] upper bound we got from the primary (primary's clock)
1438 ceph::signedspan readable_until_ub_from_primary = ceph::signedspan::zero();
1439
1440 /// [primary] last upper bound shared by primary to replicas
1441 ceph::signedspan readable_until_ub_sent = ceph::signedspan::zero();
1442
1443 /// [primary] readable ub acked by acting set members
1444 std::vector<ceph::signedspan> acting_readable_until_ub;
1445
1446 bool send_notify = false; ///< True if a notify needs to be sent to the primary
1447
1448 bool dirty_info = false; ///< small info structu on disk out of date
1449 bool dirty_big_info = false; ///< big info structure on disk out of date
1450
1451 pg_info_t info; ///< current pg info
1452 pg_info_t last_written_info; ///< last written info
1453 PastIntervals past_intervals; ///< information about prior pg mappings
1454 PGLog pg_log; ///< pg log
1455
1456 epoch_t last_peering_reset = 0; ///< epoch of last peering reset
1457
1458 /// last_update that has committed; ONLY DEFINED WHEN is_active()
1459 eversion_t last_update_ondisk;
1460 eversion_t last_complete_ondisk; ///< last_complete that has committed.
1461 eversion_t last_update_applied; ///< last_update readable
1462 /// last version to which rollback_info trimming has been applied
1463 eversion_t last_rollback_info_trimmed_to_applied;
1464
1465 /// Counter to determine when pending flushes have completed
1466 unsigned flushes_in_progress = 0;
1467
1468 /**
1469 * Primary state
1470 */
1471 std::set<pg_shard_t> stray_set; ///< non-acting osds that have PG data.
1472 std::map<pg_shard_t, pg_info_t> peer_info; ///< info from peers (stray or prior)
1473 std::map<pg_shard_t, int64_t> peer_bytes; ///< Peer's num_bytes from peer_info
1474 std::set<pg_shard_t> peer_purged; ///< peers purged
1475 std::map<pg_shard_t, pg_missing_t> peer_missing; ///< peer missing sets
1476 std::set<pg_shard_t> peer_log_requested; ///< logs i've requested (and start stamps)
1477 std::set<pg_shard_t> peer_missing_requested; ///< missing sets requested
1478
1479 /// features supported by all peers
1480 uint64_t peer_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
1481 /// features supported by acting set
1482 uint64_t acting_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
1483 /// features supported by up and acting
1484 uint64_t upacting_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
1485
1486 /// most recently consumed osdmap's require_osd_version
1487 ceph_release_t last_require_osd_release;
1488
1489 std::vector<int> want_acting; ///< non-empty while peering needs a new acting set
1490
1491 // acting_recovery_backfill contains shards that are acting,
1492 // async recovery targets, or backfill targets.
1493 std::map<pg_shard_t,eversion_t> peer_last_complete_ondisk;
1494
1495 /// up: min over last_complete_ondisk, peer_last_complete_ondisk
1496 eversion_t min_last_complete_ondisk;
1497 /// point to which the log should be trimmed
1498 eversion_t pg_trim_to;
1499
1500 std::set<int> blocked_by; ///< osds we are blocked by (for pg stats)
1501
1502 bool need_up_thru = false; ///< true if osdmap with updated up_thru needed
1503
1504 /// I deleted these strays; ignore racing PGInfo from them
1505 std::set<pg_shard_t> peer_activated;
1506
1507 std::set<pg_shard_t> backfill_targets; ///< osds to be backfilled
1508 std::set<pg_shard_t> async_recovery_targets; ///< osds to be async recovered
1509
1510 /// osds which might have objects on them which are unfound on the primary
1511 std::set<pg_shard_t> might_have_unfound;
1512
1513 bool deleting = false; /// true while in removing or OSD is shutting down
1514 std::atomic<bool> deleted = {false}; /// true once deletion complete
1515
1516 MissingLoc missing_loc; ///< information about missing objects
1517
1518 bool backfill_reserved = false;
1519 bool backfill_reserving = false;
1520
1521 PeeringMachine machine;
1522
1523 void update_osdmap_ref(OSDMapRef newmap) {
1524 osdmap_ref = std::move(newmap);
1525 }
1526
1527 void update_heartbeat_peers();
1528 void query_unfound(Formatter *f, std::string state);
1529 bool proc_replica_info(
1530 pg_shard_t from, const pg_info_t &oinfo, epoch_t send_epoch);
1531 void remove_down_peer_info(const OSDMapRef &osdmap);
1532 void check_recovery_sources(const OSDMapRef& map);
1533 void set_last_peering_reset();
1534 void check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap);
1535 bool should_restart_peering(
1536 int newupprimary,
1537 int newactingprimary,
1538 const std::vector<int>& newup,
1539 const std::vector<int>& newacting,
1540 OSDMapRef lastmap,
1541 OSDMapRef osdmap);
1542 void start_peering_interval(
1543 const OSDMapRef lastmap,
1544 const std::vector<int>& newup, int up_primary,
1545 const std::vector<int>& newacting, int acting_primary,
1546 ObjectStore::Transaction &t);
1547 void on_new_interval();
1548 void clear_recovery_state();
1549 void clear_primary_state();
1550 void check_past_interval_bounds() const;
1551 bool set_force_recovery(bool b);
1552 bool set_force_backfill(bool b);
1553
1554 /// clip calculated priority to reasonable range
1555 int clamp_recovery_priority(int prio, int pool_recovery_prio, int max);
1556 /// get log recovery reservation priority
1557 unsigned get_recovery_priority();
1558 /// get backfill reservation priority
1559 unsigned get_backfill_priority();
1560 /// get priority for pg deletion
1561 unsigned get_delete_priority();
1562
1563 bool check_prior_readable_down_osds(const OSDMapRef& map);
1564
1565 bool adjust_need_up_thru(const OSDMapRef osdmap);
1566 PastIntervals::PriorSet build_prior();
1567
1568 void reject_reservation();
1569
1570 // acting std::set
1571 std::map<pg_shard_t, pg_info_t>::const_iterator find_best_info(
1572 const std::map<pg_shard_t, pg_info_t> &infos,
1573 bool restrict_to_up_acting,
1574 bool *history_les_bound) const;
1575
1576 static void calc_ec_acting(
1577 std::map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
1578 unsigned size,
1579 const std::vector<int> &acting,
1580 const std::vector<int> &up,
1581 const std::map<pg_shard_t, pg_info_t> &all_info,
1582 bool restrict_to_up_acting,
1583 std::vector<int> *want,
1584 std::set<pg_shard_t> *backfill,
1585 std::set<pg_shard_t> *acting_backfill,
1586 std::ostream &ss);
1587
1588 static std::pair<std::map<pg_shard_t, pg_info_t>::const_iterator, eversion_t>
1589 select_replicated_primary(
1590 std::map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
1591 uint64_t force_auth_primary_missing_objects,
1592 const std::vector<int> &up,
1593 pg_shard_t up_primary,
1594 const std::map<pg_shard_t, pg_info_t> &all_info,
1595 const OSDMapRef osdmap,
1596 std::ostream &ss);
1597
1598 static void calc_replicated_acting(
1599 std::map<pg_shard_t, pg_info_t>::const_iterator primary_shard,
1600 eversion_t oldest_auth_log_entry,
1601 unsigned size,
1602 const std::vector<int> &acting,
1603 const std::vector<int> &up,
1604 pg_shard_t up_primary,
1605 const std::map<pg_shard_t, pg_info_t> &all_info,
1606 bool restrict_to_up_acting,
1607 std::vector<int> *want,
1608 std::set<pg_shard_t> *backfill,
1609 std::set<pg_shard_t> *acting_backfill,
1610 const OSDMapRef osdmap,
1611 const PGPool& pool,
1612 std::ostream &ss);
1613 static void calc_replicated_acting_stretch(
1614 std::map<pg_shard_t, pg_info_t>::const_iterator primary_shard,
1615 eversion_t oldest_auth_log_entry,
1616 unsigned size,
1617 const std::vector<int> &acting,
1618 const std::vector<int> &up,
1619 pg_shard_t up_primary,
1620 const std::map<pg_shard_t, pg_info_t> &all_info,
1621 bool restrict_to_up_acting,
1622 std::vector<int> *want,
1623 std::set<pg_shard_t> *backfill,
1624 std::set<pg_shard_t> *acting_backfill,
1625 const OSDMapRef osdmap,
1626 const PGPool& pool,
1627 std::ostream &ss);
1628
1629 void choose_async_recovery_ec(
1630 const std::map<pg_shard_t, pg_info_t> &all_info,
1631 const pg_info_t &auth_info,
1632 std::vector<int> *want,
1633 std::set<pg_shard_t> *async_recovery,
1634 const OSDMapRef osdmap) const;
1635 void choose_async_recovery_replicated(
1636 const std::map<pg_shard_t, pg_info_t> &all_info,
1637 const pg_info_t &auth_info,
1638 std::vector<int> *want,
1639 std::set<pg_shard_t> *async_recovery,
1640 const OSDMapRef osdmap) const;
1641
1642 bool recoverable(const std::vector<int> &want) const;
1643 bool choose_acting(pg_shard_t &auth_log_shard,
1644 bool restrict_to_up_acting,
1645 bool *history_les_bound,
1646 bool request_pg_temp_change_only = false);
1647
1648 bool search_for_missing(
1649 const pg_info_t &oinfo, const pg_missing_t &omissing,
1650 pg_shard_t fromosd,
1651 PeeringCtxWrapper &rctx);
1652 void build_might_have_unfound();
1653 void log_weirdness();
1654 void activate(
1655 ObjectStore::Transaction& t,
1656 epoch_t activation_epoch,
1657 PeeringCtxWrapper &ctx);
1658
1659 void rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead);
1660 void merge_log(
1661 ObjectStore::Transaction& t, pg_info_t &oinfo,
1662 pg_log_t&& olog, pg_shard_t from);
1663
1664 void proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &info);
1665 void proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo,
1666 pg_log_t&& olog, pg_missing_t&& omissing,
1667 pg_shard_t from);
1668 void proc_replica_log(pg_info_t &oinfo, const pg_log_t &olog,
1669 pg_missing_t&& omissing, pg_shard_t from);
1670
1671 void calc_min_last_complete_ondisk() {
1672 eversion_t min = last_complete_ondisk;
1673 ceph_assert(!acting_recovery_backfill.empty());
1674 for (std::set<pg_shard_t>::iterator i = acting_recovery_backfill.begin();
1675 i != acting_recovery_backfill.end();
1676 ++i) {
1677 if (*i == get_primary()) continue;
1678 if (peer_last_complete_ondisk.count(*i) == 0)
1679 return; // we don't have complete info
1680 eversion_t a = peer_last_complete_ondisk[*i];
1681 if (a < min)
1682 min = a;
1683 }
1684 if (min == min_last_complete_ondisk)
1685 return;
1686 min_last_complete_ondisk = min;
1687 return;
1688 }
1689
1690 void fulfill_info(
1691 pg_shard_t from, const pg_query_t &query,
1692 std::pair<pg_shard_t, pg_info_t> &notify_info);
1693 void fulfill_log(
1694 pg_shard_t from, const pg_query_t &query, epoch_t query_epoch);
1695 void fulfill_query(const MQuery& q, PeeringCtxWrapper &rctx);
1696
1697 void try_mark_clean();
1698
1699 void update_blocked_by();
1700 void update_calc_stats();
1701
1702 void add_log_entry(const pg_log_entry_t& e, bool applied);
1703
1704 void calc_trim_to();
1705 void calc_trim_to_aggressive();
1706
1707 public:
1708 PeeringState(
1709 CephContext *cct,
1710 pg_shard_t pg_whoami,
1711 spg_t spgid,
1712 const PGPool &pool,
1713 OSDMapRef curmap,
1714 DoutPrefixProvider *dpp,
1715 PeeringListener *pl);
1716
1717 /// Process evt
1718 void handle_event(const boost::statechart::event_base &evt,
1719 PeeringCtx *rctx) {
1720 start_handle(rctx);
1721 machine.process_event(evt);
1722 end_handle();
1723 }
1724
1725 /// Process evt
1726 void handle_event(PGPeeringEventRef evt,
1727 PeeringCtx *rctx) {
1728 start_handle(rctx);
1729 machine.process_event(evt->get_event());
1730 end_handle();
1731 }
1732
1733 /// Init fresh instance of PG
1734 void init(
1735 int role,
1736 const std::vector<int>& newup, int new_up_primary,
1737 const std::vector<int>& newacting, int new_acting_primary,
1738 const pg_history_t& history,
1739 const PastIntervals& pi,
1740 ObjectStore::Transaction &t);
1741
1742 /// Init pg instance from disk state
1743 template <typename F>
1744 auto init_from_disk_state(
1745 pg_info_t &&info_from_disk,
1746 PastIntervals &&past_intervals_from_disk,
1747 F &&pg_log_init) {
1748 info = std::move(info_from_disk);
1749 last_written_info = info;
1750 past_intervals = std::move(past_intervals_from_disk);
1751 auto ret = pg_log_init(pg_log);
1752 log_weirdness();
1753 return ret;
1754 }
1755
1756 /// Std::set initial primary/acting
1757 void init_primary_up_acting(
1758 const std::vector<int> &newup,
1759 const std::vector<int> &newacting,
1760 int new_up_primary,
1761 int new_acting_primary);
1762 void init_hb_stamps();
1763
1764 /// Std::set initial role
1765 void set_role(int r) {
1766 role = r;
1767 }
1768
1769 /// Std::set predicates used for determining readable and recoverable
1770 void set_backend_predicates(
1771 IsPGReadablePredicate *is_readable,
1772 IsPGRecoverablePredicate *is_recoverable) {
1773 missing_loc.set_backend_predicates(is_readable, is_recoverable);
1774 }
1775
1776 /// Send current pg_info to peers
1777 void share_pg_info();
1778
1779 /// Get stats for child pgs
1780 void start_split_stats(
1781 const std::set<spg_t>& childpgs, std::vector<object_stat_sum_t> *out);
1782
1783 /// Update new child with stats
1784 void finish_split_stats(
1785 const object_stat_sum_t& stats, ObjectStore::Transaction &t);
1786
1787 /// Split state for child_pgid into *child
1788 void split_into(
1789 pg_t child_pgid, PeeringState *child, unsigned split_bits);
1790
1791 /// Merge state from sources
1792 void merge_from(
1793 std::map<spg_t,PeeringState *>& sources,
1794 PeeringCtx &rctx,
1795 unsigned split_bits,
1796 const pg_merge_meta_t& last_pg_merge_meta);
1797
1798 /// Permit stray replicas to purge now unnecessary state
1799 void purge_strays();
1800
1801 /**
1802 * update_stats
1803 *
1804 * Mechanism for updating stats and/or history. Pass t to mark
1805 * dirty and write out. Return true if stats should be published
1806 * to the osd.
1807 */
1808 void update_stats(
1809 std::function<bool(pg_history_t &, pg_stat_t &)> f,
1810 ObjectStore::Transaction *t = nullptr);
1811
1812 void update_stats_wo_resched(
1813 std::function<void(pg_history_t &, pg_stat_t &)> f);
1814
1815 /**
1816 * adjust_purged_snaps
1817 *
1818 * Mechanism for updating purged_snaps. Marks dirty_info, big_dirty_info.
1819 */
1820 void adjust_purged_snaps(
1821 std::function<void(interval_set<snapid_t> &snaps)> f);
1822
1823 /// Updates info.hit_set to hset_history, does not dirty
1824 void update_hset(const pg_hit_set_history_t &hset_history);
1825
1826 /// Get all pg_shards that needs recovery
1827 std::vector<pg_shard_t> get_replica_recovery_order() const;
1828
1829 /**
1830 * update_history
1831 *
1832 * Merges new_history into info.history clearing past_intervals and
1833 * dirtying as needed.
1834 *
1835 * Calls PeeringListener::on_info_history_change()
1836 */
1837 void update_history(const pg_history_t& new_history);
1838
1839 /**
1840 * prepare_stats_for_publish
1841 *
1842 * Returns updated pg_stat_t if stats have changed since
1843 * pg_stats_publish adding in unstable_stats.
1844 *
1845 * @param pg_stats_publish the latest pg_stat possessed by caller
1846 * @param unstable_stats additional stats which should be included in the
1847 * returned stats
1848 * @return the up to date stats if it is different from the specfied
1849 * @c pg_stats_publish
1850 */
1851 std::optional<pg_stat_t> prepare_stats_for_publish(
1852 const std::optional<pg_stat_t> &pg_stats_publish,
1853 const object_stat_collection_t &unstable_stats);
1854
1855 /**
1856 * Merge entries updating missing as necessary on all
1857 * acting_recovery_backfill logs and missings (also missing_loc)
1858 */
1859 bool append_log_entries_update_missing(
1860 const mempool::osd_pglog::list<pg_log_entry_t> &entries,
1861 ObjectStore::Transaction &t,
1862 std::optional<eversion_t> trim_to,
1863 std::optional<eversion_t> roll_forward_to);
1864
1865 void append_log_with_trim_to_updated(
1866 std::vector<pg_log_entry_t>&& log_entries,
1867 eversion_t roll_forward_to,
1868 ObjectStore::Transaction &t,
1869 bool transaction_applied,
1870 bool async) {
1871 update_trim_to();
1872 append_log(std::move(log_entries), pg_trim_to, roll_forward_to,
1873 min_last_complete_ondisk, t, transaction_applied, async);
1874 }
1875
1876 /**
1877 * Updates local log to reflect new write from primary.
1878 */
1879 void append_log(
1880 std::vector<pg_log_entry_t>&& logv,
1881 eversion_t trim_to,
1882 eversion_t roll_forward_to,
1883 eversion_t min_last_complete_ondisk,
1884 ObjectStore::Transaction &t,
1885 bool transaction_applied,
1886 bool async);
1887
1888 /**
1889 * retrieve the min last_backfill among backfill targets
1890 */
1891 hobject_t earliest_backfill() const;
1892
1893
1894 /**
1895 * Updates local log/missing to reflect new oob log update from primary
1896 */
1897 void merge_new_log_entries(
1898 const mempool::osd_pglog::list<pg_log_entry_t> &entries,
1899 ObjectStore::Transaction &t,
1900 std::optional<eversion_t> trim_to,
1901 std::optional<eversion_t> roll_forward_to);
1902
1903 /// Update missing set to reflect e (TODOSAM: not sure why this is needed)
1904 void add_local_next_event(const pg_log_entry_t& e) {
1905 pg_log.missing_add_next_entry(e);
1906 }
1907
1908 /// Update log trim boundary
1909 void update_trim_to() {
1910 bool hard_limit = (get_osdmap()->test_flag(CEPH_OSDMAP_PGLOG_HARDLIMIT));
1911 if (hard_limit)
1912 calc_trim_to_aggressive();
1913 else
1914 calc_trim_to();
1915 }
1916
1917 /// Pre-process pending update on hoid represented by logv
1918 void pre_submit_op(
1919 const hobject_t &hoid,
1920 const std::vector<pg_log_entry_t>& logv,
1921 eversion_t at_version);
1922
1923 /// Signal that oid has been locally recovered to version v
1924 void recover_got(
1925 const hobject_t &oid, eversion_t v,
1926 bool is_delete,
1927 ObjectStore::Transaction &t);
1928
1929 /// Signal that oid has been recovered on peer to version
1930 void on_peer_recover(
1931 pg_shard_t peer,
1932 const hobject_t &soid,
1933 const eversion_t &version);
1934
1935 /// Notify that soid is being recovered on peer
1936 void begin_peer_recover(
1937 pg_shard_t peer,
1938 const hobject_t soid);
1939
1940 /// Pull missing sets from all candidate peers
1941 bool discover_all_missing(
1942 BufferedRecoveryMessages &rctx);
1943
1944 /// Notify that hoid has been fully recocovered
1945 void object_recovered(
1946 const hobject_t &hoid,
1947 const object_stat_sum_t &stat_diff) {
1948 info.stats.stats.sum.add(stat_diff);
1949 missing_loc.recovered(hoid);
1950 }
1951
1952 /// Update info/stats to reflect backfill progress
1953 void update_backfill_progress(
1954 const hobject_t &updated_backfill,
1955 const pg_stat_t &updated_stats,
1956 bool preserve_local_num_bytes,
1957 ObjectStore::Transaction &t);
1958
1959 /// Update info/stats to reflect completed backfill on hoid
1960 void update_complete_backfill_object_stats(
1961 const hobject_t &hoid,
1962 const pg_stat_t &stats);
1963
1964 /// Update last_backfill for peer to new_last_backfill
1965 void update_peer_last_backfill(
1966 pg_shard_t peer,
1967 const hobject_t &new_last_backfill);
1968
1969 /// Update info.stats with delta_stats for operation on soid
1970 void apply_op_stats(
1971 const hobject_t &soid,
1972 const object_stat_sum_t &delta_stats);
1973
1974 /**
1975 * force_object_missing
1976 *
1977 * Force oid on peer to be missing at version. If the object does not
1978 * currently need recovery, either candidates if provided or the remainder
1979 * of the acting std::set will be deemed to have the object.
1980 */
1981 void force_object_missing(
1982 const pg_shard_t &peer,
1983 const hobject_t &oid,
1984 eversion_t version) {
1985 force_object_missing(std::set<pg_shard_t>{peer}, oid, version);
1986 }
1987 void force_object_missing(
1988 const std::set<pg_shard_t> &peer,
1989 const hobject_t &oid,
1990 eversion_t version);
1991
1992 /// Update state prior to backfilling soid on targets
1993 void prepare_backfill_for_missing(
1994 const hobject_t &soid,
1995 const eversion_t &version,
1996 const std::vector<pg_shard_t> &targets);
1997
1998 /// Std::set targets with the right version for revert (see recover_primary)
1999 void set_revert_with_targets(
2000 const hobject_t &soid,
2001 const std::set<pg_shard_t> &good_peers);
2002
2003 /// Update lcod for fromosd
2004 void update_peer_last_complete_ondisk(
2005 pg_shard_t fromosd,
2006 eversion_t lcod) {
2007 peer_last_complete_ondisk[fromosd] = lcod;
2008 }
2009
2010 /// Update lcod
2011 void update_last_complete_ondisk(
2012 eversion_t lcod) {
2013 last_complete_ondisk = lcod;
2014 }
2015
2016 /// Update state to reflect recovery up to version
2017 void recovery_committed_to(eversion_t version);
2018
2019 /// Mark recovery complete
2020 void local_recovery_complete() {
2021 info.last_complete = info.last_update;
2022 }
2023
2024 /// Update last_requested pointer to v
2025 void set_last_requested(version_t v) {
2026 pg_log.set_last_requested(v);
2027 }
2028
2029 /// Write dirty state to t
2030 void write_if_dirty(ObjectStore::Transaction& t);
2031
2032 /// Mark write completed to v with persisted lc
2033 void complete_write(eversion_t v, eversion_t lc);
2034
2035 /// Update local write applied pointer
2036 void local_write_applied(eversion_t v) {
2037 last_update_applied = v;
2038 }
2039
2040 /// Updates peering state with new map
2041 void advance_map(
2042 OSDMapRef osdmap, ///< [in] new osdmap
2043 OSDMapRef lastmap, ///< [in] prev osdmap
2044 std::vector<int>& newup, ///< [in] new up set
2045 int up_primary, ///< [in] new up primary
2046 std::vector<int>& newacting, ///< [in] new acting
2047 int acting_primary, ///< [in] new acting primary
2048 PeeringCtx &rctx ///< [out] recovery context
2049 );
2050
2051 /// Activates most recently updated map
2052 void activate_map(
2053 PeeringCtx &rctx ///< [out] recovery context
2054 );
2055
2056 /// resets last_persisted_osdmap
2057 void reset_last_persisted() {
2058 last_persisted_osdmap = 0;
2059 dirty_info = true;
2060 dirty_big_info = true;
2061 }
2062
2063 /// Signal shutdown beginning
2064 void shutdown() {
2065 deleting = true;
2066 }
2067
2068 /// Signal shutdown complete
2069 void set_delete_complete() {
2070 deleted = true;
2071 }
2072
2073 /// Dirty info and write out
2074 void force_write_state(ObjectStore::Transaction &t) {
2075 dirty_info = true;
2076 dirty_big_info = true;
2077 write_if_dirty(t);
2078 }
2079
2080 /// Get current interval's readable_until
2081 ceph::signedspan get_readable_until() const {
2082 return readable_until;
2083 }
2084
2085 /// Get prior intervals' readable_until upper bound
2086 ceph::signedspan get_prior_readable_until_ub() const {
2087 return prior_readable_until_ub;
2088 }
2089
2090 /// Get prior intervals' readable_until down OSDs of note
2091 const std::set<int>& get_prior_readable_down_osds() const {
2092 return prior_readable_down_osds;
2093 }
2094
2095 /// Reset prior intervals' readable_until upper bound (e.g., bc it passed)
2096 void clear_prior_readable_until_ub() {
2097 prior_readable_until_ub = ceph::signedspan::zero();
2098 prior_readable_down_osds.clear();
2099 info.history.prior_readable_until_ub = ceph::signedspan::zero();
2100 }
2101
2102 void renew_lease(ceph::signedspan now) {
2103 bool was_min = (readable_until_ub == readable_until);
2104 readable_until_ub_sent = now + readable_interval;
2105 if (was_min) {
2106 recalc_readable_until();
2107 }
2108 }
2109
2110 void send_lease();
2111 void schedule_renew_lease();
2112
2113 pg_lease_t get_lease() {
2114 return pg_lease_t(readable_until, readable_until_ub_sent, readable_interval);
2115 }
2116
2117 void proc_lease(const pg_lease_t& l);
2118 void proc_lease_ack(int from, const pg_lease_ack_t& la);
2119 void proc_renew_lease();
2120
2121 pg_lease_ack_t get_lease_ack() {
2122 return pg_lease_ack_t(readable_until_ub_from_primary);
2123 }
2124
2125 /// [primary] recalc readable_until[_ub] for the current interval
2126 void recalc_readable_until();
2127
2128 //============================ const helpers ================================
2129 const char *get_current_state() const {
2130 return state_history.get_current_state();
2131 }
2132 epoch_t get_last_peering_reset() const {
2133 return last_peering_reset;
2134 }
2135 eversion_t get_last_rollback_info_trimmed_to_applied() const {
2136 return last_rollback_info_trimmed_to_applied;
2137 }
2138 /// Returns stable reference to internal pool structure
2139 const PGPool &get_pool() const {
2140 return pool;
2141 }
2142 /// Returns reference to current osdmap
2143 const OSDMapRef &get_osdmap() const {
2144 ceph_assert(osdmap_ref);
2145 return osdmap_ref;
2146 }
2147 /// Returns epoch of current osdmap
2148 epoch_t get_osdmap_epoch() const {
2149 return get_osdmap()->get_epoch();
2150 }
2151
2152 bool is_ec_pg() const override {
2153 return pool.info.is_erasure();
2154 }
2155 int get_pg_size() const override {
2156 return pool.info.size;
2157 }
2158 bool is_deleting() const {
2159 return deleting;
2160 }
2161 bool is_deleted() const {
2162 return deleted;
2163 }
2164 const std::set<pg_shard_t> &get_upset() const override {
2165 return upset;
2166 }
2167 bool is_acting_recovery_backfill(pg_shard_t osd) const {
2168 return acting_recovery_backfill.count(osd);
2169 }
2170 bool is_acting(pg_shard_t osd) const {
2171 return has_shard(pool.info.is_erasure(), acting, osd);
2172 }
2173 bool is_up(pg_shard_t osd) const {
2174 return has_shard(pool.info.is_erasure(), up, osd);
2175 }
2176 static bool has_shard(bool ec, const std::vector<int>& v, pg_shard_t osd) {
2177 if (ec) {
2178 return v.size() > (unsigned)osd.shard && v[osd.shard] == osd.osd;
2179 } else {
2180 return std::find(v.begin(), v.end(), osd.osd) != v.end();
2181 }
2182 }
2183 const PastIntervals& get_past_intervals() const {
2184 return past_intervals;
2185 }
2186 /// acting osd that is not the primary
2187 bool is_nonprimary() const {
2188 return role >= 0 && pg_whoami != primary;
2189 }
2190 /// primary osd
2191 bool is_primary() const {
2192 return pg_whoami == primary;
2193 }
2194 bool pg_has_reset_since(epoch_t e) const {
2195 return deleted || e < get_last_peering_reset();
2196 }
2197
2198 int get_role() const {
2199 return role;
2200 }
2201 const std::vector<int> &get_acting() const {
2202 return acting;
2203 }
2204 const std::set<pg_shard_t> &get_actingset() const {
2205 return actingset;
2206 }
2207 int get_acting_primary() const {
2208 return primary.osd;
2209 }
2210 pg_shard_t get_primary() const {
2211 return primary;
2212 }
2213 const std::vector<int> &get_up() const {
2214 return up;
2215 }
2216 int get_up_primary() const {
2217 return up_primary.osd;
2218 }
2219
2220 bool is_backfill_target(pg_shard_t osd) const {
2221 return backfill_targets.count(osd);
2222 }
2223 const std::set<pg_shard_t> &get_backfill_targets() const {
2224 return backfill_targets;
2225 }
2226 bool is_async_recovery_target(pg_shard_t peer) const {
2227 return async_recovery_targets.count(peer);
2228 }
2229 const std::set<pg_shard_t> &get_async_recovery_targets() const {
2230 return async_recovery_targets;
2231 }
2232 const std::set<pg_shard_t> &get_acting_recovery_backfill() const {
2233 return acting_recovery_backfill;
2234 }
2235
2236 const PGLog &get_pg_log() const {
2237 return pg_log;
2238 }
2239
2240 bool state_test(uint64_t m) const { return (state & m) != 0; }
2241 void state_set(uint64_t m) { state |= m; }
2242 void state_clear(uint64_t m) { state &= ~m; }
2243
2244 bool is_complete() const { return info.last_complete == info.last_update; }
2245 bool should_send_notify() const { return send_notify; }
2246
2247 uint64_t get_state() const { return state; }
2248 bool is_active() const { return state_test(PG_STATE_ACTIVE); }
2249 bool is_activating() const { return state_test(PG_STATE_ACTIVATING); }
2250 bool is_peering() const { return state_test(PG_STATE_PEERING); }
2251 bool is_down() const { return state_test(PG_STATE_DOWN); }
2252 bool is_recovery_unfound() const {
2253 return state_test(PG_STATE_RECOVERY_UNFOUND);
2254 }
2255 bool is_backfilling() const {
2256 return state_test(PG_STATE_BACKFILLING);
2257 }
2258 bool is_backfill_unfound() const {
2259 return state_test(PG_STATE_BACKFILL_UNFOUND);
2260 }
2261 bool is_incomplete() const { return state_test(PG_STATE_INCOMPLETE); }
2262 bool is_clean() const { return state_test(PG_STATE_CLEAN); }
2263 bool is_degraded() const { return state_test(PG_STATE_DEGRADED); }
2264 bool is_undersized() const { return state_test(PG_STATE_UNDERSIZED); }
2265 bool is_remapped() const { return state_test(PG_STATE_REMAPPED); }
2266 bool is_peered() const {
2267 return state_test(PG_STATE_ACTIVE) || state_test(PG_STATE_PEERED);
2268 }
2269 bool is_recovering() const { return state_test(PG_STATE_RECOVERING); }
2270 bool is_premerge() const { return state_test(PG_STATE_PREMERGE); }
2271 bool is_repair() const { return state_test(PG_STATE_REPAIR); }
2272 bool is_empty() const { return info.last_update == eversion_t(0,0); }
2273
2274 bool get_need_up_thru() const {
2275 return need_up_thru;
2276 }
2277
2278 bool is_forced_recovery_or_backfill() const {
2279 return get_state() & (PG_STATE_FORCED_RECOVERY | PG_STATE_FORCED_BACKFILL);
2280 }
2281
2282 bool is_backfill_reserved() const {
2283 return backfill_reserved;
2284 }
2285
2286 bool is_backfill_reserving() const {
2287 return backfill_reserving;
2288 }
2289
2290 ceph_release_t get_last_require_osd_release() const {
2291 return last_require_osd_release;
2292 }
2293
2294 const pg_info_t &get_info() const {
2295 return info;
2296 }
2297
2298 const decltype(peer_info) &get_peer_info() const {
2299 return peer_info;
2300 }
2301 const decltype(peer_missing) &get_peer_missing() const {
2302 return peer_missing;
2303 }
2304 const pg_missing_const_i &get_peer_missing(const pg_shard_t &peer) const {
2305 if (peer == pg_whoami) {
2306 return pg_log.get_missing();
2307 } else {
2308 assert(peer_missing.count(peer));
2309 return peer_missing.find(peer)->second;
2310 }
2311 }
2312 const pg_info_t&get_peer_info(pg_shard_t peer) const {
2313 assert(peer_info.count(peer));
2314 return peer_info.find(peer)->second;
2315 }
2316 bool has_peer_info(pg_shard_t peer) const {
2317 return peer_info.count(peer);
2318 }
2319
2320 bool needs_recovery() const;
2321 bool needs_backfill() const;
2322
2323 /**
2324 * Returns whether a particular object can be safely read on this replica
2325 */
2326 bool can_serve_replica_read(const hobject_t &hoid) {
2327 ceph_assert(!is_primary());
2328 return !pg_log.get_log().has_write_since(
2329 hoid, get_min_last_complete_ondisk());
2330 }
2331
2332 /**
2333 * Returns whether the current acting set is able to go active
2334 * and serve writes. It needs to satisfy min_size and any
2335 * applicable stretch cluster constraints.
2336 */
2337 bool acting_set_writeable() {
2338 return (actingset.size() >= pool.info.min_size) &&
2339 (pool.info.stretch_set_can_peer(acting, *get_osdmap(), NULL));
2340 }
2341
2342 /**
2343 * Returns whether all peers which might have unfound objects have been
2344 * queried or marked lost.
2345 */
2346 bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const;
2347 bool all_missing_unfound() const {
2348 const auto& missing = pg_log.get_missing();
2349 if (!missing.have_missing())
2350 return false;
2351 for (auto& m : missing.get_items()) {
2352 if (!missing_loc.is_unfound(m.first))
2353 return false;
2354 }
2355 return true;
2356 }
2357
2358 bool perform_deletes_during_peering() const {
2359 return !(get_osdmap()->test_flag(CEPH_OSDMAP_RECOVERY_DELETES));
2360 }
2361
2362
2363 bool have_unfound() const {
2364 return missing_loc.have_unfound();
2365 }
2366 uint64_t get_num_unfound() const {
2367 return missing_loc.num_unfound();
2368 }
2369
2370 bool have_missing() const {
2371 return pg_log.get_missing().num_missing() > 0;
2372 }
2373 unsigned int get_num_missing() const {
2374 return pg_log.get_missing().num_missing();
2375 }
2376
2377 const MissingLoc &get_missing_loc() const {
2378 return missing_loc;
2379 }
2380
2381 const MissingLoc::missing_by_count_t &get_missing_by_count() const {
2382 return missing_loc.get_missing_by_count();
2383 }
2384
2385 eversion_t get_min_last_complete_ondisk() const {
2386 return min_last_complete_ondisk;
2387 }
2388
2389 eversion_t get_pg_trim_to() const {
2390 return pg_trim_to;
2391 }
2392
2393 eversion_t get_last_update_applied() const {
2394 return last_update_applied;
2395 }
2396
2397 eversion_t get_last_update_ondisk() const {
2398 return last_update_ondisk;
2399 }
2400
2401 bool debug_has_dirty_state() const {
2402 return dirty_info || dirty_big_info;
2403 }
2404
2405 std::string get_pg_state_string() const {
2406 return pg_state_string(state);
2407 }
2408
2409 /// Dump representation of past_intervals to out
2410 void print_past_intervals(std::ostream &out) const {
2411 out << "[" << past_intervals.get_bounds()
2412 << ")/" << past_intervals.size();
2413 }
2414
2415 void dump_history(ceph::Formatter *f) const {
2416 state_history.dump(f);
2417 }
2418
2419 /// Dump formatted peering status
2420 void dump_peering_state(ceph::Formatter *f);
2421
2422 private:
2423 /// Mask feature vector with feature set from new peer
2424 void apply_peer_features(uint64_t f) { peer_features &= f; }
2425
2426 /// Reset feature vector to default
2427 void reset_min_peer_features() {
2428 peer_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
2429 }
2430 public:
2431 /// Get feature vector common to all known peers with this pg
2432 uint64_t get_min_peer_features() const { return peer_features; }
2433
2434 /// Get feature vector common to acting set
2435 uint64_t get_min_acting_features() const { return acting_features; }
2436
2437 /// Get feature vector common to up/acting set
2438 uint64_t get_min_upacting_features() const { return upacting_features; }
2439
2440
2441 // Flush control interface
2442 private:
2443 /**
2444 * Start additional flush (blocks needs_flush/activation until
2445 * complete_flush is called once for each start_flush call as
2446 * required by start_flush_on_transaction).
2447 */
2448 void start_flush(ObjectStore::Transaction &t) {
2449 flushes_in_progress++;
2450 pl->start_flush_on_transaction(t);
2451 }
2452 public:
2453 /// True if there are outstanding flushes
2454 bool needs_flush() const {
2455 return flushes_in_progress > 0;
2456 }
2457 /// Must be called once per start_flush
2458 void complete_flush();
2459
2460 friend std::ostream &operator<<(std::ostream &out, const PeeringState &ps);
2461 };
2462
2463 std::ostream &operator<<(std::ostream &out, const PeeringState &ps);