1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
6 #include <boost/statechart/custom_reaction.hpp>
7 #include <boost/statechart/event.hpp>
8 #include <boost/statechart/simple_state.hpp>
9 #include <boost/statechart/state.hpp>
10 #include <boost/statechart/state_machine.hpp>
11 #include <boost/statechart/transition.hpp>
12 #include <boost/statechart/event_base.hpp>
16 #include "include/ceph_assert.h"
17 #include "include/common_fwd.h"
20 #include "PGStateUtils.h"
21 #include "PGPeeringEvent.h"
22 #include "osd_types.h"
23 #include "osd_types_fmt.h"
24 #include "os/ObjectStore.h"
26 #include "MissingLoc.h"
27 #include "osd/osd_perf_counters.h"
28 #include "common/ostream_temp.h"
36 SnapContext snapc
; // the default pool snapc, ready to go.
38 PGPool(OSDMapRef map
, int64_t i
, const pg_pool_t
& info
,
39 const std::string
& name
)
40 : cached_epoch(map
->get_epoch()),
44 snapc
= info
.get_snap_context();
47 void update(OSDMapRef map
);
49 ceph::timespan
get_readable_interval(ConfigProxy
&conf
) const {
51 if (info
.opts
.get(pool_opts_t::READ_LEASE_INTERVAL
, &v
)) {
52 return ceph::make_timespan(v
);
54 auto hbi
= conf
->osd_heartbeat_grace
;
55 auto fac
= conf
->osd_pool_default_read_lease_ratio
;
56 return ceph::make_timespan(hbi
* fac
);
62 struct fmt::formatter
<PGPool
> {
63 template <typename ParseContext
>
64 constexpr auto parse(ParseContext
& ctx
) { return ctx
.begin(); }
66 template <typename FormatContext
>
67 auto format(const PGPool
& pool
, FormatContext
& ctx
)
69 return fmt::format_to(ctx
.out(),
79 // [primary only] content recovery state
80 struct BufferedRecoveryMessages
{
81 #if defined(WITH_SEASTAR)
82 std::map
<int, std::vector
<MessageURef
>> message_map
;
84 std::map
<int, std::vector
<MessageRef
>> message_map
;
87 BufferedRecoveryMessages() = default;
88 BufferedRecoveryMessages(PeeringCtx
&ctx
);
90 void accept_buffered_messages(BufferedRecoveryMessages
&m
) {
91 for (auto &[target
, ls
] : m
.message_map
) {
92 auto &ovec
= message_map
[target
];
93 // put buffered messages in front
94 ls
.reserve(ls
.size() + ovec
.size());
95 ls
.insert(ls
.end(), std::make_move_iterator(ovec
.begin()), std::make_move_iterator(ovec
.end()));
101 template <class MsgT
> // MsgT = MessageRef for ceph-osd and MessageURef for crimson-osd
102 void send_osd_message(int target
, MsgT
&& m
) {
103 message_map
[target
].emplace_back(std::forward
<MsgT
>(m
));
105 void send_notify(int to
, const pg_notify_t
&n
);
106 void send_query(int to
, spg_t spgid
, const pg_query_t
&q
);
107 void send_info(int to
, spg_t to_spgid
,
108 epoch_t min_epoch
, epoch_t cur_epoch
,
109 const pg_info_t
&info
,
110 std::optional
<pg_lease_t
> lease
= {},
111 std::optional
<pg_lease_ack_t
> lease_ack
= {});
114 struct HeartbeatStamps
: public RefCountedObject
{
115 mutable ceph::mutex lock
= ceph::make_mutex("HeartbeatStamps::lock");
119 // we maintain an upper and lower bound on the delta between our local
120 // mono_clock time (minus the startup_time) to the peer OSD's mono_clock
121 // time (minus its startup_time).
123 // delta is (remote_clock_time - local_clock_time), so that
124 // local_time + delta -> peer_time, and peer_time - delta -> local_time.
126 // we have an upper and lower bound value on this delta, meaning the
127 // value of the remote clock is somewhere between [my_time + lb, my_time + ub]
129 // conversely, if we have a remote timestamp T, then that is
130 // [T - ub, T - lb] in terms of the local clock. i.e., if you are
131 // substracting the delta, then take care that you swap the role of the
134 /// lower bound on peer clock - local clock
135 std::optional
<ceph::signedspan
> peer_clock_delta_lb
;
137 /// upper bound on peer clock - local clock
138 std::optional
<ceph::signedspan
> peer_clock_delta_ub
;
140 /// highest up_from we've seen from this rank
143 void print(std::ostream
& out
) const {
144 std::lock_guard
l(lock
);
145 out
<< "hbstamp(osd." << osd
<< " up_from " << up_from
146 << " peer_clock_delta [";
147 if (peer_clock_delta_lb
) {
148 out
<< *peer_clock_delta_lb
;
151 if (peer_clock_delta_ub
) {
152 out
<< *peer_clock_delta_ub
;
157 void sent_ping(std::optional
<ceph::signedspan
> *delta_ub
) {
158 std::lock_guard
l(lock
);
159 // the non-primaries need a lower bound on remote clock - local clock. if
160 // we assume the transit for the last ping_reply was
161 // instantaneous, that would be (the negative of) our last
162 // peer_clock_delta_lb value.
163 if (peer_clock_delta_lb
) {
164 *delta_ub
= - *peer_clock_delta_lb
;
168 void got_ping(epoch_t this_up_from
,
169 ceph::signedspan now
,
170 ceph::signedspan peer_send_stamp
,
171 std::optional
<ceph::signedspan
> delta_ub
,
172 ceph::signedspan
*out_delta_ub
) {
173 std::lock_guard
l(lock
);
174 if (this_up_from
< up_from
) {
177 if (this_up_from
> up_from
) {
178 up_from
= this_up_from
;
180 peer_clock_delta_lb
= peer_send_stamp
- now
;
181 peer_clock_delta_ub
= delta_ub
;
182 *out_delta_ub
= - *peer_clock_delta_lb
;
185 void got_ping_reply(ceph::signedspan now
,
186 ceph::signedspan peer_send_stamp
,
187 std::optional
<ceph::signedspan
> delta_ub
) {
188 std::lock_guard
l(lock
);
189 peer_clock_delta_lb
= peer_send_stamp
- now
;
190 peer_clock_delta_ub
= delta_ub
;
194 FRIEND_MAKE_REF(HeartbeatStamps
);
195 HeartbeatStamps(int o
)
196 : RefCountedObject(NULL
),
199 using HeartbeatStampsRef
= ceph::ref_t
<HeartbeatStamps
>;
201 inline std::ostream
& operator<<(std::ostream
& out
, const HeartbeatStamps
& hb
)
208 struct PeeringCtx
: BufferedRecoveryMessages
{
209 ObjectStore::Transaction transaction
;
210 HBHandle
* handle
= nullptr;
212 PeeringCtx() = default;
214 PeeringCtx(const PeeringCtx
&) = delete;
215 PeeringCtx
&operator=(const PeeringCtx
&) = delete;
217 PeeringCtx(PeeringCtx
&&) = default;
218 PeeringCtx
&operator=(PeeringCtx
&&) = default;
220 void reset_transaction() {
221 transaction
= ObjectStore::Transaction();
226 * Wraps PeeringCtx to hide the difference between buffering messages to
227 * be sent after flush or immediately.
229 struct PeeringCtxWrapper
{
231 BufferedRecoveryMessages
&msgs
;
232 ObjectStore::Transaction
&transaction
;
233 HBHandle
* const handle
= nullptr;
235 PeeringCtxWrapper(PeeringCtx
&wrapped
) :
237 transaction(wrapped
.transaction
),
238 handle(wrapped
.handle
) {}
240 PeeringCtxWrapper(BufferedRecoveryMessages
&buf
, PeeringCtx
&wrapped
)
242 transaction(wrapped
.transaction
),
243 handle(wrapped
.handle
) {}
245 PeeringCtxWrapper(PeeringCtxWrapper
&&ctx
) = default;
247 template <class MsgT
> // MsgT = MessageRef for ceph-osd and MessageURef for crimson-osd
248 void send_osd_message(int target
, MsgT
&& m
) {
249 msgs
.send_osd_message(target
, std::forward
<MsgT
>(m
));
251 void send_notify(int to
, const pg_notify_t
&n
) {
252 msgs
.send_notify(to
, n
);
254 void send_query(int to
, spg_t spgid
, const pg_query_t
&q
) {
255 msgs
.send_query(to
, spgid
, q
);
257 void send_info(int to
, spg_t to_spgid
,
258 epoch_t min_epoch
, epoch_t cur_epoch
,
259 const pg_info_t
&info
,
260 std::optional
<pg_lease_t
> lease
= {},
261 std::optional
<pg_lease_ack_t
> lease_ack
= {}) {
262 msgs
.send_info(to
, to_spgid
, min_epoch
, cur_epoch
, info
,
267 /* Encapsulates PG recovery process */
268 class PeeringState
: public MissingLoc::MappingInfo
{
270 struct PeeringListener
: public EpochSource
{
271 /// Prepare t with written information
272 virtual void prepare_write(
274 pg_info_t
&last_written_info
,
275 PastIntervals
&past_intervals
,
279 bool need_write_epoch
,
280 ObjectStore::Transaction
&t
) = 0;
282 /// Notify that info/history changed (generally to update scrub registration)
283 virtual void on_info_history_change() = 0;
285 /// Notify PG that Primary/Replica status has changed (to update scrub registration)
286 virtual void on_primary_status_change(bool was_primary
, bool now_primary
) = 0;
288 /// Need to reschedule next scrub. Assuming no change in role
289 virtual void reschedule_scrub() = 0;
291 /// Notify that a scrub has been requested
292 virtual void scrub_requested(scrub_level_t scrub_level
, scrub_type_t scrub_type
) = 0;
294 /// Return current snap_trimq size
295 virtual uint64_t get_snap_trimq_size() const = 0;
297 /// Send cluster message to osd
298 #if defined(WITH_SEASTAR)
299 virtual void send_cluster_message(
300 int osd
, MessageURef m
, epoch_t epoch
, bool share_map_update
=false) = 0;
302 virtual void send_cluster_message(
303 int osd
, MessageRef m
, epoch_t epoch
, bool share_map_update
=false) = 0;
305 /// Send pg_created to mon
306 virtual void send_pg_created(pg_t pgid
) = 0;
308 virtual ceph::signedspan
get_mnow() const = 0;
309 virtual HeartbeatStampsRef
get_hb_stamps(int peer
) = 0;
310 virtual void schedule_renew_lease(epoch_t plr
, ceph::timespan delay
) = 0;
311 virtual void queue_check_readable(epoch_t lpr
, ceph::timespan delay
) = 0;
312 virtual void recheck_readable() = 0;
314 virtual unsigned get_target_pg_log_entries() const = 0;
316 // ============ Flush state ==================
318 * try_flush_or_schedule_async()
320 * If true, caller may assume all past operations on this pg
321 * have been flushed. Else, caller will receive an on_flushed()
322 * call once the flush has completed.
324 virtual bool try_flush_or_schedule_async() = 0;
325 /// Arranges for a commit on t to call on_flushed() once flushed.
326 virtual void start_flush_on_transaction(
327 ObjectStore::Transaction
&t
) = 0;
328 /// Notification that all outstanding flushes for interval have completed
329 virtual void on_flushed() = 0;
331 //============= Recovery ====================
332 /// Arrange for even to be queued after delay
333 virtual void schedule_event_after(
334 PGPeeringEventRef event
,
337 * request_local_background_io_reservation
339 * Request reservation at priority with on_grant queued on grant
340 * and on_preempt on preempt
342 virtual void request_local_background_io_reservation(
344 PGPeeringEventURef on_grant
,
345 PGPeeringEventURef on_preempt
) = 0;
346 /// Modify pending local background reservation request priority
347 virtual void update_local_background_io_priority(
348 unsigned priority
) = 0;
349 /// Cancel pending local background reservation request
350 virtual void cancel_local_background_io_reservation() = 0;
353 * request_remote_background_io_reservation
355 * Request reservation at priority with on_grant queued on grant
356 * and on_preempt on preempt
358 virtual void request_remote_recovery_reservation(
360 PGPeeringEventURef on_grant
,
361 PGPeeringEventURef on_preempt
) = 0;
362 /// Cancel pending remote background reservation request
363 virtual void cancel_remote_recovery_reservation() = 0;
365 /// Arrange for on_commit to be queued upon commit of t
366 virtual void schedule_event_on_commit(
367 ObjectStore::Transaction
&t
,
368 PGPeeringEventRef on_commit
) = 0;
370 //============================ HB =============================
371 /// Update hb set to peers
372 virtual void update_heartbeat_peers(std::set
<int> peers
) = 0;
374 /// Std::set targets being probed in this interval
375 virtual void set_probe_targets(const std::set
<pg_shard_t
> &probe_set
) = 0;
376 /// Clear targets being probed in this interval
377 virtual void clear_probe_targets() = 0;
379 /// Queue for a pg_temp of wanted
380 virtual void queue_want_pg_temp(const std::vector
<int> &wanted
) = 0;
381 /// Clear queue for a pg_temp of wanted
382 virtual void clear_want_pg_temp() = 0;
384 /// Arrange for stats to be shipped to mon to be updated for this pg
385 virtual void publish_stats_to_osd() = 0;
386 /// Clear stats to be shipped to mon for this pg
387 virtual void clear_publish_stats() = 0;
389 /// Notification to check outstanding operation targets
390 virtual void check_recovery_sources(const OSDMapRef
& newmap
) = 0;
391 /// Notification to check outstanding blocklist
392 virtual void check_blocklisted_watchers() = 0;
393 /// Notification to clear state associated with primary
394 virtual void clear_primary_state() = 0;
396 // =================== Event notification ====================
397 virtual void on_pool_change() = 0;
398 virtual void on_role_change() = 0;
399 virtual void on_change(ObjectStore::Transaction
&t
) = 0;
400 virtual void on_activate(interval_set
<snapid_t
> to_trim
) = 0;
401 virtual void on_activate_complete() = 0;
402 virtual void on_new_interval() = 0;
403 virtual Context
*on_clean() = 0;
404 virtual void on_activate_committed() = 0;
405 virtual void on_active_exit() = 0;
407 // ====================== PG deletion =======================
408 /// Notification of removal complete, t must be populated to complete removal
409 virtual void on_removal(ObjectStore::Transaction
&t
) = 0;
410 /// Perform incremental removal work
411 virtual std::pair
<ghobject_t
, bool> do_delete_work(
412 ObjectStore::Transaction
&t
, ghobject_t _next
) = 0;
414 // ======================= PG Merge =========================
415 virtual void clear_ready_to_merge() = 0;
416 virtual void set_not_ready_to_merge_target(pg_t pgid
, pg_t src
) = 0;
417 virtual void set_not_ready_to_merge_source(pg_t pgid
) = 0;
418 virtual void set_ready_to_merge_target(eversion_t lu
, epoch_t les
, epoch_t lec
) = 0;
419 virtual void set_ready_to_merge_source(eversion_t lu
) = 0;
421 // ==================== Std::map notifications ===================
422 virtual void on_active_actmap() = 0;
423 virtual void on_active_advmap(const OSDMapRef
&osdmap
) = 0;
424 virtual epoch_t
cluster_osdmap_trim_lower_bound() = 0;
426 // ============ recovery reservation notifications ==========
427 virtual void on_backfill_reserved() = 0;
428 virtual void on_backfill_canceled() = 0;
429 virtual void on_recovery_reserved() = 0;
431 // ================recovery space accounting ================
432 virtual bool try_reserve_recovery_space(
433 int64_t primary_num_bytes
, int64_t local_num_bytes
) = 0;
434 virtual void unreserve_recovery_space() = 0;
436 // ================== Peering log events ====================
437 /// Get handler for rolling forward/back log entries
438 virtual PGLog::LogEntryHandlerRef
get_log_handler(
439 ObjectStore::Transaction
&t
) = 0;
441 // ============ On disk representation changes ==============
442 virtual void rebuild_missing_set_with_deletes(PGLog
&pglog
) = 0;
444 // ======================= Logging ==========================
445 virtual PerfCounters
&get_peering_perf() = 0;
446 virtual PerfCounters
&get_perf_logger() = 0;
447 virtual void log_state_enter(const char *state
) = 0;
448 virtual void log_state_exit(
449 const char *state_name
, utime_t enter_time
,
450 uint64_t events
, utime_t event_dur
) = 0;
451 virtual void dump_recovery_info(ceph::Formatter
*f
) const = 0;
453 virtual OstreamTemp
get_clog_info() = 0;
454 virtual OstreamTemp
get_clog_error() = 0;
455 virtual OstreamTemp
get_clog_debug() = 0;
457 virtual ~PeeringListener() {}
460 struct QueryState
: boost::statechart::event
< QueryState
> {
462 explicit QueryState(ceph::Formatter
*f
) : f(f
) {}
463 void print(std::ostream
*out
) const {
468 struct QueryUnfound
: boost::statechart::event
< QueryUnfound
> {
470 explicit QueryUnfound(ceph::Formatter
*f
) : f(f
) {}
471 void print(std::ostream
*out
) const {
472 *out
<< "QueryUnfound";
476 struct AdvMap
: boost::statechart::event
< AdvMap
> {
479 std::vector
<int> newup
, newacting
;
480 int up_primary
, acting_primary
;
482 OSDMapRef osdmap
, OSDMapRef lastmap
,
483 std::vector
<int>& newup
, int up_primary
,
484 std::vector
<int>& newacting
, int acting_primary
):
485 osdmap(osdmap
), lastmap(lastmap
),
487 newacting(newacting
),
488 up_primary(up_primary
),
489 acting_primary(acting_primary
) {}
490 void print(std::ostream
*out
) const {
495 struct ActMap
: boost::statechart::event
< ActMap
> {
496 ActMap() : boost::statechart::event
< ActMap
>() {}
497 void print(std::ostream
*out
) const {
501 struct Activate
: boost::statechart::event
< Activate
> {
502 epoch_t activation_epoch
;
503 explicit Activate(epoch_t q
) : boost::statechart::event
< Activate
>(),
504 activation_epoch(q
) {}
505 void print(std::ostream
*out
) const {
506 *out
<< "Activate from " << activation_epoch
;
509 struct ActivateCommitted
: boost::statechart::event
< ActivateCommitted
> {
511 epoch_t activation_epoch
;
512 explicit ActivateCommitted(epoch_t e
, epoch_t ae
)
513 : boost::statechart::event
< ActivateCommitted
>(),
515 activation_epoch(ae
) {}
516 void print(std::ostream
*out
) const {
517 *out
<< "ActivateCommitted from " << activation_epoch
518 << " processed at " << epoch
;
522 struct UnfoundBackfill
: boost::statechart::event
<UnfoundBackfill
> {
523 explicit UnfoundBackfill() {}
524 void print(std::ostream
*out
) const {
525 *out
<< "UnfoundBackfill";
528 struct UnfoundRecovery
: boost::statechart::event
<UnfoundRecovery
> {
529 explicit UnfoundRecovery() {}
530 void print(std::ostream
*out
) const {
531 *out
<< "UnfoundRecovery";
535 struct RequestScrub
: boost::statechart::event
<RequestScrub
> {
538 explicit RequestScrub(bool d
, bool r
) : deep(scrub_level_t(d
)), repair(scrub_type_t(r
)) {}
539 void print(std::ostream
*out
) const {
540 *out
<< "RequestScrub(" << ((deep
==scrub_level_t::deep
) ? "deep" : "shallow")
541 << ((repair
==scrub_type_t::do_repair
) ? " repair)" : ")");
545 TrivialEvent(Initialize
)
546 TrivialEvent(GotInfo
)
547 TrivialEvent(NeedUpThru
)
548 TrivialEvent(Backfilled
)
549 TrivialEvent(LocalBackfillReserved
)
550 TrivialEvent(RejectTooFullRemoteReservation
)
551 TrivialEvent(RequestBackfill
)
552 TrivialEvent(RemoteRecoveryPreempted
)
553 TrivialEvent(RemoteBackfillPreempted
)
554 TrivialEvent(BackfillTooFull
)
555 TrivialEvent(RecoveryTooFull
)
557 TrivialEvent(MakePrimary
)
558 TrivialEvent(MakeStray
)
559 TrivialEvent(NeedActingChange
)
560 TrivialEvent(IsIncomplete
)
563 TrivialEvent(AllReplicasRecovered
)
564 TrivialEvent(DoRecovery
)
565 TrivialEvent(LocalRecoveryReserved
)
566 TrivialEvent(AllRemotesReserved
)
567 TrivialEvent(AllBackfillsReserved
)
568 TrivialEvent(GoClean
)
570 TrivialEvent(AllReplicasActivated
)
572 TrivialEvent(IntervalFlush
)
574 TrivialEvent(DeleteStart
)
575 TrivialEvent(DeleteSome
)
577 TrivialEvent(SetForceRecovery
)
578 TrivialEvent(UnsetForceRecovery
)
579 TrivialEvent(SetForceBackfill
)
580 TrivialEvent(UnsetForceBackfill
)
582 TrivialEvent(DeleteReserved
)
583 TrivialEvent(DeleteInterrupted
)
585 TrivialEvent(CheckReadable
)
587 void start_handle(PeeringCtx
*new_ctx
);
589 void begin_block_outgoing();
590 void end_block_outgoing();
591 void clear_blocked_outgoing();
596 class PeeringMachine
: public boost::statechart::state_machine
< PeeringMachine
, Initial
> {
599 PGStateHistory
*state_history
;
602 DoutPrefixProvider
*dpp
;
606 uint64_t event_count
;
608 void clear_event_counters() {
609 event_time
= utime_t();
613 void log_enter(const char *state_name
);
614 void log_exit(const char *state_name
, utime_t duration
);
617 PeeringState
*state
, CephContext
*cct
,
619 DoutPrefixProvider
*dpp
,
621 PGStateHistory
*state_history
) :
623 state_history(state_history
),
624 cct(cct
), spgid(spgid
),
628 /* Accessor functions for state methods */
629 ObjectStore::Transaction
& get_cur_transaction() {
630 ceph_assert(state
->rctx
);
631 return state
->rctx
->transaction
;
634 PeeringCtxWrapper
&get_recovery_ctx() {
636 return *(state
->rctx
);
639 void send_notify(int to
, const pg_notify_t
&n
) {
640 ceph_assert(state
->rctx
);
641 state
->rctx
->send_notify(to
, n
);
643 void send_query(int to
, const pg_query_t
&query
) {
644 state
->rctx
->send_query(
646 spg_t(spgid
.pgid
, query
.to
),
650 friend class PeeringMachine
;
670 // WaitRemoteBackfillReserved
671 // WaitLocalBackfillReserved
675 // WaitRemoteRecoveryReserved
676 // WaitLocalRecoveryReserved
680 // RepWaitBackfillReserved
681 // RepWaitRecoveryReserved
684 // WaitDeleteReserved
688 struct Crashed
: boost::statechart::state
< Crashed
, PeeringMachine
>, NamedState
{
689 explicit Crashed(my_context ctx
);
694 struct Initial
: boost::statechart::state
< Initial
, PeeringMachine
>, NamedState
{
695 explicit Initial(my_context ctx
);
698 typedef boost::mpl::list
<
699 boost::statechart::transition
< Initialize
, Reset
>,
700 boost::statechart::custom_reaction
< NullEvt
>,
701 boost::statechart::transition
< boost::statechart::event_base
, Crashed
>
704 boost::statechart::result
react(const MNotifyRec
&);
705 boost::statechart::result
react(const MInfoRec
&);
706 boost::statechart::result
react(const MLogRec
&);
707 boost::statechart::result
react(const boost::statechart::event_base
&) {
708 return discard_event();
712 struct Reset
: boost::statechart::state
< Reset
, PeeringMachine
>, NamedState
{
713 explicit Reset(my_context ctx
);
716 typedef boost::mpl::list
<
717 boost::statechart::custom_reaction
< QueryState
>,
718 boost::statechart::custom_reaction
< QueryUnfound
>,
719 boost::statechart::custom_reaction
< AdvMap
>,
720 boost::statechart::custom_reaction
< ActMap
>,
721 boost::statechart::custom_reaction
< NullEvt
>,
722 boost::statechart::custom_reaction
< IntervalFlush
>,
723 boost::statechart::transition
< boost::statechart::event_base
, Crashed
>
725 boost::statechart::result
react(const QueryState
& q
);
726 boost::statechart::result
react(const QueryUnfound
& q
);
727 boost::statechart::result
react(const AdvMap
&);
728 boost::statechart::result
react(const ActMap
&);
729 boost::statechart::result
react(const IntervalFlush
&);
730 boost::statechart::result
react(const boost::statechart::event_base
&) {
731 return discard_event();
737 struct Started
: boost::statechart::state
< Started
, PeeringMachine
, Start
>, NamedState
{
738 explicit Started(my_context ctx
);
741 typedef boost::mpl::list
<
742 boost::statechart::custom_reaction
< QueryState
>,
743 boost::statechart::custom_reaction
< QueryUnfound
>,
744 boost::statechart::custom_reaction
< AdvMap
>,
745 boost::statechart::custom_reaction
< IntervalFlush
>,
747 boost::statechart::custom_reaction
< NullEvt
>,
748 boost::statechart::custom_reaction
<SetForceRecovery
>,
749 boost::statechart::custom_reaction
<UnsetForceRecovery
>,
750 boost::statechart::custom_reaction
<SetForceBackfill
>,
751 boost::statechart::custom_reaction
<UnsetForceBackfill
>,
752 boost::statechart::custom_reaction
<RequestScrub
>,
753 boost::statechart::custom_reaction
<CheckReadable
>,
755 boost::statechart::transition
< boost::statechart::event_base
, Crashed
>
757 boost::statechart::result
react(const QueryState
& q
);
758 boost::statechart::result
react(const QueryUnfound
& q
);
759 boost::statechart::result
react(const AdvMap
&);
760 boost::statechart::result
react(const IntervalFlush
&);
761 boost::statechart::result
react(const boost::statechart::event_base
&) {
762 return discard_event();
769 struct Start
: boost::statechart::state
< Start
, Started
>, NamedState
{
770 explicit Start(my_context ctx
);
773 typedef boost::mpl::list
<
774 boost::statechart::transition
< MakePrimary
, Primary
>,
775 boost::statechart::transition
< MakeStray
, Stray
>
780 struct WaitActingChange
;
784 struct Primary
: boost::statechart::state
< Primary
, Started
, Peering
>, NamedState
{
785 explicit Primary(my_context ctx
);
788 typedef boost::mpl::list
<
789 boost::statechart::custom_reaction
< ActMap
>,
790 boost::statechart::custom_reaction
< MNotifyRec
>,
791 boost::statechart::custom_reaction
<SetForceRecovery
>,
792 boost::statechart::custom_reaction
<UnsetForceRecovery
>,
793 boost::statechart::custom_reaction
<SetForceBackfill
>,
794 boost::statechart::custom_reaction
<UnsetForceBackfill
>,
795 boost::statechart::custom_reaction
<RequestScrub
>
797 boost::statechart::result
react(const ActMap
&);
798 boost::statechart::result
react(const MNotifyRec
&);
799 boost::statechart::result
react(const SetForceRecovery
&);
800 boost::statechart::result
react(const UnsetForceRecovery
&);
801 boost::statechart::result
react(const SetForceBackfill
&);
802 boost::statechart::result
react(const UnsetForceBackfill
&);
803 boost::statechart::result
react(const RequestScrub
&);
806 struct WaitActingChange
: boost::statechart::state
< WaitActingChange
, Primary
>,
808 typedef boost::mpl::list
<
809 boost::statechart::custom_reaction
< QueryState
>,
810 boost::statechart::custom_reaction
< QueryUnfound
>,
811 boost::statechart::custom_reaction
< AdvMap
>,
812 boost::statechart::custom_reaction
< MLogRec
>,
813 boost::statechart::custom_reaction
< MInfoRec
>,
814 boost::statechart::custom_reaction
< MNotifyRec
>
816 explicit WaitActingChange(my_context ctx
);
817 boost::statechart::result
react(const QueryState
& q
);
818 boost::statechart::result
react(const QueryUnfound
& q
);
819 boost::statechart::result
react(const AdvMap
&);
820 boost::statechart::result
react(const MLogRec
&);
821 boost::statechart::result
react(const MInfoRec
&);
822 boost::statechart::result
react(const MNotifyRec
&);
829 struct Peering
: boost::statechart::state
< Peering
, Primary
, GetInfo
>, NamedState
{
830 PastIntervals::PriorSet prior_set
;
831 bool history_les_bound
; //< need osd_find_best_info_ignore_history_les
833 explicit Peering(my_context ctx
);
836 typedef boost::mpl::list
<
837 boost::statechart::custom_reaction
< QueryState
>,
838 boost::statechart::custom_reaction
< QueryUnfound
>,
839 boost::statechart::transition
< Activate
, Active
>,
840 boost::statechart::custom_reaction
< AdvMap
>
842 boost::statechart::result
react(const QueryState
& q
);
843 boost::statechart::result
react(const QueryUnfound
& q
);
844 boost::statechart::result
react(const AdvMap
&advmap
);
847 struct WaitLocalRecoveryReserved
;
849 struct Active
: boost::statechart::state
< Active
, Primary
, Activating
>, NamedState
{
850 explicit Active(my_context ctx
);
853 const std::set
<pg_shard_t
> remote_shards_to_reserve_recovery
;
854 const std::set
<pg_shard_t
> remote_shards_to_reserve_backfill
;
855 bool all_replicas_activated
;
857 typedef boost::mpl::list
<
858 boost::statechart::custom_reaction
< QueryState
>,
859 boost::statechart::custom_reaction
< QueryUnfound
>,
860 boost::statechart::custom_reaction
< ActMap
>,
861 boost::statechart::custom_reaction
< AdvMap
>,
862 boost::statechart::custom_reaction
< MInfoRec
>,
863 boost::statechart::custom_reaction
< MNotifyRec
>,
864 boost::statechart::custom_reaction
< MLogRec
>,
865 boost::statechart::custom_reaction
< MTrim
>,
866 boost::statechart::custom_reaction
< Backfilled
>,
867 boost::statechart::custom_reaction
< ActivateCommitted
>,
868 boost::statechart::custom_reaction
< AllReplicasActivated
>,
869 boost::statechart::custom_reaction
< DeferRecovery
>,
870 boost::statechart::custom_reaction
< DeferBackfill
>,
871 boost::statechart::custom_reaction
< UnfoundRecovery
>,
872 boost::statechart::custom_reaction
< UnfoundBackfill
>,
873 boost::statechart::custom_reaction
< RemoteReservationRevokedTooFull
>,
874 boost::statechart::custom_reaction
< RemoteReservationRevoked
>,
875 boost::statechart::custom_reaction
< DoRecovery
>,
876 boost::statechart::custom_reaction
< RenewLease
>,
877 boost::statechart::custom_reaction
< MLeaseAck
>,
878 boost::statechart::custom_reaction
< CheckReadable
>
880 boost::statechart::result
react(const QueryState
& q
);
881 boost::statechart::result
react(const QueryUnfound
& q
);
882 boost::statechart::result
react(const ActMap
&);
883 boost::statechart::result
react(const AdvMap
&);
884 boost::statechart::result
react(const MInfoRec
& infoevt
);
885 boost::statechart::result
react(const MNotifyRec
& notevt
);
886 boost::statechart::result
react(const MLogRec
& logevt
);
887 boost::statechart::result
react(const MTrim
& trimevt
);
888 boost::statechart::result
react(const Backfilled
&) {
889 return discard_event();
891 boost::statechart::result
react(const ActivateCommitted
&);
892 boost::statechart::result
react(const AllReplicasActivated
&);
893 boost::statechart::result
react(const RenewLease
&);
894 boost::statechart::result
react(const MLeaseAck
&);
895 boost::statechart::result
react(const DeferRecovery
& evt
) {
896 return discard_event();
898 boost::statechart::result
react(const DeferBackfill
& evt
) {
899 return discard_event();
901 boost::statechart::result
react(const UnfoundRecovery
& evt
) {
902 return discard_event();
904 boost::statechart::result
react(const UnfoundBackfill
& evt
) {
905 return discard_event();
907 boost::statechart::result
react(const RemoteReservationRevokedTooFull
&) {
908 return discard_event();
910 boost::statechart::result
react(const RemoteReservationRevoked
&) {
911 return discard_event();
913 boost::statechart::result
react(const DoRecovery
&) {
914 return discard_event();
916 boost::statechart::result
react(const CheckReadable
&);
917 void all_activated_and_committed();
920 struct Clean
: boost::statechart::state
< Clean
, Active
>, NamedState
{
921 typedef boost::mpl::list
<
922 boost::statechart::transition
< DoRecovery
, WaitLocalRecoveryReserved
>,
923 boost::statechart::custom_reaction
<SetForceRecovery
>,
924 boost::statechart::custom_reaction
<SetForceBackfill
>
926 explicit Clean(my_context ctx
);
928 boost::statechart::result
react(const boost::statechart::event_base
&) {
929 return discard_event();
933 struct Recovered
: boost::statechart::state
< Recovered
, Active
>, NamedState
{
934 typedef boost::mpl::list
<
935 boost::statechart::transition
< GoClean
, Clean
>,
936 boost::statechart::transition
< DoRecovery
, WaitLocalRecoveryReserved
>,
937 boost::statechart::custom_reaction
< AllReplicasActivated
>
939 explicit Recovered(my_context ctx
);
941 boost::statechart::result
react(const AllReplicasActivated
&) {
942 post_event(GoClean());
943 return forward_event();
947 struct Backfilling
: boost::statechart::state
< Backfilling
, Active
>, NamedState
{
948 typedef boost::mpl::list
<
949 boost::statechart::custom_reaction
< Backfilled
>,
950 boost::statechart::custom_reaction
< DeferBackfill
>,
951 boost::statechart::custom_reaction
< UnfoundBackfill
>,
952 boost::statechart::custom_reaction
< RemoteReservationRejectedTooFull
>,
953 boost::statechart::custom_reaction
< RemoteReservationRevokedTooFull
>,
954 boost::statechart::custom_reaction
< RemoteReservationRevoked
>
956 explicit Backfilling(my_context ctx
);
957 boost::statechart::result
react(const RemoteReservationRejectedTooFull
& evt
) {
958 // for compat with old peers
959 post_event(RemoteReservationRevokedTooFull());
960 return discard_event();
962 void backfill_release_reservations();
963 boost::statechart::result
react(const Backfilled
& evt
);
964 boost::statechart::result
react(const RemoteReservationRevokedTooFull
& evt
);
965 boost::statechart::result
react(const RemoteReservationRevoked
& evt
);
966 boost::statechart::result
react(const DeferBackfill
& evt
);
967 boost::statechart::result
react(const UnfoundBackfill
& evt
);
968 void cancel_backfill();
972 struct WaitRemoteBackfillReserved
: boost::statechart::state
< WaitRemoteBackfillReserved
, Active
>, NamedState
{
973 typedef boost::mpl::list
<
974 boost::statechart::custom_reaction
< RemoteBackfillReserved
>,
975 boost::statechart::custom_reaction
< RemoteReservationRejectedTooFull
>,
976 boost::statechart::custom_reaction
< RemoteReservationRevoked
>,
977 boost::statechart::transition
< AllBackfillsReserved
, Backfilling
>
979 std::set
<pg_shard_t
>::const_iterator backfill_osd_it
;
980 explicit WaitRemoteBackfillReserved(my_context ctx
);
983 boost::statechart::result
react(const RemoteBackfillReserved
& evt
);
984 boost::statechart::result
react(const RemoteReservationRejectedTooFull
& evt
);
985 boost::statechart::result
react(const RemoteReservationRevoked
& evt
);
988 struct WaitLocalBackfillReserved
: boost::statechart::state
< WaitLocalBackfillReserved
, Active
>, NamedState
{
989 typedef boost::mpl::list
<
990 boost::statechart::transition
< LocalBackfillReserved
, WaitRemoteBackfillReserved
>,
991 boost::statechart::custom_reaction
< RemoteBackfillReserved
>
993 explicit WaitLocalBackfillReserved(my_context ctx
);
994 boost::statechart::result
react(const RemoteBackfillReserved
& evt
) {
996 return discard_event();
1001 struct NotBackfilling
: boost::statechart::state
< NotBackfilling
, Active
>, NamedState
{
1002 typedef boost::mpl::list
<
1003 boost::statechart::custom_reaction
< QueryUnfound
>,
1004 boost::statechart::transition
< RequestBackfill
, WaitLocalBackfillReserved
>,
1005 boost::statechart::custom_reaction
< RemoteBackfillReserved
>,
1006 boost::statechart::custom_reaction
< RemoteReservationRejectedTooFull
>
1008 explicit NotBackfilling(my_context ctx
);
1010 boost::statechart::result
react(const QueryUnfound
& q
);
1011 boost::statechart::result
react(const RemoteBackfillReserved
& evt
);
1012 boost::statechart::result
react(const RemoteReservationRejectedTooFull
& evt
);
1015 struct NotRecovering
: boost::statechart::state
< NotRecovering
, Active
>, NamedState
{
1016 typedef boost::mpl::list
<
1017 boost::statechart::custom_reaction
< QueryUnfound
>,
1018 boost::statechart::transition
< DoRecovery
, WaitLocalRecoveryReserved
>,
1019 boost::statechart::custom_reaction
< DeferRecovery
>,
1020 boost::statechart::custom_reaction
< UnfoundRecovery
>
1022 explicit NotRecovering(my_context ctx
);
1023 boost::statechart::result
react(const QueryUnfound
& q
);
1024 boost::statechart::result
react(const DeferRecovery
& evt
) {
1026 return discard_event();
1028 boost::statechart::result
react(const UnfoundRecovery
& evt
) {
1030 return discard_event();
1036 struct RepNotRecovering
;
1037 struct ReplicaActive
: boost::statechart::state
< ReplicaActive
, Started
, RepNotRecovering
>, NamedState
{
1038 explicit ReplicaActive(my_context ctx
);
1041 typedef boost::mpl::list
<
1042 boost::statechart::custom_reaction
< QueryState
>,
1043 boost::statechart::custom_reaction
< QueryUnfound
>,
1044 boost::statechart::custom_reaction
< ActMap
>,
1045 boost::statechart::custom_reaction
< MQuery
>,
1046 boost::statechart::custom_reaction
< MInfoRec
>,
1047 boost::statechart::custom_reaction
< MLogRec
>,
1048 boost::statechart::custom_reaction
< MTrim
>,
1049 boost::statechart::custom_reaction
< Activate
>,
1050 boost::statechart::custom_reaction
< ActivateCommitted
>,
1051 boost::statechart::custom_reaction
< DeferRecovery
>,
1052 boost::statechart::custom_reaction
< DeferBackfill
>,
1053 boost::statechart::custom_reaction
< UnfoundRecovery
>,
1054 boost::statechart::custom_reaction
< UnfoundBackfill
>,
1055 boost::statechart::custom_reaction
< RemoteBackfillPreempted
>,
1056 boost::statechart::custom_reaction
< RemoteRecoveryPreempted
>,
1057 boost::statechart::custom_reaction
< RecoveryDone
>,
1058 boost::statechart::transition
<DeleteStart
, ToDelete
>,
1059 boost::statechart::custom_reaction
< MLease
>
1061 boost::statechart::result
react(const QueryState
& q
);
1062 boost::statechart::result
react(const QueryUnfound
& q
);
1063 boost::statechart::result
react(const MInfoRec
& infoevt
);
1064 boost::statechart::result
react(const MLogRec
& logevt
);
1065 boost::statechart::result
react(const MTrim
& trimevt
);
1066 boost::statechart::result
react(const ActMap
&);
1067 boost::statechart::result
react(const MQuery
&);
1068 boost::statechart::result
react(const Activate
&);
1069 boost::statechart::result
react(const ActivateCommitted
&);
1070 boost::statechart::result
react(const MLease
&);
1071 boost::statechart::result
react(const RecoveryDone
&) {
1072 return discard_event();
1074 boost::statechart::result
react(const DeferRecovery
& evt
) {
1075 return discard_event();
1077 boost::statechart::result
react(const DeferBackfill
& evt
) {
1078 return discard_event();
1080 boost::statechart::result
react(const UnfoundRecovery
& evt
) {
1081 return discard_event();
1083 boost::statechart::result
react(const UnfoundBackfill
& evt
) {
1084 return discard_event();
1086 boost::statechart::result
react(const RemoteBackfillPreempted
& evt
) {
1087 return discard_event();
1089 boost::statechart::result
react(const RemoteRecoveryPreempted
& evt
) {
1090 return discard_event();
1094 struct RepRecovering
: boost::statechart::state
< RepRecovering
, ReplicaActive
>, NamedState
{
1095 typedef boost::mpl::list
<
1096 boost::statechart::transition
< RecoveryDone
, RepNotRecovering
>,
1097 // for compat with old peers
1098 boost::statechart::transition
< RemoteReservationRejectedTooFull
, RepNotRecovering
>,
1099 boost::statechart::transition
< RemoteReservationCanceled
, RepNotRecovering
>,
1100 boost::statechart::custom_reaction
< BackfillTooFull
>,
1101 boost::statechart::custom_reaction
< RemoteRecoveryPreempted
>,
1102 boost::statechart::custom_reaction
< RemoteBackfillPreempted
>
1104 explicit RepRecovering(my_context ctx
);
1105 boost::statechart::result
react(const RemoteRecoveryPreempted
&evt
);
1106 boost::statechart::result
react(const BackfillTooFull
&evt
);
1107 boost::statechart::result
react(const RemoteBackfillPreempted
&evt
);
1111 struct RepWaitBackfillReserved
: boost::statechart::state
< RepWaitBackfillReserved
, ReplicaActive
>, NamedState
{
1112 typedef boost::mpl::list
<
1113 boost::statechart::custom_reaction
< RemoteBackfillReserved
>,
1114 boost::statechart::custom_reaction
< RejectTooFullRemoteReservation
>,
1115 boost::statechart::custom_reaction
< RemoteReservationRejectedTooFull
>,
1116 boost::statechart::custom_reaction
< RemoteReservationCanceled
>
1118 explicit RepWaitBackfillReserved(my_context ctx
);
1120 boost::statechart::result
react(const RemoteBackfillReserved
&evt
);
1121 boost::statechart::result
react(const RejectTooFullRemoteReservation
&evt
);
1122 boost::statechart::result
react(const RemoteReservationRejectedTooFull
&evt
);
1123 boost::statechart::result
react(const RemoteReservationCanceled
&evt
);
1126 struct RepWaitRecoveryReserved
: boost::statechart::state
< RepWaitRecoveryReserved
, ReplicaActive
>, NamedState
{
1127 typedef boost::mpl::list
<
1128 boost::statechart::custom_reaction
< RemoteRecoveryReserved
>,
1129 // for compat with old peers
1130 boost::statechart::custom_reaction
< RemoteReservationRejectedTooFull
>,
1131 boost::statechart::custom_reaction
< RemoteReservationCanceled
>
1133 explicit RepWaitRecoveryReserved(my_context ctx
);
1135 boost::statechart::result
react(const RemoteRecoveryReserved
&evt
);
1136 boost::statechart::result
react(const RemoteReservationRejectedTooFull
&evt
) {
1137 // for compat with old peers
1138 post_event(RemoteReservationCanceled());
1139 return discard_event();
1141 boost::statechart::result
react(const RemoteReservationCanceled
&evt
);
1144 struct RepNotRecovering
: boost::statechart::state
< RepNotRecovering
, ReplicaActive
>, NamedState
{
1145 typedef boost::mpl::list
<
1146 boost::statechart::custom_reaction
< RequestRecoveryPrio
>,
1147 boost::statechart::custom_reaction
< RequestBackfillPrio
>,
1148 boost::statechart::custom_reaction
< RejectTooFullRemoteReservation
>,
1149 boost::statechart::transition
< RemoteReservationRejectedTooFull
, RepNotRecovering
>,
1150 boost::statechart::transition
< RemoteReservationCanceled
, RepNotRecovering
>,
1151 boost::statechart::custom_reaction
< RemoteRecoveryReserved
>,
1152 boost::statechart::custom_reaction
< RemoteBackfillReserved
>,
1153 boost::statechart::transition
< RecoveryDone
, RepNotRecovering
> // for compat with pre-reservation peers
1155 explicit RepNotRecovering(my_context ctx
);
1156 boost::statechart::result
react(const RequestRecoveryPrio
&evt
);
1157 boost::statechart::result
react(const RequestBackfillPrio
&evt
);
1158 boost::statechart::result
react(const RemoteBackfillReserved
&evt
) {
1159 // my reservation completion raced with a RELEASE from primary
1160 return discard_event();
1162 boost::statechart::result
react(const RemoteRecoveryReserved
&evt
) {
1163 // my reservation completion raced with a RELEASE from primary
1164 return discard_event();
1166 boost::statechart::result
react(const RejectTooFullRemoteReservation
&evt
);
1170 struct Recovering
: boost::statechart::state
< Recovering
, Active
>, NamedState
{
1171 typedef boost::mpl::list
<
1172 boost::statechart::custom_reaction
< AllReplicasRecovered
>,
1173 boost::statechart::custom_reaction
< DeferRecovery
>,
1174 boost::statechart::custom_reaction
< UnfoundRecovery
>,
1175 boost::statechart::custom_reaction
< RequestBackfill
>
1177 explicit Recovering(my_context ctx
);
1179 void release_reservations(bool cancel
= false);
1180 boost::statechart::result
react(const AllReplicasRecovered
&evt
);
1181 boost::statechart::result
react(const DeferRecovery
& evt
);
1182 boost::statechart::result
react(const UnfoundRecovery
& evt
);
1183 boost::statechart::result
react(const RequestBackfill
&evt
);
1186 struct WaitRemoteRecoveryReserved
: boost::statechart::state
< WaitRemoteRecoveryReserved
, Active
>, NamedState
{
1187 typedef boost::mpl::list
<
1188 boost::statechart::custom_reaction
< RemoteRecoveryReserved
>,
1189 boost::statechart::transition
< AllRemotesReserved
, Recovering
>
1191 std::set
<pg_shard_t
>::const_iterator remote_recovery_reservation_it
;
1192 explicit WaitRemoteRecoveryReserved(my_context ctx
);
1193 boost::statechart::result
react(const RemoteRecoveryReserved
&evt
);
1197 struct WaitLocalRecoveryReserved
: boost::statechart::state
< WaitLocalRecoveryReserved
, Active
>, NamedState
{
1198 typedef boost::mpl::list
<
1199 boost::statechart::transition
< LocalRecoveryReserved
, WaitRemoteRecoveryReserved
>,
1200 boost::statechart::custom_reaction
< RecoveryTooFull
>
1202 explicit WaitLocalRecoveryReserved(my_context ctx
);
1204 boost::statechart::result
react(const RecoveryTooFull
&evt
);
1207 struct Activating
: boost::statechart::state
< Activating
, Active
>, NamedState
{
1208 typedef boost::mpl::list
<
1209 boost::statechart::transition
< AllReplicasRecovered
, Recovered
>,
1210 boost::statechart::transition
< DoRecovery
, WaitLocalRecoveryReserved
>,
1211 boost::statechart::transition
< RequestBackfill
, WaitLocalBackfillReserved
>
1213 explicit Activating(my_context ctx
);
1217 struct Stray
: boost::statechart::state
< Stray
, Started
>,
1219 explicit Stray(my_context ctx
);
1222 typedef boost::mpl::list
<
1223 boost::statechart::custom_reaction
< MQuery
>,
1224 boost::statechart::custom_reaction
< MLogRec
>,
1225 boost::statechart::custom_reaction
< MInfoRec
>,
1226 boost::statechart::custom_reaction
< ActMap
>,
1227 boost::statechart::custom_reaction
< RecoveryDone
>,
1228 boost::statechart::transition
<DeleteStart
, ToDelete
>
1230 boost::statechart::result
react(const MQuery
& query
);
1231 boost::statechart::result
react(const MLogRec
& logevt
);
1232 boost::statechart::result
react(const MInfoRec
& infoevt
);
1233 boost::statechart::result
react(const ActMap
&);
1234 boost::statechart::result
react(const RecoveryDone
&) {
1235 return discard_event();
1239 struct WaitDeleteReserved
;
1240 struct ToDelete
: boost::statechart::state
<ToDelete
, Started
, WaitDeleteReserved
>, NamedState
{
1241 unsigned priority
= 0;
1242 typedef boost::mpl::list
<
1243 boost::statechart::custom_reaction
< ActMap
>,
1244 boost::statechart::custom_reaction
< ActivateCommitted
>,
1245 boost::statechart::custom_reaction
< DeleteSome
>
1247 explicit ToDelete(my_context ctx
);
1248 boost::statechart::result
react(const ActMap
&evt
);
1249 boost::statechart::result
react(const DeleteSome
&evt
) {
1250 // happens if we drop out of Deleting due to reprioritization etc.
1251 return discard_event();
1253 boost::statechart::result
react(const ActivateCommitted
&) {
1254 // Can happens if we were activated as a stray but not actually pulled
1255 // from prior to the pg going clean and sending a delete.
1256 return discard_event();
1262 struct WaitDeleteReserved
: boost::statechart::state
<WaitDeleteReserved
,
1263 ToDelete
>, NamedState
{
1264 typedef boost::mpl::list
<
1265 boost::statechart::transition
<DeleteReserved
, Deleting
>
1267 explicit WaitDeleteReserved(my_context ctx
);
1271 struct Deleting
: boost::statechart::state
<Deleting
,
1272 ToDelete
>, NamedState
{
1273 typedef boost::mpl::list
<
1274 boost::statechart::custom_reaction
< DeleteSome
>,
1275 boost::statechart::transition
<DeleteInterrupted
, WaitDeleteReserved
>
1278 explicit Deleting(my_context ctx
);
1279 boost::statechart::result
react(const DeleteSome
&evt
);
1285 struct GetInfo
: boost::statechart::state
< GetInfo
, Peering
>, NamedState
{
1286 std::set
<pg_shard_t
> peer_info_requested
;
1288 explicit GetInfo(my_context ctx
);
1292 typedef boost::mpl::list
<
1293 boost::statechart::custom_reaction
< QueryState
>,
1294 boost::statechart::custom_reaction
< QueryUnfound
>,
1295 boost::statechart::transition
< GotInfo
, GetLog
>,
1296 boost::statechart::custom_reaction
< MNotifyRec
>,
1297 boost::statechart::transition
< IsDown
, Down
>
1299 boost::statechart::result
react(const QueryState
& q
);
1300 boost::statechart::result
react(const QueryUnfound
& q
);
1301 boost::statechart::result
react(const MNotifyRec
& infoevt
);
1304 struct GotLog
: boost::statechart::event
< GotLog
> {
1305 GotLog() : boost::statechart::event
< GotLog
>() {}
1308 struct GetLog
: boost::statechart::state
< GetLog
, Peering
>, NamedState
{
1309 pg_shard_t auth_log_shard
;
1310 boost::intrusive_ptr
<MOSDPGLog
> msg
;
1312 explicit GetLog(my_context ctx
);
1315 typedef boost::mpl::list
<
1316 boost::statechart::custom_reaction
< QueryState
>,
1317 boost::statechart::custom_reaction
< QueryUnfound
>,
1318 boost::statechart::custom_reaction
< MLogRec
>,
1319 boost::statechart::custom_reaction
< GotLog
>,
1320 boost::statechart::custom_reaction
< AdvMap
>,
1321 boost::statechart::transition
< NeedActingChange
, WaitActingChange
>,
1322 boost::statechart::transition
< IsIncomplete
, Incomplete
>
1324 boost::statechart::result
react(const AdvMap
&);
1325 boost::statechart::result
react(const QueryState
& q
);
1326 boost::statechart::result
react(const QueryUnfound
& q
);
1327 boost::statechart::result
react(const MLogRec
& logevt
);
1328 boost::statechart::result
react(const GotLog
&);
1333 struct GetMissing
: boost::statechart::state
< GetMissing
, Peering
>, NamedState
{
1334 std::set
<pg_shard_t
> peer_missing_requested
;
1336 explicit GetMissing(my_context ctx
);
1339 typedef boost::mpl::list
<
1340 boost::statechart::custom_reaction
< QueryState
>,
1341 boost::statechart::custom_reaction
< QueryUnfound
>,
1342 boost::statechart::custom_reaction
< MLogRec
>,
1343 boost::statechart::transition
< NeedUpThru
, WaitUpThru
>
1345 boost::statechart::result
react(const QueryState
& q
);
1346 boost::statechart::result
react(const QueryUnfound
& q
);
1347 boost::statechart::result
react(const MLogRec
& logevt
);
1350 struct WaitUpThru
: boost::statechart::state
< WaitUpThru
, Peering
>, NamedState
{
1351 explicit WaitUpThru(my_context ctx
);
1354 typedef boost::mpl::list
<
1355 boost::statechart::custom_reaction
< QueryState
>,
1356 boost::statechart::custom_reaction
< QueryUnfound
>,
1357 boost::statechart::custom_reaction
< ActMap
>,
1358 boost::statechart::custom_reaction
< MLogRec
>
1360 boost::statechart::result
react(const QueryState
& q
);
1361 boost::statechart::result
react(const QueryUnfound
& q
);
1362 boost::statechart::result
react(const ActMap
& am
);
1363 boost::statechart::result
react(const MLogRec
& logrec
);
1366 struct Down
: boost::statechart::state
< Down
, Peering
>, NamedState
{
1367 explicit Down(my_context ctx
);
1368 typedef boost::mpl::list
<
1369 boost::statechart::custom_reaction
< QueryState
>,
1370 boost::statechart::custom_reaction
< QueryUnfound
>,
1371 boost::statechart::custom_reaction
< MNotifyRec
>
1373 boost::statechart::result
react(const QueryState
& q
);
1374 boost::statechart::result
react(const QueryUnfound
& q
);
1375 boost::statechart::result
react(const MNotifyRec
& infoevt
);
1379 struct Incomplete
: boost::statechart::state
< Incomplete
, Peering
>, NamedState
{
1380 typedef boost::mpl::list
<
1381 boost::statechart::custom_reaction
< AdvMap
>,
1382 boost::statechart::custom_reaction
< MNotifyRec
>,
1383 boost::statechart::custom_reaction
< QueryUnfound
>,
1384 boost::statechart::custom_reaction
< QueryState
>
1386 explicit Incomplete(my_context ctx
);
1387 boost::statechart::result
react(const AdvMap
&advmap
);
1388 boost::statechart::result
react(const MNotifyRec
& infoevt
);
1389 boost::statechart::result
react(const QueryUnfound
& q
);
1390 boost::statechart::result
react(const QueryState
& q
);
1394 PGStateHistory state_history
;
1397 DoutPrefixProvider
*dpp
;
1398 PeeringListener
*pl
;
1400 /// context passed in by state machine caller
1401 PeeringCtx
*orig_ctx
;
1403 /// populated if we are buffering messages pending a flush
1404 std::optional
<BufferedRecoveryMessages
> messages_pending_flush
;
1407 * populated between start_handle() and end_handle(), points into
1408 * the message lists for messages_pending_flush while blocking messages
1409 * or into orig_ctx otherwise
1411 std::optional
<PeeringCtxWrapper
> rctx
;
1416 OSDMapRef osdmap_ref
; ///< Reference to current OSDMap
1417 PGPool pool
; ///< Current pool state
1418 epoch_t last_persisted_osdmap
= 0; ///< Last osdmap epoch persisted
1422 * Peering state information
1424 int role
= -1; ///< 0 = primary, 1 = replica, -1=none.
1425 uint64_t state
= 0; ///< PG_STATE_*
1427 pg_shard_t primary
; ///< id/shard of primary
1428 pg_shard_t pg_whoami
; ///< my id/shard
1429 pg_shard_t up_primary
; ///< id/shard of primary of up set
1430 std::vector
<int> up
; ///< crush mapping without temp pgs
1431 std::set
<pg_shard_t
> upset
; ///< up in set form
1432 std::vector
<int> acting
; ///< actual acting set for the current interval
1433 std::set
<pg_shard_t
> actingset
; ///< acting in set form
1435 /// union of acting, recovery, and backfill targets
1436 std::set
<pg_shard_t
> acting_recovery_backfill
;
1438 std::vector
<HeartbeatStampsRef
> hb_stamps
;
1440 ceph::signedspan readable_interval
= ceph::signedspan::zero();
1442 /// how long we can service reads in this interval
1443 ceph::signedspan readable_until
= ceph::signedspan::zero();
1445 /// upper bound on any acting OSDs' readable_until in this interval
1446 ceph::signedspan readable_until_ub
= ceph::signedspan::zero();
1448 /// upper bound from prior interval(s)
1449 ceph::signedspan prior_readable_until_ub
= ceph::signedspan::zero();
1451 /// pg instances from prior interval(s) that may still be readable
1452 std::set
<int> prior_readable_down_osds
;
1454 /// [replica] upper bound we got from the primary (primary's clock)
1455 ceph::signedspan readable_until_ub_from_primary
= ceph::signedspan::zero();
1457 /// [primary] last upper bound shared by primary to replicas
1458 ceph::signedspan readable_until_ub_sent
= ceph::signedspan::zero();
1460 /// [primary] readable ub acked by acting set members
1461 std::vector
<ceph::signedspan
> acting_readable_until_ub
;
1463 bool send_notify
= false; ///< True if a notify needs to be sent to the primary
1465 bool dirty_info
= false; ///< small info structu on disk out of date
1466 bool dirty_big_info
= false; ///< big info structure on disk out of date
1468 pg_info_t info
; ///< current pg info
1469 pg_info_t last_written_info
; ///< last written info
1470 PastIntervals past_intervals
; ///< information about prior pg mappings
1471 PGLog pg_log
; ///< pg log
1473 epoch_t last_peering_reset
= 0; ///< epoch of last peering reset
1475 /// last_update that has committed; ONLY DEFINED WHEN is_active()
1476 eversion_t last_update_ondisk
;
1477 eversion_t last_complete_ondisk
; ///< last_complete that has committed.
1478 eversion_t last_update_applied
; ///< last_update readable
1479 /// last version to which rollback_info trimming has been applied
1480 eversion_t last_rollback_info_trimmed_to_applied
;
1482 /// Counter to determine when pending flushes have completed
1483 unsigned flushes_in_progress
= 0;
1488 std::set
<pg_shard_t
> stray_set
; ///< non-acting osds that have PG data.
1489 std::map
<pg_shard_t
, pg_info_t
> peer_info
; ///< info from peers (stray or prior)
1490 std::map
<pg_shard_t
, int64_t> peer_bytes
; ///< Peer's num_bytes from peer_info
1491 std::set
<pg_shard_t
> peer_purged
; ///< peers purged
1492 std::map
<pg_shard_t
, pg_missing_t
> peer_missing
; ///< peer missing sets
1493 std::set
<pg_shard_t
> peer_log_requested
; ///< logs i've requested (and start stamps)
1494 std::set
<pg_shard_t
> peer_missing_requested
; ///< missing sets requested
1496 /// features supported by all peers
1497 uint64_t peer_features
= CEPH_FEATURES_SUPPORTED_DEFAULT
;
1498 /// features supported by acting set
1499 uint64_t acting_features
= CEPH_FEATURES_SUPPORTED_DEFAULT
;
1500 /// features supported by up and acting
1501 uint64_t upacting_features
= CEPH_FEATURES_SUPPORTED_DEFAULT
;
1503 /// most recently consumed osdmap's require_osd_version
1504 ceph_release_t last_require_osd_release
;
1506 std::vector
<int> want_acting
; ///< non-empty while peering needs a new acting set
1508 // acting_recovery_backfill contains shards that are acting,
1509 // async recovery targets, or backfill targets.
1510 std::map
<pg_shard_t
,eversion_t
> peer_last_complete_ondisk
;
1512 /// up: min over last_complete_ondisk, peer_last_complete_ondisk
1513 eversion_t min_last_complete_ondisk
;
1514 /// point to which the log should be trimmed
1515 eversion_t pg_trim_to
;
1517 std::set
<int> blocked_by
; ///< osds we are blocked by (for pg stats)
1519 bool need_up_thru
= false; ///< true if osdmap with updated up_thru needed
1521 /// I deleted these strays; ignore racing PGInfo from them
1522 std::set
<pg_shard_t
> peer_activated
;
1524 std::set
<pg_shard_t
> backfill_targets
; ///< osds to be backfilled
1525 std::set
<pg_shard_t
> async_recovery_targets
; ///< osds to be async recovered
1527 /// osds which might have objects on them which are unfound on the primary
1528 std::set
<pg_shard_t
> might_have_unfound
;
1530 bool deleting
= false; /// true while in removing or OSD is shutting down
1531 std::atomic
<bool> deleted
= {false}; /// true once deletion complete
1533 MissingLoc missing_loc
; ///< information about missing objects
1535 bool backfill_reserved
= false;
1536 bool backfill_reserving
= false;
1538 PeeringMachine machine
;
1540 void update_osdmap_ref(OSDMapRef newmap
) {
1541 osdmap_ref
= std::move(newmap
);
1544 void update_heartbeat_peers();
1545 void query_unfound(Formatter
*f
, std::string state
);
1546 bool proc_replica_info(
1547 pg_shard_t from
, const pg_info_t
&oinfo
, epoch_t send_epoch
);
1548 void remove_down_peer_info(const OSDMapRef
&osdmap
);
1549 void check_recovery_sources(const OSDMapRef
& map
);
1550 void set_last_peering_reset();
1551 void check_full_transition(OSDMapRef lastmap
, OSDMapRef osdmap
);
1552 bool should_restart_peering(
1554 int newactingprimary
,
1555 const std::vector
<int>& newup
,
1556 const std::vector
<int>& newacting
,
1559 void start_peering_interval(
1560 const OSDMapRef lastmap
,
1561 const std::vector
<int>& newup
, int up_primary
,
1562 const std::vector
<int>& newacting
, int acting_primary
,
1563 ObjectStore::Transaction
&t
);
1564 void on_new_interval();
1565 void clear_recovery_state();
1566 void clear_primary_state();
1567 void check_past_interval_bounds() const;
1568 bool set_force_recovery(bool b
);
1569 bool set_force_backfill(bool b
);
1571 /// clip calculated priority to reasonable range
1572 int clamp_recovery_priority(int prio
, int pool_recovery_prio
, int max
);
1573 /// get log recovery reservation priority
1574 unsigned get_recovery_priority();
1575 /// get backfill reservation priority
1576 unsigned get_backfill_priority();
1577 /// get priority for pg deletion
1578 unsigned get_delete_priority();
1582 * recovery_msg_priority_t
1584 * Defines priority values for use with recovery messages. The values are
1585 * chosen to be reasonable for wpq during an upgrade scenarios, but are
1586 * actually translated into a class in PGRecoveryMsg::get_scheduler_class()
1588 enum recovery_msg_priority_t
: int {
1595 /// get message priority for recovery messages
1596 int get_recovery_op_priority() const {
1597 if (cct
->_conf
->osd_op_queue
== "mclock_scheduler") {
1598 /* For mclock, we use special priority values which will be
1599 * translated into op classes within PGRecoveryMsg::get_scheduler_class
1601 if (is_forced_recovery_or_backfill()) {
1602 return recovery_msg_priority_t::FORCED
;
1603 } else if (is_undersized()) {
1604 return recovery_msg_priority_t::UNDERSIZED
;
1605 } else if (is_degraded()) {
1606 return recovery_msg_priority_t::DEGRADED
;
1608 return recovery_msg_priority_t::BEST_EFFORT
;
1611 /* For WeightedPriorityQueue, we use pool or osd config settings to
1612 * statically set the priority for recovery messages. This special
1613 * handling should probably be removed after Reef */
1615 pool
.info
.opts
.get(pool_opts_t::RECOVERY_OP_PRIORITY
, &pri
);
1616 return pri
> 0 ? pri
: cct
->_conf
->osd_recovery_op_priority
;
1621 bool check_prior_readable_down_osds(const OSDMapRef
& map
);
1623 bool adjust_need_up_thru(const OSDMapRef osdmap
);
1624 PastIntervals::PriorSet
build_prior();
1626 void reject_reservation();
1629 std::map
<pg_shard_t
, pg_info_t
>::const_iterator
find_best_info(
1630 const std::map
<pg_shard_t
, pg_info_t
> &infos
,
1631 bool restrict_to_up_acting
,
1632 bool *history_les_bound
) const;
1634 static void calc_ec_acting(
1635 std::map
<pg_shard_t
, pg_info_t
>::const_iterator auth_log_shard
,
1637 const std::vector
<int> &acting
,
1638 const std::vector
<int> &up
,
1639 const std::map
<pg_shard_t
, pg_info_t
> &all_info
,
1640 bool restrict_to_up_acting
,
1641 std::vector
<int> *want
,
1642 std::set
<pg_shard_t
> *backfill
,
1643 std::set
<pg_shard_t
> *acting_backfill
,
1646 static std::pair
<std::map
<pg_shard_t
, pg_info_t
>::const_iterator
, eversion_t
>
1647 select_replicated_primary(
1648 std::map
<pg_shard_t
, pg_info_t
>::const_iterator auth_log_shard
,
1649 uint64_t force_auth_primary_missing_objects
,
1650 const std::vector
<int> &up
,
1651 pg_shard_t up_primary
,
1652 const std::map
<pg_shard_t
, pg_info_t
> &all_info
,
1653 const OSDMapRef osdmap
,
1656 static void calc_replicated_acting(
1657 std::map
<pg_shard_t
, pg_info_t
>::const_iterator primary_shard
,
1658 eversion_t oldest_auth_log_entry
,
1660 const std::vector
<int> &acting
,
1661 const std::vector
<int> &up
,
1662 pg_shard_t up_primary
,
1663 const std::map
<pg_shard_t
, pg_info_t
> &all_info
,
1664 bool restrict_to_up_acting
,
1665 std::vector
<int> *want
,
1666 std::set
<pg_shard_t
> *backfill
,
1667 std::set
<pg_shard_t
> *acting_backfill
,
1668 const OSDMapRef osdmap
,
1671 static void calc_replicated_acting_stretch(
1672 std::map
<pg_shard_t
, pg_info_t
>::const_iterator primary_shard
,
1673 eversion_t oldest_auth_log_entry
,
1675 const std::vector
<int> &acting
,
1676 const std::vector
<int> &up
,
1677 pg_shard_t up_primary
,
1678 const std::map
<pg_shard_t
, pg_info_t
> &all_info
,
1679 bool restrict_to_up_acting
,
1680 std::vector
<int> *want
,
1681 std::set
<pg_shard_t
> *backfill
,
1682 std::set
<pg_shard_t
> *acting_backfill
,
1683 const OSDMapRef osdmap
,
1687 void choose_async_recovery_ec(
1688 const std::map
<pg_shard_t
, pg_info_t
> &all_info
,
1689 const pg_info_t
&auth_info
,
1690 std::vector
<int> *want
,
1691 std::set
<pg_shard_t
> *async_recovery
,
1692 const OSDMapRef osdmap
) const;
1693 void choose_async_recovery_replicated(
1694 const std::map
<pg_shard_t
, pg_info_t
> &all_info
,
1695 const pg_info_t
&auth_info
,
1696 std::vector
<int> *want
,
1697 std::set
<pg_shard_t
> *async_recovery
,
1698 const OSDMapRef osdmap
) const;
1700 bool recoverable(const std::vector
<int> &want
) const;
1701 bool choose_acting(pg_shard_t
&auth_log_shard
,
1702 bool restrict_to_up_acting
,
1703 bool *history_les_bound
,
1704 bool request_pg_temp_change_only
= false);
1706 bool search_for_missing(
1707 const pg_info_t
&oinfo
, const pg_missing_t
&omissing
,
1709 PeeringCtxWrapper
&rctx
);
1710 void build_might_have_unfound();
1711 void log_weirdness();
1713 ObjectStore::Transaction
& t
,
1714 epoch_t activation_epoch
,
1715 PeeringCtxWrapper
&ctx
);
1717 void rewind_divergent_log(ObjectStore::Transaction
& t
, eversion_t newhead
);
1719 ObjectStore::Transaction
& t
, pg_info_t
&oinfo
,
1720 pg_log_t
&& olog
, pg_shard_t from
);
1722 void proc_primary_info(ObjectStore::Transaction
&t
, const pg_info_t
&info
);
1723 void proc_master_log(ObjectStore::Transaction
& t
, pg_info_t
&oinfo
,
1724 pg_log_t
&& olog
, pg_missing_t
&& omissing
,
1726 void proc_replica_log(pg_info_t
&oinfo
, const pg_log_t
&olog
,
1727 pg_missing_t
&& omissing
, pg_shard_t from
);
1729 void calc_min_last_complete_ondisk() {
1730 eversion_t min
= last_complete_ondisk
;
1731 ceph_assert(!acting_recovery_backfill
.empty());
1732 for (std::set
<pg_shard_t
>::iterator i
= acting_recovery_backfill
.begin();
1733 i
!= acting_recovery_backfill
.end();
1735 if (*i
== get_primary()) continue;
1736 if (peer_last_complete_ondisk
.count(*i
) == 0)
1737 return; // we don't have complete info
1738 eversion_t a
= peer_last_complete_ondisk
[*i
];
1742 if (min
== min_last_complete_ondisk
)
1744 min_last_complete_ondisk
= min
;
1749 pg_shard_t from
, const pg_query_t
&query
,
1750 std::pair
<pg_shard_t
, pg_info_t
> ¬ify_info
);
1752 pg_shard_t from
, const pg_query_t
&query
, epoch_t query_epoch
);
1753 void fulfill_query(const MQuery
& q
, PeeringCtxWrapper
&rctx
);
1755 void try_mark_clean();
1757 void update_blocked_by();
1758 void update_calc_stats();
1760 void add_log_entry(const pg_log_entry_t
& e
, bool applied
);
1762 void calc_trim_to();
1763 void calc_trim_to_aggressive();
1768 pg_shard_t pg_whoami
,
1772 DoutPrefixProvider
*dpp
,
1773 PeeringListener
*pl
);
1776 void handle_event(const boost::statechart::event_base
&evt
,
1779 machine
.process_event(evt
);
1784 void handle_event(PGPeeringEventRef evt
,
1787 machine
.process_event(evt
->get_event());
1791 /// Init fresh instance of PG
1794 const std::vector
<int>& newup
, int new_up_primary
,
1795 const std::vector
<int>& newacting
, int new_acting_primary
,
1796 const pg_history_t
& history
,
1797 const PastIntervals
& pi
,
1798 ObjectStore::Transaction
&t
);
1800 /// Init pg instance from disk state
1801 template <typename F
>
1802 auto init_from_disk_state(
1803 pg_info_t
&&info_from_disk
,
1804 PastIntervals
&&past_intervals_from_disk
,
1806 info
= std::move(info_from_disk
);
1807 last_written_info
= info
;
1808 past_intervals
= std::move(past_intervals_from_disk
);
1809 auto ret
= pg_log_init(pg_log
);
1814 /// Std::set initial primary/acting
1815 void init_primary_up_acting(
1816 const std::vector
<int> &newup
,
1817 const std::vector
<int> &newacting
,
1819 int new_acting_primary
);
1820 void init_hb_stamps();
1822 /// Std::set initial role
1823 void set_role(int r
) {
1827 /// Std::set predicates used for determining readable and recoverable
1828 void set_backend_predicates(
1829 IsPGReadablePredicate
*is_readable
,
1830 IsPGRecoverablePredicate
*is_recoverable
) {
1831 missing_loc
.set_backend_predicates(is_readable
, is_recoverable
);
1834 /// Send current pg_info to peers
1835 void share_pg_info();
1837 /// Get stats for child pgs
1838 void start_split_stats(
1839 const std::set
<spg_t
>& childpgs
, std::vector
<object_stat_sum_t
> *out
);
1841 /// Update new child with stats
1842 void finish_split_stats(
1843 const object_stat_sum_t
& stats
, ObjectStore::Transaction
&t
);
1845 /// Split state for child_pgid into *child
1847 pg_t child_pgid
, PeeringState
*child
, unsigned split_bits
);
1849 /// Merge state from sources
1851 std::map
<spg_t
,PeeringState
*>& sources
,
1853 unsigned split_bits
,
1854 const pg_merge_meta_t
& last_pg_merge_meta
);
1856 /// Permit stray replicas to purge now unnecessary state
1857 void purge_strays();
1862 * Mechanism for updating stats and/or history. Pass t to mark
1863 * dirty and write out. Return true if stats should be published
1867 std::function
<bool(pg_history_t
&, pg_stat_t
&)> f
,
1868 ObjectStore::Transaction
*t
= nullptr);
1870 void update_stats_wo_resched(
1871 std::function
<void(pg_history_t
&, pg_stat_t
&)> f
);
1874 * adjust_purged_snaps
1876 * Mechanism for updating purged_snaps. Marks dirty_info, big_dirty_info.
1878 void adjust_purged_snaps(
1879 std::function
<void(interval_set
<snapid_t
> &snaps
)> f
);
1881 /// Updates info.hit_set to hset_history, does not dirty
1882 void update_hset(const pg_hit_set_history_t
&hset_history
);
1884 /// Get all pg_shards that needs recovery
1885 std::vector
<pg_shard_t
> get_replica_recovery_order() const;
1890 * Merges new_history into info.history clearing past_intervals and
1891 * dirtying as needed.
1893 * Calls PeeringListener::on_info_history_change()
1895 void update_history(const pg_history_t
& new_history
);
1898 * prepare_stats_for_publish
1900 * Returns updated pg_stat_t if stats have changed since
1901 * pg_stats_publish adding in unstable_stats.
1903 * @param pg_stats_publish the latest pg_stat possessed by caller
1904 * @param unstable_stats additional stats which should be included in the
1906 * @return the up to date stats if it is different from the specfied
1907 * @c pg_stats_publish
1909 std::optional
<pg_stat_t
> prepare_stats_for_publish(
1910 const std::optional
<pg_stat_t
> &pg_stats_publish
,
1911 const object_stat_collection_t
&unstable_stats
);
1914 * Merge entries updating missing as necessary on all
1915 * acting_recovery_backfill logs and missings (also missing_loc)
1917 bool append_log_entries_update_missing(
1918 const mempool::osd_pglog::list
<pg_log_entry_t
> &entries
,
1919 ObjectStore::Transaction
&t
,
1920 std::optional
<eversion_t
> trim_to
,
1921 std::optional
<eversion_t
> roll_forward_to
);
1923 void append_log_with_trim_to_updated(
1924 std::vector
<pg_log_entry_t
>&& log_entries
,
1925 eversion_t roll_forward_to
,
1926 ObjectStore::Transaction
&t
,
1927 bool transaction_applied
,
1930 append_log(std::move(log_entries
), pg_trim_to
, roll_forward_to
,
1931 min_last_complete_ondisk
, t
, transaction_applied
, async
);
1935 * Updates local log to reflect new write from primary.
1938 std::vector
<pg_log_entry_t
>&& logv
,
1940 eversion_t roll_forward_to
,
1941 eversion_t min_last_complete_ondisk
,
1942 ObjectStore::Transaction
&t
,
1943 bool transaction_applied
,
1947 * retrieve the min last_backfill among backfill targets
1949 hobject_t
earliest_backfill() const;
1953 * Updates local log/missing to reflect new oob log update from primary
1955 void merge_new_log_entries(
1956 const mempool::osd_pglog::list
<pg_log_entry_t
> &entries
,
1957 ObjectStore::Transaction
&t
,
1958 std::optional
<eversion_t
> trim_to
,
1959 std::optional
<eversion_t
> roll_forward_to
);
1961 /// Update missing set to reflect e (TODOSAM: not sure why this is needed)
1962 void add_local_next_event(const pg_log_entry_t
& e
) {
1963 pg_log
.missing_add_next_entry(e
);
1966 /// Update log trim boundary
1967 void update_trim_to() {
1968 bool hard_limit
= (get_osdmap()->test_flag(CEPH_OSDMAP_PGLOG_HARDLIMIT
));
1970 calc_trim_to_aggressive();
1975 /// Pre-process pending update on hoid represented by logv
1977 const hobject_t
&hoid
,
1978 const std::vector
<pg_log_entry_t
>& logv
,
1979 eversion_t at_version
);
1981 /// Signal that oid has been locally recovered to version v
1983 const hobject_t
&oid
, eversion_t v
,
1985 ObjectStore::Transaction
&t
);
1987 /// Signal that oid has been recovered on peer to version
1988 void on_peer_recover(
1990 const hobject_t
&soid
,
1991 const eversion_t
&version
);
1993 /// Notify that soid is being recovered on peer
1994 void begin_peer_recover(
1996 const hobject_t soid
);
1998 /// Pull missing sets from all candidate peers
1999 bool discover_all_missing(
2000 BufferedRecoveryMessages
&rctx
);
2002 /// Notify that hoid has been fully recocovered
2003 void object_recovered(
2004 const hobject_t
&hoid
,
2005 const object_stat_sum_t
&stat_diff
) {
2006 info
.stats
.stats
.sum
.add(stat_diff
);
2007 missing_loc
.recovered(hoid
);
2010 /// Update info/stats to reflect backfill progress
2011 void update_backfill_progress(
2012 const hobject_t
&updated_backfill
,
2013 const pg_stat_t
&updated_stats
,
2014 bool preserve_local_num_bytes
,
2015 ObjectStore::Transaction
&t
);
2017 /// Update info/stats to reflect completed backfill on hoid
2018 void update_complete_backfill_object_stats(
2019 const hobject_t
&hoid
,
2020 const pg_stat_t
&stats
);
2022 /// Update last_backfill for peer to new_last_backfill
2023 void update_peer_last_backfill(
2025 const hobject_t
&new_last_backfill
);
2027 /// Update info.stats with delta_stats for operation on soid
2028 void apply_op_stats(
2029 const hobject_t
&soid
,
2030 const object_stat_sum_t
&delta_stats
);
2033 * force_object_missing
2035 * Force oid on peer to be missing at version. If the object does not
2036 * currently need recovery, either candidates if provided or the remainder
2037 * of the acting std::set will be deemed to have the object.
2039 void force_object_missing(
2040 const pg_shard_t
&peer
,
2041 const hobject_t
&oid
,
2042 eversion_t version
) {
2043 force_object_missing(std::set
<pg_shard_t
>{peer
}, oid
, version
);
2045 void force_object_missing(
2046 const std::set
<pg_shard_t
> &peer
,
2047 const hobject_t
&oid
,
2048 eversion_t version
);
2050 /// Update state prior to backfilling soid on targets
2051 void prepare_backfill_for_missing(
2052 const hobject_t
&soid
,
2053 const eversion_t
&version
,
2054 const std::vector
<pg_shard_t
> &targets
);
2056 /// Std::set targets with the right version for revert (see recover_primary)
2057 void set_revert_with_targets(
2058 const hobject_t
&soid
,
2059 const std::set
<pg_shard_t
> &good_peers
);
2061 /// Update lcod for fromosd
2062 void update_peer_last_complete_ondisk(
2065 peer_last_complete_ondisk
[fromosd
] = lcod
;
2069 void update_last_complete_ondisk(
2071 last_complete_ondisk
= lcod
;
2074 /// Update state to reflect recovery up to version
2075 void recovery_committed_to(eversion_t version
);
2077 /// Mark recovery complete
2078 void local_recovery_complete() {
2079 info
.last_complete
= info
.last_update
;
2082 /// Update last_requested pointer to v
2083 void set_last_requested(version_t v
) {
2084 pg_log
.set_last_requested(v
);
2087 /// Write dirty state to t
2088 void write_if_dirty(ObjectStore::Transaction
& t
);
2090 /// Mark write completed to v with persisted lc
2091 void complete_write(eversion_t v
, eversion_t lc
);
2093 /// Update local write applied pointer
2094 void local_write_applied(eversion_t v
) {
2095 last_update_applied
= v
;
2098 /// Updates peering state with new map
2100 OSDMapRef osdmap
, ///< [in] new osdmap
2101 OSDMapRef lastmap
, ///< [in] prev osdmap
2102 std::vector
<int>& newup
, ///< [in] new up set
2103 int up_primary
, ///< [in] new up primary
2104 std::vector
<int>& newacting
, ///< [in] new acting
2105 int acting_primary
, ///< [in] new acting primary
2106 PeeringCtx
&rctx
///< [out] recovery context
2109 /// Activates most recently updated map
2111 PeeringCtx
&rctx
///< [out] recovery context
2114 /// resets last_persisted_osdmap
2115 void reset_last_persisted() {
2116 last_persisted_osdmap
= 0;
2118 dirty_big_info
= true;
2121 /// Signal shutdown beginning
2126 /// Signal shutdown complete
2127 void set_delete_complete() {
2131 /// Dirty info and write out
2132 void force_write_state(ObjectStore::Transaction
&t
) {
2134 dirty_big_info
= true;
2138 /// Get current interval's readable_until
2139 ceph::signedspan
get_readable_until() const {
2140 return readable_until
;
2143 /// Get prior intervals' readable_until upper bound
2144 ceph::signedspan
get_prior_readable_until_ub() const {
2145 return prior_readable_until_ub
;
2148 /// Get prior intervals' readable_until down OSDs of note
2149 const std::set
<int>& get_prior_readable_down_osds() const {
2150 return prior_readable_down_osds
;
2153 /// Reset prior intervals' readable_until upper bound (e.g., bc it passed)
2154 void clear_prior_readable_until_ub() {
2155 prior_readable_until_ub
= ceph::signedspan::zero();
2156 prior_readable_down_osds
.clear();
2157 info
.history
.prior_readable_until_ub
= ceph::signedspan::zero();
2160 void renew_lease(ceph::signedspan now
) {
2161 bool was_min
= (readable_until_ub
== readable_until
);
2162 readable_until_ub_sent
= now
+ readable_interval
;
2164 recalc_readable_until();
2169 void schedule_renew_lease();
2171 pg_lease_t
get_lease() {
2172 return pg_lease_t(readable_until
, readable_until_ub_sent
, readable_interval
);
2175 void proc_lease(const pg_lease_t
& l
);
2176 void proc_lease_ack(int from
, const pg_lease_ack_t
& la
);
2177 void proc_renew_lease();
2179 pg_lease_ack_t
get_lease_ack() {
2180 return pg_lease_ack_t(readable_until_ub_from_primary
);
2183 /// [primary] recalc readable_until[_ub] for the current interval
2184 void recalc_readable_until();
2186 //============================ const helpers ================================
2187 const char *get_current_state() const {
2188 return state_history
.get_current_state();
2190 epoch_t
get_last_peering_reset() const {
2191 return last_peering_reset
;
2193 eversion_t
get_last_rollback_info_trimmed_to_applied() const {
2194 return last_rollback_info_trimmed_to_applied
;
2196 /// Returns stable reference to internal pool structure
2197 const PGPool
&get_pgpool() const {
2200 /// Returns reference to current osdmap
2201 const OSDMapRef
&get_osdmap() const {
2202 ceph_assert(osdmap_ref
);
2205 /// Returns epoch of current osdmap
2206 epoch_t
get_osdmap_epoch() const {
2207 return get_osdmap()->get_epoch();
2210 bool is_ec_pg() const override
{
2211 return pool
.info
.is_erasure();
2213 int get_pg_size() const override
{
2214 return pool
.info
.size
;
2216 bool is_deleting() const {
2219 bool is_deleted() const {
2222 const std::set
<pg_shard_t
> &get_upset() const override
{
2225 bool is_acting_recovery_backfill(pg_shard_t osd
) const {
2226 return acting_recovery_backfill
.count(osd
);
2228 bool is_acting(pg_shard_t osd
) const {
2229 return has_shard(pool
.info
.is_erasure(), acting
, osd
);
2231 bool is_up(pg_shard_t osd
) const {
2232 return has_shard(pool
.info
.is_erasure(), up
, osd
);
2234 static bool has_shard(bool ec
, const std::vector
<int>& v
, pg_shard_t osd
) {
2236 return v
.size() > (unsigned)osd
.shard
&& v
[osd
.shard
] == osd
.osd
;
2238 return std::find(v
.begin(), v
.end(), osd
.osd
) != v
.end();
2241 const PastIntervals
& get_past_intervals() const {
2242 return past_intervals
;
2244 /// acting osd that is not the primary
2245 bool is_nonprimary() const {
2246 return role
>= 0 && pg_whoami
!= primary
;
2249 bool is_primary() const {
2250 return pg_whoami
== primary
;
2252 bool pg_has_reset_since(epoch_t e
) const {
2253 return deleted
|| e
< get_last_peering_reset();
2256 int get_role() const {
2259 const std::vector
<int> &get_acting() const {
2262 const std::set
<pg_shard_t
> &get_actingset() const {
2265 int get_acting_primary() const {
2268 pg_shard_t
get_primary() const {
2271 const std::vector
<int> &get_up() const {
2274 int get_up_primary() const {
2275 return up_primary
.osd
;
2278 bool is_backfill_target(pg_shard_t osd
) const {
2279 return backfill_targets
.count(osd
);
2281 const std::set
<pg_shard_t
> &get_backfill_targets() const {
2282 return backfill_targets
;
2284 bool is_async_recovery_target(pg_shard_t peer
) const {
2285 return async_recovery_targets
.count(peer
);
2287 const std::set
<pg_shard_t
> &get_async_recovery_targets() const {
2288 return async_recovery_targets
;
2290 const std::set
<pg_shard_t
> &get_acting_recovery_backfill() const {
2291 return acting_recovery_backfill
;
2294 const PGLog
&get_pg_log() const {
2298 bool state_test(uint64_t m
) const { return (state
& m
) != 0; }
2299 void state_set(uint64_t m
) { state
|= m
; }
2300 void state_clear(uint64_t m
) { state
&= ~m
; }
2302 bool is_complete() const { return info
.last_complete
== info
.last_update
; }
2303 bool should_send_notify() const { return send_notify
; }
2305 uint64_t get_state() const { return state
; }
2306 bool is_active() const { return state_test(PG_STATE_ACTIVE
); }
2307 bool is_activating() const { return state_test(PG_STATE_ACTIVATING
); }
2308 bool is_peering() const { return state_test(PG_STATE_PEERING
); }
2309 bool is_down() const { return state_test(PG_STATE_DOWN
); }
2310 bool is_recovery_unfound() const {
2311 return state_test(PG_STATE_RECOVERY_UNFOUND
);
2313 bool is_backfilling() const {
2314 return state_test(PG_STATE_BACKFILLING
);
2316 bool is_backfill_unfound() const {
2317 return state_test(PG_STATE_BACKFILL_UNFOUND
);
2319 bool is_incomplete() const { return state_test(PG_STATE_INCOMPLETE
); }
2320 bool is_clean() const { return state_test(PG_STATE_CLEAN
); }
2321 bool is_degraded() const { return state_test(PG_STATE_DEGRADED
); }
2322 bool is_undersized() const { return state_test(PG_STATE_UNDERSIZED
); }
2323 bool is_remapped() const { return state_test(PG_STATE_REMAPPED
); }
2324 bool is_peered() const {
2325 return state_test(PG_STATE_ACTIVE
) || state_test(PG_STATE_PEERED
);
2327 bool is_recovering() const { return state_test(PG_STATE_RECOVERING
); }
2328 bool is_premerge() const { return state_test(PG_STATE_PREMERGE
); }
2329 bool is_repair() const { return state_test(PG_STATE_REPAIR
); }
2330 bool is_empty() const { return info
.last_update
== eversion_t(0,0); }
2332 bool get_need_up_thru() const {
2333 return need_up_thru
;
2336 bool is_forced_recovery_or_backfill() const {
2337 return get_state() & (PG_STATE_FORCED_RECOVERY
| PG_STATE_FORCED_BACKFILL
);
2340 bool is_backfill_reserved() const {
2341 return backfill_reserved
;
2344 bool is_backfill_reserving() const {
2345 return backfill_reserving
;
2348 ceph_release_t
get_last_require_osd_release() const {
2349 return last_require_osd_release
;
2352 const pg_info_t
&get_info() const {
2356 const decltype(peer_info
) &get_peer_info() const {
2359 const decltype(peer_missing
) &get_peer_missing() const {
2360 return peer_missing
;
2362 const pg_missing_const_i
&get_peer_missing(const pg_shard_t
&peer
) const {
2363 if (peer
== pg_whoami
) {
2364 return pg_log
.get_missing();
2366 assert(peer_missing
.count(peer
));
2367 return peer_missing
.find(peer
)->second
;
2370 const pg_info_t
&get_peer_info(pg_shard_t peer
) const {
2371 assert(peer_info
.count(peer
));
2372 return peer_info
.find(peer
)->second
;
2374 bool has_peer_info(pg_shard_t peer
) const {
2375 return peer_info
.count(peer
);
2378 bool needs_recovery() const;
2379 bool needs_backfill() const;
2382 * Returns whether a particular object can be safely read on this replica
2384 bool can_serve_replica_read(const hobject_t
&hoid
) {
2385 ceph_assert(!is_primary());
2386 return !pg_log
.get_log().has_write_since(
2387 hoid
, get_min_last_complete_ondisk());
2391 * Returns whether the current acting set is able to go active
2392 * and serve writes. It needs to satisfy min_size and any
2393 * applicable stretch cluster constraints.
2395 bool acting_set_writeable() {
2396 return (actingset
.size() >= pool
.info
.min_size
) &&
2397 (pool
.info
.stretch_set_can_peer(acting
, *get_osdmap(), NULL
));
2401 * Returns whether all peers which might have unfound objects have been
2402 * queried or marked lost.
2404 bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap
) const;
2405 bool all_missing_unfound() const {
2406 const auto& missing
= pg_log
.get_missing();
2407 if (!missing
.have_missing())
2409 for (auto& m
: missing
.get_items()) {
2410 if (!missing_loc
.is_unfound(m
.first
))
2416 bool perform_deletes_during_peering() const {
2417 return !(get_osdmap()->test_flag(CEPH_OSDMAP_RECOVERY_DELETES
));
2421 bool have_unfound() const {
2422 return missing_loc
.have_unfound();
2424 uint64_t get_num_unfound() const {
2425 return missing_loc
.num_unfound();
2428 bool have_missing() const {
2429 return pg_log
.get_missing().num_missing() > 0;
2431 unsigned int get_num_missing() const {
2432 return pg_log
.get_missing().num_missing();
2435 const MissingLoc
&get_missing_loc() const {
2439 const MissingLoc::missing_by_count_t
&get_missing_by_count() const {
2440 return missing_loc
.get_missing_by_count();
2443 eversion_t
get_min_last_complete_ondisk() const {
2444 return min_last_complete_ondisk
;
2447 eversion_t
get_pg_trim_to() const {
2451 eversion_t
get_last_update_applied() const {
2452 return last_update_applied
;
2455 eversion_t
get_last_update_ondisk() const {
2456 return last_update_ondisk
;
2459 bool debug_has_dirty_state() const {
2460 return dirty_info
|| dirty_big_info
;
2463 std::string
get_pg_state_string() const {
2464 return pg_state_string(state
);
2467 /// Dump representation of past_intervals to out
2468 void print_past_intervals(std::ostream
&out
) const {
2469 out
<< "[" << past_intervals
.get_bounds()
2470 << ")/" << past_intervals
.size();
2473 void dump_history(ceph::Formatter
*f
) const {
2474 state_history
.dump(f
);
2477 /// Dump formatted peering status
2478 void dump_peering_state(ceph::Formatter
*f
);
2481 /// Mask feature vector with feature set from new peer
2482 void apply_peer_features(uint64_t f
) { peer_features
&= f
; }
2484 /// Reset feature vector to default
2485 void reset_min_peer_features() {
2486 peer_features
= CEPH_FEATURES_SUPPORTED_DEFAULT
;
2489 /// Get feature vector common to all known peers with this pg
2490 uint64_t get_min_peer_features() const { return peer_features
; }
2492 /// Get feature vector common to acting set
2493 uint64_t get_min_acting_features() const { return acting_features
; }
2495 /// Get feature vector common to up/acting set
2496 uint64_t get_min_upacting_features() const { return upacting_features
; }
2499 // Flush control interface
2502 * Start additional flush (blocks needs_flush/activation until
2503 * complete_flush is called once for each start_flush call as
2504 * required by start_flush_on_transaction).
2506 void start_flush(ObjectStore::Transaction
&t
) {
2507 flushes_in_progress
++;
2508 pl
->start_flush_on_transaction(t
);
2511 /// True if there are outstanding flushes
2512 bool needs_flush() const {
2513 return flushes_in_progress
> 0;
2515 /// Must be called once per start_flush
2516 void complete_flush();
2518 friend std::ostream
&operator<<(std::ostream
&out
, const PeeringState
&ps
);
2521 std::ostream
&operator<<(std::ostream
&out
, const PeeringState
&ps
);