1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
6 #include <boost/statechart/custom_reaction.hpp>
7 #include <boost/statechart/event.hpp>
8 #include <boost/statechart/simple_state.hpp>
9 #include <boost/statechart/state.hpp>
10 #include <boost/statechart/state_machine.hpp>
11 #include <boost/statechart/transition.hpp>
12 #include <boost/statechart/event_base.hpp>
16 #include "include/ceph_assert.h"
17 #include "include/common_fwd.h"
20 #include "PGStateUtils.h"
21 #include "PGPeeringEvent.h"
22 #include "osd_types.h"
23 #include "os/ObjectStore.h"
25 #include "MissingLoc.h"
26 #include "osd/osd_perf_counters.h"
27 #include "common/ostream_temp.h"
35 SnapContext snapc
; // the default pool snapc, ready to go.
37 PGPool(OSDMapRef map
, int64_t i
, const pg_pool_t
& info
,
38 const std::string
& name
)
39 : cached_epoch(map
->get_epoch()),
43 snapc
= info
.get_snap_context();
46 void update(OSDMapRef map
);
48 ceph::timespan
get_readable_interval(ConfigProxy
&conf
) const {
50 if (info
.opts
.get(pool_opts_t::READ_LEASE_INTERVAL
, &v
)) {
51 return ceph::make_timespan(v
);
53 auto hbi
= conf
->osd_heartbeat_grace
;
54 auto fac
= conf
->osd_pool_default_read_lease_ratio
;
55 return ceph::make_timespan(hbi
* fac
);
62 // [primary only] content recovery state
63 struct BufferedRecoveryMessages
{
64 #if defined(WITH_SEASTAR)
65 std::map
<int, std::vector
<MessageURef
>> message_map
;
67 std::map
<int, std::vector
<MessageRef
>> message_map
;
70 BufferedRecoveryMessages() = default;
71 BufferedRecoveryMessages(PeeringCtx
&ctx
);
73 void accept_buffered_messages(BufferedRecoveryMessages
&m
) {
74 for (auto &[target
, ls
] : m
.message_map
) {
75 auto &ovec
= message_map
[target
];
76 // put buffered messages in front
77 ls
.reserve(ls
.size() + ovec
.size());
78 ls
.insert(ls
.end(), std::make_move_iterator(ovec
.begin()), std::make_move_iterator(ovec
.end()));
84 template <class MsgT
> // MsgT = MessageRef for ceph-osd and MessageURef for crimson-osd
85 void send_osd_message(int target
, MsgT
&& m
) {
86 message_map
[target
].emplace_back(std::forward
<MsgT
>(m
));
88 void send_notify(int to
, const pg_notify_t
&n
);
89 void send_query(int to
, spg_t spgid
, const pg_query_t
&q
);
90 void send_info(int to
, spg_t to_spgid
,
91 epoch_t min_epoch
, epoch_t cur_epoch
,
92 const pg_info_t
&info
,
93 std::optional
<pg_lease_t
> lease
= {},
94 std::optional
<pg_lease_ack_t
> lease_ack
= {});
97 struct HeartbeatStamps
: public RefCountedObject
{
98 mutable ceph::mutex lock
= ceph::make_mutex("HeartbeatStamps::lock");
102 // we maintain an upper and lower bound on the delta between our local
103 // mono_clock time (minus the startup_time) to the peer OSD's mono_clock
104 // time (minus its startup_time).
106 // delta is (remote_clock_time - local_clock_time), so that
107 // local_time + delta -> peer_time, and peer_time - delta -> local_time.
109 // we have an upper and lower bound value on this delta, meaning the
110 // value of the remote clock is somewhere between [my_time + lb, my_time + ub]
112 // conversely, if we have a remote timestamp T, then that is
113 // [T - ub, T - lb] in terms of the local clock. i.e., if you are
114 // substracting the delta, then take care that you swap the role of the
117 /// lower bound on peer clock - local clock
118 std::optional
<ceph::signedspan
> peer_clock_delta_lb
;
120 /// upper bound on peer clock - local clock
121 std::optional
<ceph::signedspan
> peer_clock_delta_ub
;
123 /// highest up_from we've seen from this rank
126 void print(std::ostream
& out
) const {
127 std::lock_guard
l(lock
);
128 out
<< "hbstamp(osd." << osd
<< " up_from " << up_from
129 << " peer_clock_delta [";
130 if (peer_clock_delta_lb
) {
131 out
<< *peer_clock_delta_lb
;
134 if (peer_clock_delta_ub
) {
135 out
<< *peer_clock_delta_ub
;
140 void sent_ping(std::optional
<ceph::signedspan
> *delta_ub
) {
141 std::lock_guard
l(lock
);
142 // the non-primaries need a lower bound on remote clock - local clock. if
143 // we assume the transit for the last ping_reply was
144 // instantaneous, that would be (the negative of) our last
145 // peer_clock_delta_lb value.
146 if (peer_clock_delta_lb
) {
147 *delta_ub
= - *peer_clock_delta_lb
;
151 void got_ping(epoch_t this_up_from
,
152 ceph::signedspan now
,
153 ceph::signedspan peer_send_stamp
,
154 std::optional
<ceph::signedspan
> delta_ub
,
155 ceph::signedspan
*out_delta_ub
) {
156 std::lock_guard
l(lock
);
157 if (this_up_from
< up_from
) {
160 if (this_up_from
> up_from
) {
161 up_from
= this_up_from
;
163 peer_clock_delta_lb
= peer_send_stamp
- now
;
164 peer_clock_delta_ub
= delta_ub
;
165 *out_delta_ub
= - *peer_clock_delta_lb
;
168 void got_ping_reply(ceph::signedspan now
,
169 ceph::signedspan peer_send_stamp
,
170 std::optional
<ceph::signedspan
> delta_ub
) {
171 std::lock_guard
l(lock
);
172 peer_clock_delta_lb
= peer_send_stamp
- now
;
173 peer_clock_delta_ub
= delta_ub
;
177 FRIEND_MAKE_REF(HeartbeatStamps
);
178 HeartbeatStamps(int o
)
179 : RefCountedObject(NULL
),
182 using HeartbeatStampsRef
= ceph::ref_t
<HeartbeatStamps
>;
184 inline std::ostream
& operator<<(std::ostream
& out
, const HeartbeatStamps
& hb
)
191 struct PeeringCtx
: BufferedRecoveryMessages
{
192 ObjectStore::Transaction transaction
;
193 HBHandle
* handle
= nullptr;
195 PeeringCtx() = default;
197 PeeringCtx(const PeeringCtx
&) = delete;
198 PeeringCtx
&operator=(const PeeringCtx
&) = delete;
200 PeeringCtx(PeeringCtx
&&) = default;
201 PeeringCtx
&operator=(PeeringCtx
&&) = default;
203 void reset_transaction() {
204 transaction
= ObjectStore::Transaction();
209 * Wraps PeeringCtx to hide the difference between buffering messages to
210 * be sent after flush or immediately.
212 struct PeeringCtxWrapper
{
214 BufferedRecoveryMessages
&msgs
;
215 ObjectStore::Transaction
&transaction
;
216 HBHandle
* const handle
= nullptr;
218 PeeringCtxWrapper(PeeringCtx
&wrapped
) :
220 transaction(wrapped
.transaction
),
221 handle(wrapped
.handle
) {}
223 PeeringCtxWrapper(BufferedRecoveryMessages
&buf
, PeeringCtx
&wrapped
)
225 transaction(wrapped
.transaction
),
226 handle(wrapped
.handle
) {}
228 PeeringCtxWrapper(PeeringCtxWrapper
&&ctx
) = default;
230 template <class MsgT
> // MsgT = MessageRef for ceph-osd and MessageURef for crimson-osd
231 void send_osd_message(int target
, MsgT
&& m
) {
232 msgs
.send_osd_message(target
, std::forward
<MsgT
>(m
));
234 void send_notify(int to
, const pg_notify_t
&n
) {
235 msgs
.send_notify(to
, n
);
237 void send_query(int to
, spg_t spgid
, const pg_query_t
&q
) {
238 msgs
.send_query(to
, spgid
, q
);
240 void send_info(int to
, spg_t to_spgid
,
241 epoch_t min_epoch
, epoch_t cur_epoch
,
242 const pg_info_t
&info
,
243 std::optional
<pg_lease_t
> lease
= {},
244 std::optional
<pg_lease_ack_t
> lease_ack
= {}) {
245 msgs
.send_info(to
, to_spgid
, min_epoch
, cur_epoch
, info
,
250 /* Encapsulates PG recovery process */
251 class PeeringState
: public MissingLoc::MappingInfo
{
253 struct PeeringListener
: public EpochSource
{
254 /// Prepare t with written information
255 virtual void prepare_write(
257 pg_info_t
&last_written_info
,
258 PastIntervals
&past_intervals
,
262 bool need_write_epoch
,
263 ObjectStore::Transaction
&t
) = 0;
265 /// Notify that info/history changed (generally to update scrub registration)
266 virtual void on_info_history_change() = 0;
268 /// Notify PG that Primary/Replica status has changed (to update scrub registration)
269 virtual void on_primary_status_change(bool was_primary
, bool now_primary
) = 0;
271 /// Need to reschedule next scrub. Assuming no change in role
272 virtual void reschedule_scrub() = 0;
274 /// Notify that a scrub has been requested
275 virtual void scrub_requested(scrub_level_t scrub_level
, scrub_type_t scrub_type
) = 0;
277 /// Return current snap_trimq size
278 virtual uint64_t get_snap_trimq_size() const = 0;
280 /// Send cluster message to osd
281 #if defined(WITH_SEASTAR)
282 virtual void send_cluster_message(
283 int osd
, MessageURef m
, epoch_t epoch
, bool share_map_update
=false) = 0;
285 virtual void send_cluster_message(
286 int osd
, MessageRef m
, epoch_t epoch
, bool share_map_update
=false) = 0;
288 /// Send pg_created to mon
289 virtual void send_pg_created(pg_t pgid
) = 0;
291 virtual ceph::signedspan
get_mnow() = 0;
292 virtual HeartbeatStampsRef
get_hb_stamps(int peer
) = 0;
293 virtual void schedule_renew_lease(epoch_t plr
, ceph::timespan delay
) = 0;
294 virtual void queue_check_readable(epoch_t lpr
, ceph::timespan delay
) = 0;
295 virtual void recheck_readable() = 0;
297 virtual unsigned get_target_pg_log_entries() const = 0;
299 // ============ Flush state ==================
301 * try_flush_or_schedule_async()
303 * If true, caller may assume all past operations on this pg
304 * have been flushed. Else, caller will receive an on_flushed()
305 * call once the flush has completed.
307 virtual bool try_flush_or_schedule_async() = 0;
308 /// Arranges for a commit on t to call on_flushed() once flushed.
309 virtual void start_flush_on_transaction(
310 ObjectStore::Transaction
&t
) = 0;
311 /// Notification that all outstanding flushes for interval have completed
312 virtual void on_flushed() = 0;
314 //============= Recovery ====================
315 /// Arrange for even to be queued after delay
316 virtual void schedule_event_after(
317 PGPeeringEventRef event
,
320 * request_local_background_io_reservation
322 * Request reservation at priority with on_grant queued on grant
323 * and on_preempt on preempt
325 virtual void request_local_background_io_reservation(
327 PGPeeringEventURef on_grant
,
328 PGPeeringEventURef on_preempt
) = 0;
329 /// Modify pending local background reservation request priority
330 virtual void update_local_background_io_priority(
331 unsigned priority
) = 0;
332 /// Cancel pending local background reservation request
333 virtual void cancel_local_background_io_reservation() = 0;
336 * request_remote_background_io_reservation
338 * Request reservation at priority with on_grant queued on grant
339 * and on_preempt on preempt
341 virtual void request_remote_recovery_reservation(
343 PGPeeringEventURef on_grant
,
344 PGPeeringEventURef on_preempt
) = 0;
345 /// Cancel pending remote background reservation request
346 virtual void cancel_remote_recovery_reservation() = 0;
348 /// Arrange for on_commit to be queued upon commit of t
349 virtual void schedule_event_on_commit(
350 ObjectStore::Transaction
&t
,
351 PGPeeringEventRef on_commit
) = 0;
353 //============================ HB =============================
354 /// Update hb set to peers
355 virtual void update_heartbeat_peers(std::set
<int> peers
) = 0;
357 /// Std::set targets being probed in this interval
358 virtual void set_probe_targets(const std::set
<pg_shard_t
> &probe_set
) = 0;
359 /// Clear targets being probed in this interval
360 virtual void clear_probe_targets() = 0;
362 /// Queue for a pg_temp of wanted
363 virtual void queue_want_pg_temp(const std::vector
<int> &wanted
) = 0;
364 /// Clear queue for a pg_temp of wanted
365 virtual void clear_want_pg_temp() = 0;
367 /// Arrange for stats to be shipped to mon to be updated for this pg
368 virtual void publish_stats_to_osd() = 0;
369 /// Clear stats to be shipped to mon for this pg
370 virtual void clear_publish_stats() = 0;
372 /// Notification to check outstanding operation targets
373 virtual void check_recovery_sources(const OSDMapRef
& newmap
) = 0;
374 /// Notification to check outstanding blocklist
375 virtual void check_blocklisted_watchers() = 0;
376 /// Notification to clear state associated with primary
377 virtual void clear_primary_state() = 0;
379 // =================== Event notification ====================
380 virtual void on_pool_change() = 0;
381 virtual void on_role_change() = 0;
382 virtual void on_change(ObjectStore::Transaction
&t
) = 0;
383 virtual void on_activate(interval_set
<snapid_t
> to_trim
) = 0;
384 virtual void on_activate_complete() = 0;
385 virtual void on_new_interval() = 0;
386 virtual Context
*on_clean() = 0;
387 virtual void on_activate_committed() = 0;
388 virtual void on_active_exit() = 0;
390 // ====================== PG deletion =======================
391 /// Notification of removal complete, t must be populated to complete removal
392 virtual void on_removal(ObjectStore::Transaction
&t
) = 0;
393 /// Perform incremental removal work
394 virtual std::pair
<ghobject_t
, bool> do_delete_work(
395 ObjectStore::Transaction
&t
, ghobject_t _next
) = 0;
397 // ======================= PG Merge =========================
398 virtual void clear_ready_to_merge() = 0;
399 virtual void set_not_ready_to_merge_target(pg_t pgid
, pg_t src
) = 0;
400 virtual void set_not_ready_to_merge_source(pg_t pgid
) = 0;
401 virtual void set_ready_to_merge_target(eversion_t lu
, epoch_t les
, epoch_t lec
) = 0;
402 virtual void set_ready_to_merge_source(eversion_t lu
) = 0;
404 // ==================== Std::map notifications ===================
405 virtual void on_active_actmap() = 0;
406 virtual void on_active_advmap(const OSDMapRef
&osdmap
) = 0;
407 virtual epoch_t
oldest_stored_osdmap() = 0;
409 // ============ recovery reservation notifications ==========
410 virtual void on_backfill_reserved() = 0;
411 virtual void on_backfill_canceled() = 0;
412 virtual void on_recovery_reserved() = 0;
414 // ================recovery space accounting ================
415 virtual bool try_reserve_recovery_space(
416 int64_t primary_num_bytes
, int64_t local_num_bytes
) = 0;
417 virtual void unreserve_recovery_space() = 0;
419 // ================== Peering log events ====================
420 /// Get handler for rolling forward/back log entries
421 virtual PGLog::LogEntryHandlerRef
get_log_handler(
422 ObjectStore::Transaction
&t
) = 0;
424 // ============ On disk representation changes ==============
425 virtual void rebuild_missing_set_with_deletes(PGLog
&pglog
) = 0;
427 // ======================= Logging ==========================
428 virtual PerfCounters
&get_peering_perf() = 0;
429 virtual PerfCounters
&get_perf_logger() = 0;
430 virtual void log_state_enter(const char *state
) = 0;
431 virtual void log_state_exit(
432 const char *state_name
, utime_t enter_time
,
433 uint64_t events
, utime_t event_dur
) = 0;
434 virtual void dump_recovery_info(ceph::Formatter
*f
) const = 0;
436 virtual OstreamTemp
get_clog_info() = 0;
437 virtual OstreamTemp
get_clog_error() = 0;
438 virtual OstreamTemp
get_clog_debug() = 0;
440 virtual ~PeeringListener() {}
443 struct QueryState
: boost::statechart::event
< QueryState
> {
445 explicit QueryState(ceph::Formatter
*f
) : f(f
) {}
446 void print(std::ostream
*out
) const {
451 struct QueryUnfound
: boost::statechart::event
< QueryUnfound
> {
453 explicit QueryUnfound(ceph::Formatter
*f
) : f(f
) {}
454 void print(std::ostream
*out
) const {
455 *out
<< "QueryUnfound";
459 struct AdvMap
: boost::statechart::event
< AdvMap
> {
462 std::vector
<int> newup
, newacting
;
463 int up_primary
, acting_primary
;
465 OSDMapRef osdmap
, OSDMapRef lastmap
,
466 std::vector
<int>& newup
, int up_primary
,
467 std::vector
<int>& newacting
, int acting_primary
):
468 osdmap(osdmap
), lastmap(lastmap
),
470 newacting(newacting
),
471 up_primary(up_primary
),
472 acting_primary(acting_primary
) {}
473 void print(std::ostream
*out
) const {
478 struct ActMap
: boost::statechart::event
< ActMap
> {
479 ActMap() : boost::statechart::event
< ActMap
>() {}
480 void print(std::ostream
*out
) const {
484 struct Activate
: boost::statechart::event
< Activate
> {
485 epoch_t activation_epoch
;
486 explicit Activate(epoch_t q
) : boost::statechart::event
< Activate
>(),
487 activation_epoch(q
) {}
488 void print(std::ostream
*out
) const {
489 *out
<< "Activate from " << activation_epoch
;
492 struct ActivateCommitted
: boost::statechart::event
< ActivateCommitted
> {
494 epoch_t activation_epoch
;
495 explicit ActivateCommitted(epoch_t e
, epoch_t ae
)
496 : boost::statechart::event
< ActivateCommitted
>(),
498 activation_epoch(ae
) {}
499 void print(std::ostream
*out
) const {
500 *out
<< "ActivateCommitted from " << activation_epoch
501 << " processed at " << epoch
;
505 struct UnfoundBackfill
: boost::statechart::event
<UnfoundBackfill
> {
506 explicit UnfoundBackfill() {}
507 void print(std::ostream
*out
) const {
508 *out
<< "UnfoundBackfill";
511 struct UnfoundRecovery
: boost::statechart::event
<UnfoundRecovery
> {
512 explicit UnfoundRecovery() {}
513 void print(std::ostream
*out
) const {
514 *out
<< "UnfoundRecovery";
518 struct RequestScrub
: boost::statechart::event
<RequestScrub
> {
521 explicit RequestScrub(bool d
, bool r
) : deep(scrub_level_t(d
)), repair(scrub_type_t(r
)) {}
522 void print(std::ostream
*out
) const {
523 *out
<< "RequestScrub(" << ((deep
==scrub_level_t::deep
) ? "deep" : "shallow")
524 << ((repair
==scrub_type_t::do_repair
) ? " repair)" : ")");
528 TrivialEvent(Initialize
)
529 TrivialEvent(GotInfo
)
530 TrivialEvent(NeedUpThru
)
531 TrivialEvent(Backfilled
)
532 TrivialEvent(LocalBackfillReserved
)
533 TrivialEvent(RejectTooFullRemoteReservation
)
534 TrivialEvent(RequestBackfill
)
535 TrivialEvent(RemoteRecoveryPreempted
)
536 TrivialEvent(RemoteBackfillPreempted
)
537 TrivialEvent(BackfillTooFull
)
538 TrivialEvent(RecoveryTooFull
)
540 TrivialEvent(MakePrimary
)
541 TrivialEvent(MakeStray
)
542 TrivialEvent(NeedActingChange
)
543 TrivialEvent(IsIncomplete
)
546 TrivialEvent(AllReplicasRecovered
)
547 TrivialEvent(DoRecovery
)
548 TrivialEvent(LocalRecoveryReserved
)
549 TrivialEvent(AllRemotesReserved
)
550 TrivialEvent(AllBackfillsReserved
)
551 TrivialEvent(GoClean
)
553 TrivialEvent(AllReplicasActivated
)
555 TrivialEvent(IntervalFlush
)
557 TrivialEvent(DeleteStart
)
558 TrivialEvent(DeleteSome
)
560 TrivialEvent(SetForceRecovery
)
561 TrivialEvent(UnsetForceRecovery
)
562 TrivialEvent(SetForceBackfill
)
563 TrivialEvent(UnsetForceBackfill
)
565 TrivialEvent(DeleteReserved
)
566 TrivialEvent(DeleteInterrupted
)
568 TrivialEvent(CheckReadable
)
570 void start_handle(PeeringCtx
*new_ctx
);
572 void begin_block_outgoing();
573 void end_block_outgoing();
574 void clear_blocked_outgoing();
579 class PeeringMachine
: public boost::statechart::state_machine
< PeeringMachine
, Initial
> {
582 PGStateHistory
*state_history
;
585 DoutPrefixProvider
*dpp
;
589 uint64_t event_count
;
591 void clear_event_counters() {
592 event_time
= utime_t();
596 void log_enter(const char *state_name
);
597 void log_exit(const char *state_name
, utime_t duration
);
600 PeeringState
*state
, CephContext
*cct
,
602 DoutPrefixProvider
*dpp
,
604 PGStateHistory
*state_history
) :
606 state_history(state_history
),
607 cct(cct
), spgid(spgid
),
611 /* Accessor functions for state methods */
612 ObjectStore::Transaction
& get_cur_transaction() {
613 ceph_assert(state
->rctx
);
614 return state
->rctx
->transaction
;
617 PeeringCtxWrapper
&get_recovery_ctx() {
619 return *(state
->rctx
);
622 void send_notify(int to
, const pg_notify_t
&n
) {
623 ceph_assert(state
->rctx
);
624 state
->rctx
->send_notify(to
, n
);
626 void send_query(int to
, const pg_query_t
&query
) {
627 state
->rctx
->send_query(
629 spg_t(spgid
.pgid
, query
.to
),
633 friend class PeeringMachine
;
653 // WaitRemoteBackfillReserved
654 // WaitLocalBackfillReserved
658 // WaitRemoteRecoveryReserved
659 // WaitLocalRecoveryReserved
663 // RepWaitBackfillReserved
664 // RepWaitRecoveryReserved
667 // WaitDeleteReserved
671 struct Crashed
: boost::statechart::state
< Crashed
, PeeringMachine
>, NamedState
{
672 explicit Crashed(my_context ctx
);
677 struct Initial
: boost::statechart::state
< Initial
, PeeringMachine
>, NamedState
{
678 explicit Initial(my_context ctx
);
681 typedef boost::mpl::list
<
682 boost::statechart::transition
< Initialize
, Reset
>,
683 boost::statechart::custom_reaction
< NullEvt
>,
684 boost::statechart::transition
< boost::statechart::event_base
, Crashed
>
687 boost::statechart::result
react(const MNotifyRec
&);
688 boost::statechart::result
react(const MInfoRec
&);
689 boost::statechart::result
react(const MLogRec
&);
690 boost::statechart::result
react(const boost::statechart::event_base
&) {
691 return discard_event();
695 struct Reset
: boost::statechart::state
< Reset
, PeeringMachine
>, NamedState
{
696 explicit Reset(my_context ctx
);
699 typedef boost::mpl::list
<
700 boost::statechart::custom_reaction
< QueryState
>,
701 boost::statechart::custom_reaction
< QueryUnfound
>,
702 boost::statechart::custom_reaction
< AdvMap
>,
703 boost::statechart::custom_reaction
< ActMap
>,
704 boost::statechart::custom_reaction
< NullEvt
>,
705 boost::statechart::custom_reaction
< IntervalFlush
>,
706 boost::statechart::transition
< boost::statechart::event_base
, Crashed
>
708 boost::statechart::result
react(const QueryState
& q
);
709 boost::statechart::result
react(const QueryUnfound
& q
);
710 boost::statechart::result
react(const AdvMap
&);
711 boost::statechart::result
react(const ActMap
&);
712 boost::statechart::result
react(const IntervalFlush
&);
713 boost::statechart::result
react(const boost::statechart::event_base
&) {
714 return discard_event();
720 struct Started
: boost::statechart::state
< Started
, PeeringMachine
, Start
>, NamedState
{
721 explicit Started(my_context ctx
);
724 typedef boost::mpl::list
<
725 boost::statechart::custom_reaction
< QueryState
>,
726 boost::statechart::custom_reaction
< QueryUnfound
>,
727 boost::statechart::custom_reaction
< AdvMap
>,
728 boost::statechart::custom_reaction
< IntervalFlush
>,
730 boost::statechart::custom_reaction
< NullEvt
>,
731 boost::statechart::custom_reaction
<SetForceRecovery
>,
732 boost::statechart::custom_reaction
<UnsetForceRecovery
>,
733 boost::statechart::custom_reaction
<SetForceBackfill
>,
734 boost::statechart::custom_reaction
<UnsetForceBackfill
>,
735 boost::statechart::custom_reaction
<RequestScrub
>,
736 boost::statechart::custom_reaction
<CheckReadable
>,
738 boost::statechart::transition
< boost::statechart::event_base
, Crashed
>
740 boost::statechart::result
react(const QueryState
& q
);
741 boost::statechart::result
react(const QueryUnfound
& q
);
742 boost::statechart::result
react(const AdvMap
&);
743 boost::statechart::result
react(const IntervalFlush
&);
744 boost::statechart::result
react(const boost::statechart::event_base
&) {
745 return discard_event();
752 struct Start
: boost::statechart::state
< Start
, Started
>, NamedState
{
753 explicit Start(my_context ctx
);
756 typedef boost::mpl::list
<
757 boost::statechart::transition
< MakePrimary
, Primary
>,
758 boost::statechart::transition
< MakeStray
, Stray
>
763 struct WaitActingChange
;
767 struct Primary
: boost::statechart::state
< Primary
, Started
, Peering
>, NamedState
{
768 explicit Primary(my_context ctx
);
771 typedef boost::mpl::list
<
772 boost::statechart::custom_reaction
< ActMap
>,
773 boost::statechart::custom_reaction
< MNotifyRec
>,
774 boost::statechart::custom_reaction
<SetForceRecovery
>,
775 boost::statechart::custom_reaction
<UnsetForceRecovery
>,
776 boost::statechart::custom_reaction
<SetForceBackfill
>,
777 boost::statechart::custom_reaction
<UnsetForceBackfill
>,
778 boost::statechart::custom_reaction
<RequestScrub
>
780 boost::statechart::result
react(const ActMap
&);
781 boost::statechart::result
react(const MNotifyRec
&);
782 boost::statechart::result
react(const SetForceRecovery
&);
783 boost::statechart::result
react(const UnsetForceRecovery
&);
784 boost::statechart::result
react(const SetForceBackfill
&);
785 boost::statechart::result
react(const UnsetForceBackfill
&);
786 boost::statechart::result
react(const RequestScrub
&);
789 struct WaitActingChange
: boost::statechart::state
< WaitActingChange
, Primary
>,
791 typedef boost::mpl::list
<
792 boost::statechart::custom_reaction
< QueryState
>,
793 boost::statechart::custom_reaction
< QueryUnfound
>,
794 boost::statechart::custom_reaction
< AdvMap
>,
795 boost::statechart::custom_reaction
< MLogRec
>,
796 boost::statechart::custom_reaction
< MInfoRec
>,
797 boost::statechart::custom_reaction
< MNotifyRec
>
799 explicit WaitActingChange(my_context ctx
);
800 boost::statechart::result
react(const QueryState
& q
);
801 boost::statechart::result
react(const QueryUnfound
& q
);
802 boost::statechart::result
react(const AdvMap
&);
803 boost::statechart::result
react(const MLogRec
&);
804 boost::statechart::result
react(const MInfoRec
&);
805 boost::statechart::result
react(const MNotifyRec
&);
812 struct Peering
: boost::statechart::state
< Peering
, Primary
, GetInfo
>, NamedState
{
813 PastIntervals::PriorSet prior_set
;
814 bool history_les_bound
; //< need osd_find_best_info_ignore_history_les
816 explicit Peering(my_context ctx
);
819 typedef boost::mpl::list
<
820 boost::statechart::custom_reaction
< QueryState
>,
821 boost::statechart::custom_reaction
< QueryUnfound
>,
822 boost::statechart::transition
< Activate
, Active
>,
823 boost::statechart::custom_reaction
< AdvMap
>
825 boost::statechart::result
react(const QueryState
& q
);
826 boost::statechart::result
react(const QueryUnfound
& q
);
827 boost::statechart::result
react(const AdvMap
&advmap
);
830 struct WaitLocalRecoveryReserved
;
832 struct Active
: boost::statechart::state
< Active
, Primary
, Activating
>, NamedState
{
833 explicit Active(my_context ctx
);
836 const std::set
<pg_shard_t
> remote_shards_to_reserve_recovery
;
837 const std::set
<pg_shard_t
> remote_shards_to_reserve_backfill
;
838 bool all_replicas_activated
;
840 typedef boost::mpl::list
<
841 boost::statechart::custom_reaction
< QueryState
>,
842 boost::statechart::custom_reaction
< QueryUnfound
>,
843 boost::statechart::custom_reaction
< ActMap
>,
844 boost::statechart::custom_reaction
< AdvMap
>,
845 boost::statechart::custom_reaction
< MInfoRec
>,
846 boost::statechart::custom_reaction
< MNotifyRec
>,
847 boost::statechart::custom_reaction
< MLogRec
>,
848 boost::statechart::custom_reaction
< MTrim
>,
849 boost::statechart::custom_reaction
< Backfilled
>,
850 boost::statechart::custom_reaction
< ActivateCommitted
>,
851 boost::statechart::custom_reaction
< AllReplicasActivated
>,
852 boost::statechart::custom_reaction
< DeferRecovery
>,
853 boost::statechart::custom_reaction
< DeferBackfill
>,
854 boost::statechart::custom_reaction
< UnfoundRecovery
>,
855 boost::statechart::custom_reaction
< UnfoundBackfill
>,
856 boost::statechart::custom_reaction
< RemoteReservationRevokedTooFull
>,
857 boost::statechart::custom_reaction
< RemoteReservationRevoked
>,
858 boost::statechart::custom_reaction
< DoRecovery
>,
859 boost::statechart::custom_reaction
< RenewLease
>,
860 boost::statechart::custom_reaction
< MLeaseAck
>,
861 boost::statechart::custom_reaction
< CheckReadable
>
863 boost::statechart::result
react(const QueryState
& q
);
864 boost::statechart::result
react(const QueryUnfound
& q
);
865 boost::statechart::result
react(const ActMap
&);
866 boost::statechart::result
react(const AdvMap
&);
867 boost::statechart::result
react(const MInfoRec
& infoevt
);
868 boost::statechart::result
react(const MNotifyRec
& notevt
);
869 boost::statechart::result
react(const MLogRec
& logevt
);
870 boost::statechart::result
react(const MTrim
& trimevt
);
871 boost::statechart::result
react(const Backfilled
&) {
872 return discard_event();
874 boost::statechart::result
react(const ActivateCommitted
&);
875 boost::statechart::result
react(const AllReplicasActivated
&);
876 boost::statechart::result
react(const RenewLease
&);
877 boost::statechart::result
react(const MLeaseAck
&);
878 boost::statechart::result
react(const DeferRecovery
& evt
) {
879 return discard_event();
881 boost::statechart::result
react(const DeferBackfill
& evt
) {
882 return discard_event();
884 boost::statechart::result
react(const UnfoundRecovery
& evt
) {
885 return discard_event();
887 boost::statechart::result
react(const UnfoundBackfill
& evt
) {
888 return discard_event();
890 boost::statechart::result
react(const RemoteReservationRevokedTooFull
&) {
891 return discard_event();
893 boost::statechart::result
react(const RemoteReservationRevoked
&) {
894 return discard_event();
896 boost::statechart::result
react(const DoRecovery
&) {
897 return discard_event();
899 boost::statechart::result
react(const CheckReadable
&);
900 void all_activated_and_committed();
903 struct Clean
: boost::statechart::state
< Clean
, Active
>, NamedState
{
904 typedef boost::mpl::list
<
905 boost::statechart::transition
< DoRecovery
, WaitLocalRecoveryReserved
>,
906 boost::statechart::custom_reaction
<SetForceRecovery
>,
907 boost::statechart::custom_reaction
<SetForceBackfill
>
909 explicit Clean(my_context ctx
);
911 boost::statechart::result
react(const boost::statechart::event_base
&) {
912 return discard_event();
916 struct Recovered
: boost::statechart::state
< Recovered
, Active
>, NamedState
{
917 typedef boost::mpl::list
<
918 boost::statechart::transition
< GoClean
, Clean
>,
919 boost::statechart::transition
< DoRecovery
, WaitLocalRecoveryReserved
>,
920 boost::statechart::custom_reaction
< AllReplicasActivated
>
922 explicit Recovered(my_context ctx
);
924 boost::statechart::result
react(const AllReplicasActivated
&) {
925 post_event(GoClean());
926 return forward_event();
930 struct Backfilling
: boost::statechart::state
< Backfilling
, Active
>, NamedState
{
931 typedef boost::mpl::list
<
932 boost::statechart::custom_reaction
< Backfilled
>,
933 boost::statechart::custom_reaction
< DeferBackfill
>,
934 boost::statechart::custom_reaction
< UnfoundBackfill
>,
935 boost::statechart::custom_reaction
< RemoteReservationRejectedTooFull
>,
936 boost::statechart::custom_reaction
< RemoteReservationRevokedTooFull
>,
937 boost::statechart::custom_reaction
< RemoteReservationRevoked
>
939 explicit Backfilling(my_context ctx
);
940 boost::statechart::result
react(const RemoteReservationRejectedTooFull
& evt
) {
941 // for compat with old peers
942 post_event(RemoteReservationRevokedTooFull());
943 return discard_event();
945 void backfill_release_reservations();
946 boost::statechart::result
react(const Backfilled
& evt
);
947 boost::statechart::result
react(const RemoteReservationRevokedTooFull
& evt
);
948 boost::statechart::result
react(const RemoteReservationRevoked
& evt
);
949 boost::statechart::result
react(const DeferBackfill
& evt
);
950 boost::statechart::result
react(const UnfoundBackfill
& evt
);
951 void cancel_backfill();
955 struct WaitRemoteBackfillReserved
: boost::statechart::state
< WaitRemoteBackfillReserved
, Active
>, NamedState
{
956 typedef boost::mpl::list
<
957 boost::statechart::custom_reaction
< RemoteBackfillReserved
>,
958 boost::statechart::custom_reaction
< RemoteReservationRejectedTooFull
>,
959 boost::statechart::custom_reaction
< RemoteReservationRevoked
>,
960 boost::statechart::transition
< AllBackfillsReserved
, Backfilling
>
962 std::set
<pg_shard_t
>::const_iterator backfill_osd_it
;
963 explicit WaitRemoteBackfillReserved(my_context ctx
);
966 boost::statechart::result
react(const RemoteBackfillReserved
& evt
);
967 boost::statechart::result
react(const RemoteReservationRejectedTooFull
& evt
);
968 boost::statechart::result
react(const RemoteReservationRevoked
& evt
);
971 struct WaitLocalBackfillReserved
: boost::statechart::state
< WaitLocalBackfillReserved
, Active
>, NamedState
{
972 typedef boost::mpl::list
<
973 boost::statechart::transition
< LocalBackfillReserved
, WaitRemoteBackfillReserved
>,
974 boost::statechart::custom_reaction
< RemoteBackfillReserved
>
976 explicit WaitLocalBackfillReserved(my_context ctx
);
977 boost::statechart::result
react(const RemoteBackfillReserved
& evt
) {
979 return discard_event();
984 struct NotBackfilling
: boost::statechart::state
< NotBackfilling
, Active
>, NamedState
{
985 typedef boost::mpl::list
<
986 boost::statechart::custom_reaction
< QueryUnfound
>,
987 boost::statechart::transition
< RequestBackfill
, WaitLocalBackfillReserved
>,
988 boost::statechart::custom_reaction
< RemoteBackfillReserved
>,
989 boost::statechart::custom_reaction
< RemoteReservationRejectedTooFull
>
991 explicit NotBackfilling(my_context ctx
);
993 boost::statechart::result
react(const QueryUnfound
& q
);
994 boost::statechart::result
react(const RemoteBackfillReserved
& evt
);
995 boost::statechart::result
react(const RemoteReservationRejectedTooFull
& evt
);
998 struct NotRecovering
: boost::statechart::state
< NotRecovering
, Active
>, NamedState
{
999 typedef boost::mpl::list
<
1000 boost::statechart::custom_reaction
< QueryUnfound
>,
1001 boost::statechart::transition
< DoRecovery
, WaitLocalRecoveryReserved
>,
1002 boost::statechart::custom_reaction
< DeferRecovery
>,
1003 boost::statechart::custom_reaction
< UnfoundRecovery
>
1005 explicit NotRecovering(my_context ctx
);
1006 boost::statechart::result
react(const QueryUnfound
& q
);
1007 boost::statechart::result
react(const DeferRecovery
& evt
) {
1009 return discard_event();
1011 boost::statechart::result
react(const UnfoundRecovery
& evt
) {
1013 return discard_event();
1019 struct RepNotRecovering
;
1020 struct ReplicaActive
: boost::statechart::state
< ReplicaActive
, Started
, RepNotRecovering
>, NamedState
{
1021 explicit ReplicaActive(my_context ctx
);
1024 typedef boost::mpl::list
<
1025 boost::statechart::custom_reaction
< QueryState
>,
1026 boost::statechart::custom_reaction
< QueryUnfound
>,
1027 boost::statechart::custom_reaction
< ActMap
>,
1028 boost::statechart::custom_reaction
< MQuery
>,
1029 boost::statechart::custom_reaction
< MInfoRec
>,
1030 boost::statechart::custom_reaction
< MLogRec
>,
1031 boost::statechart::custom_reaction
< MTrim
>,
1032 boost::statechart::custom_reaction
< Activate
>,
1033 boost::statechart::custom_reaction
< ActivateCommitted
>,
1034 boost::statechart::custom_reaction
< DeferRecovery
>,
1035 boost::statechart::custom_reaction
< DeferBackfill
>,
1036 boost::statechart::custom_reaction
< UnfoundRecovery
>,
1037 boost::statechart::custom_reaction
< UnfoundBackfill
>,
1038 boost::statechart::custom_reaction
< RemoteBackfillPreempted
>,
1039 boost::statechart::custom_reaction
< RemoteRecoveryPreempted
>,
1040 boost::statechart::custom_reaction
< RecoveryDone
>,
1041 boost::statechart::transition
<DeleteStart
, ToDelete
>,
1042 boost::statechart::custom_reaction
< MLease
>
1044 boost::statechart::result
react(const QueryState
& q
);
1045 boost::statechart::result
react(const QueryUnfound
& q
);
1046 boost::statechart::result
react(const MInfoRec
& infoevt
);
1047 boost::statechart::result
react(const MLogRec
& logevt
);
1048 boost::statechart::result
react(const MTrim
& trimevt
);
1049 boost::statechart::result
react(const ActMap
&);
1050 boost::statechart::result
react(const MQuery
&);
1051 boost::statechart::result
react(const Activate
&);
1052 boost::statechart::result
react(const ActivateCommitted
&);
1053 boost::statechart::result
react(const MLease
&);
1054 boost::statechart::result
react(const RecoveryDone
&) {
1055 return discard_event();
1057 boost::statechart::result
react(const DeferRecovery
& evt
) {
1058 return discard_event();
1060 boost::statechart::result
react(const DeferBackfill
& evt
) {
1061 return discard_event();
1063 boost::statechart::result
react(const UnfoundRecovery
& evt
) {
1064 return discard_event();
1066 boost::statechart::result
react(const UnfoundBackfill
& evt
) {
1067 return discard_event();
1069 boost::statechart::result
react(const RemoteBackfillPreempted
& evt
) {
1070 return discard_event();
1072 boost::statechart::result
react(const RemoteRecoveryPreempted
& evt
) {
1073 return discard_event();
1077 struct RepRecovering
: boost::statechart::state
< RepRecovering
, ReplicaActive
>, NamedState
{
1078 typedef boost::mpl::list
<
1079 boost::statechart::transition
< RecoveryDone
, RepNotRecovering
>,
1080 // for compat with old peers
1081 boost::statechart::transition
< RemoteReservationRejectedTooFull
, RepNotRecovering
>,
1082 boost::statechart::transition
< RemoteReservationCanceled
, RepNotRecovering
>,
1083 boost::statechart::custom_reaction
< BackfillTooFull
>,
1084 boost::statechart::custom_reaction
< RemoteRecoveryPreempted
>,
1085 boost::statechart::custom_reaction
< RemoteBackfillPreempted
>
1087 explicit RepRecovering(my_context ctx
);
1088 boost::statechart::result
react(const RemoteRecoveryPreempted
&evt
);
1089 boost::statechart::result
react(const BackfillTooFull
&evt
);
1090 boost::statechart::result
react(const RemoteBackfillPreempted
&evt
);
1094 struct RepWaitBackfillReserved
: boost::statechart::state
< RepWaitBackfillReserved
, ReplicaActive
>, NamedState
{
1095 typedef boost::mpl::list
<
1096 boost::statechart::custom_reaction
< RemoteBackfillReserved
>,
1097 boost::statechart::custom_reaction
< RejectTooFullRemoteReservation
>,
1098 boost::statechart::custom_reaction
< RemoteReservationRejectedTooFull
>,
1099 boost::statechart::custom_reaction
< RemoteReservationCanceled
>
1101 explicit RepWaitBackfillReserved(my_context ctx
);
1103 boost::statechart::result
react(const RemoteBackfillReserved
&evt
);
1104 boost::statechart::result
react(const RejectTooFullRemoteReservation
&evt
);
1105 boost::statechart::result
react(const RemoteReservationRejectedTooFull
&evt
);
1106 boost::statechart::result
react(const RemoteReservationCanceled
&evt
);
1109 struct RepWaitRecoveryReserved
: boost::statechart::state
< RepWaitRecoveryReserved
, ReplicaActive
>, NamedState
{
1110 typedef boost::mpl::list
<
1111 boost::statechart::custom_reaction
< RemoteRecoveryReserved
>,
1112 // for compat with old peers
1113 boost::statechart::custom_reaction
< RemoteReservationRejectedTooFull
>,
1114 boost::statechart::custom_reaction
< RemoteReservationCanceled
>
1116 explicit RepWaitRecoveryReserved(my_context ctx
);
1118 boost::statechart::result
react(const RemoteRecoveryReserved
&evt
);
1119 boost::statechart::result
react(const RemoteReservationRejectedTooFull
&evt
) {
1120 // for compat with old peers
1121 post_event(RemoteReservationCanceled());
1122 return discard_event();
1124 boost::statechart::result
react(const RemoteReservationCanceled
&evt
);
1127 struct RepNotRecovering
: boost::statechart::state
< RepNotRecovering
, ReplicaActive
>, NamedState
{
1128 typedef boost::mpl::list
<
1129 boost::statechart::custom_reaction
< RequestRecoveryPrio
>,
1130 boost::statechart::custom_reaction
< RequestBackfillPrio
>,
1131 boost::statechart::custom_reaction
< RejectTooFullRemoteReservation
>,
1132 boost::statechart::transition
< RemoteReservationRejectedTooFull
, RepNotRecovering
>,
1133 boost::statechart::transition
< RemoteReservationCanceled
, RepNotRecovering
>,
1134 boost::statechart::custom_reaction
< RemoteRecoveryReserved
>,
1135 boost::statechart::custom_reaction
< RemoteBackfillReserved
>,
1136 boost::statechart::transition
< RecoveryDone
, RepNotRecovering
> // for compat with pre-reservation peers
1138 explicit RepNotRecovering(my_context ctx
);
1139 boost::statechart::result
react(const RequestRecoveryPrio
&evt
);
1140 boost::statechart::result
react(const RequestBackfillPrio
&evt
);
1141 boost::statechart::result
react(const RemoteBackfillReserved
&evt
) {
1142 // my reservation completion raced with a RELEASE from primary
1143 return discard_event();
1145 boost::statechart::result
react(const RemoteRecoveryReserved
&evt
) {
1146 // my reservation completion raced with a RELEASE from primary
1147 return discard_event();
1149 boost::statechart::result
react(const RejectTooFullRemoteReservation
&evt
);
1153 struct Recovering
: boost::statechart::state
< Recovering
, Active
>, NamedState
{
1154 typedef boost::mpl::list
<
1155 boost::statechart::custom_reaction
< AllReplicasRecovered
>,
1156 boost::statechart::custom_reaction
< DeferRecovery
>,
1157 boost::statechart::custom_reaction
< UnfoundRecovery
>,
1158 boost::statechart::custom_reaction
< RequestBackfill
>
1160 explicit Recovering(my_context ctx
);
1162 void release_reservations(bool cancel
= false);
1163 boost::statechart::result
react(const AllReplicasRecovered
&evt
);
1164 boost::statechart::result
react(const DeferRecovery
& evt
);
1165 boost::statechart::result
react(const UnfoundRecovery
& evt
);
1166 boost::statechart::result
react(const RequestBackfill
&evt
);
1169 struct WaitRemoteRecoveryReserved
: boost::statechart::state
< WaitRemoteRecoveryReserved
, Active
>, NamedState
{
1170 typedef boost::mpl::list
<
1171 boost::statechart::custom_reaction
< RemoteRecoveryReserved
>,
1172 boost::statechart::transition
< AllRemotesReserved
, Recovering
>
1174 std::set
<pg_shard_t
>::const_iterator remote_recovery_reservation_it
;
1175 explicit WaitRemoteRecoveryReserved(my_context ctx
);
1176 boost::statechart::result
react(const RemoteRecoveryReserved
&evt
);
1180 struct WaitLocalRecoveryReserved
: boost::statechart::state
< WaitLocalRecoveryReserved
, Active
>, NamedState
{
1181 typedef boost::mpl::list
<
1182 boost::statechart::transition
< LocalRecoveryReserved
, WaitRemoteRecoveryReserved
>,
1183 boost::statechart::custom_reaction
< RecoveryTooFull
>
1185 explicit WaitLocalRecoveryReserved(my_context ctx
);
1187 boost::statechart::result
react(const RecoveryTooFull
&evt
);
1190 struct Activating
: boost::statechart::state
< Activating
, Active
>, NamedState
{
1191 typedef boost::mpl::list
<
1192 boost::statechart::transition
< AllReplicasRecovered
, Recovered
>,
1193 boost::statechart::transition
< DoRecovery
, WaitLocalRecoveryReserved
>,
1194 boost::statechart::transition
< RequestBackfill
, WaitLocalBackfillReserved
>
1196 explicit Activating(my_context ctx
);
1200 struct Stray
: boost::statechart::state
< Stray
, Started
>,
1202 explicit Stray(my_context ctx
);
1205 typedef boost::mpl::list
<
1206 boost::statechart::custom_reaction
< MQuery
>,
1207 boost::statechart::custom_reaction
< MLogRec
>,
1208 boost::statechart::custom_reaction
< MInfoRec
>,
1209 boost::statechart::custom_reaction
< ActMap
>,
1210 boost::statechart::custom_reaction
< RecoveryDone
>,
1211 boost::statechart::transition
<DeleteStart
, ToDelete
>
1213 boost::statechart::result
react(const MQuery
& query
);
1214 boost::statechart::result
react(const MLogRec
& logevt
);
1215 boost::statechart::result
react(const MInfoRec
& infoevt
);
1216 boost::statechart::result
react(const ActMap
&);
1217 boost::statechart::result
react(const RecoveryDone
&) {
1218 return discard_event();
1222 struct WaitDeleteReserved
;
1223 struct ToDelete
: boost::statechart::state
<ToDelete
, Started
, WaitDeleteReserved
>, NamedState
{
1224 unsigned priority
= 0;
1225 typedef boost::mpl::list
<
1226 boost::statechart::custom_reaction
< ActMap
>,
1227 boost::statechart::custom_reaction
< ActivateCommitted
>,
1228 boost::statechart::custom_reaction
< DeleteSome
>
1230 explicit ToDelete(my_context ctx
);
1231 boost::statechart::result
react(const ActMap
&evt
);
1232 boost::statechart::result
react(const DeleteSome
&evt
) {
1233 // happens if we drop out of Deleting due to reprioritization etc.
1234 return discard_event();
1236 boost::statechart::result
react(const ActivateCommitted
&) {
1237 // Can happens if we were activated as a stray but not actually pulled
1238 // from prior to the pg going clean and sending a delete.
1239 return discard_event();
1245 struct WaitDeleteReserved
: boost::statechart::state
<WaitDeleteReserved
,
1246 ToDelete
>, NamedState
{
1247 typedef boost::mpl::list
<
1248 boost::statechart::transition
<DeleteReserved
, Deleting
>
1250 explicit WaitDeleteReserved(my_context ctx
);
1254 struct Deleting
: boost::statechart::state
<Deleting
,
1255 ToDelete
>, NamedState
{
1256 typedef boost::mpl::list
<
1257 boost::statechart::custom_reaction
< DeleteSome
>,
1258 boost::statechart::transition
<DeleteInterrupted
, WaitDeleteReserved
>
1261 explicit Deleting(my_context ctx
);
1262 boost::statechart::result
react(const DeleteSome
&evt
);
1268 struct GetInfo
: boost::statechart::state
< GetInfo
, Peering
>, NamedState
{
1269 std::set
<pg_shard_t
> peer_info_requested
;
1271 explicit GetInfo(my_context ctx
);
1275 typedef boost::mpl::list
<
1276 boost::statechart::custom_reaction
< QueryState
>,
1277 boost::statechart::custom_reaction
< QueryUnfound
>,
1278 boost::statechart::transition
< GotInfo
, GetLog
>,
1279 boost::statechart::custom_reaction
< MNotifyRec
>,
1280 boost::statechart::transition
< IsDown
, Down
>
1282 boost::statechart::result
react(const QueryState
& q
);
1283 boost::statechart::result
react(const QueryUnfound
& q
);
1284 boost::statechart::result
react(const MNotifyRec
& infoevt
);
1287 struct GotLog
: boost::statechart::event
< GotLog
> {
1288 GotLog() : boost::statechart::event
< GotLog
>() {}
1291 struct GetLog
: boost::statechart::state
< GetLog
, Peering
>, NamedState
{
1292 pg_shard_t auth_log_shard
;
1293 boost::intrusive_ptr
<MOSDPGLog
> msg
;
1295 explicit GetLog(my_context ctx
);
1298 typedef boost::mpl::list
<
1299 boost::statechart::custom_reaction
< QueryState
>,
1300 boost::statechart::custom_reaction
< QueryUnfound
>,
1301 boost::statechart::custom_reaction
< MLogRec
>,
1302 boost::statechart::custom_reaction
< GotLog
>,
1303 boost::statechart::custom_reaction
< AdvMap
>,
1304 boost::statechart::transition
< NeedActingChange
, WaitActingChange
>,
1305 boost::statechart::transition
< IsIncomplete
, Incomplete
>
1307 boost::statechart::result
react(const AdvMap
&);
1308 boost::statechart::result
react(const QueryState
& q
);
1309 boost::statechart::result
react(const QueryUnfound
& q
);
1310 boost::statechart::result
react(const MLogRec
& logevt
);
1311 boost::statechart::result
react(const GotLog
&);
1316 struct GetMissing
: boost::statechart::state
< GetMissing
, Peering
>, NamedState
{
1317 std::set
<pg_shard_t
> peer_missing_requested
;
1319 explicit GetMissing(my_context ctx
);
1322 typedef boost::mpl::list
<
1323 boost::statechart::custom_reaction
< QueryState
>,
1324 boost::statechart::custom_reaction
< QueryUnfound
>,
1325 boost::statechart::custom_reaction
< MLogRec
>,
1326 boost::statechart::transition
< NeedUpThru
, WaitUpThru
>
1328 boost::statechart::result
react(const QueryState
& q
);
1329 boost::statechart::result
react(const QueryUnfound
& q
);
1330 boost::statechart::result
react(const MLogRec
& logevt
);
1333 struct WaitUpThru
: boost::statechart::state
< WaitUpThru
, Peering
>, NamedState
{
1334 explicit WaitUpThru(my_context ctx
);
1337 typedef boost::mpl::list
<
1338 boost::statechart::custom_reaction
< QueryState
>,
1339 boost::statechart::custom_reaction
< QueryUnfound
>,
1340 boost::statechart::custom_reaction
< ActMap
>,
1341 boost::statechart::custom_reaction
< MLogRec
>
1343 boost::statechart::result
react(const QueryState
& q
);
1344 boost::statechart::result
react(const QueryUnfound
& q
);
1345 boost::statechart::result
react(const ActMap
& am
);
1346 boost::statechart::result
react(const MLogRec
& logrec
);
1349 struct Down
: boost::statechart::state
< Down
, Peering
>, NamedState
{
1350 explicit Down(my_context ctx
);
1351 typedef boost::mpl::list
<
1352 boost::statechart::custom_reaction
< QueryState
>,
1353 boost::statechart::custom_reaction
< QueryUnfound
>,
1354 boost::statechart::custom_reaction
< MNotifyRec
>
1356 boost::statechart::result
react(const QueryState
& q
);
1357 boost::statechart::result
react(const QueryUnfound
& q
);
1358 boost::statechart::result
react(const MNotifyRec
& infoevt
);
1362 struct Incomplete
: boost::statechart::state
< Incomplete
, Peering
>, NamedState
{
1363 typedef boost::mpl::list
<
1364 boost::statechart::custom_reaction
< AdvMap
>,
1365 boost::statechart::custom_reaction
< MNotifyRec
>,
1366 boost::statechart::custom_reaction
< QueryUnfound
>,
1367 boost::statechart::custom_reaction
< QueryState
>
1369 explicit Incomplete(my_context ctx
);
1370 boost::statechart::result
react(const AdvMap
&advmap
);
1371 boost::statechart::result
react(const MNotifyRec
& infoevt
);
1372 boost::statechart::result
react(const QueryUnfound
& q
);
1373 boost::statechart::result
react(const QueryState
& q
);
1377 PGStateHistory state_history
;
1380 DoutPrefixProvider
*dpp
;
1381 PeeringListener
*pl
;
1383 /// context passed in by state machine caller
1384 PeeringCtx
*orig_ctx
;
1386 /// populated if we are buffering messages pending a flush
1387 std::optional
<BufferedRecoveryMessages
> messages_pending_flush
;
1390 * populated between start_handle() and end_handle(), points into
1391 * the message lists for messages_pending_flush while blocking messages
1392 * or into orig_ctx otherwise
1394 std::optional
<PeeringCtxWrapper
> rctx
;
1399 OSDMapRef osdmap_ref
; ///< Reference to current OSDMap
1400 PGPool pool
; ///< Current pool state
1401 epoch_t last_persisted_osdmap
= 0; ///< Last osdmap epoch persisted
1405 * Peering state information
1407 int role
= -1; ///< 0 = primary, 1 = replica, -1=none.
1408 uint64_t state
= 0; ///< PG_STATE_*
1410 pg_shard_t primary
; ///< id/shard of primary
1411 pg_shard_t pg_whoami
; ///< my id/shard
1412 pg_shard_t up_primary
; ///< id/shard of primary of up set
1413 std::vector
<int> up
; ///< crush mapping without temp pgs
1414 std::set
<pg_shard_t
> upset
; ///< up in set form
1415 std::vector
<int> acting
; ///< actual acting set for the current interval
1416 std::set
<pg_shard_t
> actingset
; ///< acting in set form
1418 /// union of acting, recovery, and backfill targets
1419 std::set
<pg_shard_t
> acting_recovery_backfill
;
1421 std::vector
<HeartbeatStampsRef
> hb_stamps
;
1423 ceph::signedspan readable_interval
= ceph::signedspan::zero();
1425 /// how long we can service reads in this interval
1426 ceph::signedspan readable_until
= ceph::signedspan::zero();
1428 /// upper bound on any acting OSDs' readable_until in this interval
1429 ceph::signedspan readable_until_ub
= ceph::signedspan::zero();
1431 /// upper bound from prior interval(s)
1432 ceph::signedspan prior_readable_until_ub
= ceph::signedspan::zero();
1434 /// pg instances from prior interval(s) that may still be readable
1435 std::set
<int> prior_readable_down_osds
;
1437 /// [replica] upper bound we got from the primary (primary's clock)
1438 ceph::signedspan readable_until_ub_from_primary
= ceph::signedspan::zero();
1440 /// [primary] last upper bound shared by primary to replicas
1441 ceph::signedspan readable_until_ub_sent
= ceph::signedspan::zero();
1443 /// [primary] readable ub acked by acting set members
1444 std::vector
<ceph::signedspan
> acting_readable_until_ub
;
1446 bool send_notify
= false; ///< True if a notify needs to be sent to the primary
1448 bool dirty_info
= false; ///< small info structu on disk out of date
1449 bool dirty_big_info
= false; ///< big info structure on disk out of date
1451 pg_info_t info
; ///< current pg info
1452 pg_info_t last_written_info
; ///< last written info
1453 PastIntervals past_intervals
; ///< information about prior pg mappings
1454 PGLog pg_log
; ///< pg log
1456 epoch_t last_peering_reset
= 0; ///< epoch of last peering reset
1458 /// last_update that has committed; ONLY DEFINED WHEN is_active()
1459 eversion_t last_update_ondisk
;
1460 eversion_t last_complete_ondisk
; ///< last_complete that has committed.
1461 eversion_t last_update_applied
; ///< last_update readable
1462 /// last version to which rollback_info trimming has been applied
1463 eversion_t last_rollback_info_trimmed_to_applied
;
1465 /// Counter to determine when pending flushes have completed
1466 unsigned flushes_in_progress
= 0;
1471 std::set
<pg_shard_t
> stray_set
; ///< non-acting osds that have PG data.
1472 std::map
<pg_shard_t
, pg_info_t
> peer_info
; ///< info from peers (stray or prior)
1473 std::map
<pg_shard_t
, int64_t> peer_bytes
; ///< Peer's num_bytes from peer_info
1474 std::set
<pg_shard_t
> peer_purged
; ///< peers purged
1475 std::map
<pg_shard_t
, pg_missing_t
> peer_missing
; ///< peer missing sets
1476 std::set
<pg_shard_t
> peer_log_requested
; ///< logs i've requested (and start stamps)
1477 std::set
<pg_shard_t
> peer_missing_requested
; ///< missing sets requested
1479 /// features supported by all peers
1480 uint64_t peer_features
= CEPH_FEATURES_SUPPORTED_DEFAULT
;
1481 /// features supported by acting set
1482 uint64_t acting_features
= CEPH_FEATURES_SUPPORTED_DEFAULT
;
1483 /// features supported by up and acting
1484 uint64_t upacting_features
= CEPH_FEATURES_SUPPORTED_DEFAULT
;
1486 /// most recently consumed osdmap's require_osd_version
1487 ceph_release_t last_require_osd_release
;
1489 std::vector
<int> want_acting
; ///< non-empty while peering needs a new acting set
1491 // acting_recovery_backfill contains shards that are acting,
1492 // async recovery targets, or backfill targets.
1493 std::map
<pg_shard_t
,eversion_t
> peer_last_complete_ondisk
;
1495 /// up: min over last_complete_ondisk, peer_last_complete_ondisk
1496 eversion_t min_last_complete_ondisk
;
1497 /// point to which the log should be trimmed
1498 eversion_t pg_trim_to
;
1500 std::set
<int> blocked_by
; ///< osds we are blocked by (for pg stats)
1502 bool need_up_thru
= false; ///< true if osdmap with updated up_thru needed
1504 /// I deleted these strays; ignore racing PGInfo from them
1505 std::set
<pg_shard_t
> peer_activated
;
1507 std::set
<pg_shard_t
> backfill_targets
; ///< osds to be backfilled
1508 std::set
<pg_shard_t
> async_recovery_targets
; ///< osds to be async recovered
1510 /// osds which might have objects on them which are unfound on the primary
1511 std::set
<pg_shard_t
> might_have_unfound
;
1513 bool deleting
= false; /// true while in removing or OSD is shutting down
1514 std::atomic
<bool> deleted
= {false}; /// true once deletion complete
1516 MissingLoc missing_loc
; ///< information about missing objects
1518 bool backfill_reserved
= false;
1519 bool backfill_reserving
= false;
1521 PeeringMachine machine
;
1523 void update_osdmap_ref(OSDMapRef newmap
) {
1524 osdmap_ref
= std::move(newmap
);
1527 void update_heartbeat_peers();
1528 void query_unfound(Formatter
*f
, std::string state
);
1529 bool proc_replica_info(
1530 pg_shard_t from
, const pg_info_t
&oinfo
, epoch_t send_epoch
);
1531 void remove_down_peer_info(const OSDMapRef
&osdmap
);
1532 void check_recovery_sources(const OSDMapRef
& map
);
1533 void set_last_peering_reset();
1534 void check_full_transition(OSDMapRef lastmap
, OSDMapRef osdmap
);
1535 bool should_restart_peering(
1537 int newactingprimary
,
1538 const std::vector
<int>& newup
,
1539 const std::vector
<int>& newacting
,
1542 void start_peering_interval(
1543 const OSDMapRef lastmap
,
1544 const std::vector
<int>& newup
, int up_primary
,
1545 const std::vector
<int>& newacting
, int acting_primary
,
1546 ObjectStore::Transaction
&t
);
1547 void on_new_interval();
1548 void clear_recovery_state();
1549 void clear_primary_state();
1550 void check_past_interval_bounds() const;
1551 bool set_force_recovery(bool b
);
1552 bool set_force_backfill(bool b
);
1554 /// clip calculated priority to reasonable range
1555 int clamp_recovery_priority(int prio
, int pool_recovery_prio
, int max
);
1556 /// get log recovery reservation priority
1557 unsigned get_recovery_priority();
1558 /// get backfill reservation priority
1559 unsigned get_backfill_priority();
1560 /// get priority for pg deletion
1561 unsigned get_delete_priority();
1563 bool check_prior_readable_down_osds(const OSDMapRef
& map
);
1565 bool adjust_need_up_thru(const OSDMapRef osdmap
);
1566 PastIntervals::PriorSet
build_prior();
1568 void reject_reservation();
1571 std::map
<pg_shard_t
, pg_info_t
>::const_iterator
find_best_info(
1572 const std::map
<pg_shard_t
, pg_info_t
> &infos
,
1573 bool restrict_to_up_acting
,
1574 bool *history_les_bound
) const;
1576 static void calc_ec_acting(
1577 std::map
<pg_shard_t
, pg_info_t
>::const_iterator auth_log_shard
,
1579 const std::vector
<int> &acting
,
1580 const std::vector
<int> &up
,
1581 const std::map
<pg_shard_t
, pg_info_t
> &all_info
,
1582 bool restrict_to_up_acting
,
1583 std::vector
<int> *want
,
1584 std::set
<pg_shard_t
> *backfill
,
1585 std::set
<pg_shard_t
> *acting_backfill
,
1588 static std::pair
<std::map
<pg_shard_t
, pg_info_t
>::const_iterator
, eversion_t
>
1589 select_replicated_primary(
1590 std::map
<pg_shard_t
, pg_info_t
>::const_iterator auth_log_shard
,
1591 uint64_t force_auth_primary_missing_objects
,
1592 const std::vector
<int> &up
,
1593 pg_shard_t up_primary
,
1594 const std::map
<pg_shard_t
, pg_info_t
> &all_info
,
1595 const OSDMapRef osdmap
,
1598 static void calc_replicated_acting(
1599 std::map
<pg_shard_t
, pg_info_t
>::const_iterator primary_shard
,
1600 eversion_t oldest_auth_log_entry
,
1602 const std::vector
<int> &acting
,
1603 const std::vector
<int> &up
,
1604 pg_shard_t up_primary
,
1605 const std::map
<pg_shard_t
, pg_info_t
> &all_info
,
1606 bool restrict_to_up_acting
,
1607 std::vector
<int> *want
,
1608 std::set
<pg_shard_t
> *backfill
,
1609 std::set
<pg_shard_t
> *acting_backfill
,
1610 const OSDMapRef osdmap
,
1613 static void calc_replicated_acting_stretch(
1614 std::map
<pg_shard_t
, pg_info_t
>::const_iterator primary_shard
,
1615 eversion_t oldest_auth_log_entry
,
1617 const std::vector
<int> &acting
,
1618 const std::vector
<int> &up
,
1619 pg_shard_t up_primary
,
1620 const std::map
<pg_shard_t
, pg_info_t
> &all_info
,
1621 bool restrict_to_up_acting
,
1622 std::vector
<int> *want
,
1623 std::set
<pg_shard_t
> *backfill
,
1624 std::set
<pg_shard_t
> *acting_backfill
,
1625 const OSDMapRef osdmap
,
1629 void choose_async_recovery_ec(
1630 const std::map
<pg_shard_t
, pg_info_t
> &all_info
,
1631 const pg_info_t
&auth_info
,
1632 std::vector
<int> *want
,
1633 std::set
<pg_shard_t
> *async_recovery
,
1634 const OSDMapRef osdmap
) const;
1635 void choose_async_recovery_replicated(
1636 const std::map
<pg_shard_t
, pg_info_t
> &all_info
,
1637 const pg_info_t
&auth_info
,
1638 std::vector
<int> *want
,
1639 std::set
<pg_shard_t
> *async_recovery
,
1640 const OSDMapRef osdmap
) const;
1642 bool recoverable(const std::vector
<int> &want
) const;
1643 bool choose_acting(pg_shard_t
&auth_log_shard
,
1644 bool restrict_to_up_acting
,
1645 bool *history_les_bound
,
1646 bool request_pg_temp_change_only
= false);
1648 bool search_for_missing(
1649 const pg_info_t
&oinfo
, const pg_missing_t
&omissing
,
1651 PeeringCtxWrapper
&rctx
);
1652 void build_might_have_unfound();
1653 void log_weirdness();
1655 ObjectStore::Transaction
& t
,
1656 epoch_t activation_epoch
,
1657 PeeringCtxWrapper
&ctx
);
1659 void rewind_divergent_log(ObjectStore::Transaction
& t
, eversion_t newhead
);
1661 ObjectStore::Transaction
& t
, pg_info_t
&oinfo
,
1662 pg_log_t
&& olog
, pg_shard_t from
);
1664 void proc_primary_info(ObjectStore::Transaction
&t
, const pg_info_t
&info
);
1665 void proc_master_log(ObjectStore::Transaction
& t
, pg_info_t
&oinfo
,
1666 pg_log_t
&& olog
, pg_missing_t
&& omissing
,
1668 void proc_replica_log(pg_info_t
&oinfo
, const pg_log_t
&olog
,
1669 pg_missing_t
&& omissing
, pg_shard_t from
);
1671 void calc_min_last_complete_ondisk() {
1672 eversion_t min
= last_complete_ondisk
;
1673 ceph_assert(!acting_recovery_backfill
.empty());
1674 for (std::set
<pg_shard_t
>::iterator i
= acting_recovery_backfill
.begin();
1675 i
!= acting_recovery_backfill
.end();
1677 if (*i
== get_primary()) continue;
1678 if (peer_last_complete_ondisk
.count(*i
) == 0)
1679 return; // we don't have complete info
1680 eversion_t a
= peer_last_complete_ondisk
[*i
];
1684 if (min
== min_last_complete_ondisk
)
1686 min_last_complete_ondisk
= min
;
1691 pg_shard_t from
, const pg_query_t
&query
,
1692 std::pair
<pg_shard_t
, pg_info_t
> ¬ify_info
);
1694 pg_shard_t from
, const pg_query_t
&query
, epoch_t query_epoch
);
1695 void fulfill_query(const MQuery
& q
, PeeringCtxWrapper
&rctx
);
1697 void try_mark_clean();
1699 void update_blocked_by();
1700 void update_calc_stats();
1702 void add_log_entry(const pg_log_entry_t
& e
, bool applied
);
1704 void calc_trim_to();
1705 void calc_trim_to_aggressive();
1710 pg_shard_t pg_whoami
,
1714 DoutPrefixProvider
*dpp
,
1715 PeeringListener
*pl
);
1718 void handle_event(const boost::statechart::event_base
&evt
,
1721 machine
.process_event(evt
);
1726 void handle_event(PGPeeringEventRef evt
,
1729 machine
.process_event(evt
->get_event());
1733 /// Init fresh instance of PG
1736 const std::vector
<int>& newup
, int new_up_primary
,
1737 const std::vector
<int>& newacting
, int new_acting_primary
,
1738 const pg_history_t
& history
,
1739 const PastIntervals
& pi
,
1740 ObjectStore::Transaction
&t
);
1742 /// Init pg instance from disk state
1743 template <typename F
>
1744 auto init_from_disk_state(
1745 pg_info_t
&&info_from_disk
,
1746 PastIntervals
&&past_intervals_from_disk
,
1748 info
= std::move(info_from_disk
);
1749 last_written_info
= info
;
1750 past_intervals
= std::move(past_intervals_from_disk
);
1751 auto ret
= pg_log_init(pg_log
);
1756 /// Std::set initial primary/acting
1757 void init_primary_up_acting(
1758 const std::vector
<int> &newup
,
1759 const std::vector
<int> &newacting
,
1761 int new_acting_primary
);
1762 void init_hb_stamps();
1764 /// Std::set initial role
1765 void set_role(int r
) {
1769 /// Std::set predicates used for determining readable and recoverable
1770 void set_backend_predicates(
1771 IsPGReadablePredicate
*is_readable
,
1772 IsPGRecoverablePredicate
*is_recoverable
) {
1773 missing_loc
.set_backend_predicates(is_readable
, is_recoverable
);
1776 /// Send current pg_info to peers
1777 void share_pg_info();
1779 /// Get stats for child pgs
1780 void start_split_stats(
1781 const std::set
<spg_t
>& childpgs
, std::vector
<object_stat_sum_t
> *out
);
1783 /// Update new child with stats
1784 void finish_split_stats(
1785 const object_stat_sum_t
& stats
, ObjectStore::Transaction
&t
);
1787 /// Split state for child_pgid into *child
1789 pg_t child_pgid
, PeeringState
*child
, unsigned split_bits
);
1791 /// Merge state from sources
1793 std::map
<spg_t
,PeeringState
*>& sources
,
1795 unsigned split_bits
,
1796 const pg_merge_meta_t
& last_pg_merge_meta
);
1798 /// Permit stray replicas to purge now unnecessary state
1799 void purge_strays();
1804 * Mechanism for updating stats and/or history. Pass t to mark
1805 * dirty and write out. Return true if stats should be published
1809 std::function
<bool(pg_history_t
&, pg_stat_t
&)> f
,
1810 ObjectStore::Transaction
*t
= nullptr);
1812 void update_stats_wo_resched(
1813 std::function
<void(pg_history_t
&, pg_stat_t
&)> f
);
1816 * adjust_purged_snaps
1818 * Mechanism for updating purged_snaps. Marks dirty_info, big_dirty_info.
1820 void adjust_purged_snaps(
1821 std::function
<void(interval_set
<snapid_t
> &snaps
)> f
);
1823 /// Updates info.hit_set to hset_history, does not dirty
1824 void update_hset(const pg_hit_set_history_t
&hset_history
);
1826 /// Get all pg_shards that needs recovery
1827 std::vector
<pg_shard_t
> get_replica_recovery_order() const;
1832 * Merges new_history into info.history clearing past_intervals and
1833 * dirtying as needed.
1835 * Calls PeeringListener::on_info_history_change()
1837 void update_history(const pg_history_t
& new_history
);
1840 * prepare_stats_for_publish
1842 * Returns updated pg_stat_t if stats have changed since
1843 * pg_stats_publish adding in unstable_stats.
1845 * @param pg_stats_publish the latest pg_stat possessed by caller
1846 * @param unstable_stats additional stats which should be included in the
1848 * @return the up to date stats if it is different from the specfied
1849 * @c pg_stats_publish
1851 std::optional
<pg_stat_t
> prepare_stats_for_publish(
1852 const std::optional
<pg_stat_t
> &pg_stats_publish
,
1853 const object_stat_collection_t
&unstable_stats
);
1856 * Merge entries updating missing as necessary on all
1857 * acting_recovery_backfill logs and missings (also missing_loc)
1859 bool append_log_entries_update_missing(
1860 const mempool::osd_pglog::list
<pg_log_entry_t
> &entries
,
1861 ObjectStore::Transaction
&t
,
1862 std::optional
<eversion_t
> trim_to
,
1863 std::optional
<eversion_t
> roll_forward_to
);
1865 void append_log_with_trim_to_updated(
1866 std::vector
<pg_log_entry_t
>&& log_entries
,
1867 eversion_t roll_forward_to
,
1868 ObjectStore::Transaction
&t
,
1869 bool transaction_applied
,
1872 append_log(std::move(log_entries
), pg_trim_to
, roll_forward_to
,
1873 min_last_complete_ondisk
, t
, transaction_applied
, async
);
1877 * Updates local log to reflect new write from primary.
1880 std::vector
<pg_log_entry_t
>&& logv
,
1882 eversion_t roll_forward_to
,
1883 eversion_t min_last_complete_ondisk
,
1884 ObjectStore::Transaction
&t
,
1885 bool transaction_applied
,
1889 * retrieve the min last_backfill among backfill targets
1891 hobject_t
earliest_backfill() const;
1895 * Updates local log/missing to reflect new oob log update from primary
1897 void merge_new_log_entries(
1898 const mempool::osd_pglog::list
<pg_log_entry_t
> &entries
,
1899 ObjectStore::Transaction
&t
,
1900 std::optional
<eversion_t
> trim_to
,
1901 std::optional
<eversion_t
> roll_forward_to
);
1903 /// Update missing set to reflect e (TODOSAM: not sure why this is needed)
1904 void add_local_next_event(const pg_log_entry_t
& e
) {
1905 pg_log
.missing_add_next_entry(e
);
1908 /// Update log trim boundary
1909 void update_trim_to() {
1910 bool hard_limit
= (get_osdmap()->test_flag(CEPH_OSDMAP_PGLOG_HARDLIMIT
));
1912 calc_trim_to_aggressive();
1917 /// Pre-process pending update on hoid represented by logv
1919 const hobject_t
&hoid
,
1920 const std::vector
<pg_log_entry_t
>& logv
,
1921 eversion_t at_version
);
1923 /// Signal that oid has been locally recovered to version v
1925 const hobject_t
&oid
, eversion_t v
,
1927 ObjectStore::Transaction
&t
);
1929 /// Signal that oid has been recovered on peer to version
1930 void on_peer_recover(
1932 const hobject_t
&soid
,
1933 const eversion_t
&version
);
1935 /// Notify that soid is being recovered on peer
1936 void begin_peer_recover(
1938 const hobject_t soid
);
1940 /// Pull missing sets from all candidate peers
1941 bool discover_all_missing(
1942 BufferedRecoveryMessages
&rctx
);
1944 /// Notify that hoid has been fully recocovered
1945 void object_recovered(
1946 const hobject_t
&hoid
,
1947 const object_stat_sum_t
&stat_diff
) {
1948 info
.stats
.stats
.sum
.add(stat_diff
);
1949 missing_loc
.recovered(hoid
);
1952 /// Update info/stats to reflect backfill progress
1953 void update_backfill_progress(
1954 const hobject_t
&updated_backfill
,
1955 const pg_stat_t
&updated_stats
,
1956 bool preserve_local_num_bytes
,
1957 ObjectStore::Transaction
&t
);
1959 /// Update info/stats to reflect completed backfill on hoid
1960 void update_complete_backfill_object_stats(
1961 const hobject_t
&hoid
,
1962 const pg_stat_t
&stats
);
1964 /// Update last_backfill for peer to new_last_backfill
1965 void update_peer_last_backfill(
1967 const hobject_t
&new_last_backfill
);
1969 /// Update info.stats with delta_stats for operation on soid
1970 void apply_op_stats(
1971 const hobject_t
&soid
,
1972 const object_stat_sum_t
&delta_stats
);
1975 * force_object_missing
1977 * Force oid on peer to be missing at version. If the object does not
1978 * currently need recovery, either candidates if provided or the remainder
1979 * of the acting std::set will be deemed to have the object.
1981 void force_object_missing(
1982 const pg_shard_t
&peer
,
1983 const hobject_t
&oid
,
1984 eversion_t version
) {
1985 force_object_missing(std::set
<pg_shard_t
>{peer
}, oid
, version
);
1987 void force_object_missing(
1988 const std::set
<pg_shard_t
> &peer
,
1989 const hobject_t
&oid
,
1990 eversion_t version
);
1992 /// Update state prior to backfilling soid on targets
1993 void prepare_backfill_for_missing(
1994 const hobject_t
&soid
,
1995 const eversion_t
&version
,
1996 const std::vector
<pg_shard_t
> &targets
);
1998 /// Std::set targets with the right version for revert (see recover_primary)
1999 void set_revert_with_targets(
2000 const hobject_t
&soid
,
2001 const std::set
<pg_shard_t
> &good_peers
);
2003 /// Update lcod for fromosd
2004 void update_peer_last_complete_ondisk(
2007 peer_last_complete_ondisk
[fromosd
] = lcod
;
2011 void update_last_complete_ondisk(
2013 last_complete_ondisk
= lcod
;
2016 /// Update state to reflect recovery up to version
2017 void recovery_committed_to(eversion_t version
);
2019 /// Mark recovery complete
2020 void local_recovery_complete() {
2021 info
.last_complete
= info
.last_update
;
2024 /// Update last_requested pointer to v
2025 void set_last_requested(version_t v
) {
2026 pg_log
.set_last_requested(v
);
2029 /// Write dirty state to t
2030 void write_if_dirty(ObjectStore::Transaction
& t
);
2032 /// Mark write completed to v with persisted lc
2033 void complete_write(eversion_t v
, eversion_t lc
);
2035 /// Update local write applied pointer
2036 void local_write_applied(eversion_t v
) {
2037 last_update_applied
= v
;
2040 /// Updates peering state with new map
2042 OSDMapRef osdmap
, ///< [in] new osdmap
2043 OSDMapRef lastmap
, ///< [in] prev osdmap
2044 std::vector
<int>& newup
, ///< [in] new up set
2045 int up_primary
, ///< [in] new up primary
2046 std::vector
<int>& newacting
, ///< [in] new acting
2047 int acting_primary
, ///< [in] new acting primary
2048 PeeringCtx
&rctx
///< [out] recovery context
2051 /// Activates most recently updated map
2053 PeeringCtx
&rctx
///< [out] recovery context
2056 /// resets last_persisted_osdmap
2057 void reset_last_persisted() {
2058 last_persisted_osdmap
= 0;
2060 dirty_big_info
= true;
2063 /// Signal shutdown beginning
2068 /// Signal shutdown complete
2069 void set_delete_complete() {
2073 /// Dirty info and write out
2074 void force_write_state(ObjectStore::Transaction
&t
) {
2076 dirty_big_info
= true;
2080 /// Get current interval's readable_until
2081 ceph::signedspan
get_readable_until() const {
2082 return readable_until
;
2085 /// Get prior intervals' readable_until upper bound
2086 ceph::signedspan
get_prior_readable_until_ub() const {
2087 return prior_readable_until_ub
;
2090 /// Get prior intervals' readable_until down OSDs of note
2091 const std::set
<int>& get_prior_readable_down_osds() const {
2092 return prior_readable_down_osds
;
2095 /// Reset prior intervals' readable_until upper bound (e.g., bc it passed)
2096 void clear_prior_readable_until_ub() {
2097 prior_readable_until_ub
= ceph::signedspan::zero();
2098 prior_readable_down_osds
.clear();
2099 info
.history
.prior_readable_until_ub
= ceph::signedspan::zero();
2102 void renew_lease(ceph::signedspan now
) {
2103 bool was_min
= (readable_until_ub
== readable_until
);
2104 readable_until_ub_sent
= now
+ readable_interval
;
2106 recalc_readable_until();
2111 void schedule_renew_lease();
2113 pg_lease_t
get_lease() {
2114 return pg_lease_t(readable_until
, readable_until_ub_sent
, readable_interval
);
2117 void proc_lease(const pg_lease_t
& l
);
2118 void proc_lease_ack(int from
, const pg_lease_ack_t
& la
);
2119 void proc_renew_lease();
2121 pg_lease_ack_t
get_lease_ack() {
2122 return pg_lease_ack_t(readable_until_ub_from_primary
);
2125 /// [primary] recalc readable_until[_ub] for the current interval
2126 void recalc_readable_until();
2128 //============================ const helpers ================================
2129 const char *get_current_state() const {
2130 return state_history
.get_current_state();
2132 epoch_t
get_last_peering_reset() const {
2133 return last_peering_reset
;
2135 eversion_t
get_last_rollback_info_trimmed_to_applied() const {
2136 return last_rollback_info_trimmed_to_applied
;
2138 /// Returns stable reference to internal pool structure
2139 const PGPool
&get_pool() const {
2142 /// Returns reference to current osdmap
2143 const OSDMapRef
&get_osdmap() const {
2144 ceph_assert(osdmap_ref
);
2147 /// Returns epoch of current osdmap
2148 epoch_t
get_osdmap_epoch() const {
2149 return get_osdmap()->get_epoch();
2152 bool is_ec_pg() const override
{
2153 return pool
.info
.is_erasure();
2155 int get_pg_size() const override
{
2156 return pool
.info
.size
;
2158 bool is_deleting() const {
2161 bool is_deleted() const {
2164 const std::set
<pg_shard_t
> &get_upset() const override
{
2167 bool is_acting_recovery_backfill(pg_shard_t osd
) const {
2168 return acting_recovery_backfill
.count(osd
);
2170 bool is_acting(pg_shard_t osd
) const {
2171 return has_shard(pool
.info
.is_erasure(), acting
, osd
);
2173 bool is_up(pg_shard_t osd
) const {
2174 return has_shard(pool
.info
.is_erasure(), up
, osd
);
2176 static bool has_shard(bool ec
, const std::vector
<int>& v
, pg_shard_t osd
) {
2178 return v
.size() > (unsigned)osd
.shard
&& v
[osd
.shard
] == osd
.osd
;
2180 return std::find(v
.begin(), v
.end(), osd
.osd
) != v
.end();
2183 const PastIntervals
& get_past_intervals() const {
2184 return past_intervals
;
2186 /// acting osd that is not the primary
2187 bool is_nonprimary() const {
2188 return role
>= 0 && pg_whoami
!= primary
;
2191 bool is_primary() const {
2192 return pg_whoami
== primary
;
2194 bool pg_has_reset_since(epoch_t e
) const {
2195 return deleted
|| e
< get_last_peering_reset();
2198 int get_role() const {
2201 const std::vector
<int> &get_acting() const {
2204 const std::set
<pg_shard_t
> &get_actingset() const {
2207 int get_acting_primary() const {
2210 pg_shard_t
get_primary() const {
2213 const std::vector
<int> &get_up() const {
2216 int get_up_primary() const {
2217 return up_primary
.osd
;
2220 bool is_backfill_target(pg_shard_t osd
) const {
2221 return backfill_targets
.count(osd
);
2223 const std::set
<pg_shard_t
> &get_backfill_targets() const {
2224 return backfill_targets
;
2226 bool is_async_recovery_target(pg_shard_t peer
) const {
2227 return async_recovery_targets
.count(peer
);
2229 const std::set
<pg_shard_t
> &get_async_recovery_targets() const {
2230 return async_recovery_targets
;
2232 const std::set
<pg_shard_t
> &get_acting_recovery_backfill() const {
2233 return acting_recovery_backfill
;
2236 const PGLog
&get_pg_log() const {
2240 bool state_test(uint64_t m
) const { return (state
& m
) != 0; }
2241 void state_set(uint64_t m
) { state
|= m
; }
2242 void state_clear(uint64_t m
) { state
&= ~m
; }
2244 bool is_complete() const { return info
.last_complete
== info
.last_update
; }
2245 bool should_send_notify() const { return send_notify
; }
2247 uint64_t get_state() const { return state
; }
2248 bool is_active() const { return state_test(PG_STATE_ACTIVE
); }
2249 bool is_activating() const { return state_test(PG_STATE_ACTIVATING
); }
2250 bool is_peering() const { return state_test(PG_STATE_PEERING
); }
2251 bool is_down() const { return state_test(PG_STATE_DOWN
); }
2252 bool is_recovery_unfound() const {
2253 return state_test(PG_STATE_RECOVERY_UNFOUND
);
2255 bool is_backfilling() const {
2256 return state_test(PG_STATE_BACKFILLING
);
2258 bool is_backfill_unfound() const {
2259 return state_test(PG_STATE_BACKFILL_UNFOUND
);
2261 bool is_incomplete() const { return state_test(PG_STATE_INCOMPLETE
); }
2262 bool is_clean() const { return state_test(PG_STATE_CLEAN
); }
2263 bool is_degraded() const { return state_test(PG_STATE_DEGRADED
); }
2264 bool is_undersized() const { return state_test(PG_STATE_UNDERSIZED
); }
2265 bool is_remapped() const { return state_test(PG_STATE_REMAPPED
); }
2266 bool is_peered() const {
2267 return state_test(PG_STATE_ACTIVE
) || state_test(PG_STATE_PEERED
);
2269 bool is_recovering() const { return state_test(PG_STATE_RECOVERING
); }
2270 bool is_premerge() const { return state_test(PG_STATE_PREMERGE
); }
2271 bool is_repair() const { return state_test(PG_STATE_REPAIR
); }
2272 bool is_empty() const { return info
.last_update
== eversion_t(0,0); }
2274 bool get_need_up_thru() const {
2275 return need_up_thru
;
2278 bool is_forced_recovery_or_backfill() const {
2279 return get_state() & (PG_STATE_FORCED_RECOVERY
| PG_STATE_FORCED_BACKFILL
);
2282 bool is_backfill_reserved() const {
2283 return backfill_reserved
;
2286 bool is_backfill_reserving() const {
2287 return backfill_reserving
;
2290 ceph_release_t
get_last_require_osd_release() const {
2291 return last_require_osd_release
;
2294 const pg_info_t
&get_info() const {
2298 const decltype(peer_info
) &get_peer_info() const {
2301 const decltype(peer_missing
) &get_peer_missing() const {
2302 return peer_missing
;
2304 const pg_missing_const_i
&get_peer_missing(const pg_shard_t
&peer
) const {
2305 if (peer
== pg_whoami
) {
2306 return pg_log
.get_missing();
2308 assert(peer_missing
.count(peer
));
2309 return peer_missing
.find(peer
)->second
;
2312 const pg_info_t
&get_peer_info(pg_shard_t peer
) const {
2313 assert(peer_info
.count(peer
));
2314 return peer_info
.find(peer
)->second
;
2316 bool has_peer_info(pg_shard_t peer
) const {
2317 return peer_info
.count(peer
);
2320 bool needs_recovery() const;
2321 bool needs_backfill() const;
2324 * Returns whether a particular object can be safely read on this replica
2326 bool can_serve_replica_read(const hobject_t
&hoid
) {
2327 ceph_assert(!is_primary());
2328 return !pg_log
.get_log().has_write_since(
2329 hoid
, get_min_last_complete_ondisk());
2333 * Returns whether the current acting set is able to go active
2334 * and serve writes. It needs to satisfy min_size and any
2335 * applicable stretch cluster constraints.
2337 bool acting_set_writeable() {
2338 return (actingset
.size() >= pool
.info
.min_size
) &&
2339 (pool
.info
.stretch_set_can_peer(acting
, *get_osdmap(), NULL
));
2343 * Returns whether all peers which might have unfound objects have been
2344 * queried or marked lost.
2346 bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap
) const;
2347 bool all_missing_unfound() const {
2348 const auto& missing
= pg_log
.get_missing();
2349 if (!missing
.have_missing())
2351 for (auto& m
: missing
.get_items()) {
2352 if (!missing_loc
.is_unfound(m
.first
))
2358 bool perform_deletes_during_peering() const {
2359 return !(get_osdmap()->test_flag(CEPH_OSDMAP_RECOVERY_DELETES
));
2363 bool have_unfound() const {
2364 return missing_loc
.have_unfound();
2366 uint64_t get_num_unfound() const {
2367 return missing_loc
.num_unfound();
2370 bool have_missing() const {
2371 return pg_log
.get_missing().num_missing() > 0;
2373 unsigned int get_num_missing() const {
2374 return pg_log
.get_missing().num_missing();
2377 const MissingLoc
&get_missing_loc() const {
2381 const MissingLoc::missing_by_count_t
&get_missing_by_count() const {
2382 return missing_loc
.get_missing_by_count();
2385 eversion_t
get_min_last_complete_ondisk() const {
2386 return min_last_complete_ondisk
;
2389 eversion_t
get_pg_trim_to() const {
2393 eversion_t
get_last_update_applied() const {
2394 return last_update_applied
;
2397 eversion_t
get_last_update_ondisk() const {
2398 return last_update_ondisk
;
2401 bool debug_has_dirty_state() const {
2402 return dirty_info
|| dirty_big_info
;
2405 std::string
get_pg_state_string() const {
2406 return pg_state_string(state
);
2409 /// Dump representation of past_intervals to out
2410 void print_past_intervals(std::ostream
&out
) const {
2411 out
<< "[" << past_intervals
.get_bounds()
2412 << ")/" << past_intervals
.size();
2415 void dump_history(ceph::Formatter
*f
) const {
2416 state_history
.dump(f
);
2419 /// Dump formatted peering status
2420 void dump_peering_state(ceph::Formatter
*f
);
2423 /// Mask feature vector with feature set from new peer
2424 void apply_peer_features(uint64_t f
) { peer_features
&= f
; }
2426 /// Reset feature vector to default
2427 void reset_min_peer_features() {
2428 peer_features
= CEPH_FEATURES_SUPPORTED_DEFAULT
;
2431 /// Get feature vector common to all known peers with this pg
2432 uint64_t get_min_peer_features() const { return peer_features
; }
2434 /// Get feature vector common to acting set
2435 uint64_t get_min_acting_features() const { return acting_features
; }
2437 /// Get feature vector common to up/acting set
2438 uint64_t get_min_upacting_features() const { return upacting_features
; }
2441 // Flush control interface
2444 * Start additional flush (blocks needs_flush/activation until
2445 * complete_flush is called once for each start_flush call as
2446 * required by start_flush_on_transaction).
2448 void start_flush(ObjectStore::Transaction
&t
) {
2449 flushes_in_progress
++;
2450 pl
->start_flush_on_transaction(t
);
2453 /// True if there are outstanding flushes
2454 bool needs_flush() const {
2455 return flushes_in_progress
> 0;
2457 /// Must be called once per start_flush
2458 void complete_flush();
2460 friend std::ostream
&operator<<(std::ostream
&out
, const PeeringState
&ps
);
2463 std::ostream
&operator<<(std::ostream
&out
, const PeeringState
&ps
);