]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #pragma once | |
5 | ||
6 | #include <boost/statechart/custom_reaction.hpp> | |
7 | #include <boost/statechart/event.hpp> | |
8 | #include <boost/statechart/simple_state.hpp> | |
9 | #include <boost/statechart/state.hpp> | |
10 | #include <boost/statechart/state_machine.hpp> | |
11 | #include <boost/statechart/transition.hpp> | |
12 | #include <boost/statechart/event_base.hpp> | |
13 | #include <string> | |
14 | #include <atomic> | |
15 | ||
16 | #include "include/ceph_assert.h" | |
17 | #include "include/common_fwd.h" | |
18 | ||
19 | #include "PGLog.h" | |
20 | #include "PGStateUtils.h" | |
21 | #include "PGPeeringEvent.h" | |
22 | #include "osd_types.h" | |
23 | #include "os/ObjectStore.h" | |
24 | #include "OSDMap.h" | |
25 | #include "MissingLoc.h" | |
26 | #include "osd/osd_perf_counters.h" | |
27 | #include "common/ostream_temp.h" | |
28 | ||
29 | struct PGPool { | |
9f95a23c TL |
30 | epoch_t cached_epoch; |
31 | int64_t id; | |
f67539c2 | 32 | std::string name; |
9f95a23c TL |
33 | |
34 | pg_pool_t info; | |
35 | SnapContext snapc; // the default pool snapc, ready to go. | |
36 | ||
f67539c2 TL |
37 | PGPool(OSDMapRef map, int64_t i, const pg_pool_t& info, |
38 | const std::string& name) | |
39 | : cached_epoch(map->get_epoch()), | |
9f95a23c TL |
40 | id(i), |
41 | name(name), | |
42 | info(info) { | |
43 | snapc = info.get_snap_context(); | |
44 | } | |
45 | ||
f67539c2 | 46 | void update(OSDMapRef map); |
9f95a23c | 47 | |
f67539c2 | 48 | ceph::timespan get_readable_interval(ConfigProxy &conf) const { |
9f95a23c TL |
49 | double v = 0; |
50 | if (info.opts.get(pool_opts_t::READ_LEASE_INTERVAL, &v)) { | |
51 | return ceph::make_timespan(v); | |
52 | } else { | |
f67539c2 TL |
53 | auto hbi = conf->osd_heartbeat_grace; |
54 | auto fac = conf->osd_pool_default_read_lease_ratio; | |
9f95a23c TL |
55 | return ceph::make_timespan(hbi * fac); |
56 | } | |
57 | } | |
58 | }; | |
59 | ||
f67539c2 | 60 | struct PeeringCtx; |
9f95a23c TL |
61 | |
62 | // [primary only] content recovery state | |
63 | struct BufferedRecoveryMessages { | |
64 | ceph_release_t require_osd_release; | |
f67539c2 | 65 | std::map<int, std::vector<MessageRef>> message_map; |
9f95a23c TL |
66 | |
67 | BufferedRecoveryMessages(ceph_release_t r) | |
68 | : require_osd_release(r) { | |
69 | } | |
70 | BufferedRecoveryMessages(ceph_release_t r, PeeringCtx &ctx); | |
71 | ||
72 | void accept_buffered_messages(BufferedRecoveryMessages &m) { | |
73 | for (auto &[target, ls] : m.message_map) { | |
74 | auto &ovec = message_map[target]; | |
75 | // put buffered messages in front | |
76 | ls.reserve(ls.size() + ovec.size()); | |
77 | ls.insert(ls.end(), ovec.begin(), ovec.end()); | |
78 | ovec.clear(); | |
79 | ovec.swap(ls); | |
80 | } | |
81 | } | |
82 | ||
83 | void send_osd_message(int target, MessageRef m) { | |
84 | message_map[target].push_back(std::move(m)); | |
85 | } | |
86 | void send_notify(int to, const pg_notify_t &n); | |
87 | void send_query(int to, spg_t spgid, const pg_query_t &q); | |
88 | void send_info(int to, spg_t to_spgid, | |
89 | epoch_t min_epoch, epoch_t cur_epoch, | |
90 | const pg_info_t &info, | |
91 | std::optional<pg_lease_t> lease = {}, | |
92 | std::optional<pg_lease_ack_t> lease_ack = {}); | |
93 | }; | |
94 | ||
95 | struct HeartbeatStamps : public RefCountedObject { | |
96 | mutable ceph::mutex lock = ceph::make_mutex("HeartbeatStamps::lock"); | |
97 | ||
98 | const int osd; | |
99 | ||
100 | // we maintain an upper and lower bound on the delta between our local | |
101 | // mono_clock time (minus the startup_time) to the peer OSD's mono_clock | |
102 | // time (minus its startup_time). | |
103 | // | |
104 | // delta is (remote_clock_time - local_clock_time), so that | |
105 | // local_time + delta -> peer_time, and peer_time - delta -> local_time. | |
106 | // | |
107 | // we have an upper and lower bound value on this delta, meaning the | |
108 | // value of the remote clock is somewhere between [my_time + lb, my_time + ub] | |
109 | // | |
110 | // conversely, if we have a remote timestamp T, then that is | |
111 | // [T - ub, T - lb] in terms of the local clock. i.e., if you are | |
112 | // substracting the delta, then take care that you swap the role of the | |
113 | // lb and ub values. | |
114 | ||
115 | /// lower bound on peer clock - local clock | |
116 | std::optional<ceph::signedspan> peer_clock_delta_lb; | |
117 | ||
118 | /// upper bound on peer clock - local clock | |
119 | std::optional<ceph::signedspan> peer_clock_delta_ub; | |
120 | ||
121 | /// highest up_from we've seen from this rank | |
122 | epoch_t up_from = 0; | |
123 | ||
f67539c2 | 124 | void print(std::ostream& out) const { |
9f95a23c TL |
125 | std::lock_guard l(lock); |
126 | out << "hbstamp(osd." << osd << " up_from " << up_from | |
127 | << " peer_clock_delta ["; | |
128 | if (peer_clock_delta_lb) { | |
129 | out << *peer_clock_delta_lb; | |
130 | } | |
131 | out << ","; | |
132 | if (peer_clock_delta_ub) { | |
133 | out << *peer_clock_delta_ub; | |
134 | } | |
135 | out << "])"; | |
136 | } | |
137 | ||
138 | void sent_ping(std::optional<ceph::signedspan> *delta_ub) { | |
139 | std::lock_guard l(lock); | |
140 | // the non-primaries need a lower bound on remote clock - local clock. if | |
141 | // we assume the transit for the last ping_reply was | |
142 | // instantaneous, that would be (the negative of) our last | |
143 | // peer_clock_delta_lb value. | |
144 | if (peer_clock_delta_lb) { | |
145 | *delta_ub = - *peer_clock_delta_lb; | |
146 | } | |
147 | } | |
148 | ||
149 | void got_ping(epoch_t this_up_from, | |
150 | ceph::signedspan now, | |
151 | ceph::signedspan peer_send_stamp, | |
152 | std::optional<ceph::signedspan> delta_ub, | |
153 | ceph::signedspan *out_delta_ub) { | |
154 | std::lock_guard l(lock); | |
155 | if (this_up_from < up_from) { | |
156 | return; | |
157 | } | |
158 | if (this_up_from > up_from) { | |
159 | up_from = this_up_from; | |
160 | } | |
161 | peer_clock_delta_lb = peer_send_stamp - now; | |
162 | peer_clock_delta_ub = delta_ub; | |
163 | *out_delta_ub = - *peer_clock_delta_lb; | |
164 | } | |
165 | ||
166 | void got_ping_reply(ceph::signedspan now, | |
167 | ceph::signedspan peer_send_stamp, | |
168 | std::optional<ceph::signedspan> delta_ub) { | |
169 | std::lock_guard l(lock); | |
170 | peer_clock_delta_lb = peer_send_stamp - now; | |
171 | peer_clock_delta_ub = delta_ub; | |
172 | } | |
173 | ||
174 | private: | |
175 | FRIEND_MAKE_REF(HeartbeatStamps); | |
176 | HeartbeatStamps(int o) | |
177 | : RefCountedObject(NULL), | |
178 | osd(o) {} | |
179 | }; | |
180 | using HeartbeatStampsRef = ceph::ref_t<HeartbeatStamps>; | |
181 | ||
f67539c2 | 182 | inline std::ostream& operator<<(std::ostream& out, const HeartbeatStamps& hb) |
9f95a23c TL |
183 | { |
184 | hb.print(out); | |
185 | return out; | |
186 | } | |
187 | ||
188 | ||
189 | struct PeeringCtx : BufferedRecoveryMessages { | |
190 | ObjectStore::Transaction transaction; | |
191 | HBHandle* handle = nullptr; | |
192 | ||
193 | PeeringCtx(ceph_release_t r) | |
194 | : BufferedRecoveryMessages(r) {} | |
195 | ||
196 | PeeringCtx(const PeeringCtx &) = delete; | |
197 | PeeringCtx &operator=(const PeeringCtx &) = delete; | |
198 | ||
199 | PeeringCtx(PeeringCtx &&) = default; | |
200 | PeeringCtx &operator=(PeeringCtx &&) = default; | |
201 | ||
202 | void reset_transaction() { | |
203 | transaction = ObjectStore::Transaction(); | |
204 | } | |
205 | }; | |
206 | ||
207 | /** | |
208 | * Wraps PeeringCtx to hide the difference between buffering messages to | |
209 | * be sent after flush or immediately. | |
210 | */ | |
211 | struct PeeringCtxWrapper { | |
212 | utime_t start_time; | |
213 | BufferedRecoveryMessages &msgs; | |
214 | ObjectStore::Transaction &transaction; | |
215 | HBHandle * const handle = nullptr; | |
216 | ||
217 | PeeringCtxWrapper(PeeringCtx &wrapped) : | |
218 | msgs(wrapped), | |
219 | transaction(wrapped.transaction), | |
220 | handle(wrapped.handle) {} | |
221 | ||
222 | PeeringCtxWrapper(BufferedRecoveryMessages &buf, PeeringCtx &wrapped) | |
223 | : msgs(buf), | |
224 | transaction(wrapped.transaction), | |
225 | handle(wrapped.handle) {} | |
226 | ||
227 | PeeringCtxWrapper(PeeringCtxWrapper &&ctx) = default; | |
228 | ||
229 | void send_osd_message(int target, MessageRef m) { | |
230 | msgs.send_osd_message(target, std::move(m)); | |
231 | } | |
232 | void send_notify(int to, const pg_notify_t &n) { | |
233 | msgs.send_notify(to, n); | |
234 | } | |
235 | void send_query(int to, spg_t spgid, const pg_query_t &q) { | |
236 | msgs.send_query(to, spgid, q); | |
237 | } | |
238 | void send_info(int to, spg_t to_spgid, | |
239 | epoch_t min_epoch, epoch_t cur_epoch, | |
240 | const pg_info_t &info, | |
241 | std::optional<pg_lease_t> lease = {}, | |
242 | std::optional<pg_lease_ack_t> lease_ack = {}) { | |
243 | msgs.send_info(to, to_spgid, min_epoch, cur_epoch, info, | |
244 | lease, lease_ack); | |
245 | } | |
246 | }; | |
247 | ||
f67539c2 | 248 | /* Encapsulates PG recovery process */ |
9f95a23c TL |
249 | class PeeringState : public MissingLoc::MappingInfo { |
250 | public: | |
251 | struct PeeringListener : public EpochSource { | |
252 | /// Prepare t with written information | |
253 | virtual void prepare_write( | |
254 | pg_info_t &info, | |
255 | pg_info_t &last_written_info, | |
256 | PastIntervals &past_intervals, | |
257 | PGLog &pglog, | |
258 | bool dirty_info, | |
259 | bool dirty_big_info, | |
260 | bool need_write_epoch, | |
261 | ObjectStore::Transaction &t) = 0; | |
262 | ||
263 | /// Notify that info/history changed (generally to update scrub registration) | |
264 | virtual void on_info_history_change() = 0; | |
265 | /// Notify that a scrub has been requested | |
f67539c2 | 266 | virtual void scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type) = 0; |
9f95a23c TL |
267 | |
268 | /// Return current snap_trimq size | |
269 | virtual uint64_t get_snap_trimq_size() const = 0; | |
270 | ||
271 | /// Send cluster message to osd | |
272 | virtual void send_cluster_message( | |
f67539c2 | 273 | int osd, MessageRef m, epoch_t epoch, bool share_map_update=false) = 0; |
9f95a23c TL |
274 | /// Send pg_created to mon |
275 | virtual void send_pg_created(pg_t pgid) = 0; | |
276 | ||
277 | virtual ceph::signedspan get_mnow() = 0; | |
278 | virtual HeartbeatStampsRef get_hb_stamps(int peer) = 0; | |
279 | virtual void schedule_renew_lease(epoch_t plr, ceph::timespan delay) = 0; | |
280 | virtual void queue_check_readable(epoch_t lpr, ceph::timespan delay) = 0; | |
281 | virtual void recheck_readable() = 0; | |
282 | ||
283 | virtual unsigned get_target_pg_log_entries() const = 0; | |
284 | ||
285 | // ============ Flush state ================== | |
286 | /** | |
287 | * try_flush_or_schedule_async() | |
288 | * | |
289 | * If true, caller may assume all past operations on this pg | |
290 | * have been flushed. Else, caller will receive an on_flushed() | |
291 | * call once the flush has completed. | |
292 | */ | |
293 | virtual bool try_flush_or_schedule_async() = 0; | |
294 | /// Arranges for a commit on t to call on_flushed() once flushed. | |
295 | virtual void start_flush_on_transaction( | |
296 | ObjectStore::Transaction &t) = 0; | |
297 | /// Notification that all outstanding flushes for interval have completed | |
298 | virtual void on_flushed() = 0; | |
299 | ||
300 | //============= Recovery ==================== | |
301 | /// Arrange for even to be queued after delay | |
302 | virtual void schedule_event_after( | |
303 | PGPeeringEventRef event, | |
304 | float delay) = 0; | |
305 | /** | |
306 | * request_local_background_io_reservation | |
307 | * | |
308 | * Request reservation at priority with on_grant queued on grant | |
309 | * and on_preempt on preempt | |
310 | */ | |
311 | virtual void request_local_background_io_reservation( | |
312 | unsigned priority, | |
f67539c2 TL |
313 | PGPeeringEventURef on_grant, |
314 | PGPeeringEventURef on_preempt) = 0; | |
9f95a23c TL |
315 | /// Modify pending local background reservation request priority |
316 | virtual void update_local_background_io_priority( | |
317 | unsigned priority) = 0; | |
318 | /// Cancel pending local background reservation request | |
319 | virtual void cancel_local_background_io_reservation() = 0; | |
320 | ||
321 | /** | |
322 | * request_remote_background_io_reservation | |
323 | * | |
324 | * Request reservation at priority with on_grant queued on grant | |
325 | * and on_preempt on preempt | |
326 | */ | |
327 | virtual void request_remote_recovery_reservation( | |
328 | unsigned priority, | |
f67539c2 TL |
329 | PGPeeringEventURef on_grant, |
330 | PGPeeringEventURef on_preempt) = 0; | |
9f95a23c TL |
331 | /// Cancel pending remote background reservation request |
332 | virtual void cancel_remote_recovery_reservation() = 0; | |
333 | ||
334 | /// Arrange for on_commit to be queued upon commit of t | |
335 | virtual void schedule_event_on_commit( | |
336 | ObjectStore::Transaction &t, | |
337 | PGPeeringEventRef on_commit) = 0; | |
338 | ||
339 | //============================ HB ============================= | |
340 | /// Update hb set to peers | |
f67539c2 | 341 | virtual void update_heartbeat_peers(std::set<int> peers) = 0; |
9f95a23c | 342 | |
f67539c2 TL |
343 | /// Std::set targets being probed in this interval |
344 | virtual void set_probe_targets(const std::set<pg_shard_t> &probe_set) = 0; | |
9f95a23c TL |
345 | /// Clear targets being probed in this interval |
346 | virtual void clear_probe_targets() = 0; | |
347 | ||
348 | /// Queue for a pg_temp of wanted | |
f67539c2 | 349 | virtual void queue_want_pg_temp(const std::vector<int> &wanted) = 0; |
9f95a23c TL |
350 | /// Clear queue for a pg_temp of wanted |
351 | virtual void clear_want_pg_temp() = 0; | |
352 | ||
353 | /// Arrange for stats to be shipped to mon to be updated for this pg | |
354 | virtual void publish_stats_to_osd() = 0; | |
355 | /// Clear stats to be shipped to mon for this pg | |
356 | virtual void clear_publish_stats() = 0; | |
357 | ||
358 | /// Notification to check outstanding operation targets | |
359 | virtual void check_recovery_sources(const OSDMapRef& newmap) = 0; | |
f67539c2 TL |
360 | /// Notification to check outstanding blocklist |
361 | virtual void check_blocklisted_watchers() = 0; | |
9f95a23c TL |
362 | /// Notification to clear state associated with primary |
363 | virtual void clear_primary_state() = 0; | |
364 | ||
365 | // =================== Event notification ==================== | |
366 | virtual void on_pool_change() = 0; | |
367 | virtual void on_role_change() = 0; | |
368 | virtual void on_change(ObjectStore::Transaction &t) = 0; | |
369 | virtual void on_activate(interval_set<snapid_t> to_trim) = 0; | |
370 | virtual void on_activate_complete() = 0; | |
371 | virtual void on_new_interval() = 0; | |
372 | virtual Context *on_clean() = 0; | |
373 | virtual void on_activate_committed() = 0; | |
374 | virtual void on_active_exit() = 0; | |
375 | ||
376 | // ====================== PG deletion ======================= | |
377 | /// Notification of removal complete, t must be populated to complete removal | |
378 | virtual void on_removal(ObjectStore::Transaction &t) = 0; | |
379 | /// Perform incremental removal work | |
f67539c2 TL |
380 | virtual std::pair<ghobject_t, bool> do_delete_work( |
381 | ObjectStore::Transaction &t, ghobject_t _next) = 0; | |
9f95a23c TL |
382 | |
383 | // ======================= PG Merge ========================= | |
384 | virtual void clear_ready_to_merge() = 0; | |
385 | virtual void set_not_ready_to_merge_target(pg_t pgid, pg_t src) = 0; | |
386 | virtual void set_not_ready_to_merge_source(pg_t pgid) = 0; | |
387 | virtual void set_ready_to_merge_target(eversion_t lu, epoch_t les, epoch_t lec) = 0; | |
388 | virtual void set_ready_to_merge_source(eversion_t lu) = 0; | |
389 | ||
f67539c2 | 390 | // ==================== Std::map notifications =================== |
9f95a23c TL |
391 | virtual void on_active_actmap() = 0; |
392 | virtual void on_active_advmap(const OSDMapRef &osdmap) = 0; | |
393 | virtual epoch_t oldest_stored_osdmap() = 0; | |
394 | ||
395 | // ============ recovery reservation notifications ========== | |
396 | virtual void on_backfill_reserved() = 0; | |
397 | virtual void on_backfill_canceled() = 0; | |
398 | virtual void on_recovery_reserved() = 0; | |
399 | ||
400 | // ================recovery space accounting ================ | |
401 | virtual bool try_reserve_recovery_space( | |
402 | int64_t primary_num_bytes, int64_t local_num_bytes) = 0; | |
403 | virtual void unreserve_recovery_space() = 0; | |
404 | ||
405 | // ================== Peering log events ==================== | |
406 | /// Get handler for rolling forward/back log entries | |
407 | virtual PGLog::LogEntryHandlerRef get_log_handler( | |
408 | ObjectStore::Transaction &t) = 0; | |
409 | ||
410 | // ============ On disk representation changes ============== | |
411 | virtual void rebuild_missing_set_with_deletes(PGLog &pglog) = 0; | |
412 | ||
413 | // ======================= Logging ========================== | |
414 | virtual PerfCounters &get_peering_perf() = 0; | |
415 | virtual PerfCounters &get_perf_logger() = 0; | |
416 | virtual void log_state_enter(const char *state) = 0; | |
417 | virtual void log_state_exit( | |
418 | const char *state_name, utime_t enter_time, | |
419 | uint64_t events, utime_t event_dur) = 0; | |
f67539c2 | 420 | virtual void dump_recovery_info(ceph::Formatter *f) const = 0; |
9f95a23c TL |
421 | |
422 | virtual OstreamTemp get_clog_info() = 0; | |
423 | virtual OstreamTemp get_clog_error() = 0; | |
424 | virtual OstreamTemp get_clog_debug() = 0; | |
425 | ||
426 | virtual ~PeeringListener() {} | |
427 | }; | |
428 | ||
429 | struct QueryState : boost::statechart::event< QueryState > { | |
f67539c2 TL |
430 | ceph::Formatter *f; |
431 | explicit QueryState(ceph::Formatter *f) : f(f) {} | |
9f95a23c TL |
432 | void print(std::ostream *out) const { |
433 | *out << "Query"; | |
434 | } | |
435 | }; | |
436 | ||
f67539c2 TL |
437 | struct QueryUnfound : boost::statechart::event< QueryUnfound > { |
438 | ceph::Formatter *f; | |
439 | explicit QueryUnfound(ceph::Formatter *f) : f(f) {} | |
440 | void print(std::ostream *out) const { | |
441 | *out << "QueryUnfound"; | |
442 | } | |
443 | }; | |
444 | ||
9f95a23c TL |
445 | struct AdvMap : boost::statechart::event< AdvMap > { |
446 | OSDMapRef osdmap; | |
447 | OSDMapRef lastmap; | |
f67539c2 | 448 | std::vector<int> newup, newacting; |
9f95a23c TL |
449 | int up_primary, acting_primary; |
450 | AdvMap( | |
451 | OSDMapRef osdmap, OSDMapRef lastmap, | |
f67539c2 TL |
452 | std::vector<int>& newup, int up_primary, |
453 | std::vector<int>& newacting, int acting_primary): | |
9f95a23c TL |
454 | osdmap(osdmap), lastmap(lastmap), |
455 | newup(newup), | |
456 | newacting(newacting), | |
457 | up_primary(up_primary), | |
458 | acting_primary(acting_primary) {} | |
459 | void print(std::ostream *out) const { | |
460 | *out << "AdvMap"; | |
461 | } | |
462 | }; | |
463 | ||
464 | struct ActMap : boost::statechart::event< ActMap > { | |
465 | ActMap() : boost::statechart::event< ActMap >() {} | |
466 | void print(std::ostream *out) const { | |
467 | *out << "ActMap"; | |
468 | } | |
469 | }; | |
470 | struct Activate : boost::statechart::event< Activate > { | |
471 | epoch_t activation_epoch; | |
472 | explicit Activate(epoch_t q) : boost::statechart::event< Activate >(), | |
473 | activation_epoch(q) {} | |
474 | void print(std::ostream *out) const { | |
475 | *out << "Activate from " << activation_epoch; | |
476 | } | |
477 | }; | |
478 | struct ActivateCommitted : boost::statechart::event< ActivateCommitted > { | |
479 | epoch_t epoch; | |
480 | epoch_t activation_epoch; | |
481 | explicit ActivateCommitted(epoch_t e, epoch_t ae) | |
482 | : boost::statechart::event< ActivateCommitted >(), | |
483 | epoch(e), | |
484 | activation_epoch(ae) {} | |
485 | void print(std::ostream *out) const { | |
486 | *out << "ActivateCommitted from " << activation_epoch | |
487 | << " processed at " << epoch; | |
488 | } | |
489 | }; | |
490 | public: | |
491 | struct UnfoundBackfill : boost::statechart::event<UnfoundBackfill> { | |
492 | explicit UnfoundBackfill() {} | |
493 | void print(std::ostream *out) const { | |
494 | *out << "UnfoundBackfill"; | |
495 | } | |
496 | }; | |
497 | struct UnfoundRecovery : boost::statechart::event<UnfoundRecovery> { | |
498 | explicit UnfoundRecovery() {} | |
499 | void print(std::ostream *out) const { | |
500 | *out << "UnfoundRecovery"; | |
501 | } | |
502 | }; | |
503 | ||
504 | struct RequestScrub : boost::statechart::event<RequestScrub> { | |
f67539c2 TL |
505 | scrub_level_t deep; |
506 | scrub_type_t repair; | |
507 | explicit RequestScrub(bool d, bool r) : deep(scrub_level_t(d)), repair(scrub_type_t(r)) {} | |
9f95a23c | 508 | void print(std::ostream *out) const { |
f67539c2 TL |
509 | *out << "RequestScrub(" << ((deep==scrub_level_t::deep) ? "deep" : "shallow") |
510 | << ((repair==scrub_type_t::do_repair) ? " repair)" : ")"); | |
9f95a23c TL |
511 | } |
512 | }; | |
513 | ||
514 | TrivialEvent(Initialize) | |
515 | TrivialEvent(GotInfo) | |
516 | TrivialEvent(NeedUpThru) | |
517 | TrivialEvent(Backfilled) | |
518 | TrivialEvent(LocalBackfillReserved) | |
519 | TrivialEvent(RejectTooFullRemoteReservation) | |
520 | TrivialEvent(RequestBackfill) | |
521 | TrivialEvent(RemoteRecoveryPreempted) | |
522 | TrivialEvent(RemoteBackfillPreempted) | |
523 | TrivialEvent(BackfillTooFull) | |
524 | TrivialEvent(RecoveryTooFull) | |
525 | ||
526 | TrivialEvent(MakePrimary) | |
527 | TrivialEvent(MakeStray) | |
528 | TrivialEvent(NeedActingChange) | |
529 | TrivialEvent(IsIncomplete) | |
530 | TrivialEvent(IsDown) | |
531 | ||
532 | TrivialEvent(AllReplicasRecovered) | |
533 | TrivialEvent(DoRecovery) | |
534 | TrivialEvent(LocalRecoveryReserved) | |
535 | TrivialEvent(AllRemotesReserved) | |
536 | TrivialEvent(AllBackfillsReserved) | |
537 | TrivialEvent(GoClean) | |
538 | ||
539 | TrivialEvent(AllReplicasActivated) | |
540 | ||
541 | TrivialEvent(IntervalFlush) | |
542 | ||
543 | TrivialEvent(DeleteStart) | |
544 | TrivialEvent(DeleteSome) | |
545 | ||
546 | TrivialEvent(SetForceRecovery) | |
547 | TrivialEvent(UnsetForceRecovery) | |
548 | TrivialEvent(SetForceBackfill) | |
549 | TrivialEvent(UnsetForceBackfill) | |
550 | ||
551 | TrivialEvent(DeleteReserved) | |
552 | TrivialEvent(DeleteInterrupted) | |
553 | ||
554 | TrivialEvent(CheckReadable) | |
555 | ||
556 | void start_handle(PeeringCtx *new_ctx); | |
557 | void end_handle(); | |
558 | void begin_block_outgoing(); | |
559 | void end_block_outgoing(); | |
560 | void clear_blocked_outgoing(); | |
561 | private: | |
562 | ||
563 | /* States */ | |
564 | struct Initial; | |
565 | class PeeringMachine : public boost::statechart::state_machine< PeeringMachine, Initial > { | |
566 | public: | |
567 | PeeringState *state; | |
568 | PGStateHistory *state_history; | |
569 | CephContext *cct; | |
570 | spg_t spgid; | |
571 | DoutPrefixProvider *dpp; | |
572 | PeeringListener *pl; | |
573 | ||
574 | utime_t event_time; | |
575 | uint64_t event_count; | |
576 | ||
577 | void clear_event_counters() { | |
578 | event_time = utime_t(); | |
579 | event_count = 0; | |
580 | } | |
581 | ||
582 | void log_enter(const char *state_name); | |
583 | void log_exit(const char *state_name, utime_t duration); | |
584 | ||
585 | PeeringMachine( | |
586 | PeeringState *state, CephContext *cct, | |
587 | spg_t spgid, | |
588 | DoutPrefixProvider *dpp, | |
589 | PeeringListener *pl, | |
590 | PGStateHistory *state_history) : | |
591 | state(state), | |
592 | state_history(state_history), | |
593 | cct(cct), spgid(spgid), | |
594 | dpp(dpp), pl(pl), | |
595 | event_count(0) {} | |
596 | ||
597 | /* Accessor functions for state methods */ | |
598 | ObjectStore::Transaction& get_cur_transaction() { | |
599 | ceph_assert(state->rctx); | |
600 | return state->rctx->transaction; | |
601 | } | |
602 | ||
603 | PeeringCtxWrapper &get_recovery_ctx() { | |
604 | assert(state->rctx); | |
605 | return *(state->rctx); | |
606 | } | |
607 | ||
608 | void send_notify(int to, const pg_notify_t &n) { | |
609 | ceph_assert(state->rctx); | |
610 | state->rctx->send_notify(to, n); | |
611 | } | |
612 | void send_query(int to, const pg_query_t &query) { | |
613 | state->rctx->send_query( | |
614 | to, | |
615 | spg_t(spgid.pgid, query.to), | |
616 | query); | |
617 | } | |
618 | }; | |
619 | friend class PeeringMachine; | |
620 | ||
621 | /* States */ | |
622 | // Initial | |
623 | // Reset | |
624 | // Start | |
625 | // Started | |
626 | // Primary | |
627 | // WaitActingChange | |
628 | // Peering | |
629 | // GetInfo | |
630 | // GetLog | |
631 | // GetMissing | |
632 | // WaitUpThru | |
633 | // Incomplete | |
634 | // Active | |
635 | // Activating | |
636 | // Clean | |
637 | // Recovered | |
638 | // Backfilling | |
639 | // WaitRemoteBackfillReserved | |
640 | // WaitLocalBackfillReserved | |
641 | // NotBackfilling | |
642 | // NotRecovering | |
643 | // Recovering | |
644 | // WaitRemoteRecoveryReserved | |
645 | // WaitLocalRecoveryReserved | |
646 | // ReplicaActive | |
647 | // RepNotRecovering | |
648 | // RepRecovering | |
649 | // RepWaitBackfillReserved | |
650 | // RepWaitRecoveryReserved | |
651 | // Stray | |
652 | // ToDelete | |
653 | // WaitDeleteReserved | |
654 | // Deleting | |
655 | // Crashed | |
656 | ||
657 | struct Crashed : boost::statechart::state< Crashed, PeeringMachine >, NamedState { | |
658 | explicit Crashed(my_context ctx); | |
659 | }; | |
660 | ||
661 | struct Reset; | |
662 | ||
663 | struct Initial : boost::statechart::state< Initial, PeeringMachine >, NamedState { | |
664 | explicit Initial(my_context ctx); | |
665 | void exit(); | |
666 | ||
667 | typedef boost::mpl::list < | |
668 | boost::statechart::transition< Initialize, Reset >, | |
669 | boost::statechart::custom_reaction< NullEvt >, | |
670 | boost::statechart::transition< boost::statechart::event_base, Crashed > | |
671 | > reactions; | |
672 | ||
673 | boost::statechart::result react(const MNotifyRec&); | |
674 | boost::statechart::result react(const MInfoRec&); | |
675 | boost::statechart::result react(const MLogRec&); | |
676 | boost::statechart::result react(const boost::statechart::event_base&) { | |
677 | return discard_event(); | |
678 | } | |
679 | }; | |
680 | ||
681 | struct Reset : boost::statechart::state< Reset, PeeringMachine >, NamedState { | |
682 | explicit Reset(my_context ctx); | |
683 | void exit(); | |
684 | ||
685 | typedef boost::mpl::list < | |
686 | boost::statechart::custom_reaction< QueryState >, | |
f67539c2 | 687 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
688 | boost::statechart::custom_reaction< AdvMap >, |
689 | boost::statechart::custom_reaction< ActMap >, | |
690 | boost::statechart::custom_reaction< NullEvt >, | |
691 | boost::statechart::custom_reaction< IntervalFlush >, | |
692 | boost::statechart::transition< boost::statechart::event_base, Crashed > | |
693 | > reactions; | |
694 | boost::statechart::result react(const QueryState& q); | |
f67539c2 | 695 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
696 | boost::statechart::result react(const AdvMap&); |
697 | boost::statechart::result react(const ActMap&); | |
698 | boost::statechart::result react(const IntervalFlush&); | |
699 | boost::statechart::result react(const boost::statechart::event_base&) { | |
700 | return discard_event(); | |
701 | } | |
702 | }; | |
703 | ||
704 | struct Start; | |
705 | ||
706 | struct Started : boost::statechart::state< Started, PeeringMachine, Start >, NamedState { | |
707 | explicit Started(my_context ctx); | |
708 | void exit(); | |
709 | ||
710 | typedef boost::mpl::list < | |
711 | boost::statechart::custom_reaction< QueryState >, | |
f67539c2 | 712 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
713 | boost::statechart::custom_reaction< AdvMap >, |
714 | boost::statechart::custom_reaction< IntervalFlush >, | |
715 | // ignored | |
716 | boost::statechart::custom_reaction< NullEvt >, | |
717 | boost::statechart::custom_reaction<SetForceRecovery>, | |
718 | boost::statechart::custom_reaction<UnsetForceRecovery>, | |
719 | boost::statechart::custom_reaction<SetForceBackfill>, | |
720 | boost::statechart::custom_reaction<UnsetForceBackfill>, | |
721 | boost::statechart::custom_reaction<RequestScrub>, | |
722 | boost::statechart::custom_reaction<CheckReadable>, | |
723 | // crash | |
724 | boost::statechart::transition< boost::statechart::event_base, Crashed > | |
725 | > reactions; | |
726 | boost::statechart::result react(const QueryState& q); | |
f67539c2 | 727 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
728 | boost::statechart::result react(const AdvMap&); |
729 | boost::statechart::result react(const IntervalFlush&); | |
730 | boost::statechart::result react(const boost::statechart::event_base&) { | |
731 | return discard_event(); | |
732 | } | |
733 | }; | |
734 | ||
735 | struct Primary; | |
736 | struct Stray; | |
737 | ||
738 | struct Start : boost::statechart::state< Start, Started >, NamedState { | |
739 | explicit Start(my_context ctx); | |
740 | void exit(); | |
741 | ||
742 | typedef boost::mpl::list < | |
743 | boost::statechart::transition< MakePrimary, Primary >, | |
744 | boost::statechart::transition< MakeStray, Stray > | |
745 | > reactions; | |
746 | }; | |
747 | ||
748 | struct Peering; | |
749 | struct WaitActingChange; | |
750 | struct Incomplete; | |
751 | struct Down; | |
752 | ||
753 | struct Primary : boost::statechart::state< Primary, Started, Peering >, NamedState { | |
754 | explicit Primary(my_context ctx); | |
755 | void exit(); | |
756 | ||
757 | typedef boost::mpl::list < | |
758 | boost::statechart::custom_reaction< ActMap >, | |
759 | boost::statechart::custom_reaction< MNotifyRec >, | |
760 | boost::statechart::custom_reaction<SetForceRecovery>, | |
761 | boost::statechart::custom_reaction<UnsetForceRecovery>, | |
762 | boost::statechart::custom_reaction<SetForceBackfill>, | |
763 | boost::statechart::custom_reaction<UnsetForceBackfill>, | |
764 | boost::statechart::custom_reaction<RequestScrub> | |
765 | > reactions; | |
766 | boost::statechart::result react(const ActMap&); | |
767 | boost::statechart::result react(const MNotifyRec&); | |
768 | boost::statechart::result react(const SetForceRecovery&); | |
769 | boost::statechart::result react(const UnsetForceRecovery&); | |
770 | boost::statechart::result react(const SetForceBackfill&); | |
771 | boost::statechart::result react(const UnsetForceBackfill&); | |
772 | boost::statechart::result react(const RequestScrub&); | |
773 | }; | |
774 | ||
775 | struct WaitActingChange : boost::statechart::state< WaitActingChange, Primary>, | |
776 | NamedState { | |
777 | typedef boost::mpl::list < | |
778 | boost::statechart::custom_reaction< QueryState >, | |
f67539c2 | 779 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
780 | boost::statechart::custom_reaction< AdvMap >, |
781 | boost::statechart::custom_reaction< MLogRec >, | |
782 | boost::statechart::custom_reaction< MInfoRec >, | |
783 | boost::statechart::custom_reaction< MNotifyRec > | |
784 | > reactions; | |
785 | explicit WaitActingChange(my_context ctx); | |
786 | boost::statechart::result react(const QueryState& q); | |
f67539c2 | 787 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
788 | boost::statechart::result react(const AdvMap&); |
789 | boost::statechart::result react(const MLogRec&); | |
790 | boost::statechart::result react(const MInfoRec&); | |
791 | boost::statechart::result react(const MNotifyRec&); | |
792 | void exit(); | |
793 | }; | |
794 | ||
795 | struct GetInfo; | |
796 | struct Active; | |
797 | ||
798 | struct Peering : boost::statechart::state< Peering, Primary, GetInfo >, NamedState { | |
799 | PastIntervals::PriorSet prior_set; | |
800 | bool history_les_bound; //< need osd_find_best_info_ignore_history_les | |
801 | ||
802 | explicit Peering(my_context ctx); | |
803 | void exit(); | |
804 | ||
805 | typedef boost::mpl::list < | |
806 | boost::statechart::custom_reaction< QueryState >, | |
f67539c2 | 807 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
808 | boost::statechart::transition< Activate, Active >, |
809 | boost::statechart::custom_reaction< AdvMap > | |
810 | > reactions; | |
811 | boost::statechart::result react(const QueryState& q); | |
f67539c2 | 812 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
813 | boost::statechart::result react(const AdvMap &advmap); |
814 | }; | |
815 | ||
816 | struct WaitLocalRecoveryReserved; | |
817 | struct Activating; | |
818 | struct Active : boost::statechart::state< Active, Primary, Activating >, NamedState { | |
819 | explicit Active(my_context ctx); | |
820 | void exit(); | |
821 | ||
f67539c2 TL |
822 | const std::set<pg_shard_t> remote_shards_to_reserve_recovery; |
823 | const std::set<pg_shard_t> remote_shards_to_reserve_backfill; | |
9f95a23c TL |
824 | bool all_replicas_activated; |
825 | ||
826 | typedef boost::mpl::list < | |
827 | boost::statechart::custom_reaction< QueryState >, | |
f67539c2 | 828 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
829 | boost::statechart::custom_reaction< ActMap >, |
830 | boost::statechart::custom_reaction< AdvMap >, | |
831 | boost::statechart::custom_reaction< MInfoRec >, | |
832 | boost::statechart::custom_reaction< MNotifyRec >, | |
833 | boost::statechart::custom_reaction< MLogRec >, | |
834 | boost::statechart::custom_reaction< MTrim >, | |
835 | boost::statechart::custom_reaction< Backfilled >, | |
836 | boost::statechart::custom_reaction< ActivateCommitted >, | |
837 | boost::statechart::custom_reaction< AllReplicasActivated >, | |
838 | boost::statechart::custom_reaction< DeferRecovery >, | |
839 | boost::statechart::custom_reaction< DeferBackfill >, | |
840 | boost::statechart::custom_reaction< UnfoundRecovery >, | |
841 | boost::statechart::custom_reaction< UnfoundBackfill >, | |
842 | boost::statechart::custom_reaction< RemoteReservationRevokedTooFull>, | |
843 | boost::statechart::custom_reaction< RemoteReservationRevoked>, | |
844 | boost::statechart::custom_reaction< DoRecovery>, | |
845 | boost::statechart::custom_reaction< RenewLease>, | |
846 | boost::statechart::custom_reaction< MLeaseAck>, | |
847 | boost::statechart::custom_reaction< CheckReadable> | |
848 | > reactions; | |
849 | boost::statechart::result react(const QueryState& q); | |
f67539c2 | 850 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
851 | boost::statechart::result react(const ActMap&); |
852 | boost::statechart::result react(const AdvMap&); | |
853 | boost::statechart::result react(const MInfoRec& infoevt); | |
854 | boost::statechart::result react(const MNotifyRec& notevt); | |
855 | boost::statechart::result react(const MLogRec& logevt); | |
856 | boost::statechart::result react(const MTrim& trimevt); | |
857 | boost::statechart::result react(const Backfilled&) { | |
858 | return discard_event(); | |
859 | } | |
860 | boost::statechart::result react(const ActivateCommitted&); | |
861 | boost::statechart::result react(const AllReplicasActivated&); | |
862 | boost::statechart::result react(const RenewLease&); | |
863 | boost::statechart::result react(const MLeaseAck&); | |
864 | boost::statechart::result react(const DeferRecovery& evt) { | |
865 | return discard_event(); | |
866 | } | |
867 | boost::statechart::result react(const DeferBackfill& evt) { | |
868 | return discard_event(); | |
869 | } | |
870 | boost::statechart::result react(const UnfoundRecovery& evt) { | |
871 | return discard_event(); | |
872 | } | |
873 | boost::statechart::result react(const UnfoundBackfill& evt) { | |
874 | return discard_event(); | |
875 | } | |
876 | boost::statechart::result react(const RemoteReservationRevokedTooFull&) { | |
877 | return discard_event(); | |
878 | } | |
879 | boost::statechart::result react(const RemoteReservationRevoked&) { | |
880 | return discard_event(); | |
881 | } | |
882 | boost::statechart::result react(const DoRecovery&) { | |
883 | return discard_event(); | |
884 | } | |
885 | boost::statechart::result react(const CheckReadable&); | |
886 | void all_activated_and_committed(); | |
887 | }; | |
888 | ||
889 | struct Clean : boost::statechart::state< Clean, Active >, NamedState { | |
890 | typedef boost::mpl::list< | |
891 | boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >, | |
892 | boost::statechart::custom_reaction<SetForceRecovery>, | |
893 | boost::statechart::custom_reaction<SetForceBackfill> | |
894 | > reactions; | |
895 | explicit Clean(my_context ctx); | |
896 | void exit(); | |
897 | boost::statechart::result react(const boost::statechart::event_base&) { | |
898 | return discard_event(); | |
899 | } | |
900 | }; | |
901 | ||
902 | struct Recovered : boost::statechart::state< Recovered, Active >, NamedState { | |
903 | typedef boost::mpl::list< | |
904 | boost::statechart::transition< GoClean, Clean >, | |
905 | boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >, | |
906 | boost::statechart::custom_reaction< AllReplicasActivated > | |
907 | > reactions; | |
908 | explicit Recovered(my_context ctx); | |
909 | void exit(); | |
910 | boost::statechart::result react(const AllReplicasActivated&) { | |
911 | post_event(GoClean()); | |
912 | return forward_event(); | |
913 | } | |
914 | }; | |
915 | ||
916 | struct Backfilling : boost::statechart::state< Backfilling, Active >, NamedState { | |
917 | typedef boost::mpl::list< | |
918 | boost::statechart::custom_reaction< Backfilled >, | |
919 | boost::statechart::custom_reaction< DeferBackfill >, | |
920 | boost::statechart::custom_reaction< UnfoundBackfill >, | |
921 | boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >, | |
922 | boost::statechart::custom_reaction< RemoteReservationRevokedTooFull>, | |
923 | boost::statechart::custom_reaction< RemoteReservationRevoked> | |
924 | > reactions; | |
925 | explicit Backfilling(my_context ctx); | |
926 | boost::statechart::result react(const RemoteReservationRejectedTooFull& evt) { | |
927 | // for compat with old peers | |
928 | post_event(RemoteReservationRevokedTooFull()); | |
929 | return discard_event(); | |
930 | } | |
931 | void backfill_release_reservations(); | |
932 | boost::statechart::result react(const Backfilled& evt); | |
933 | boost::statechart::result react(const RemoteReservationRevokedTooFull& evt); | |
934 | boost::statechart::result react(const RemoteReservationRevoked& evt); | |
935 | boost::statechart::result react(const DeferBackfill& evt); | |
936 | boost::statechart::result react(const UnfoundBackfill& evt); | |
937 | void cancel_backfill(); | |
938 | void exit(); | |
939 | }; | |
940 | ||
941 | struct WaitRemoteBackfillReserved : boost::statechart::state< WaitRemoteBackfillReserved, Active >, NamedState { | |
942 | typedef boost::mpl::list< | |
943 | boost::statechart::custom_reaction< RemoteBackfillReserved >, | |
944 | boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >, | |
945 | boost::statechart::custom_reaction< RemoteReservationRevoked >, | |
946 | boost::statechart::transition< AllBackfillsReserved, Backfilling > | |
947 | > reactions; | |
f67539c2 | 948 | std::set<pg_shard_t>::const_iterator backfill_osd_it; |
9f95a23c TL |
949 | explicit WaitRemoteBackfillReserved(my_context ctx); |
950 | void retry(); | |
951 | void exit(); | |
952 | boost::statechart::result react(const RemoteBackfillReserved& evt); | |
953 | boost::statechart::result react(const RemoteReservationRejectedTooFull& evt); | |
954 | boost::statechart::result react(const RemoteReservationRevoked& evt); | |
955 | }; | |
956 | ||
957 | struct WaitLocalBackfillReserved : boost::statechart::state< WaitLocalBackfillReserved, Active >, NamedState { | |
958 | typedef boost::mpl::list< | |
959 | boost::statechart::transition< LocalBackfillReserved, WaitRemoteBackfillReserved >, | |
960 | boost::statechart::custom_reaction< RemoteBackfillReserved > | |
961 | > reactions; | |
962 | explicit WaitLocalBackfillReserved(my_context ctx); | |
963 | boost::statechart::result react(const RemoteBackfillReserved& evt) { | |
964 | /* no-op */ | |
965 | return discard_event(); | |
966 | } | |
967 | void exit(); | |
968 | }; | |
969 | ||
970 | struct NotBackfilling : boost::statechart::state< NotBackfilling, Active>, NamedState { | |
971 | typedef boost::mpl::list< | |
f67539c2 | 972 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
973 | boost::statechart::transition< RequestBackfill, WaitLocalBackfillReserved>, |
974 | boost::statechart::custom_reaction< RemoteBackfillReserved >, | |
975 | boost::statechart::custom_reaction< RemoteReservationRejectedTooFull > | |
976 | > reactions; | |
977 | explicit NotBackfilling(my_context ctx); | |
978 | void exit(); | |
f67539c2 | 979 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
980 | boost::statechart::result react(const RemoteBackfillReserved& evt); |
981 | boost::statechart::result react(const RemoteReservationRejectedTooFull& evt); | |
982 | }; | |
983 | ||
984 | struct NotRecovering : boost::statechart::state< NotRecovering, Active>, NamedState { | |
985 | typedef boost::mpl::list< | |
f67539c2 | 986 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
987 | boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >, |
988 | boost::statechart::custom_reaction< DeferRecovery >, | |
989 | boost::statechart::custom_reaction< UnfoundRecovery > | |
990 | > reactions; | |
991 | explicit NotRecovering(my_context ctx); | |
f67539c2 | 992 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
993 | boost::statechart::result react(const DeferRecovery& evt) { |
994 | /* no-op */ | |
995 | return discard_event(); | |
996 | } | |
997 | boost::statechart::result react(const UnfoundRecovery& evt) { | |
998 | /* no-op */ | |
999 | return discard_event(); | |
1000 | } | |
1001 | void exit(); | |
1002 | }; | |
1003 | ||
1004 | struct ToDelete; | |
1005 | struct RepNotRecovering; | |
1006 | struct ReplicaActive : boost::statechart::state< ReplicaActive, Started, RepNotRecovering >, NamedState { | |
1007 | explicit ReplicaActive(my_context ctx); | |
1008 | void exit(); | |
1009 | ||
1010 | typedef boost::mpl::list < | |
1011 | boost::statechart::custom_reaction< QueryState >, | |
f67539c2 | 1012 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
1013 | boost::statechart::custom_reaction< ActMap >, |
1014 | boost::statechart::custom_reaction< MQuery >, | |
1015 | boost::statechart::custom_reaction< MInfoRec >, | |
1016 | boost::statechart::custom_reaction< MLogRec >, | |
1017 | boost::statechart::custom_reaction< MTrim >, | |
1018 | boost::statechart::custom_reaction< Activate >, | |
1019 | boost::statechart::custom_reaction< ActivateCommitted >, | |
1020 | boost::statechart::custom_reaction< DeferRecovery >, | |
1021 | boost::statechart::custom_reaction< DeferBackfill >, | |
1022 | boost::statechart::custom_reaction< UnfoundRecovery >, | |
1023 | boost::statechart::custom_reaction< UnfoundBackfill >, | |
1024 | boost::statechart::custom_reaction< RemoteBackfillPreempted >, | |
1025 | boost::statechart::custom_reaction< RemoteRecoveryPreempted >, | |
1026 | boost::statechart::custom_reaction< RecoveryDone >, | |
1027 | boost::statechart::transition<DeleteStart, ToDelete>, | |
1028 | boost::statechart::custom_reaction< MLease > | |
1029 | > reactions; | |
1030 | boost::statechart::result react(const QueryState& q); | |
f67539c2 | 1031 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
1032 | boost::statechart::result react(const MInfoRec& infoevt); |
1033 | boost::statechart::result react(const MLogRec& logevt); | |
1034 | boost::statechart::result react(const MTrim& trimevt); | |
1035 | boost::statechart::result react(const ActMap&); | |
1036 | boost::statechart::result react(const MQuery&); | |
1037 | boost::statechart::result react(const Activate&); | |
1038 | boost::statechart::result react(const ActivateCommitted&); | |
1039 | boost::statechart::result react(const MLease&); | |
1040 | boost::statechart::result react(const RecoveryDone&) { | |
1041 | return discard_event(); | |
1042 | } | |
1043 | boost::statechart::result react(const DeferRecovery& evt) { | |
1044 | return discard_event(); | |
1045 | } | |
1046 | boost::statechart::result react(const DeferBackfill& evt) { | |
1047 | return discard_event(); | |
1048 | } | |
1049 | boost::statechart::result react(const UnfoundRecovery& evt) { | |
1050 | return discard_event(); | |
1051 | } | |
1052 | boost::statechart::result react(const UnfoundBackfill& evt) { | |
1053 | return discard_event(); | |
1054 | } | |
1055 | boost::statechart::result react(const RemoteBackfillPreempted& evt) { | |
1056 | return discard_event(); | |
1057 | } | |
1058 | boost::statechart::result react(const RemoteRecoveryPreempted& evt) { | |
1059 | return discard_event(); | |
1060 | } | |
1061 | }; | |
1062 | ||
1063 | struct RepRecovering : boost::statechart::state< RepRecovering, ReplicaActive >, NamedState { | |
1064 | typedef boost::mpl::list< | |
1065 | boost::statechart::transition< RecoveryDone, RepNotRecovering >, | |
1066 | // for compat with old peers | |
1067 | boost::statechart::transition< RemoteReservationRejectedTooFull, RepNotRecovering >, | |
1068 | boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >, | |
1069 | boost::statechart::custom_reaction< BackfillTooFull >, | |
1070 | boost::statechart::custom_reaction< RemoteRecoveryPreempted >, | |
1071 | boost::statechart::custom_reaction< RemoteBackfillPreempted > | |
1072 | > reactions; | |
1073 | explicit RepRecovering(my_context ctx); | |
1074 | boost::statechart::result react(const RemoteRecoveryPreempted &evt); | |
1075 | boost::statechart::result react(const BackfillTooFull &evt); | |
1076 | boost::statechart::result react(const RemoteBackfillPreempted &evt); | |
1077 | void exit(); | |
1078 | }; | |
1079 | ||
1080 | struct RepWaitBackfillReserved : boost::statechart::state< RepWaitBackfillReserved, ReplicaActive >, NamedState { | |
1081 | typedef boost::mpl::list< | |
1082 | boost::statechart::custom_reaction< RemoteBackfillReserved >, | |
1083 | boost::statechart::custom_reaction< RejectTooFullRemoteReservation >, | |
1084 | boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >, | |
1085 | boost::statechart::custom_reaction< RemoteReservationCanceled > | |
1086 | > reactions; | |
1087 | explicit RepWaitBackfillReserved(my_context ctx); | |
1088 | void exit(); | |
1089 | boost::statechart::result react(const RemoteBackfillReserved &evt); | |
1090 | boost::statechart::result react(const RejectTooFullRemoteReservation &evt); | |
1091 | boost::statechart::result react(const RemoteReservationRejectedTooFull &evt); | |
1092 | boost::statechart::result react(const RemoteReservationCanceled &evt); | |
1093 | }; | |
1094 | ||
1095 | struct RepWaitRecoveryReserved : boost::statechart::state< RepWaitRecoveryReserved, ReplicaActive >, NamedState { | |
1096 | typedef boost::mpl::list< | |
1097 | boost::statechart::custom_reaction< RemoteRecoveryReserved >, | |
1098 | // for compat with old peers | |
1099 | boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >, | |
1100 | boost::statechart::custom_reaction< RemoteReservationCanceled > | |
1101 | > reactions; | |
1102 | explicit RepWaitRecoveryReserved(my_context ctx); | |
1103 | void exit(); | |
1104 | boost::statechart::result react(const RemoteRecoveryReserved &evt); | |
1105 | boost::statechart::result react(const RemoteReservationRejectedTooFull &evt) { | |
1106 | // for compat with old peers | |
1107 | post_event(RemoteReservationCanceled()); | |
1108 | return discard_event(); | |
1109 | } | |
1110 | boost::statechart::result react(const RemoteReservationCanceled &evt); | |
1111 | }; | |
1112 | ||
1113 | struct RepNotRecovering : boost::statechart::state< RepNotRecovering, ReplicaActive>, NamedState { | |
1114 | typedef boost::mpl::list< | |
1115 | boost::statechart::custom_reaction< RequestRecoveryPrio >, | |
1116 | boost::statechart::custom_reaction< RequestBackfillPrio >, | |
1117 | boost::statechart::custom_reaction< RejectTooFullRemoteReservation >, | |
1118 | boost::statechart::transition< RemoteReservationRejectedTooFull, RepNotRecovering >, | |
1119 | boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >, | |
1120 | boost::statechart::custom_reaction< RemoteRecoveryReserved >, | |
1121 | boost::statechart::custom_reaction< RemoteBackfillReserved >, | |
1122 | boost::statechart::transition< RecoveryDone, RepNotRecovering > // for compat with pre-reservation peers | |
1123 | > reactions; | |
1124 | explicit RepNotRecovering(my_context ctx); | |
1125 | boost::statechart::result react(const RequestRecoveryPrio &evt); | |
1126 | boost::statechart::result react(const RequestBackfillPrio &evt); | |
1127 | boost::statechart::result react(const RemoteBackfillReserved &evt) { | |
1128 | // my reservation completion raced with a RELEASE from primary | |
1129 | return discard_event(); | |
1130 | } | |
1131 | boost::statechart::result react(const RemoteRecoveryReserved &evt) { | |
1132 | // my reservation completion raced with a RELEASE from primary | |
1133 | return discard_event(); | |
1134 | } | |
1135 | boost::statechart::result react(const RejectTooFullRemoteReservation &evt); | |
1136 | void exit(); | |
1137 | }; | |
1138 | ||
1139 | struct Recovering : boost::statechart::state< Recovering, Active >, NamedState { | |
1140 | typedef boost::mpl::list < | |
1141 | boost::statechart::custom_reaction< AllReplicasRecovered >, | |
1142 | boost::statechart::custom_reaction< DeferRecovery >, | |
1143 | boost::statechart::custom_reaction< UnfoundRecovery >, | |
1144 | boost::statechart::custom_reaction< RequestBackfill > | |
1145 | > reactions; | |
1146 | explicit Recovering(my_context ctx); | |
1147 | void exit(); | |
1148 | void release_reservations(bool cancel = false); | |
1149 | boost::statechart::result react(const AllReplicasRecovered &evt); | |
1150 | boost::statechart::result react(const DeferRecovery& evt); | |
1151 | boost::statechart::result react(const UnfoundRecovery& evt); | |
1152 | boost::statechart::result react(const RequestBackfill &evt); | |
1153 | }; | |
1154 | ||
1155 | struct WaitRemoteRecoveryReserved : boost::statechart::state< WaitRemoteRecoveryReserved, Active >, NamedState { | |
1156 | typedef boost::mpl::list < | |
1157 | boost::statechart::custom_reaction< RemoteRecoveryReserved >, | |
1158 | boost::statechart::transition< AllRemotesReserved, Recovering > | |
1159 | > reactions; | |
f67539c2 | 1160 | std::set<pg_shard_t>::const_iterator remote_recovery_reservation_it; |
9f95a23c TL |
1161 | explicit WaitRemoteRecoveryReserved(my_context ctx); |
1162 | boost::statechart::result react(const RemoteRecoveryReserved &evt); | |
1163 | void exit(); | |
1164 | }; | |
1165 | ||
1166 | struct WaitLocalRecoveryReserved : boost::statechart::state< WaitLocalRecoveryReserved, Active >, NamedState { | |
1167 | typedef boost::mpl::list < | |
1168 | boost::statechart::transition< LocalRecoveryReserved, WaitRemoteRecoveryReserved >, | |
1169 | boost::statechart::custom_reaction< RecoveryTooFull > | |
1170 | > reactions; | |
1171 | explicit WaitLocalRecoveryReserved(my_context ctx); | |
1172 | void exit(); | |
1173 | boost::statechart::result react(const RecoveryTooFull &evt); | |
1174 | }; | |
1175 | ||
1176 | struct Activating : boost::statechart::state< Activating, Active >, NamedState { | |
1177 | typedef boost::mpl::list < | |
1178 | boost::statechart::transition< AllReplicasRecovered, Recovered >, | |
1179 | boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >, | |
1180 | boost::statechart::transition< RequestBackfill, WaitLocalBackfillReserved > | |
1181 | > reactions; | |
1182 | explicit Activating(my_context ctx); | |
1183 | void exit(); | |
1184 | }; | |
1185 | ||
1186 | struct Stray : boost::statechart::state< Stray, Started >, | |
1187 | NamedState { | |
1188 | explicit Stray(my_context ctx); | |
1189 | void exit(); | |
1190 | ||
1191 | typedef boost::mpl::list < | |
1192 | boost::statechart::custom_reaction< MQuery >, | |
1193 | boost::statechart::custom_reaction< MLogRec >, | |
1194 | boost::statechart::custom_reaction< MInfoRec >, | |
1195 | boost::statechart::custom_reaction< ActMap >, | |
1196 | boost::statechart::custom_reaction< RecoveryDone >, | |
1197 | boost::statechart::transition<DeleteStart, ToDelete> | |
1198 | > reactions; | |
1199 | boost::statechart::result react(const MQuery& query); | |
1200 | boost::statechart::result react(const MLogRec& logevt); | |
1201 | boost::statechart::result react(const MInfoRec& infoevt); | |
1202 | boost::statechart::result react(const ActMap&); | |
1203 | boost::statechart::result react(const RecoveryDone&) { | |
1204 | return discard_event(); | |
1205 | } | |
1206 | }; | |
1207 | ||
1208 | struct WaitDeleteReserved; | |
1209 | struct ToDelete : boost::statechart::state<ToDelete, Started, WaitDeleteReserved>, NamedState { | |
1210 | unsigned priority = 0; | |
1211 | typedef boost::mpl::list < | |
1212 | boost::statechart::custom_reaction< ActMap >, | |
1213 | boost::statechart::custom_reaction< ActivateCommitted >, | |
1214 | boost::statechart::custom_reaction< DeleteSome > | |
1215 | > reactions; | |
1216 | explicit ToDelete(my_context ctx); | |
1217 | boost::statechart::result react(const ActMap &evt); | |
1218 | boost::statechart::result react(const DeleteSome &evt) { | |
1219 | // happens if we drop out of Deleting due to reprioritization etc. | |
1220 | return discard_event(); | |
1221 | } | |
1222 | boost::statechart::result react(const ActivateCommitted&) { | |
1223 | // Can happens if we were activated as a stray but not actually pulled | |
1224 | // from prior to the pg going clean and sending a delete. | |
1225 | return discard_event(); | |
1226 | } | |
1227 | void exit(); | |
1228 | }; | |
1229 | ||
1230 | struct Deleting; | |
1231 | struct WaitDeleteReserved : boost::statechart::state<WaitDeleteReserved, | |
1232 | ToDelete>, NamedState { | |
1233 | typedef boost::mpl::list < | |
1234 | boost::statechart::transition<DeleteReserved, Deleting> | |
1235 | > reactions; | |
1236 | explicit WaitDeleteReserved(my_context ctx); | |
1237 | void exit(); | |
1238 | }; | |
1239 | ||
1240 | struct Deleting : boost::statechart::state<Deleting, | |
1241 | ToDelete>, NamedState { | |
1242 | typedef boost::mpl::list < | |
1243 | boost::statechart::custom_reaction< DeleteSome >, | |
1244 | boost::statechart::transition<DeleteInterrupted, WaitDeleteReserved> | |
1245 | > reactions; | |
adb31ebb | 1246 | ghobject_t next; |
9f95a23c TL |
1247 | explicit Deleting(my_context ctx); |
1248 | boost::statechart::result react(const DeleteSome &evt); | |
1249 | void exit(); | |
1250 | }; | |
1251 | ||
1252 | struct GetLog; | |
1253 | ||
1254 | struct GetInfo : boost::statechart::state< GetInfo, Peering >, NamedState { | |
f67539c2 | 1255 | std::set<pg_shard_t> peer_info_requested; |
9f95a23c TL |
1256 | |
1257 | explicit GetInfo(my_context ctx); | |
1258 | void exit(); | |
1259 | void get_infos(); | |
1260 | ||
1261 | typedef boost::mpl::list < | |
1262 | boost::statechart::custom_reaction< QueryState >, | |
f67539c2 | 1263 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
1264 | boost::statechart::transition< GotInfo, GetLog >, |
1265 | boost::statechart::custom_reaction< MNotifyRec >, | |
1266 | boost::statechart::transition< IsDown, Down > | |
1267 | > reactions; | |
1268 | boost::statechart::result react(const QueryState& q); | |
f67539c2 | 1269 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
1270 | boost::statechart::result react(const MNotifyRec& infoevt); |
1271 | }; | |
1272 | ||
1273 | struct GotLog : boost::statechart::event< GotLog > { | |
1274 | GotLog() : boost::statechart::event< GotLog >() {} | |
1275 | }; | |
1276 | ||
1277 | struct GetLog : boost::statechart::state< GetLog, Peering >, NamedState { | |
1278 | pg_shard_t auth_log_shard; | |
1279 | boost::intrusive_ptr<MOSDPGLog> msg; | |
1280 | ||
1281 | explicit GetLog(my_context ctx); | |
1282 | void exit(); | |
1283 | ||
1284 | typedef boost::mpl::list < | |
1285 | boost::statechart::custom_reaction< QueryState >, | |
f67539c2 | 1286 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
1287 | boost::statechart::custom_reaction< MLogRec >, |
1288 | boost::statechart::custom_reaction< GotLog >, | |
1289 | boost::statechart::custom_reaction< AdvMap >, | |
1290 | boost::statechart::transition< NeedActingChange, WaitActingChange >, | |
1291 | boost::statechart::transition< IsIncomplete, Incomplete > | |
1292 | > reactions; | |
1293 | boost::statechart::result react(const AdvMap&); | |
1294 | boost::statechart::result react(const QueryState& q); | |
f67539c2 | 1295 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
1296 | boost::statechart::result react(const MLogRec& logevt); |
1297 | boost::statechart::result react(const GotLog&); | |
1298 | }; | |
1299 | ||
1300 | struct WaitUpThru; | |
1301 | ||
1302 | struct GetMissing : boost::statechart::state< GetMissing, Peering >, NamedState { | |
f67539c2 | 1303 | std::set<pg_shard_t> peer_missing_requested; |
9f95a23c TL |
1304 | |
1305 | explicit GetMissing(my_context ctx); | |
1306 | void exit(); | |
1307 | ||
1308 | typedef boost::mpl::list < | |
1309 | boost::statechart::custom_reaction< QueryState >, | |
f67539c2 | 1310 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
1311 | boost::statechart::custom_reaction< MLogRec >, |
1312 | boost::statechart::transition< NeedUpThru, WaitUpThru > | |
1313 | > reactions; | |
1314 | boost::statechart::result react(const QueryState& q); | |
f67539c2 | 1315 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
1316 | boost::statechart::result react(const MLogRec& logevt); |
1317 | }; | |
1318 | ||
1319 | struct WaitUpThru : boost::statechart::state< WaitUpThru, Peering >, NamedState { | |
1320 | explicit WaitUpThru(my_context ctx); | |
1321 | void exit(); | |
1322 | ||
1323 | typedef boost::mpl::list < | |
1324 | boost::statechart::custom_reaction< QueryState >, | |
f67539c2 | 1325 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
1326 | boost::statechart::custom_reaction< ActMap >, |
1327 | boost::statechart::custom_reaction< MLogRec > | |
1328 | > reactions; | |
1329 | boost::statechart::result react(const QueryState& q); | |
f67539c2 | 1330 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
1331 | boost::statechart::result react(const ActMap& am); |
1332 | boost::statechart::result react(const MLogRec& logrec); | |
1333 | }; | |
1334 | ||
1335 | struct Down : boost::statechart::state< Down, Peering>, NamedState { | |
1336 | explicit Down(my_context ctx); | |
1337 | typedef boost::mpl::list < | |
1338 | boost::statechart::custom_reaction< QueryState >, | |
f67539c2 | 1339 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
1340 | boost::statechart::custom_reaction< MNotifyRec > |
1341 | > reactions; | |
1342 | boost::statechart::result react(const QueryState& q); | |
f67539c2 | 1343 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
1344 | boost::statechart::result react(const MNotifyRec& infoevt); |
1345 | void exit(); | |
1346 | }; | |
1347 | ||
1348 | struct Incomplete : boost::statechart::state< Incomplete, Peering>, NamedState { | |
1349 | typedef boost::mpl::list < | |
1350 | boost::statechart::custom_reaction< AdvMap >, | |
1351 | boost::statechart::custom_reaction< MNotifyRec >, | |
f67539c2 | 1352 | boost::statechart::custom_reaction< QueryUnfound >, |
9f95a23c TL |
1353 | boost::statechart::custom_reaction< QueryState > |
1354 | > reactions; | |
1355 | explicit Incomplete(my_context ctx); | |
1356 | boost::statechart::result react(const AdvMap &advmap); | |
1357 | boost::statechart::result react(const MNotifyRec& infoevt); | |
f67539c2 | 1358 | boost::statechart::result react(const QueryUnfound& q); |
9f95a23c TL |
1359 | boost::statechart::result react(const QueryState& q); |
1360 | void exit(); | |
1361 | }; | |
1362 | ||
1363 | PGStateHistory state_history; | |
1364 | CephContext* cct; | |
1365 | spg_t spgid; | |
1366 | DoutPrefixProvider *dpp; | |
1367 | PeeringListener *pl; | |
1368 | ||
1369 | /// context passed in by state machine caller | |
1370 | PeeringCtx *orig_ctx; | |
1371 | ||
1372 | /// populated if we are buffering messages pending a flush | |
1373 | std::optional<BufferedRecoveryMessages> messages_pending_flush; | |
1374 | ||
1375 | /** | |
1376 | * populated between start_handle() and end_handle(), points into | |
1377 | * the message lists for messages_pending_flush while blocking messages | |
1378 | * or into orig_ctx otherwise | |
1379 | */ | |
1380 | std::optional<PeeringCtxWrapper> rctx; | |
1381 | ||
1382 | /** | |
1383 | * OSDMap state | |
1384 | */ | |
1385 | OSDMapRef osdmap_ref; ///< Reference to current OSDMap | |
1386 | PGPool pool; ///< Current pool state | |
1387 | epoch_t last_persisted_osdmap = 0; ///< Last osdmap epoch persisted | |
1388 | ||
1389 | ||
1390 | /** | |
1391 | * Peering state information | |
1392 | */ | |
1393 | int role = -1; ///< 0 = primary, 1 = replica, -1=none. | |
1394 | uint64_t state = 0; ///< PG_STATE_* | |
1395 | ||
1396 | pg_shard_t primary; ///< id/shard of primary | |
1397 | pg_shard_t pg_whoami; ///< my id/shard | |
1398 | pg_shard_t up_primary; ///< id/shard of primary of up set | |
f67539c2 TL |
1399 | std::vector<int> up; ///< crush mapping without temp pgs |
1400 | std::set<pg_shard_t> upset; ///< up in set form | |
1401 | std::vector<int> acting; ///< actual acting set for the current interval | |
1402 | std::set<pg_shard_t> actingset; ///< acting in set form | |
9f95a23c TL |
1403 | |
1404 | /// union of acting, recovery, and backfill targets | |
f67539c2 | 1405 | std::set<pg_shard_t> acting_recovery_backfill; |
9f95a23c | 1406 | |
f67539c2 | 1407 | std::vector<HeartbeatStampsRef> hb_stamps; |
9f95a23c TL |
1408 | |
1409 | ceph::signedspan readable_interval = ceph::signedspan::zero(); | |
1410 | ||
1411 | /// how long we can service reads in this interval | |
1412 | ceph::signedspan readable_until = ceph::signedspan::zero(); | |
1413 | ||
1414 | /// upper bound on any acting OSDs' readable_until in this interval | |
1415 | ceph::signedspan readable_until_ub = ceph::signedspan::zero(); | |
1416 | ||
1417 | /// upper bound from prior interval(s) | |
1418 | ceph::signedspan prior_readable_until_ub = ceph::signedspan::zero(); | |
1419 | ||
1420 | /// pg instances from prior interval(s) that may still be readable | |
f67539c2 | 1421 | std::set<int> prior_readable_down_osds; |
9f95a23c TL |
1422 | |
1423 | /// [replica] upper bound we got from the primary (primary's clock) | |
1424 | ceph::signedspan readable_until_ub_from_primary = ceph::signedspan::zero(); | |
1425 | ||
1426 | /// [primary] last upper bound shared by primary to replicas | |
1427 | ceph::signedspan readable_until_ub_sent = ceph::signedspan::zero(); | |
1428 | ||
1429 | /// [primary] readable ub acked by acting set members | |
f67539c2 | 1430 | std::vector<ceph::signedspan> acting_readable_until_ub; |
9f95a23c TL |
1431 | |
1432 | bool send_notify = false; ///< True if a notify needs to be sent to the primary | |
1433 | ||
1434 | bool dirty_info = false; ///< small info structu on disk out of date | |
1435 | bool dirty_big_info = false; ///< big info structure on disk out of date | |
1436 | ||
1437 | pg_info_t info; ///< current pg info | |
1438 | pg_info_t last_written_info; ///< last written info | |
1439 | PastIntervals past_intervals; ///< information about prior pg mappings | |
1440 | PGLog pg_log; ///< pg log | |
1441 | ||
1442 | epoch_t last_peering_reset = 0; ///< epoch of last peering reset | |
1443 | ||
1444 | /// last_update that has committed; ONLY DEFINED WHEN is_active() | |
1445 | eversion_t last_update_ondisk; | |
1446 | eversion_t last_complete_ondisk; ///< last_complete that has committed. | |
1447 | eversion_t last_update_applied; ///< last_update readable | |
1448 | /// last version to which rollback_info trimming has been applied | |
1449 | eversion_t last_rollback_info_trimmed_to_applied; | |
1450 | ||
1451 | /// Counter to determine when pending flushes have completed | |
1452 | unsigned flushes_in_progress = 0; | |
1453 | ||
1454 | /** | |
1455 | * Primary state | |
1456 | */ | |
f67539c2 TL |
1457 | std::set<pg_shard_t> stray_set; ///< non-acting osds that have PG data. |
1458 | std::map<pg_shard_t, pg_info_t> peer_info; ///< info from peers (stray or prior) | |
1459 | std::map<pg_shard_t, int64_t> peer_bytes; ///< Peer's num_bytes from peer_info | |
1460 | std::set<pg_shard_t> peer_purged; ///< peers purged | |
1461 | std::map<pg_shard_t, pg_missing_t> peer_missing; ///< peer missing sets | |
1462 | std::set<pg_shard_t> peer_log_requested; ///< logs i've requested (and start stamps) | |
1463 | std::set<pg_shard_t> peer_missing_requested; ///< missing sets requested | |
9f95a23c TL |
1464 | |
1465 | /// features supported by all peers | |
1466 | uint64_t peer_features = CEPH_FEATURES_SUPPORTED_DEFAULT; | |
1467 | /// features supported by acting set | |
1468 | uint64_t acting_features = CEPH_FEATURES_SUPPORTED_DEFAULT; | |
1469 | /// features supported by up and acting | |
1470 | uint64_t upacting_features = CEPH_FEATURES_SUPPORTED_DEFAULT; | |
1471 | ||
1472 | /// most recently consumed osdmap's require_osd_version | |
1473 | ceph_release_t last_require_osd_release = ceph_release_t::unknown; | |
1474 | ||
f67539c2 | 1475 | std::vector<int> want_acting; ///< non-empty while peering needs a new acting set |
9f95a23c TL |
1476 | |
1477 | // acting_recovery_backfill contains shards that are acting, | |
1478 | // async recovery targets, or backfill targets. | |
f67539c2 | 1479 | std::map<pg_shard_t,eversion_t> peer_last_complete_ondisk; |
9f95a23c TL |
1480 | |
1481 | /// up: min over last_complete_ondisk, peer_last_complete_ondisk | |
1482 | eversion_t min_last_complete_ondisk; | |
1483 | /// point to which the log should be trimmed | |
1484 | eversion_t pg_trim_to; | |
1485 | ||
f67539c2 | 1486 | std::set<int> blocked_by; ///< osds we are blocked by (for pg stats) |
9f95a23c TL |
1487 | |
1488 | bool need_up_thru = false; ///< true if osdmap with updated up_thru needed | |
1489 | ||
1490 | /// I deleted these strays; ignore racing PGInfo from them | |
f67539c2 | 1491 | std::set<pg_shard_t> peer_activated; |
9f95a23c | 1492 | |
f67539c2 TL |
1493 | std::set<pg_shard_t> backfill_targets; ///< osds to be backfilled |
1494 | std::set<pg_shard_t> async_recovery_targets; ///< osds to be async recovered | |
9f95a23c TL |
1495 | |
1496 | /// osds which might have objects on them which are unfound on the primary | |
f67539c2 | 1497 | std::set<pg_shard_t> might_have_unfound; |
9f95a23c TL |
1498 | |
1499 | bool deleting = false; /// true while in removing or OSD is shutting down | |
1500 | std::atomic<bool> deleted = {false}; /// true once deletion complete | |
1501 | ||
1502 | MissingLoc missing_loc; ///< information about missing objects | |
1503 | ||
1504 | bool backfill_reserved = false; | |
1505 | bool backfill_reserving = false; | |
1506 | ||
1507 | PeeringMachine machine; | |
1508 | ||
1509 | void update_osdmap_ref(OSDMapRef newmap) { | |
1510 | osdmap_ref = std::move(newmap); | |
1511 | } | |
1512 | ||
1513 | void update_heartbeat_peers(); | |
f67539c2 | 1514 | void query_unfound(Formatter *f, string state); |
9f95a23c TL |
1515 | bool proc_replica_info( |
1516 | pg_shard_t from, const pg_info_t &oinfo, epoch_t send_epoch); | |
1517 | void remove_down_peer_info(const OSDMapRef &osdmap); | |
1518 | void check_recovery_sources(const OSDMapRef& map); | |
1519 | void set_last_peering_reset(); | |
1520 | void check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap); | |
1521 | bool should_restart_peering( | |
1522 | int newupprimary, | |
1523 | int newactingprimary, | |
f67539c2 TL |
1524 | const std::vector<int>& newup, |
1525 | const std::vector<int>& newacting, | |
9f95a23c TL |
1526 | OSDMapRef lastmap, |
1527 | OSDMapRef osdmap); | |
1528 | void start_peering_interval( | |
1529 | const OSDMapRef lastmap, | |
f67539c2 TL |
1530 | const std::vector<int>& newup, int up_primary, |
1531 | const std::vector<int>& newacting, int acting_primary, | |
9f95a23c TL |
1532 | ObjectStore::Transaction &t); |
1533 | void on_new_interval(); | |
1534 | void clear_recovery_state(); | |
1535 | void clear_primary_state(); | |
1536 | void check_past_interval_bounds() const; | |
1537 | bool set_force_recovery(bool b); | |
1538 | bool set_force_backfill(bool b); | |
1539 | ||
1540 | /// clip calculated priority to reasonable range | |
1541 | int clamp_recovery_priority(int prio, int pool_recovery_prio, int max); | |
1542 | /// get log recovery reservation priority | |
1543 | unsigned get_recovery_priority(); | |
1544 | /// get backfill reservation priority | |
1545 | unsigned get_backfill_priority(); | |
1546 | /// get priority for pg deletion | |
1547 | unsigned get_delete_priority(); | |
1548 | ||
1549 | bool check_prior_readable_down_osds(const OSDMapRef& map); | |
1550 | ||
1551 | bool adjust_need_up_thru(const OSDMapRef osdmap); | |
1552 | PastIntervals::PriorSet build_prior(); | |
1553 | ||
1554 | void reject_reservation(); | |
1555 | ||
f67539c2 TL |
1556 | // acting std::set |
1557 | std::map<pg_shard_t, pg_info_t>::const_iterator find_best_info( | |
1558 | const std::map<pg_shard_t, pg_info_t> &infos, | |
9f95a23c TL |
1559 | bool restrict_to_up_acting, |
1560 | bool *history_les_bound) const; | |
f67539c2 | 1561 | |
9f95a23c | 1562 | static void calc_ec_acting( |
f67539c2 | 1563 | std::map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard, |
9f95a23c | 1564 | unsigned size, |
f67539c2 TL |
1565 | const std::vector<int> &acting, |
1566 | const std::vector<int> &up, | |
1567 | const std::map<pg_shard_t, pg_info_t> &all_info, | |
9f95a23c | 1568 | bool restrict_to_up_acting, |
f67539c2 TL |
1569 | std::vector<int> *want, |
1570 | std::set<pg_shard_t> *backfill, | |
1571 | std::set<pg_shard_t> *acting_backfill, | |
1572 | std::ostream &ss); | |
1573 | ||
1574 | static std::pair<map<pg_shard_t, pg_info_t>::const_iterator, eversion_t> | |
1575 | select_replicated_primary( | |
9f95a23c TL |
1576 | map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard, |
1577 | uint64_t force_auth_primary_missing_objects, | |
f67539c2 | 1578 | const std::vector<int> &up, |
9f95a23c TL |
1579 | pg_shard_t up_primary, |
1580 | const map<pg_shard_t, pg_info_t> &all_info, | |
9f95a23c TL |
1581 | const OSDMapRef osdmap, |
1582 | ostream &ss); | |
f67539c2 TL |
1583 | |
1584 | static void calc_replicated_acting( | |
1585 | map<pg_shard_t, pg_info_t>::const_iterator primary_shard, | |
1586 | eversion_t oldest_auth_log_entry, | |
1587 | unsigned size, | |
1588 | const std::vector<int> &acting, | |
1589 | const std::vector<int> &up, | |
1590 | pg_shard_t up_primary, | |
1591 | const std::map<pg_shard_t, pg_info_t> &all_info, | |
1592 | bool restrict_to_up_acting, | |
1593 | std::vector<int> *want, | |
1594 | std::set<pg_shard_t> *backfill, | |
1595 | std::set<pg_shard_t> *acting_backfill, | |
1596 | const OSDMapRef osdmap, | |
1597 | const PGPool& pool, | |
1598 | std::ostream &ss); | |
1599 | static void calc_replicated_acting_stretch( | |
1600 | map<pg_shard_t, pg_info_t>::const_iterator primary_shard, | |
1601 | eversion_t oldest_auth_log_entry, | |
1602 | unsigned size, | |
1603 | const std::vector<int> &acting, | |
1604 | const std::vector<int> &up, | |
1605 | pg_shard_t up_primary, | |
1606 | const std::map<pg_shard_t, pg_info_t> &all_info, | |
1607 | bool restrict_to_up_acting, | |
1608 | std::vector<int> *want, | |
1609 | std::set<pg_shard_t> *backfill, | |
1610 | std::set<pg_shard_t> *acting_backfill, | |
1611 | const OSDMapRef osdmap, | |
1612 | const PGPool& pool, | |
1613 | std::ostream &ss); | |
1614 | ||
9f95a23c | 1615 | void choose_async_recovery_ec( |
f67539c2 | 1616 | const std::map<pg_shard_t, pg_info_t> &all_info, |
9f95a23c | 1617 | const pg_info_t &auth_info, |
f67539c2 TL |
1618 | std::vector<int> *want, |
1619 | std::set<pg_shard_t> *async_recovery, | |
9f95a23c TL |
1620 | const OSDMapRef osdmap) const; |
1621 | void choose_async_recovery_replicated( | |
f67539c2 | 1622 | const std::map<pg_shard_t, pg_info_t> &all_info, |
9f95a23c | 1623 | const pg_info_t &auth_info, |
f67539c2 TL |
1624 | std::vector<int> *want, |
1625 | std::set<pg_shard_t> *async_recovery, | |
9f95a23c TL |
1626 | const OSDMapRef osdmap) const; |
1627 | ||
f67539c2 | 1628 | bool recoverable(const std::vector<int> &want) const; |
9f95a23c TL |
1629 | bool choose_acting(pg_shard_t &auth_log_shard, |
1630 | bool restrict_to_up_acting, | |
1631 | bool *history_les_bound, | |
1632 | bool request_pg_temp_change_only = false); | |
1633 | ||
1634 | bool search_for_missing( | |
1635 | const pg_info_t &oinfo, const pg_missing_t &omissing, | |
1636 | pg_shard_t fromosd, | |
1637 | PeeringCtxWrapper &rctx); | |
1638 | void build_might_have_unfound(); | |
1639 | void log_weirdness(); | |
1640 | void activate( | |
1641 | ObjectStore::Transaction& t, | |
1642 | epoch_t activation_epoch, | |
1643 | PeeringCtxWrapper &ctx); | |
1644 | ||
1645 | void rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead); | |
1646 | void merge_log( | |
1647 | ObjectStore::Transaction& t, pg_info_t &oinfo, | |
f67539c2 | 1648 | pg_log_t&& olog, pg_shard_t from); |
9f95a23c TL |
1649 | |
1650 | void proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &info); | |
1651 | void proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo, | |
f67539c2 | 1652 | pg_log_t&& olog, pg_missing_t&& omissing, |
9f95a23c TL |
1653 | pg_shard_t from); |
1654 | void proc_replica_log(pg_info_t &oinfo, const pg_log_t &olog, | |
f67539c2 | 1655 | pg_missing_t&& omissing, pg_shard_t from); |
9f95a23c TL |
1656 | |
1657 | void calc_min_last_complete_ondisk() { | |
1658 | eversion_t min = last_complete_ondisk; | |
1659 | ceph_assert(!acting_recovery_backfill.empty()); | |
f67539c2 | 1660 | for (std::set<pg_shard_t>::iterator i = acting_recovery_backfill.begin(); |
9f95a23c TL |
1661 | i != acting_recovery_backfill.end(); |
1662 | ++i) { | |
1663 | if (*i == get_primary()) continue; | |
1664 | if (peer_last_complete_ondisk.count(*i) == 0) | |
1665 | return; // we don't have complete info | |
1666 | eversion_t a = peer_last_complete_ondisk[*i]; | |
1667 | if (a < min) | |
1668 | min = a; | |
1669 | } | |
1670 | if (min == min_last_complete_ondisk) | |
1671 | return; | |
1672 | min_last_complete_ondisk = min; | |
1673 | return; | |
1674 | } | |
1675 | ||
1676 | void fulfill_info( | |
1677 | pg_shard_t from, const pg_query_t &query, | |
f67539c2 | 1678 | std::pair<pg_shard_t, pg_info_t> ¬ify_info); |
9f95a23c TL |
1679 | void fulfill_log( |
1680 | pg_shard_t from, const pg_query_t &query, epoch_t query_epoch); | |
1681 | void fulfill_query(const MQuery& q, PeeringCtxWrapper &rctx); | |
1682 | ||
1683 | void try_mark_clean(); | |
1684 | ||
1685 | void update_blocked_by(); | |
1686 | void update_calc_stats(); | |
1687 | ||
1688 | void add_log_entry(const pg_log_entry_t& e, bool applied); | |
1689 | ||
1690 | void calc_trim_to(); | |
1691 | void calc_trim_to_aggressive(); | |
1692 | ||
1693 | public: | |
1694 | PeeringState( | |
1695 | CephContext *cct, | |
1696 | pg_shard_t pg_whoami, | |
1697 | spg_t spgid, | |
1698 | const PGPool &pool, | |
1699 | OSDMapRef curmap, | |
1700 | DoutPrefixProvider *dpp, | |
1701 | PeeringListener *pl); | |
1702 | ||
1703 | /// Process evt | |
1704 | void handle_event(const boost::statechart::event_base &evt, | |
1705 | PeeringCtx *rctx) { | |
1706 | start_handle(rctx); | |
1707 | machine.process_event(evt); | |
1708 | end_handle(); | |
1709 | } | |
1710 | ||
1711 | /// Process evt | |
1712 | void handle_event(PGPeeringEventRef evt, | |
1713 | PeeringCtx *rctx) { | |
1714 | start_handle(rctx); | |
1715 | machine.process_event(evt->get_event()); | |
1716 | end_handle(); | |
1717 | } | |
1718 | ||
1719 | /// Init fresh instance of PG | |
1720 | void init( | |
1721 | int role, | |
f67539c2 TL |
1722 | const std::vector<int>& newup, int new_up_primary, |
1723 | const std::vector<int>& newacting, int new_acting_primary, | |
9f95a23c TL |
1724 | const pg_history_t& history, |
1725 | const PastIntervals& pi, | |
1726 | bool backfill, | |
1727 | ObjectStore::Transaction &t); | |
1728 | ||
1729 | /// Init pg instance from disk state | |
1730 | template <typename F> | |
1731 | auto init_from_disk_state( | |
1732 | pg_info_t &&info_from_disk, | |
1733 | PastIntervals &&past_intervals_from_disk, | |
1734 | F &&pg_log_init) { | |
1735 | info = std::move(info_from_disk); | |
1736 | last_written_info = info; | |
1737 | past_intervals = std::move(past_intervals_from_disk); | |
1738 | auto ret = pg_log_init(pg_log); | |
1739 | log_weirdness(); | |
1740 | return ret; | |
1741 | } | |
1742 | ||
f67539c2 | 1743 | /// Std::set initial primary/acting |
9f95a23c | 1744 | void init_primary_up_acting( |
f67539c2 TL |
1745 | const std::vector<int> &newup, |
1746 | const std::vector<int> &newacting, | |
9f95a23c TL |
1747 | int new_up_primary, |
1748 | int new_acting_primary); | |
1749 | void init_hb_stamps(); | |
1750 | ||
f67539c2 | 1751 | /// Std::set initial role |
9f95a23c TL |
1752 | void set_role(int r) { |
1753 | role = r; | |
1754 | } | |
1755 | ||
f67539c2 | 1756 | /// Std::set predicates used for determining readable and recoverable |
9f95a23c TL |
1757 | void set_backend_predicates( |
1758 | IsPGReadablePredicate *is_readable, | |
1759 | IsPGRecoverablePredicate *is_recoverable) { | |
1760 | missing_loc.set_backend_predicates(is_readable, is_recoverable); | |
1761 | } | |
1762 | ||
1763 | /// Send current pg_info to peers | |
1764 | void share_pg_info(); | |
1765 | ||
1766 | /// Get stats for child pgs | |
1767 | void start_split_stats( | |
f67539c2 | 1768 | const std::set<spg_t>& childpgs, std::vector<object_stat_sum_t> *out); |
9f95a23c TL |
1769 | |
1770 | /// Update new child with stats | |
1771 | void finish_split_stats( | |
1772 | const object_stat_sum_t& stats, ObjectStore::Transaction &t); | |
1773 | ||
1774 | /// Split state for child_pgid into *child | |
1775 | void split_into( | |
1776 | pg_t child_pgid, PeeringState *child, unsigned split_bits); | |
1777 | ||
1778 | /// Merge state from sources | |
1779 | void merge_from( | |
f67539c2 | 1780 | std::map<spg_t,PeeringState *>& sources, |
9f95a23c TL |
1781 | PeeringCtx &rctx, |
1782 | unsigned split_bits, | |
1783 | const pg_merge_meta_t& last_pg_merge_meta); | |
1784 | ||
1785 | /// Permit stray replicas to purge now unnecessary state | |
1786 | void purge_strays(); | |
1787 | ||
1788 | /** | |
1789 | * update_stats | |
1790 | * | |
1791 | * Mechanism for updating stats and/or history. Pass t to mark | |
1792 | * dirty and write out. Return true if stats should be published | |
1793 | * to the osd. | |
1794 | */ | |
1795 | void update_stats( | |
1796 | std::function<bool(pg_history_t &, pg_stat_t &)> f, | |
1797 | ObjectStore::Transaction *t = nullptr); | |
1798 | ||
1799 | /** | |
1800 | * adjust_purged_snaps | |
1801 | * | |
1802 | * Mechanism for updating purged_snaps. Marks dirty_info, big_dirty_info. | |
1803 | */ | |
1804 | void adjust_purged_snaps( | |
1805 | std::function<void(interval_set<snapid_t> &snaps)> f); | |
1806 | ||
1807 | /// Updates info.hit_set to hset_history, does not dirty | |
1808 | void update_hset(const pg_hit_set_history_t &hset_history); | |
1809 | ||
f67539c2 TL |
1810 | /// Get all pg_shards that needs recovery |
1811 | std::vector<pg_shard_t> get_replica_recovery_order() const; | |
1812 | ||
9f95a23c TL |
1813 | /** |
1814 | * update_history | |
1815 | * | |
1816 | * Merges new_history into info.history clearing past_intervals and | |
1817 | * dirtying as needed. | |
1818 | * | |
1819 | * Calls PeeringListener::on_info_history_change() | |
1820 | */ | |
1821 | void update_history(const pg_history_t& new_history); | |
1822 | ||
1823 | /** | |
1824 | * prepare_stats_for_publish | |
1825 | * | |
1826 | * Returns updated pg_stat_t if stats have changed since | |
1827 | * pg_stats_publish adding in unstable_stats. | |
1828 | */ | |
1829 | std::optional<pg_stat_t> prepare_stats_for_publish( | |
1830 | bool pg_stats_publish_valid, | |
1831 | const pg_stat_t &pg_stats_publish, | |
1832 | const object_stat_collection_t &unstable_stats); | |
1833 | ||
1834 | /** | |
1835 | * Merge entries updating missing as necessary on all | |
1836 | * acting_recovery_backfill logs and missings (also missing_loc) | |
1837 | */ | |
1838 | bool append_log_entries_update_missing( | |
1839 | const mempool::osd_pglog::list<pg_log_entry_t> &entries, | |
1840 | ObjectStore::Transaction &t, | |
1841 | std::optional<eversion_t> trim_to, | |
1842 | std::optional<eversion_t> roll_forward_to); | |
1843 | ||
f67539c2 TL |
1844 | void append_log_with_trim_to_updated( |
1845 | std::vector<pg_log_entry_t>&& log_entries, | |
1846 | eversion_t roll_forward_to, | |
1847 | ObjectStore::Transaction &t, | |
1848 | bool transaction_applied, | |
1849 | bool async) { | |
1850 | update_trim_to(); | |
1851 | append_log(std::move(log_entries), pg_trim_to, roll_forward_to, | |
1852 | min_last_complete_ondisk, t, transaction_applied, async); | |
1853 | } | |
1854 | ||
9f95a23c TL |
1855 | /** |
1856 | * Updates local log to reflect new write from primary. | |
1857 | */ | |
1858 | void append_log( | |
f67539c2 | 1859 | std::vector<pg_log_entry_t>&& logv, |
9f95a23c TL |
1860 | eversion_t trim_to, |
1861 | eversion_t roll_forward_to, | |
1862 | eversion_t min_last_complete_ondisk, | |
1863 | ObjectStore::Transaction &t, | |
1864 | bool transaction_applied, | |
1865 | bool async); | |
1866 | ||
f67539c2 TL |
1867 | /** |
1868 | * retrieve the min last_backfill among backfill targets | |
1869 | */ | |
1870 | hobject_t earliest_backfill() const; | |
1871 | ||
1872 | ||
9f95a23c TL |
1873 | /** |
1874 | * Updates local log/missing to reflect new oob log update from primary | |
1875 | */ | |
1876 | void merge_new_log_entries( | |
1877 | const mempool::osd_pglog::list<pg_log_entry_t> &entries, | |
1878 | ObjectStore::Transaction &t, | |
1879 | std::optional<eversion_t> trim_to, | |
1880 | std::optional<eversion_t> roll_forward_to); | |
1881 | ||
1882 | /// Update missing set to reflect e (TODOSAM: not sure why this is needed) | |
1883 | void add_local_next_event(const pg_log_entry_t& e) { | |
1884 | pg_log.missing_add_next_entry(e); | |
1885 | } | |
1886 | ||
1887 | /// Update log trim boundary | |
1888 | void update_trim_to() { | |
1889 | bool hard_limit = (get_osdmap()->test_flag(CEPH_OSDMAP_PGLOG_HARDLIMIT)); | |
1890 | if (hard_limit) | |
1891 | calc_trim_to_aggressive(); | |
1892 | else | |
1893 | calc_trim_to(); | |
1894 | } | |
1895 | ||
1896 | /// Pre-process pending update on hoid represented by logv | |
1897 | void pre_submit_op( | |
1898 | const hobject_t &hoid, | |
f67539c2 | 1899 | const std::vector<pg_log_entry_t>& logv, |
9f95a23c TL |
1900 | eversion_t at_version); |
1901 | ||
1902 | /// Signal that oid has been locally recovered to version v | |
1903 | void recover_got( | |
1904 | const hobject_t &oid, eversion_t v, | |
1905 | bool is_delete, | |
1906 | ObjectStore::Transaction &t); | |
1907 | ||
1908 | /// Signal that oid has been recovered on peer to version | |
1909 | void on_peer_recover( | |
1910 | pg_shard_t peer, | |
1911 | const hobject_t &soid, | |
1912 | const eversion_t &version); | |
1913 | ||
1914 | /// Notify that soid is being recovered on peer | |
1915 | void begin_peer_recover( | |
1916 | pg_shard_t peer, | |
1917 | const hobject_t soid); | |
1918 | ||
1919 | /// Pull missing sets from all candidate peers | |
1920 | bool discover_all_missing( | |
1921 | BufferedRecoveryMessages &rctx); | |
1922 | ||
1923 | /// Notify that hoid has been fully recocovered | |
1924 | void object_recovered( | |
1925 | const hobject_t &hoid, | |
1926 | const object_stat_sum_t &stat_diff) { | |
1927 | info.stats.stats.sum.add(stat_diff); | |
1928 | missing_loc.recovered(hoid); | |
1929 | } | |
1930 | ||
1931 | /// Update info/stats to reflect backfill progress | |
1932 | void update_backfill_progress( | |
1933 | const hobject_t &updated_backfill, | |
1934 | const pg_stat_t &updated_stats, | |
1935 | bool preserve_local_num_bytes, | |
1936 | ObjectStore::Transaction &t); | |
1937 | ||
1938 | /// Update info/stats to reflect completed backfill on hoid | |
1939 | void update_complete_backfill_object_stats( | |
1940 | const hobject_t &hoid, | |
1941 | const pg_stat_t &stats); | |
1942 | ||
1943 | /// Update last_backfill for peer to new_last_backfill | |
1944 | void update_peer_last_backfill( | |
1945 | pg_shard_t peer, | |
1946 | const hobject_t &new_last_backfill); | |
1947 | ||
1948 | /// Update info.stats with delta_stats for operation on soid | |
1949 | void apply_op_stats( | |
1950 | const hobject_t &soid, | |
1951 | const object_stat_sum_t &delta_stats); | |
1952 | ||
1953 | /** | |
1954 | * force_object_missing | |
1955 | * | |
1956 | * Force oid on peer to be missing at version. If the object does not | |
1957 | * currently need recovery, either candidates if provided or the remainder | |
f67539c2 | 1958 | * of the acting std::set will be deemed to have the object. |
9f95a23c TL |
1959 | */ |
1960 | void force_object_missing( | |
1961 | const pg_shard_t &peer, | |
1962 | const hobject_t &oid, | |
1963 | eversion_t version) { | |
f67539c2 | 1964 | force_object_missing(std::set<pg_shard_t>{peer}, oid, version); |
9f95a23c TL |
1965 | } |
1966 | void force_object_missing( | |
f67539c2 | 1967 | const std::set<pg_shard_t> &peer, |
9f95a23c TL |
1968 | const hobject_t &oid, |
1969 | eversion_t version); | |
1970 | ||
1971 | /// Update state prior to backfilling soid on targets | |
1972 | void prepare_backfill_for_missing( | |
1973 | const hobject_t &soid, | |
1974 | const eversion_t &version, | |
f67539c2 | 1975 | const std::vector<pg_shard_t> &targets); |
9f95a23c | 1976 | |
f67539c2 | 1977 | /// Std::set targets with the right version for revert (see recover_primary) |
9f95a23c TL |
1978 | void set_revert_with_targets( |
1979 | const hobject_t &soid, | |
f67539c2 | 1980 | const std::set<pg_shard_t> &good_peers); |
9f95a23c TL |
1981 | |
1982 | /// Update lcod for fromosd | |
1983 | void update_peer_last_complete_ondisk( | |
1984 | pg_shard_t fromosd, | |
1985 | eversion_t lcod) { | |
1986 | peer_last_complete_ondisk[fromosd] = lcod; | |
1987 | } | |
1988 | ||
1989 | /// Update lcod | |
1990 | void update_last_complete_ondisk( | |
1991 | eversion_t lcod) { | |
1992 | last_complete_ondisk = lcod; | |
1993 | } | |
1994 | ||
1995 | /// Update state to reflect recovery up to version | |
1996 | void recovery_committed_to(eversion_t version); | |
1997 | ||
1998 | /// Mark recovery complete | |
1999 | void local_recovery_complete() { | |
2000 | info.last_complete = info.last_update; | |
2001 | } | |
2002 | ||
2003 | /// Update last_requested pointer to v | |
2004 | void set_last_requested(version_t v) { | |
2005 | pg_log.set_last_requested(v); | |
2006 | } | |
2007 | ||
2008 | /// Write dirty state to t | |
2009 | void write_if_dirty(ObjectStore::Transaction& t); | |
2010 | ||
2011 | /// Mark write completed to v with persisted lc | |
2012 | void complete_write(eversion_t v, eversion_t lc); | |
2013 | ||
2014 | /// Update local write applied pointer | |
2015 | void local_write_applied(eversion_t v) { | |
2016 | last_update_applied = v; | |
2017 | } | |
2018 | ||
2019 | /// Updates peering state with new map | |
2020 | void advance_map( | |
2021 | OSDMapRef osdmap, ///< [in] new osdmap | |
2022 | OSDMapRef lastmap, ///< [in] prev osdmap | |
f67539c2 | 2023 | std::vector<int>& newup, ///< [in] new up set |
9f95a23c | 2024 | int up_primary, ///< [in] new up primary |
f67539c2 | 2025 | std::vector<int>& newacting, ///< [in] new acting |
9f95a23c TL |
2026 | int acting_primary, ///< [in] new acting primary |
2027 | PeeringCtx &rctx ///< [out] recovery context | |
2028 | ); | |
2029 | ||
2030 | /// Activates most recently updated map | |
2031 | void activate_map( | |
2032 | PeeringCtx &rctx ///< [out] recovery context | |
2033 | ); | |
2034 | ||
2035 | /// resets last_persisted_osdmap | |
2036 | void reset_last_persisted() { | |
2037 | last_persisted_osdmap = 0; | |
2038 | dirty_info = true; | |
2039 | dirty_big_info = true; | |
2040 | } | |
2041 | ||
2042 | /// Signal shutdown beginning | |
2043 | void shutdown() { | |
2044 | deleting = true; | |
2045 | } | |
2046 | ||
2047 | /// Signal shutdown complete | |
2048 | void set_delete_complete() { | |
2049 | deleted = true; | |
2050 | } | |
2051 | ||
2052 | /// Dirty info and write out | |
2053 | void force_write_state(ObjectStore::Transaction &t) { | |
2054 | dirty_info = true; | |
2055 | dirty_big_info = true; | |
2056 | write_if_dirty(t); | |
2057 | } | |
2058 | ||
2059 | /// Get current interval's readable_until | |
2060 | ceph::signedspan get_readable_until() const { | |
2061 | return readable_until; | |
2062 | } | |
2063 | ||
2064 | /// Get prior intervals' readable_until upper bound | |
2065 | ceph::signedspan get_prior_readable_until_ub() const { | |
2066 | return prior_readable_until_ub; | |
2067 | } | |
2068 | ||
2069 | /// Get prior intervals' readable_until down OSDs of note | |
f67539c2 | 2070 | const std::set<int>& get_prior_readable_down_osds() const { |
9f95a23c TL |
2071 | return prior_readable_down_osds; |
2072 | } | |
2073 | ||
2074 | /// Reset prior intervals' readable_until upper bound (e.g., bc it passed) | |
2075 | void clear_prior_readable_until_ub() { | |
2076 | prior_readable_until_ub = ceph::signedspan::zero(); | |
2077 | prior_readable_down_osds.clear(); | |
2078 | } | |
2079 | ||
2080 | void renew_lease(ceph::signedspan now) { | |
2081 | bool was_min = (readable_until_ub == readable_until); | |
2082 | readable_until_ub_sent = now + readable_interval; | |
2083 | if (was_min) { | |
2084 | recalc_readable_until(); | |
2085 | } | |
2086 | } | |
2087 | ||
2088 | void send_lease(); | |
2089 | void schedule_renew_lease(); | |
2090 | ||
2091 | pg_lease_t get_lease() { | |
2092 | return pg_lease_t(readable_until, readable_until_ub_sent, readable_interval); | |
2093 | } | |
2094 | ||
2095 | void proc_lease(const pg_lease_t& l); | |
2096 | void proc_lease_ack(int from, const pg_lease_ack_t& la); | |
2097 | void proc_renew_lease(); | |
2098 | ||
2099 | pg_lease_ack_t get_lease_ack() { | |
2100 | return pg_lease_ack_t(readable_until_ub_from_primary); | |
2101 | } | |
2102 | ||
2103 | /// [primary] recalc readable_until[_ub] for the current interval | |
2104 | void recalc_readable_until(); | |
2105 | ||
2106 | //============================ const helpers ================================ | |
2107 | const char *get_current_state() const { | |
2108 | return state_history.get_current_state(); | |
2109 | } | |
2110 | epoch_t get_last_peering_reset() const { | |
2111 | return last_peering_reset; | |
2112 | } | |
2113 | eversion_t get_last_rollback_info_trimmed_to_applied() const { | |
2114 | return last_rollback_info_trimmed_to_applied; | |
2115 | } | |
2116 | /// Returns stable reference to internal pool structure | |
2117 | const PGPool &get_pool() const { | |
2118 | return pool; | |
2119 | } | |
2120 | /// Returns reference to current osdmap | |
2121 | const OSDMapRef &get_osdmap() const { | |
2122 | ceph_assert(osdmap_ref); | |
2123 | return osdmap_ref; | |
2124 | } | |
2125 | /// Returns epoch of current osdmap | |
2126 | epoch_t get_osdmap_epoch() const { | |
2127 | return get_osdmap()->get_epoch(); | |
2128 | } | |
2129 | ||
2130 | bool is_ec_pg() const override { | |
2131 | return pool.info.is_erasure(); | |
2132 | } | |
2133 | int get_pg_size() const override { | |
2134 | return pool.info.size; | |
2135 | } | |
2136 | bool is_deleting() const { | |
2137 | return deleting; | |
2138 | } | |
2139 | bool is_deleted() const { | |
2140 | return deleted; | |
2141 | } | |
f67539c2 | 2142 | const std::set<pg_shard_t> &get_upset() const override { |
9f95a23c TL |
2143 | return upset; |
2144 | } | |
2145 | bool is_acting_recovery_backfill(pg_shard_t osd) const { | |
2146 | return acting_recovery_backfill.count(osd); | |
2147 | } | |
2148 | bool is_acting(pg_shard_t osd) const { | |
2149 | return has_shard(pool.info.is_erasure(), acting, osd); | |
2150 | } | |
2151 | bool is_up(pg_shard_t osd) const { | |
2152 | return has_shard(pool.info.is_erasure(), up, osd); | |
2153 | } | |
f67539c2 | 2154 | static bool has_shard(bool ec, const std::vector<int>& v, pg_shard_t osd) { |
9f95a23c TL |
2155 | if (ec) { |
2156 | return v.size() > (unsigned)osd.shard && v[osd.shard] == osd.osd; | |
2157 | } else { | |
2158 | return std::find(v.begin(), v.end(), osd.osd) != v.end(); | |
2159 | } | |
2160 | } | |
2161 | const PastIntervals& get_past_intervals() const { | |
2162 | return past_intervals; | |
2163 | } | |
2164 | /// acting osd that is not the primary | |
2165 | bool is_nonprimary() const { | |
2166 | return role >= 0 && pg_whoami != primary; | |
2167 | } | |
2168 | /// primary osd | |
2169 | bool is_primary() const { | |
2170 | return pg_whoami == primary; | |
2171 | } | |
2172 | bool pg_has_reset_since(epoch_t e) const { | |
2173 | return deleted || e < get_last_peering_reset(); | |
2174 | } | |
2175 | ||
2176 | int get_role() const { | |
2177 | return role; | |
2178 | } | |
f67539c2 | 2179 | const std::vector<int> &get_acting() const { |
9f95a23c TL |
2180 | return acting; |
2181 | } | |
f67539c2 | 2182 | const std::set<pg_shard_t> &get_actingset() const { |
9f95a23c TL |
2183 | return actingset; |
2184 | } | |
2185 | int get_acting_primary() const { | |
2186 | return primary.osd; | |
2187 | } | |
2188 | pg_shard_t get_primary() const { | |
2189 | return primary; | |
2190 | } | |
f67539c2 | 2191 | const std::vector<int> &get_up() const { |
9f95a23c TL |
2192 | return up; |
2193 | } | |
2194 | int get_up_primary() const { | |
2195 | return up_primary.osd; | |
2196 | } | |
2197 | ||
2198 | bool is_backfill_target(pg_shard_t osd) const { | |
2199 | return backfill_targets.count(osd); | |
2200 | } | |
f67539c2 | 2201 | const std::set<pg_shard_t> &get_backfill_targets() const { |
9f95a23c TL |
2202 | return backfill_targets; |
2203 | } | |
2204 | bool is_async_recovery_target(pg_shard_t peer) const { | |
2205 | return async_recovery_targets.count(peer); | |
2206 | } | |
f67539c2 | 2207 | const std::set<pg_shard_t> &get_async_recovery_targets() const { |
9f95a23c TL |
2208 | return async_recovery_targets; |
2209 | } | |
f67539c2 | 2210 | const std::set<pg_shard_t> &get_acting_recovery_backfill() const { |
9f95a23c TL |
2211 | return acting_recovery_backfill; |
2212 | } | |
2213 | ||
2214 | const PGLog &get_pg_log() const { | |
2215 | return pg_log; | |
2216 | } | |
2217 | ||
2218 | bool state_test(uint64_t m) const { return (state & m) != 0; } | |
2219 | void state_set(uint64_t m) { state |= m; } | |
2220 | void state_clear(uint64_t m) { state &= ~m; } | |
2221 | ||
2222 | bool is_complete() const { return info.last_complete == info.last_update; } | |
2223 | bool should_send_notify() const { return send_notify; } | |
2224 | ||
2225 | uint64_t get_state() const { return state; } | |
2226 | bool is_active() const { return state_test(PG_STATE_ACTIVE); } | |
2227 | bool is_activating() const { return state_test(PG_STATE_ACTIVATING); } | |
2228 | bool is_peering() const { return state_test(PG_STATE_PEERING); } | |
2229 | bool is_down() const { return state_test(PG_STATE_DOWN); } | |
2230 | bool is_recovery_unfound() const { | |
2231 | return state_test(PG_STATE_RECOVERY_UNFOUND); | |
2232 | } | |
f67539c2 TL |
2233 | bool is_backfilling() const { |
2234 | return state_test(PG_STATE_BACKFILLING); | |
2235 | } | |
9f95a23c TL |
2236 | bool is_backfill_unfound() const { |
2237 | return state_test(PG_STATE_BACKFILL_UNFOUND); | |
2238 | } | |
2239 | bool is_incomplete() const { return state_test(PG_STATE_INCOMPLETE); } | |
2240 | bool is_clean() const { return state_test(PG_STATE_CLEAN); } | |
2241 | bool is_degraded() const { return state_test(PG_STATE_DEGRADED); } | |
2242 | bool is_undersized() const { return state_test(PG_STATE_UNDERSIZED); } | |
2243 | bool is_remapped() const { return state_test(PG_STATE_REMAPPED); } | |
2244 | bool is_peered() const { | |
2245 | return state_test(PG_STATE_ACTIVE) || state_test(PG_STATE_PEERED); | |
2246 | } | |
2247 | bool is_recovering() const { return state_test(PG_STATE_RECOVERING); } | |
2248 | bool is_premerge() const { return state_test(PG_STATE_PREMERGE); } | |
2249 | bool is_repair() const { return state_test(PG_STATE_REPAIR); } | |
2250 | bool is_empty() const { return info.last_update == eversion_t(0,0); } | |
2251 | ||
2252 | bool get_need_up_thru() const { | |
2253 | return need_up_thru; | |
2254 | } | |
2255 | ||
2256 | bool is_forced_recovery_or_backfill() const { | |
2257 | return get_state() & (PG_STATE_FORCED_RECOVERY | PG_STATE_FORCED_BACKFILL); | |
2258 | } | |
2259 | ||
2260 | bool is_backfill_reserved() const { | |
2261 | return backfill_reserved; | |
2262 | } | |
2263 | ||
2264 | bool is_backfill_reserving() const { | |
2265 | return backfill_reserving; | |
2266 | } | |
2267 | ||
2268 | ceph_release_t get_last_require_osd_release() const { | |
2269 | return last_require_osd_release; | |
2270 | } | |
2271 | ||
2272 | const pg_info_t &get_info() const { | |
2273 | return info; | |
2274 | } | |
2275 | ||
2276 | const decltype(peer_info) &get_peer_info() const { | |
2277 | return peer_info; | |
2278 | } | |
2279 | const decltype(peer_missing) &get_peer_missing() const { | |
2280 | return peer_missing; | |
2281 | } | |
2282 | const pg_missing_const_i &get_peer_missing(const pg_shard_t &peer) const { | |
2283 | if (peer == pg_whoami) { | |
2284 | return pg_log.get_missing(); | |
2285 | } else { | |
2286 | assert(peer_missing.count(peer)); | |
2287 | return peer_missing.find(peer)->second; | |
2288 | } | |
2289 | } | |
2290 | const pg_info_t&get_peer_info(pg_shard_t peer) const { | |
2291 | assert(peer_info.count(peer)); | |
2292 | return peer_info.find(peer)->second; | |
2293 | } | |
2294 | bool has_peer_info(pg_shard_t peer) const { | |
2295 | return peer_info.count(peer); | |
2296 | } | |
2297 | ||
2298 | bool needs_recovery() const; | |
2299 | bool needs_backfill() const; | |
2300 | ||
2301 | /** | |
2302 | * Returns whether a particular object can be safely read on this replica | |
2303 | */ | |
2304 | bool can_serve_replica_read(const hobject_t &hoid) { | |
2305 | ceph_assert(!is_primary()); | |
2306 | return !pg_log.get_log().has_write_since( | |
2307 | hoid, get_min_last_complete_ondisk()); | |
2308 | } | |
2309 | ||
f67539c2 TL |
2310 | /** |
2311 | * Returns whether the current acting set is able to go active | |
2312 | * and serve writes. It needs to satisfy min_size and any | |
2313 | * applicable stretch cluster constraints. | |
2314 | */ | |
2315 | bool acting_set_writeable() { | |
2316 | return (actingset.size() >= pool.info.min_size) && | |
2317 | (pool.info.stretch_set_can_peer(acting, *get_osdmap(), NULL)); | |
2318 | } | |
2319 | ||
9f95a23c TL |
2320 | /** |
2321 | * Returns whether all peers which might have unfound objects have been | |
2322 | * queried or marked lost. | |
2323 | */ | |
2324 | bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const; | |
2325 | bool all_missing_unfound() const { | |
2326 | const auto& missing = pg_log.get_missing(); | |
2327 | if (!missing.have_missing()) | |
2328 | return false; | |
2329 | for (auto& m : missing.get_items()) { | |
2330 | if (!missing_loc.is_unfound(m.first)) | |
2331 | return false; | |
2332 | } | |
2333 | return true; | |
2334 | } | |
2335 | ||
2336 | bool perform_deletes_during_peering() const { | |
2337 | return !(get_osdmap()->test_flag(CEPH_OSDMAP_RECOVERY_DELETES)); | |
2338 | } | |
2339 | ||
2340 | ||
2341 | bool have_unfound() const { | |
2342 | return missing_loc.have_unfound(); | |
2343 | } | |
2344 | uint64_t get_num_unfound() const { | |
2345 | return missing_loc.num_unfound(); | |
2346 | } | |
2347 | ||
2348 | bool have_missing() const { | |
2349 | return pg_log.get_missing().num_missing() > 0; | |
2350 | } | |
2351 | unsigned int get_num_missing() const { | |
2352 | return pg_log.get_missing().num_missing(); | |
2353 | } | |
2354 | ||
2355 | const MissingLoc &get_missing_loc() const { | |
2356 | return missing_loc; | |
2357 | } | |
2358 | ||
2359 | const MissingLoc::missing_by_count_t &get_missing_by_count() const { | |
2360 | return missing_loc.get_missing_by_count(); | |
2361 | } | |
2362 | ||
2363 | eversion_t get_min_last_complete_ondisk() const { | |
2364 | return min_last_complete_ondisk; | |
2365 | } | |
2366 | ||
2367 | eversion_t get_pg_trim_to() const { | |
2368 | return pg_trim_to; | |
2369 | } | |
2370 | ||
2371 | eversion_t get_last_update_applied() const { | |
2372 | return last_update_applied; | |
2373 | } | |
2374 | ||
1911f103 TL |
2375 | eversion_t get_last_update_ondisk() const { |
2376 | return last_update_ondisk; | |
2377 | } | |
2378 | ||
9f95a23c TL |
2379 | bool debug_has_dirty_state() const { |
2380 | return dirty_info || dirty_big_info; | |
2381 | } | |
2382 | ||
2383 | std::string get_pg_state_string() const { | |
2384 | return pg_state_string(state); | |
2385 | } | |
2386 | ||
2387 | /// Dump representation of past_intervals to out | |
f67539c2 | 2388 | void print_past_intervals(std::ostream &out) const { |
9f95a23c TL |
2389 | out << "[" << past_intervals.get_bounds() |
2390 | << ")/" << past_intervals.size(); | |
2391 | } | |
2392 | ||
f67539c2 | 2393 | void dump_history(ceph::Formatter *f) const { |
9f95a23c TL |
2394 | state_history.dump(f); |
2395 | } | |
2396 | ||
2397 | /// Dump formatted peering status | |
f67539c2 | 2398 | void dump_peering_state(ceph::Formatter *f); |
9f95a23c TL |
2399 | |
2400 | private: | |
2401 | /// Mask feature vector with feature set from new peer | |
2402 | void apply_peer_features(uint64_t f) { peer_features &= f; } | |
2403 | ||
2404 | /// Reset feature vector to default | |
2405 | void reset_min_peer_features() { | |
2406 | peer_features = CEPH_FEATURES_SUPPORTED_DEFAULT; | |
2407 | } | |
2408 | public: | |
2409 | /// Get feature vector common to all known peers with this pg | |
2410 | uint64_t get_min_peer_features() const { return peer_features; } | |
2411 | ||
2412 | /// Get feature vector common to acting set | |
2413 | uint64_t get_min_acting_features() const { return acting_features; } | |
2414 | ||
2415 | /// Get feature vector common to up/acting set | |
2416 | uint64_t get_min_upacting_features() const { return upacting_features; } | |
2417 | ||
2418 | ||
2419 | // Flush control interface | |
2420 | private: | |
2421 | /** | |
2422 | * Start additional flush (blocks needs_flush/activation until | |
2423 | * complete_flush is called once for each start_flush call as | |
2424 | * required by start_flush_on_transaction). | |
2425 | */ | |
2426 | void start_flush(ObjectStore::Transaction &t) { | |
2427 | flushes_in_progress++; | |
2428 | pl->start_flush_on_transaction(t); | |
2429 | } | |
2430 | public: | |
2431 | /// True if there are outstanding flushes | |
2432 | bool needs_flush() const { | |
2433 | return flushes_in_progress > 0; | |
2434 | } | |
2435 | /// Must be called once per start_flush | |
2436 | void complete_flush(); | |
2437 | ||
f67539c2 | 2438 | friend std::ostream &operator<<(std::ostream &out, const PeeringState &ps); |
9f95a23c TL |
2439 | }; |
2440 | ||
f67539c2 | 2441 | std::ostream &operator<<(std::ostream &out, const PeeringState &ps); |