]>
Commit | Line | Data |
---|---|---|
20effc67 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- |
2 | // vim: ts=8 sw=2 smarttab expandtab | |
11fdf7f2 TL |
3 | |
4 | #pragma once | |
5 | ||
9f95a23c TL |
6 | #include <memory> |
7 | #include <optional> | |
11fdf7f2 TL |
8 | #include <boost/smart_ptr/intrusive_ref_counter.hpp> |
9 | #include <seastar/core/future.hh> | |
9f95a23c | 10 | #include <seastar/core/shared_future.hh> |
11fdf7f2 | 11 | |
9f95a23c | 12 | #include "common/dout.h" |
1e59de90 | 13 | #include "include/interval_set.h" |
9f95a23c | 14 | #include "crimson/net/Fwd.h" |
f67539c2 TL |
15 | #include "messages/MOSDRepOpReply.h" |
16 | #include "messages/MOSDOpReply.h" | |
9f95a23c | 17 | #include "os/Transaction.h" |
11fdf7f2 | 18 | #include "osd/osd_types.h" |
1e59de90 | 19 | #include "osd/osd_types_fmt.h" |
9f95a23c TL |
20 | #include "crimson/osd/object_context.h" |
21 | #include "osd/PeeringState.h" | |
1e59de90 | 22 | #include "osd/SnapMapper.h" |
11fdf7f2 | 23 | |
20effc67 | 24 | #include "crimson/common/interruptible_future.h" |
9f95a23c TL |
25 | #include "crimson/common/type_helpers.h" |
26 | #include "crimson/os/futurized_collection.h" | |
f67539c2 | 27 | #include "crimson/osd/backfill_state.h" |
20effc67 TL |
28 | #include "crimson/osd/pg_interval_interrupt_condition.h" |
29 | #include "crimson/osd/ops_executer.h" | |
9f95a23c | 30 | #include "crimson/osd/osd_operations/client_request.h" |
1e59de90 TL |
31 | #include "crimson/osd/osd_operations/logmissing_request.h" |
32 | #include "crimson/osd/osd_operations/logmissing_request_reply.h" | |
9f95a23c TL |
33 | #include "crimson/osd/osd_operations/peering_event.h" |
34 | #include "crimson/osd/osd_operations/replicated_request.h" | |
35 | #include "crimson/osd/shard_services.h" | |
36 | #include "crimson/osd/osdmap_gate.h" | |
1e59de90 | 37 | #include "crimson/osd/pg_activation_blocker.h" |
f67539c2 TL |
38 | #include "crimson/osd/pg_recovery.h" |
39 | #include "crimson/osd/pg_recovery_listener.h" | |
40 | #include "crimson/osd/recovery_backend.h" | |
1e59de90 | 41 | #include "crimson/osd/object_context_loader.h" |
9f95a23c | 42 | |
9f95a23c | 43 | class MQuery; |
f67539c2 | 44 | class OSDMap; |
9f95a23c TL |
45 | class PGBackend; |
46 | class PGPeeringEvent; | |
f67539c2 TL |
47 | class osd_op_params_t; |
48 | ||
9f95a23c TL |
49 | namespace recovery { |
50 | class Context; | |
51 | } | |
52 | ||
53 | namespace crimson::net { | |
54 | class Messenger; | |
55 | } | |
56 | ||
57 | namespace crimson::os { | |
58 | class FuturizedStore; | |
59 | } | |
60 | ||
61 | namespace crimson::osd { | |
f67539c2 | 62 | class OpsExecuter; |
1e59de90 | 63 | class BackfillRecovery; |
aee94f69 | 64 | class SnapTrimEvent; |
11fdf7f2 TL |
65 | |
66 | class PG : public boost::intrusive_ref_counter< | |
67 | PG, | |
9f95a23c | 68 | boost::thread_unsafe_counter>, |
f67539c2 | 69 | public PGRecoveryListener, |
9f95a23c TL |
70 | PeeringState::PeeringListener, |
71 | DoutPrefixProvider | |
11fdf7f2 TL |
72 | { |
73 | using ec_profile_t = std::map<std::string,std::string>; | |
1e59de90 | 74 | using cached_map_t = OSDMapService::cached_map_t; |
9f95a23c | 75 | |
1e59de90 TL |
76 | ClientRequest::PGPipeline request_pg_pipeline; |
77 | PGPeeringPipeline peering_request_pg_pipeline; | |
78 | ||
79 | ClientRequest::Orderer client_request_orderer; | |
9f95a23c TL |
80 | |
81 | spg_t pgid; | |
82 | pg_shard_t pg_whoami; | |
83 | crimson::os::CollectionRef coll_ref; | |
84 | ghobject_t pgmeta_oid; | |
85 | ||
86 | seastar::timer<seastar::lowres_clock> check_readable_timer; | |
87 | seastar::timer<seastar::lowres_clock> renew_lease_timer; | |
88 | ||
11fdf7f2 | 89 | public: |
20effc67 TL |
90 | template <typename T = void> |
91 | using interruptible_future = | |
92 | ::crimson::interruptible::interruptible_future< | |
93 | ::crimson::osd::IOInterruptCondition, T>; | |
94 | ||
9f95a23c TL |
95 | PG(spg_t pgid, |
96 | pg_shard_t pg_shard, | |
97 | crimson::os::CollectionRef coll_ref, | |
98 | pg_pool_t&& pool, | |
99 | std::string&& name, | |
100 | cached_map_t osdmap, | |
101 | ShardServices &shard_services, | |
102 | ec_profile_t profile); | |
103 | ||
104 | ~PG(); | |
105 | ||
f67539c2 | 106 | const pg_shard_t& get_pg_whoami() const final { |
9f95a23c TL |
107 | return pg_whoami; |
108 | } | |
109 | ||
f67539c2 | 110 | const spg_t& get_pgid() const final { |
9f95a23c TL |
111 | return pgid; |
112 | } | |
113 | ||
114 | PGBackend& get_backend() { | |
115 | return *backend; | |
116 | } | |
117 | const PGBackend& get_backend() const { | |
118 | return *backend; | |
119 | } | |
9f95a23c TL |
120 | // EpochSource |
121 | epoch_t get_osdmap_epoch() const final { | |
122 | return peering_state.get_osdmap_epoch(); | |
123 | } | |
124 | ||
f67539c2 TL |
125 | eversion_t get_pg_trim_to() const { |
126 | return peering_state.get_pg_trim_to(); | |
127 | } | |
128 | ||
129 | eversion_t get_min_last_complete_ondisk() const { | |
130 | return peering_state.get_min_last_complete_ondisk(); | |
131 | } | |
132 | ||
133 | const pg_info_t& get_info() const final { | |
134 | return peering_state.get_info(); | |
135 | } | |
136 | ||
9f95a23c TL |
137 | // DoutPrefixProvider |
138 | std::ostream& gen_prefix(std::ostream& out) const final { | |
139 | return out << *this; | |
140 | } | |
141 | crimson::common::CephContext *get_cct() const final { | |
142 | return shard_services.get_cct(); | |
143 | } | |
144 | unsigned get_subsys() const final { | |
145 | return ceph_subsys_osd; | |
146 | } | |
147 | ||
148 | crimson::os::CollectionRef get_collection_ref() { | |
149 | return coll_ref; | |
150 | } | |
151 | ||
152 | // PeeringListener | |
153 | void prepare_write( | |
154 | pg_info_t &info, | |
155 | pg_info_t &last_written_info, | |
156 | PastIntervals &past_intervals, | |
157 | PGLog &pglog, | |
158 | bool dirty_info, | |
159 | bool dirty_big_info, | |
160 | bool need_write_epoch, | |
161 | ceph::os::Transaction &t) final; | |
162 | ||
163 | void on_info_history_change() final { | |
164 | // Not needed yet -- mainly for scrub scheduling | |
165 | } | |
166 | ||
20effc67 TL |
167 | /// Notify PG that Primary/Replica status has changed (to update scrub registration) |
168 | void on_primary_status_change(bool was_primary, bool now_primary) final { | |
169 | } | |
170 | ||
171 | /// Need to reschedule next scrub. Assuming no change in role | |
172 | void reschedule_scrub() final { | |
173 | } | |
174 | ||
f67539c2 | 175 | void scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type) final; |
9f95a23c TL |
176 | |
177 | uint64_t get_snap_trimq_size() const final { | |
1e59de90 | 178 | return std::size(snap_trimq); |
9f95a23c TL |
179 | } |
180 | ||
181 | void send_cluster_message( | |
20effc67 | 182 | int osd, MessageURef m, |
9f95a23c | 183 | epoch_t epoch, bool share_map_update=false) final { |
20effc67 | 184 | (void)shard_services.send_to_osd(osd, std::move(m), epoch); |
9f95a23c TL |
185 | } |
186 | ||
187 | void send_pg_created(pg_t pgid) final { | |
188 | (void)shard_services.send_pg_created(pgid); | |
189 | } | |
190 | ||
191 | bool try_flush_or_schedule_async() final; | |
192 | ||
193 | void start_flush_on_transaction( | |
194 | ceph::os::Transaction &t) final { | |
195 | t.register_on_commit( | |
196 | new LambdaContext([this](int r){ | |
197 | peering_state.complete_flush(); | |
198 | })); | |
199 | } | |
200 | ||
201 | void on_flushed() final { | |
202 | // will be needed for unblocking IO operations/peering | |
203 | } | |
204 | ||
f67539c2 TL |
205 | template <typename T> |
206 | void start_peering_event_operation(T &&evt, float delay = 0) { | |
207 | (void) shard_services.start_operation<LocalPeeringEvent>( | |
208 | this, | |
f67539c2 TL |
209 | pg_whoami, |
210 | pgid, | |
211 | delay, | |
212 | std::forward<T>(evt)); | |
213 | } | |
214 | ||
9f95a23c TL |
215 | void schedule_event_after( |
216 | PGPeeringEventRef event, | |
217 | float delay) final { | |
f67539c2 TL |
218 | start_peering_event_operation(std::move(*event), delay); |
219 | } | |
220 | std::vector<pg_shard_t> get_replica_recovery_order() const final { | |
221 | return peering_state.get_replica_recovery_order(); | |
9f95a23c | 222 | } |
9f95a23c TL |
223 | void request_local_background_io_reservation( |
224 | unsigned priority, | |
f67539c2 TL |
225 | PGPeeringEventURef on_grant, |
226 | PGPeeringEventURef on_preempt) final { | |
1e59de90 TL |
227 | // TODO -- we probably want to add a mechanism for blocking on this |
228 | // after handling the peering event | |
229 | std::ignore = shard_services.local_request_reservation( | |
f67539c2 TL |
230 | pgid, |
231 | on_grant ? make_lambda_context([this, on_grant=std::move(on_grant)] (int) { | |
232 | start_peering_event_operation(std::move(*on_grant)); | |
233 | }) : nullptr, | |
234 | priority, | |
235 | on_preempt ? make_lambda_context( | |
236 | [this, on_preempt=std::move(on_preempt)] (int) { | |
237 | start_peering_event_operation(std::move(*on_preempt)); | |
238 | }) : nullptr); | |
9f95a23c TL |
239 | } |
240 | ||
241 | void update_local_background_io_priority( | |
242 | unsigned priority) final { | |
1e59de90 TL |
243 | // TODO -- we probably want to add a mechanism for blocking on this |
244 | // after handling the peering event | |
245 | std::ignore = shard_services.local_update_priority( | |
f67539c2 TL |
246 | pgid, |
247 | priority); | |
9f95a23c TL |
248 | } |
249 | ||
250 | void cancel_local_background_io_reservation() final { | |
1e59de90 TL |
251 | // TODO -- we probably want to add a mechanism for blocking on this |
252 | // after handling the peering event | |
253 | std::ignore = shard_services.local_cancel_reservation( | |
f67539c2 | 254 | pgid); |
9f95a23c TL |
255 | } |
256 | ||
257 | void request_remote_recovery_reservation( | |
258 | unsigned priority, | |
f67539c2 TL |
259 | PGPeeringEventURef on_grant, |
260 | PGPeeringEventURef on_preempt) final { | |
1e59de90 TL |
261 | // TODO -- we probably want to add a mechanism for blocking on this |
262 | // after handling the peering event | |
263 | std::ignore = shard_services.remote_request_reservation( | |
f67539c2 TL |
264 | pgid, |
265 | on_grant ? make_lambda_context([this, on_grant=std::move(on_grant)] (int) { | |
266 | start_peering_event_operation(std::move(*on_grant)); | |
267 | }) : nullptr, | |
268 | priority, | |
269 | on_preempt ? make_lambda_context( | |
270 | [this, on_preempt=std::move(on_preempt)] (int) { | |
271 | start_peering_event_operation(std::move(*on_preempt)); | |
272 | }) : nullptr); | |
9f95a23c TL |
273 | } |
274 | ||
275 | void cancel_remote_recovery_reservation() final { | |
1e59de90 TL |
276 | // TODO -- we probably want to add a mechanism for blocking on this |
277 | // after handling the peering event | |
278 | std::ignore = shard_services.remote_cancel_reservation( | |
f67539c2 | 279 | pgid); |
9f95a23c TL |
280 | } |
281 | ||
282 | void schedule_event_on_commit( | |
283 | ceph::os::Transaction &t, | |
284 | PGPeeringEventRef on_commit) final { | |
285 | t.register_on_commit( | |
f67539c2 TL |
286 | make_lambda_context( |
287 | [this, on_commit=std::move(on_commit)](int) { | |
288 | start_peering_event_operation(std::move(*on_commit)); | |
9f95a23c TL |
289 | })); |
290 | } | |
291 | ||
20effc67 | 292 | void update_heartbeat_peers(std::set<int> peers) final { |
9f95a23c TL |
293 | // Not needed yet |
294 | } | |
20effc67 | 295 | void set_probe_targets(const std::set<pg_shard_t> &probe_set) final { |
9f95a23c TL |
296 | // Not needed yet |
297 | } | |
298 | void clear_probe_targets() final { | |
299 | // Not needed yet | |
300 | } | |
301 | void queue_want_pg_temp(const std::vector<int> &wanted) final { | |
1e59de90 TL |
302 | // TODO -- we probably want to add a mechanism for blocking on this |
303 | // after handling the peering event | |
304 | std::ignore = shard_services.queue_want_pg_temp(pgid.pgid, wanted); | |
9f95a23c TL |
305 | } |
306 | void clear_want_pg_temp() final { | |
1e59de90 TL |
307 | // TODO -- we probably want to add a mechanism for blocking on this |
308 | // after handling the peering event | |
309 | std::ignore = shard_services.remove_want_pg_temp(pgid.pgid); | |
9f95a23c | 310 | } |
9f95a23c TL |
311 | void check_recovery_sources(const OSDMapRef& newmap) final { |
312 | // Not needed yet | |
313 | } | |
1e59de90 | 314 | void check_blocklisted_watchers() final; |
9f95a23c TL |
315 | void clear_primary_state() final { |
316 | // Not needed yet | |
317 | } | |
318 | ||
319 | void queue_check_readable(epoch_t last_peering_reset, | |
320 | ceph::timespan delay) final; | |
321 | void recheck_readable() final; | |
322 | ||
323 | unsigned get_target_pg_log_entries() const final; | |
324 | ||
325 | void on_pool_change() final { | |
326 | // Not needed yet | |
327 | } | |
328 | void on_role_change() final { | |
329 | // Not needed yet | |
330 | } | |
f67539c2 | 331 | void on_change(ceph::os::Transaction &t) final; |
9f95a23c TL |
332 | void on_activate(interval_set<snapid_t> to_trim) final; |
333 | void on_activate_complete() final; | |
334 | void on_new_interval() final { | |
335 | // Not needed yet | |
336 | } | |
1e59de90 | 337 | Context *on_clean() final; |
9f95a23c TL |
338 | void on_activate_committed() final { |
339 | // Not needed yet (will be needed for IO unblocking) | |
340 | } | |
341 | void on_active_exit() final { | |
342 | // Not needed yet | |
343 | } | |
344 | ||
1e59de90 TL |
345 | void on_removal(ceph::os::Transaction &t) final; |
346 | ||
f67539c2 TL |
347 | std::pair<ghobject_t, bool> |
348 | do_delete_work(ceph::os::Transaction &t, ghobject_t _next) final; | |
9f95a23c TL |
349 | |
350 | // merge/split not ready | |
351 | void clear_ready_to_merge() final {} | |
352 | void set_not_ready_to_merge_target(pg_t pgid, pg_t src) final {} | |
353 | void set_not_ready_to_merge_source(pg_t pgid) final {} | |
354 | void set_ready_to_merge_target(eversion_t lu, epoch_t les, epoch_t lec) final {} | |
355 | void set_ready_to_merge_source(eversion_t lu) final {} | |
356 | ||
1e59de90 TL |
357 | void on_active_actmap() final; |
358 | void on_active_advmap(const OSDMapRef &osdmap) final; | |
359 | ||
360 | epoch_t cluster_osdmap_trim_lower_bound() final { | |
9f95a23c TL |
361 | // TODO |
362 | return 0; | |
363 | } | |
364 | ||
9f95a23c | 365 | void on_backfill_reserved() final { |
f67539c2 | 366 | recovery_handler->on_backfill_reserved(); |
9f95a23c TL |
367 | } |
368 | void on_backfill_canceled() final { | |
369 | ceph_assert(0 == "Not implemented"); | |
370 | } | |
f67539c2 | 371 | |
9f95a23c | 372 | void on_recovery_reserved() final { |
f67539c2 | 373 | recovery_handler->start_pglogbased_recovery(); |
9f95a23c TL |
374 | } |
375 | ||
376 | ||
377 | bool try_reserve_recovery_space( | |
378 | int64_t primary_num_bytes, int64_t local_num_bytes) final { | |
f67539c2 | 379 | // TODO |
9f95a23c TL |
380 | return true; |
381 | } | |
382 | void unreserve_recovery_space() final {} | |
383 | ||
384 | struct PGLogEntryHandler : public PGLog::LogEntryHandler { | |
385 | PG *pg; | |
386 | ceph::os::Transaction *t; | |
387 | PGLogEntryHandler(PG *pg, ceph::os::Transaction *t) : pg(pg), t(t) {} | |
388 | ||
389 | // LogEntryHandler | |
390 | void remove(const hobject_t &hoid) override { | |
391 | // TODO | |
392 | } | |
393 | void try_stash(const hobject_t &hoid, version_t v) override { | |
394 | // TODO | |
395 | } | |
396 | void rollback(const pg_log_entry_t &entry) override { | |
397 | // TODO | |
398 | } | |
399 | void rollforward(const pg_log_entry_t &entry) override { | |
400 | // TODO | |
401 | } | |
402 | void trim(const pg_log_entry_t &entry) override { | |
403 | // TODO | |
404 | } | |
405 | }; | |
406 | PGLog::LogEntryHandlerRef get_log_handler( | |
407 | ceph::os::Transaction &t) final { | |
408 | return std::make_unique<PG::PGLogEntryHandler>(this, &t); | |
409 | } | |
410 | ||
411 | void rebuild_missing_set_with_deletes(PGLog &pglog) final { | |
1e59de90 TL |
412 | pglog.rebuild_missing_set_with_deletes_crimson( |
413 | shard_services.get_store(), | |
414 | coll_ref, | |
415 | peering_state.get_info()).get(); | |
9f95a23c TL |
416 | } |
417 | ||
418 | PerfCounters &get_peering_perf() final { | |
419 | return shard_services.get_recoverystate_perf_logger(); | |
420 | } | |
421 | PerfCounters &get_perf_logger() final { | |
422 | return shard_services.get_perf_logger(); | |
423 | } | |
424 | ||
425 | void log_state_enter(const char *state) final; | |
426 | void log_state_exit( | |
427 | const char *state_name, utime_t enter_time, | |
428 | uint64_t events, utime_t event_dur) final; | |
429 | ||
430 | void dump_recovery_info(Formatter *f) const final { | |
431 | } | |
432 | ||
433 | OstreamTemp get_clog_info() final { | |
434 | // not needed yet: replace with not a stub (needs to be wired up to monc) | |
435 | return OstreamTemp(CLOG_INFO, nullptr); | |
436 | } | |
437 | OstreamTemp get_clog_debug() final { | |
438 | // not needed yet: replace with not a stub (needs to be wired up to monc) | |
439 | return OstreamTemp(CLOG_DEBUG, nullptr); | |
440 | } | |
441 | OstreamTemp get_clog_error() final { | |
442 | // not needed yet: replace with not a stub (needs to be wired up to monc) | |
443 | return OstreamTemp(CLOG_ERROR, nullptr); | |
444 | } | |
445 | ||
1e59de90 | 446 | ceph::signedspan get_mnow() const final; |
9f95a23c TL |
447 | HeartbeatStampsRef get_hb_stamps(int peer) final; |
448 | void schedule_renew_lease(epoch_t plr, ceph::timespan delay) final; | |
449 | ||
450 | ||
451 | // Utility | |
f67539c2 | 452 | bool is_primary() const final { |
9f95a23c TL |
453 | return peering_state.is_primary(); |
454 | } | |
f67539c2 TL |
455 | bool is_nonprimary() const { |
456 | return peering_state.is_nonprimary(); | |
457 | } | |
458 | bool is_peered() const final { | |
459 | return peering_state.is_peered(); | |
460 | } | |
461 | bool is_recovering() const final { | |
462 | return peering_state.is_recovering(); | |
463 | } | |
464 | bool is_backfilling() const final { | |
465 | return peering_state.is_backfilling(); | |
466 | } | |
20effc67 TL |
467 | uint64_t get_last_user_version() const { |
468 | return get_info().last_user_version; | |
9f95a23c TL |
469 | } |
470 | bool get_need_up_thru() const { | |
471 | return peering_state.get_need_up_thru(); | |
472 | } | |
f67539c2 TL |
473 | epoch_t get_same_interval_since() const { |
474 | return get_info().history.same_interval_since; | |
475 | } | |
9f95a23c | 476 | |
1e59de90 TL |
477 | const auto& get_pgpool() const { |
478 | return peering_state.get_pgpool(); | |
9f95a23c | 479 | } |
f67539c2 TL |
480 | pg_shard_t get_primary() const { |
481 | return peering_state.get_primary(); | |
482 | } | |
9f95a23c TL |
483 | |
484 | /// initialize created PG | |
485 | void init( | |
486 | int role, | |
487 | const std::vector<int>& up, | |
488 | int up_primary, | |
489 | const std::vector<int>& acting, | |
490 | int acting_primary, | |
491 | const pg_history_t& history, | |
492 | const PastIntervals& pim, | |
9f95a23c TL |
493 | ceph::os::Transaction &t); |
494 | ||
1e59de90 | 495 | seastar::future<> read_state(crimson::os::FuturizedStore::Shard* store); |
9f95a23c | 496 | |
1e59de90 | 497 | interruptible_future<> do_peering_event( |
9f95a23c TL |
498 | PGPeeringEvent& evt, PeeringCtx &rctx); |
499 | ||
1e59de90 TL |
500 | seastar::future<> handle_advance_map(cached_map_t next_map, PeeringCtx &rctx); |
501 | seastar::future<> handle_activate_map(PeeringCtx &rctx); | |
502 | seastar::future<> handle_initialize(PeeringCtx &rctx); | |
9f95a23c | 503 | |
20effc67 | 504 | static hobject_t get_oid(const hobject_t& hobj); |
f67539c2 | 505 | static RWState::State get_lock_type(const OpInfo &op_info); |
9f95a23c TL |
506 | |
507 | using load_obc_ertr = crimson::errorator< | |
1e59de90 | 508 | crimson::ct_error::enoent, |
9f95a23c | 509 | crimson::ct_error::object_corrupted>; |
20effc67 TL |
510 | using load_obc_iertr = |
511 | ::crimson::interruptible::interruptible_errorator< | |
512 | ::crimson::osd::IOInterruptCondition, | |
513 | load_obc_ertr>; | |
514 | using interruptor = ::crimson::interruptible::interruptor< | |
515 | ::crimson::osd::IOInterruptCondition>; | |
9f95a23c | 516 | |
9f95a23c | 517 | public: |
f67539c2 | 518 | using with_obc_func_t = |
20effc67 TL |
519 | std::function<load_obc_iertr::future<> (ObjectContextRef)>; |
520 | ||
20effc67 TL |
521 | load_obc_iertr::future<> with_locked_obc( |
522 | const hobject_t &hobj, | |
9f95a23c | 523 | const OpInfo &op_info, |
f67539c2 | 524 | with_obc_func_t&& f); |
9f95a23c | 525 | |
20effc67 | 526 | interruptible_future<> handle_rep_op(Ref<MOSDRepOp> m); |
1e59de90 TL |
527 | void log_operation( |
528 | std::vector<pg_log_entry_t>&& logv, | |
529 | const eversion_t &trim_to, | |
530 | const eversion_t &roll_forward_to, | |
531 | const eversion_t &min_last_complete_ondisk, | |
532 | bool transaction_applied, | |
533 | ObjectStore::Transaction &txn, | |
534 | bool async = false); | |
535 | void replica_clear_repop_obc( | |
536 | const std::vector<pg_log_entry_t> &logv); | |
537 | void handle_rep_op_reply(const MOSDRepOpReply& m); | |
538 | interruptible_future<> do_update_log_missing( | |
539 | Ref<MOSDPGUpdateLogMissing> m, | |
540 | crimson::net::ConnectionRef conn); | |
541 | interruptible_future<> do_update_log_missing_reply( | |
542 | Ref<MOSDPGUpdateLogMissingReply> m); | |
543 | ||
9f95a23c TL |
544 | |
545 | void print(std::ostream& os) const; | |
f67539c2 | 546 | void dump_primary(Formatter*); |
1e59de90 TL |
547 | seastar::future<> submit_error_log( |
548 | Ref<MOSDOp> m, | |
549 | const OpInfo &op_info, | |
550 | ObjectContextRef obc, | |
551 | const std::error_code e, | |
552 | ceph_tid_t rep_tid, | |
553 | eversion_t &version); | |
9f95a23c TL |
554 | |
555 | private: | |
aee94f69 TL |
556 | |
557 | struct SnapTrimMutex { | |
558 | struct WaitPG : OrderedConcurrentPhaseT<WaitPG> { | |
559 | static constexpr auto type_name = "SnapTrimEvent::wait_pg"; | |
560 | } wait_pg; | |
561 | seastar::shared_mutex mutex; | |
562 | ||
563 | interruptible_future<> lock(SnapTrimEvent &st_event) noexcept; | |
564 | ||
565 | void unlock() noexcept { | |
566 | mutex.unlock(); | |
567 | } | |
568 | } snaptrim_mutex; | |
569 | ||
20effc67 TL |
570 | using do_osd_ops_ertr = crimson::errorator< |
571 | crimson::ct_error::eagain>; | |
572 | using do_osd_ops_iertr = | |
573 | ::crimson::interruptible::interruptible_errorator< | |
574 | ::crimson::osd::IOInterruptCondition, | |
575 | ::crimson::errorator<crimson::ct_error::eagain>>; | |
576 | template <typename Ret = void> | |
577 | using pg_rep_op_fut_t = | |
578 | std::tuple<interruptible_future<>, | |
579 | do_osd_ops_iertr::future<Ret>>; | |
580 | do_osd_ops_iertr::future<pg_rep_op_fut_t<MURef<MOSDOpReply>>> do_osd_ops( | |
9f95a23c | 581 | Ref<MOSDOp> m, |
1e59de90 | 582 | crimson::net::ConnectionRef conn, |
f67539c2 | 583 | ObjectContextRef obc, |
1e59de90 TL |
584 | const OpInfo &op_info, |
585 | const SnapContext& snapc); | |
20effc67 TL |
586 | using do_osd_ops_success_func_t = |
587 | std::function<do_osd_ops_iertr::future<>()>; | |
588 | using do_osd_ops_failure_func_t = | |
589 | std::function<do_osd_ops_iertr::future<>(const std::error_code&)>; | |
590 | struct do_osd_ops_params_t; | |
591 | do_osd_ops_iertr::future<pg_rep_op_fut_t<>> do_osd_ops( | |
592 | ObjectContextRef obc, | |
593 | std::vector<OSDOp>& ops, | |
594 | const OpInfo &op_info, | |
1e59de90 | 595 | const do_osd_ops_params_t &¶ms, |
20effc67 TL |
596 | do_osd_ops_success_func_t success_func, |
597 | do_osd_ops_failure_func_t failure_func); | |
598 | template <class Ret, class SuccessFunc, class FailureFunc> | |
599 | do_osd_ops_iertr::future<pg_rep_op_fut_t<Ret>> do_osd_ops_execute( | |
600 | seastar::lw_shared_ptr<OpsExecuter> ox, | |
601 | std::vector<OSDOp>& ops, | |
20effc67 TL |
602 | SuccessFunc&& success_func, |
603 | FailureFunc&& failure_func); | |
604 | interruptible_future<MURef<MOSDOpReply>> do_pg_ops(Ref<MOSDOp> m); | |
605 | std::tuple<interruptible_future<>, interruptible_future<>> | |
606 | submit_transaction( | |
20effc67 TL |
607 | ObjectContextRef&& obc, |
608 | ceph::os::Transaction&& txn, | |
1e59de90 TL |
609 | osd_op_params_t&& oop, |
610 | std::vector<pg_log_entry_t>&& log_entries); | |
20effc67 TL |
611 | interruptible_future<> repair_object( |
612 | const hobject_t& oid, | |
613 | eversion_t& v); | |
1e59de90 | 614 | void check_blocklisted_obc_watchers(ObjectContextRef &obc); |
9f95a23c TL |
615 | |
616 | private: | |
1e59de90 | 617 | PG_OSDMapGate osdmap_gate; |
9f95a23c TL |
618 | ShardServices &shard_services; |
619 | ||
9f95a23c TL |
620 | |
621 | public: | |
1e59de90 | 622 | cached_map_t get_osdmap() { return peering_state.get_osdmap(); } |
f67539c2 TL |
623 | eversion_t next_version() { |
624 | return eversion_t(get_osdmap_epoch(), | |
625 | ++projected_last_update.version); | |
626 | } | |
627 | ShardServices& get_shard_services() final { | |
628 | return shard_services; | |
629 | } | |
630 | seastar::future<> stop(); | |
9f95a23c TL |
631 | private: |
632 | std::unique_ptr<PGBackend> backend; | |
f67539c2 TL |
633 | std::unique_ptr<RecoveryBackend> recovery_backend; |
634 | std::unique_ptr<PGRecovery> recovery_handler; | |
9f95a23c TL |
635 | |
636 | PeeringState peering_state; | |
637 | eversion_t projected_last_update; | |
20effc67 | 638 | |
1e59de90 TL |
639 | public: |
640 | ObjectContextRegistry obc_registry; | |
641 | ObjectContextLoader obc_loader; | |
642 | ||
643 | private: | |
644 | OSDriver osdriver; | |
645 | SnapMapper snap_mapper; | |
646 | ||
20effc67 TL |
647 | public: |
648 | // PeeringListener | |
649 | void publish_stats_to_osd() final; | |
650 | void clear_publish_stats() final; | |
651 | pg_stat_t get_stats() const; | |
652 | private: | |
653 | std::optional<pg_stat_t> pg_stats; | |
654 | ||
f67539c2 TL |
655 | public: |
656 | RecoveryBackend* get_recovery_backend() final { | |
657 | return recovery_backend.get(); | |
658 | } | |
659 | PGRecovery* get_recovery_handler() final { | |
660 | return recovery_handler.get(); | |
661 | } | |
662 | PeeringState& get_peering_state() final { | |
663 | return peering_state; | |
664 | } | |
665 | bool has_reset_since(epoch_t epoch) const final { | |
666 | return peering_state.pg_has_reset_since(epoch); | |
667 | } | |
668 | ||
669 | const pg_missing_tracker_t& get_local_missing() const { | |
670 | return peering_state.get_pg_log().get_missing(); | |
671 | } | |
672 | epoch_t get_last_peering_reset() const final { | |
673 | return peering_state.get_last_peering_reset(); | |
674 | } | |
20effc67 | 675 | const std::set<pg_shard_t> &get_acting_recovery_backfill() const { |
f67539c2 TL |
676 | return peering_state.get_acting_recovery_backfill(); |
677 | } | |
678 | bool is_backfill_target(pg_shard_t osd) const { | |
679 | return peering_state.is_backfill_target(osd); | |
680 | } | |
681 | void begin_peer_recover(pg_shard_t peer, const hobject_t oid) { | |
682 | peering_state.begin_peer_recover(peer, oid); | |
683 | } | |
684 | uint64_t min_peer_features() const { | |
685 | return peering_state.get_min_peer_features(); | |
686 | } | |
20effc67 | 687 | const std::map<hobject_t, std::set<pg_shard_t>>& |
f67539c2 TL |
688 | get_missing_loc_shards() const { |
689 | return peering_state.get_missing_loc().get_missing_locs(); | |
690 | } | |
20effc67 | 691 | const std::map<pg_shard_t, pg_missing_t> &get_shard_missing() const { |
f67539c2 TL |
692 | return peering_state.get_peer_missing(); |
693 | } | |
20effc67 TL |
694 | epoch_t get_interval_start_epoch() const { |
695 | return get_info().history.same_interval_since; | |
696 | } | |
f67539c2 TL |
697 | const pg_missing_const_i* get_shard_missing(pg_shard_t shard) const { |
698 | if (shard == pg_whoami) | |
699 | return &get_local_missing(); | |
700 | else { | |
701 | auto it = peering_state.get_peer_missing().find(shard); | |
702 | if (it == peering_state.get_peer_missing().end()) | |
703 | return nullptr; | |
704 | else | |
705 | return &it->second; | |
706 | } | |
707 | } | |
1e59de90 TL |
708 | |
709 | struct complete_op_t { | |
710 | const version_t user_version; | |
711 | const eversion_t version; | |
712 | const int err; | |
713 | }; | |
714 | interruptible_future<std::optional<complete_op_t>> | |
715 | already_complete(const osd_reqid_t& reqid); | |
f67539c2 TL |
716 | int get_recovery_op_priority() const { |
717 | int64_t pri = 0; | |
1e59de90 | 718 | get_pgpool().info.opts.get(pool_opts_t::RECOVERY_OP_PRIORITY, &pri); |
f67539c2 TL |
719 | return pri > 0 ? pri : crimson::common::local_conf()->osd_recovery_op_priority; |
720 | } | |
721 | seastar::future<> mark_unfound_lost(int) { | |
722 | // TODO: see PrimaryLogPG::mark_all_unfound_lost() | |
723 | return seastar::now(); | |
724 | } | |
725 | ||
20effc67 TL |
726 | bool old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch) const; |
727 | ||
728 | template <typename MsgType> | |
729 | bool can_discard_replica_op(const MsgType& m) const { | |
730 | return can_discard_replica_op(m, m.map_epoch); | |
731 | } | |
732 | ||
f67539c2 TL |
733 | private: |
734 | // instead of seastar::gate, we use a boolean flag to indicate | |
735 | // whether the system is shutting down, as we don't need to track | |
736 | // continuations here. | |
737 | bool stopping = false; | |
9f95a23c | 738 | |
1e59de90 | 739 | PGActivationBlocker wait_for_active_blocker; |
9f95a23c TL |
740 | |
741 | friend std::ostream& operator<<(std::ostream&, const PG& pg); | |
742 | friend class ClientRequest; | |
20effc67 | 743 | friend struct CommonClientRequest; |
9f95a23c | 744 | friend class PGAdvanceMap; |
1e59de90 | 745 | template <class T> |
9f95a23c TL |
746 | friend class PeeringEvent; |
747 | friend class RepRequest; | |
1e59de90 TL |
748 | friend class LogMissingRequest; |
749 | friend class LogMissingRequestReply; | |
f67539c2 TL |
750 | friend class BackfillRecovery; |
751 | friend struct PGFacade; | |
20effc67 TL |
752 | friend class InternalClientRequest; |
753 | friend class WatchTimeoutRequest; | |
1e59de90 TL |
754 | friend class SnapTrimEvent; |
755 | friend class SnapTrimObjSubEvent; | |
f67539c2 TL |
756 | private: |
757 | seastar::future<bool> find_unfound() { | |
758 | return seastar::make_ready_future<bool>(true); | |
759 | } | |
760 | ||
20effc67 | 761 | bool can_discard_replica_op(const Message& m, epoch_t m_map_epoch) const; |
f67539c2 | 762 | bool can_discard_op(const MOSDOp& m) const; |
1e59de90 | 763 | void context_registry_on_change(); |
f67539c2 TL |
764 | bool is_missing_object(const hobject_t& soid) const { |
765 | return peering_state.get_pg_log().get_missing().get_items().count(soid); | |
766 | } | |
767 | bool is_unreadable_object(const hobject_t &oid, | |
768 | eversion_t* v = 0) const final { | |
769 | return is_missing_object(oid) || | |
770 | !peering_state.get_missing_loc().readable_with_acting( | |
771 | oid, get_actingset(), v); | |
772 | } | |
773 | bool is_degraded_or_backfilling_object(const hobject_t& soid) const; | |
20effc67 | 774 | const std::set<pg_shard_t> &get_actingset() const { |
f67539c2 TL |
775 | return peering_state.get_actingset(); |
776 | } | |
777 | ||
778 | private: | |
20effc67 | 779 | friend class IOInterruptCondition; |
1e59de90 TL |
780 | struct log_update_t { |
781 | std::set<pg_shard_t> waiting_on; | |
782 | seastar::shared_promise<> all_committed; | |
783 | }; | |
784 | ||
785 | std::map<ceph_tid_t, log_update_t> log_entry_update_waiting_on; | |
786 | // snap trimming | |
787 | interval_set<snapid_t> snap_trimq; | |
20effc67 TL |
788 | }; |
789 | ||
790 | struct PG::do_osd_ops_params_t { | |
1e59de90 TL |
791 | crimson::net::ConnectionRef &get_connection() const { |
792 | return conn; | |
20effc67 TL |
793 | } |
794 | osd_reqid_t get_reqid() const { | |
795 | return reqid; | |
796 | } | |
797 | utime_t get_mtime() const { | |
798 | return mtime; | |
799 | }; | |
800 | epoch_t get_map_epoch() const { | |
801 | return map_epoch; | |
802 | } | |
803 | entity_inst_t get_orig_source_inst() const { | |
804 | return orig_source_inst; | |
805 | } | |
806 | uint64_t get_features() const { | |
807 | return features; | |
808 | } | |
1e59de90 TL |
809 | // Only used by InternalClientRequest, no op flags |
810 | bool has_flag(uint32_t flag) const { | |
811 | return false; | |
812 | } | |
813 | ||
814 | // Only used by ExecutableMessagePimpl | |
815 | entity_name_t get_source() const { | |
816 | return orig_source_inst.name; | |
817 | } | |
818 | ||
819 | crimson::net::ConnectionRef &conn; | |
20effc67 TL |
820 | osd_reqid_t reqid; |
821 | utime_t mtime; | |
822 | epoch_t map_epoch; | |
823 | entity_inst_t orig_source_inst; | |
824 | uint64_t features; | |
11fdf7f2 | 825 | }; |
9f95a23c TL |
826 | |
827 | std::ostream& operator<<(std::ostream&, const PG& pg); | |
828 | ||
829 | } | |
1e59de90 TL |
830 | |
831 | #if FMT_VERSION >= 90000 | |
832 | template <> struct fmt::formatter<crimson::osd::PG> : fmt::ostream_formatter {}; | |
833 | #endif |