]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/PG.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / osd / PG.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_PG_H
16 #define CEPH_PG_H
17
18 #include <boost/scoped_ptr.hpp>
19 #include <boost/container/flat_set.hpp>
20 #include "include/mempool.h"
21
22 // re-include our assert to clobber boost's
23 #include "include/ceph_assert.h"
24 #include "include/common_fwd.h"
25
26 #include "include/types.h"
27 #include "include/stringify.h"
28 #include "osd_types.h"
29 #include "include/xlist.h"
30 #include "SnapMapper.h"
31 #include "Session.h"
32 #include "common/Timer.h"
33
34 #include "PGLog.h"
35 #include "OSDMap.h"
36 #include "include/str_list.h"
37 #include "PGBackend.h"
38 #include "PGPeeringEvent.h"
39 #include "PeeringState.h"
40 #include "recovery_types.h"
41 #include "MissingLoc.h"
42 #include "scrubber_common.h"
43
44 #include "mgr/OSDPerfMetricTypes.h"
45
46 #include <atomic>
47 #include <list>
48 #include <memory>
49 #include <string>
50 #include <tuple>
51
52 //#define DEBUG_RECOVERY_OIDS // track std::set of recovering oids explicitly, to find counting bugs
53 //#define PG_DEBUG_REFS // track provenance of pg refs, helpful for finding leaks
54
55 class OSD;
56 class OSDService;
57 struct OSDShard;
58 struct OSDShardPGSlot;
59
60 class PG;
61 struct OpRequest;
62 typedef OpRequest::Ref OpRequestRef;
63 class DynamicPerfStats;
64 class PgScrubber;
65 class ScrubBackend;
66
67 namespace Scrub {
68 class Store;
69 class ReplicaReservations;
70 class LocalReservation;
71 class ReservedByRemotePrimary;
72 enum class schedule_result_t;
73 }
74
75 #ifdef PG_DEBUG_REFS
76 #include "common/tracked_int_ptr.hpp"
77 uint64_t get_with_id(PG *pg);
78 void put_with_id(PG *pg, uint64_t id);
79 typedef TrackedIntPtr<PG> PGRef;
80 #else
81 typedef boost::intrusive_ptr<PG> PGRef;
82 #endif
83
84 class PGRecoveryStats {
85 struct per_state_info {
86 uint64_t enter, exit; // enter/exit counts
87 uint64_t events;
88 utime_t event_time; // time spent processing events
89 utime_t total_time; // total time in state
90 utime_t min_time, max_time;
91
92 // cppcheck-suppress unreachableCode
93 per_state_info() : enter(0), exit(0), events(0) {}
94 };
95 std::map<const char *,per_state_info> info;
96 ceph::mutex lock = ceph::make_mutex("PGRecoverStats::lock");
97
98 public:
99 PGRecoveryStats() = default;
100
101 void reset() {
102 std::lock_guard l(lock);
103 info.clear();
104 }
105 void dump(ostream& out) {
106 std::lock_guard l(lock);
107 for (std::map<const char *,per_state_info>::iterator p = info.begin(); p != info.end(); ++p) {
108 per_state_info& i = p->second;
109 out << i.enter << "\t" << i.exit << "\t"
110 << i.events << "\t" << i.event_time << "\t"
111 << i.total_time << "\t"
112 << i.min_time << "\t" << i.max_time << "\t"
113 << p->first << "\n";
114 }
115 }
116
117 void dump_formatted(ceph::Formatter *f) {
118 std::lock_guard l(lock);
119 f->open_array_section("pg_recovery_stats");
120 for (std::map<const char *,per_state_info>::iterator p = info.begin();
121 p != info.end(); ++p) {
122 per_state_info& i = p->second;
123 f->open_object_section("recovery_state");
124 f->dump_int("enter", i.enter);
125 f->dump_int("exit", i.exit);
126 f->dump_int("events", i.events);
127 f->dump_stream("event_time") << i.event_time;
128 f->dump_stream("total_time") << i.total_time;
129 f->dump_stream("min_time") << i.min_time;
130 f->dump_stream("max_time") << i.max_time;
131 std::vector<std::string> states;
132 get_str_vec(p->first, "/", states);
133 f->open_array_section("nested_states");
134 for (std::vector<std::string>::iterator st = states.begin();
135 st != states.end(); ++st) {
136 f->dump_string("state", *st);
137 }
138 f->close_section();
139 f->close_section();
140 }
141 f->close_section();
142 }
143
144 void log_enter(const char *s) {
145 std::lock_guard l(lock);
146 info[s].enter++;
147 }
148 void log_exit(const char *s, utime_t dur, uint64_t events, utime_t event_dur) {
149 std::lock_guard l(lock);
150 per_state_info &i = info[s];
151 i.exit++;
152 i.total_time += dur;
153 if (dur > i.max_time)
154 i.max_time = dur;
155 if (dur < i.min_time || i.min_time == utime_t())
156 i.min_time = dur;
157 i.events += events;
158 i.event_time += event_dur;
159 }
160 };
161
162 /** PG - Replica Placement Group
163 *
164 */
165
166 class PG : public DoutPrefixProvider,
167 public PeeringState::PeeringListener,
168 public Scrub::PgScrubBeListener {
169 friend struct NamedState;
170 friend class PeeringState;
171 friend class PgScrubber;
172 friend class ScrubBackend;
173
174 public:
175 const pg_shard_t pg_whoami;
176 const spg_t pg_id;
177
178 /// the 'scrubber'. Will be allocated in the derivative (PrimaryLogPG) ctor,
179 /// and be removed only in the PrimaryLogPG destructor.
180 std::unique_ptr<ScrubPgIF> m_scrubber;
181
182 /// flags detailing scheduling/operation characteristics of the next scrub
183 requested_scrub_t m_planned_scrub;
184
185 const requested_scrub_t& get_planned_scrub() const {
186 return m_planned_scrub;
187 }
188
189 /// scrubbing state for both Primary & replicas
190 bool is_scrub_active() const { return m_scrubber->is_scrub_active(); }
191
192 /// set when the scrub request is queued, and reset after scrubbing fully
193 /// cleaned up.
194 bool is_scrub_queued_or_active() const { return m_scrubber->is_queued_or_active(); }
195
196 public:
197 // -- members --
198 const coll_t coll;
199
200 ObjectStore::CollectionHandle ch;
201
202 // -- methods --
203 std::ostream& gen_prefix(std::ostream& out) const override;
204 CephContext *get_cct() const override {
205 return cct;
206 }
207 unsigned get_subsys() const override {
208 return ceph_subsys_osd;
209 }
210
211 const char* const get_current_state() const {
212 return recovery_state.get_current_state();
213 }
214
215 const OSDMapRef& get_osdmap() const {
216 ceph_assert(is_locked());
217 return recovery_state.get_osdmap();
218 }
219
220 epoch_t get_osdmap_epoch() const override final {
221 return recovery_state.get_osdmap()->get_epoch();
222 }
223
224 PerfCounters &get_peering_perf() override;
225 PerfCounters &get_perf_logger() override;
226 void log_state_enter(const char *state) override;
227 void log_state_exit(
228 const char *state_name, utime_t enter_time,
229 uint64_t events, utime_t event_dur) override;
230
231 void lock(bool no_lockdep = false) const;
232 void unlock() const;
233 bool is_locked() const;
234
235 const spg_t& get_pgid() const {
236 return pg_id;
237 }
238
239 const PGPool& get_pgpool() const final {
240 return pool;
241 }
242 uint64_t get_last_user_version() const {
243 return info.last_user_version;
244 }
245 const pg_history_t& get_history() const {
246 return info.history;
247 }
248 bool get_need_up_thru() const {
249 return recovery_state.get_need_up_thru();
250 }
251 epoch_t get_same_interval_since() const {
252 return info.history.same_interval_since;
253 }
254
255 bool is_waiting_for_unreadable_object() const final
256 {
257 return !waiting_for_unreadable_object.empty();
258 }
259
260 static void set_last_scrub_stamp(
261 utime_t t, pg_history_t &history, pg_stat_t &stats) {
262 stats.last_scrub_stamp = t;
263 history.last_scrub_stamp = t;
264 }
265
266 void set_last_scrub_stamp(utime_t t) {
267 recovery_state.update_stats(
268 [t](auto &history, auto &stats) {
269 set_last_scrub_stamp(t, history, stats);
270 return true;
271 });
272 }
273
274 static void set_last_deep_scrub_stamp(
275 utime_t t, pg_history_t &history, pg_stat_t &stats) {
276 stats.last_deep_scrub_stamp = t;
277 history.last_deep_scrub_stamp = t;
278 }
279
280 void set_last_deep_scrub_stamp(utime_t t) {
281 recovery_state.update_stats(
282 [t](auto &history, auto &stats) {
283 set_last_deep_scrub_stamp(t, history, stats);
284 return true;
285 });
286 }
287
288 static void add_objects_scrubbed_count(
289 int64_t count, pg_stat_t &stats) {
290 stats.objects_scrubbed += count;
291 }
292
293 void add_objects_scrubbed_count(int64_t count) {
294 recovery_state.update_stats(
295 [count](auto &history, auto &stats) {
296 add_objects_scrubbed_count(count, stats);
297 return true;
298 });
299 }
300
301 static void reset_objects_scrubbed(pg_stat_t &stats) {
302 stats.objects_scrubbed = 0;
303 }
304
305 void reset_objects_scrubbed()
306 {
307 recovery_state.update_stats([](auto& history, auto& stats) {
308 reset_objects_scrubbed(stats);
309 return true;
310 });
311 }
312
313 bool is_deleting() const {
314 return recovery_state.is_deleting();
315 }
316 bool is_deleted() const {
317 return recovery_state.is_deleted();
318 }
319 bool is_nonprimary() const {
320 return recovery_state.is_nonprimary();
321 }
322 bool is_primary() const {
323 return recovery_state.is_primary();
324 }
325 bool pg_has_reset_since(epoch_t e) {
326 ceph_assert(is_locked());
327 return recovery_state.pg_has_reset_since(e);
328 }
329
330 bool is_ec_pg() const {
331 return recovery_state.is_ec_pg();
332 }
333 int get_role() const {
334 return recovery_state.get_role();
335 }
336 const std::vector<int> get_acting() const {
337 return recovery_state.get_acting();
338 }
339 const std::set<pg_shard_t> &get_actingset() const {
340 return recovery_state.get_actingset();
341 }
342 int get_acting_primary() const {
343 return recovery_state.get_acting_primary();
344 }
345 pg_shard_t get_primary() const final {
346 return recovery_state.get_primary();
347 }
348 const std::vector<int> get_up() const {
349 return recovery_state.get_up();
350 }
351 int get_up_primary() const {
352 return recovery_state.get_up_primary();
353 }
354 const PastIntervals& get_past_intervals() const {
355 return recovery_state.get_past_intervals();
356 }
357 bool is_acting_recovery_backfill(pg_shard_t osd) const {
358 return recovery_state.is_acting_recovery_backfill(osd);
359 }
360 const std::set<pg_shard_t> &get_acting_recovery_backfill() const {
361 return recovery_state.get_acting_recovery_backfill();
362 }
363 bool is_acting(pg_shard_t osd) const {
364 return recovery_state.is_acting(osd);
365 }
366 bool is_up(pg_shard_t osd) const {
367 return recovery_state.is_up(osd);
368 }
369 static bool has_shard(bool ec, const std::vector<int>& v, pg_shard_t osd) {
370 return PeeringState::has_shard(ec, v, osd);
371 }
372
373 /// initialize created PG
374 void init(
375 int role,
376 const std::vector<int>& up,
377 int up_primary,
378 const std::vector<int>& acting,
379 int acting_primary,
380 const pg_history_t& history,
381 const PastIntervals& pim,
382 ObjectStore::Transaction &t);
383
384 /// read existing pg state off disk
385 void read_state(ObjectStore *store);
386 static int peek_map_epoch(ObjectStore *store, spg_t pgid, epoch_t *pepoch);
387
388 static int get_latest_struct_v() {
389 return pg_latest_struct_v;
390 }
391 static int get_compat_struct_v() {
392 return pg_compat_struct_v;
393 }
394 static int read_info(
395 ObjectStore *store, spg_t pgid, const coll_t &coll,
396 pg_info_t &info, PastIntervals &past_intervals,
397 __u8 &);
398 static bool _has_removal_flag(ObjectStore *store, spg_t pgid);
399
400 void rm_backoff(const ceph::ref_t<Backoff>& b);
401
402 void update_snap_mapper_bits(uint32_t bits) {
403 snap_mapper.update_bits(bits);
404 }
405 void start_split_stats(const std::set<spg_t>& childpgs, std::vector<object_stat_sum_t> *v);
406 virtual void split_colls(
407 spg_t child,
408 int split_bits,
409 int seed,
410 const pg_pool_t *pool,
411 ObjectStore::Transaction &t) = 0;
412 void split_into(pg_t child_pgid, PG *child, unsigned split_bits);
413 void merge_from(std::map<spg_t,PGRef>& sources, PeeringCtx &rctx,
414 unsigned split_bits,
415 const pg_merge_meta_t& last_pg_merge_meta);
416 void finish_split_stats(const object_stat_sum_t& stats,
417 ObjectStore::Transaction &t);
418
419 void scrub(epoch_t queued, ThreadPool::TPHandle& handle)
420 {
421 // a new scrub
422 forward_scrub_event(&ScrubPgIF::initiate_regular_scrub, queued, "StartScrub");
423 }
424
425 /**
426 * a special version of PG::scrub(), which:
427 * - is initiated after repair, and
428 * (not true anymore:)
429 * - is not required to allocate local/remote OSD scrub resources
430 */
431 void recovery_scrub(epoch_t queued, ThreadPool::TPHandle& handle)
432 {
433 // a new scrub
434 forward_scrub_event(&ScrubPgIF::initiate_scrub_after_repair, queued,
435 "AfterRepairScrub");
436 }
437
438 void replica_scrub(epoch_t queued,
439 Scrub::act_token_t act_token,
440 ThreadPool::TPHandle& handle);
441
442 void replica_scrub_resched(epoch_t queued,
443 Scrub::act_token_t act_token,
444 ThreadPool::TPHandle& handle)
445 {
446 forward_scrub_event(&ScrubPgIF::send_sched_replica, queued, act_token,
447 "SchedReplica");
448 }
449
450 void scrub_send_resources_granted(epoch_t queued, ThreadPool::TPHandle& handle)
451 {
452 forward_scrub_event(&ScrubPgIF::send_remotes_reserved, queued, "RemotesReserved");
453 }
454
455 void scrub_send_resources_denied(epoch_t queued, ThreadPool::TPHandle& handle)
456 {
457 forward_scrub_event(&ScrubPgIF::send_reservation_failure, queued,
458 "ReservationFailure");
459 }
460
461 void scrub_send_scrub_resched(epoch_t queued, ThreadPool::TPHandle& handle)
462 {
463 forward_scrub_event(&ScrubPgIF::send_scrub_resched, queued, "InternalSchedScrub");
464 }
465
466 void scrub_send_pushes_update(epoch_t queued, ThreadPool::TPHandle& handle)
467 {
468 forward_scrub_event(&ScrubPgIF::active_pushes_notification, queued,
469 "ActivePushesUpd");
470 }
471
472 void scrub_send_applied_update(epoch_t queued, ThreadPool::TPHandle& handle)
473 {
474 forward_scrub_event(&ScrubPgIF::update_applied_notification, queued,
475 "UpdatesApplied");
476 }
477
478 void scrub_send_unblocking(epoch_t queued, ThreadPool::TPHandle& handle)
479 {
480 forward_scrub_event(&ScrubPgIF::send_scrub_unblock, queued, "Unblocked");
481 }
482
483 void scrub_send_digest_update(epoch_t queued, ThreadPool::TPHandle& handle)
484 {
485 forward_scrub_event(&ScrubPgIF::digest_update_notification, queued, "DigestUpdate");
486 }
487
488 void scrub_send_local_map_ready(epoch_t queued, ThreadPool::TPHandle& handle)
489 {
490 forward_scrub_event(&ScrubPgIF::send_local_map_done, queued, "IntLocalMapDone");
491 }
492
493 void scrub_send_replmaps_ready(epoch_t queued, ThreadPool::TPHandle& handle)
494 {
495 forward_scrub_event(&ScrubPgIF::send_replica_maps_ready, queued, "GotReplicas");
496 }
497
498 void scrub_send_replica_pushes(epoch_t queued, ThreadPool::TPHandle& handle)
499 {
500 forward_scrub_event(&ScrubPgIF::send_replica_pushes_upd, queued,
501 "ReplicaPushesUpd");
502 }
503
504 void scrub_send_get_next_chunk(epoch_t queued, ThreadPool::TPHandle& handle)
505 {
506 forward_scrub_event(&ScrubPgIF::send_get_next_chunk, queued, "NextChunk");
507 }
508
509 void scrub_send_scrub_is_finished(epoch_t queued, ThreadPool::TPHandle& handle)
510 {
511 forward_scrub_event(&ScrubPgIF::send_scrub_is_finished, queued, "ScrubFinished");
512 }
513
514 void scrub_send_chunk_free(epoch_t queued, ThreadPool::TPHandle& handle)
515 {
516 forward_scrub_event(&ScrubPgIF::send_chunk_free, queued, "SelectedChunkFree");
517 }
518
519 void scrub_send_chunk_busy(epoch_t queued, ThreadPool::TPHandle& handle)
520 {
521 forward_scrub_event(&ScrubPgIF::send_chunk_busy, queued, "ChunkIsBusy");
522 }
523
524 void queue_want_pg_temp(const std::vector<int> &wanted) override;
525 void clear_want_pg_temp() override;
526
527 void on_new_interval() override;
528
529 void on_role_change() override;
530 virtual void plpg_on_role_change() = 0;
531
532 void init_collection_pool_opts();
533 void on_pool_change() override;
534 virtual void plpg_on_pool_change() = 0;
535
536 void on_info_history_change() override;
537
538 void on_primary_status_change(bool was_primary, bool now_primary) override;
539
540 void reschedule_scrub() override;
541
542 void scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type) override;
543
544 uint64_t get_snap_trimq_size() const override {
545 return snap_trimq.size();
546 }
547
548 static void add_objects_trimmed_count(
549 int64_t count, pg_stat_t &stats) {
550 stats.objects_trimmed += count;
551 }
552
553 void add_objects_trimmed_count(int64_t count) {
554 recovery_state.update_stats_wo_resched(
555 [count](auto &history, auto &stats) {
556 add_objects_trimmed_count(count, stats);
557 });
558 }
559
560 static void reset_objects_trimmed(pg_stat_t &stats) {
561 stats.objects_trimmed = 0;
562 }
563
564 void reset_objects_trimmed() {
565 recovery_state.update_stats_wo_resched(
566 [](auto &history, auto &stats) {
567 reset_objects_trimmed(stats);
568 });
569 }
570
571 utime_t snaptrim_begin_stamp;
572
573 void set_snaptrim_begin_stamp() {
574 snaptrim_begin_stamp = ceph_clock_now();
575 }
576
577 void set_snaptrim_duration() {
578 utime_t cur_stamp = ceph_clock_now();
579 utime_t duration = cur_stamp - snaptrim_begin_stamp;
580 recovery_state.update_stats_wo_resched(
581 [duration](auto &history, auto &stats) {
582 stats.snaptrim_duration = double(duration);
583 });
584 }
585
586 unsigned get_target_pg_log_entries() const override;
587
588 void clear_publish_stats() override;
589 void clear_primary_state() override;
590
591 epoch_t cluster_osdmap_trim_lower_bound() override;
592 OstreamTemp get_clog_error() override;
593 OstreamTemp get_clog_info() override;
594 OstreamTemp get_clog_debug() override;
595
596 void schedule_event_after(
597 PGPeeringEventRef event,
598 float delay) override;
599 void request_local_background_io_reservation(
600 unsigned priority,
601 PGPeeringEventURef on_grant,
602 PGPeeringEventURef on_preempt) override;
603 void update_local_background_io_priority(
604 unsigned priority) override;
605 void cancel_local_background_io_reservation() override;
606
607 void request_remote_recovery_reservation(
608 unsigned priority,
609 PGPeeringEventURef on_grant,
610 PGPeeringEventURef on_preempt) override;
611 void cancel_remote_recovery_reservation() override;
612
613 void schedule_event_on_commit(
614 ObjectStore::Transaction &t,
615 PGPeeringEventRef on_commit) override;
616
617 void on_active_exit() override;
618
619 Context *on_clean() override {
620 if (is_active()) {
621 kick_snap_trim();
622 }
623 requeue_ops(waiting_for_clean_to_primary_repair);
624 return finish_recovery();
625 }
626
627 void on_activate(interval_set<snapid_t> snaps) override;
628
629 void on_activate_committed() override;
630
631 void on_active_actmap() override;
632 void on_active_advmap(const OSDMapRef &osdmap) override;
633
634 void queue_snap_retrim(snapid_t snap);
635
636 void on_backfill_reserved() override;
637 void on_backfill_canceled() override;
638 void on_recovery_reserved() override;
639
640 bool is_forced_recovery_or_backfill() const {
641 return recovery_state.is_forced_recovery_or_backfill();
642 }
643
644 PGLog::LogEntryHandlerRef get_log_handler(
645 ObjectStore::Transaction &t) override {
646 return std::make_unique<PG::PGLogEntryHandler>(this, &t);
647 }
648
649 std::pair<ghobject_t, bool> do_delete_work(ObjectStore::Transaction &t,
650 ghobject_t _next) override;
651
652 void clear_ready_to_merge() override;
653 void set_not_ready_to_merge_target(pg_t pgid, pg_t src) override;
654 void set_not_ready_to_merge_source(pg_t pgid) override;
655 void set_ready_to_merge_target(eversion_t lu, epoch_t les, epoch_t lec) override;
656 void set_ready_to_merge_source(eversion_t lu) override;
657
658 void send_pg_created(pg_t pgid) override;
659
660 ceph::signedspan get_mnow() const override;
661 HeartbeatStampsRef get_hb_stamps(int peer) override;
662 void schedule_renew_lease(epoch_t lpr, ceph::timespan delay) override;
663 void queue_check_readable(epoch_t lpr, ceph::timespan delay) override;
664
665 void rebuild_missing_set_with_deletes(PGLog &pglog) override;
666
667 void queue_peering_event(PGPeeringEventRef evt);
668 void do_peering_event(PGPeeringEventRef evt, PeeringCtx &rcx);
669 void queue_null(epoch_t msg_epoch, epoch_t query_epoch);
670 void queue_flushed(epoch_t started_at);
671 void handle_advance_map(
672 OSDMapRef osdmap, OSDMapRef lastmap,
673 std::vector<int>& newup, int up_primary,
674 std::vector<int>& newacting, int acting_primary,
675 PeeringCtx &rctx);
676 void handle_activate_map(PeeringCtx &rctx);
677 void handle_initialize(PeeringCtx &rxcx);
678 void handle_query_state(ceph::Formatter *f);
679
680 /**
681 * @param ops_begun returns how many recovery ops the function started
682 * @returns true if any useful work was accomplished; false otherwise
683 */
684 virtual bool start_recovery_ops(
685 uint64_t max,
686 ThreadPool::TPHandle &handle,
687 uint64_t *ops_begun) = 0;
688
689 // more work after the above, but with a PeeringCtx
690 void find_unfound(epoch_t queued, PeeringCtx &rctx);
691
692 virtual void get_watchers(std::list<obj_watch_item_t> *ls) = 0;
693
694 void dump_pgstate_history(ceph::Formatter *f);
695 void dump_missing(ceph::Formatter *f);
696
697 void with_pg_stats(ceph::coarse_real_clock::time_point now_is,
698 std::function<void(const pg_stat_t&, epoch_t lec)>&& f);
699 void with_heartbeat_peers(std::function<void(int)>&& f);
700
701 void shutdown();
702 virtual void on_shutdown() = 0;
703
704 bool get_must_scrub() const;
705 Scrub::schedule_result_t sched_scrub();
706
707 unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority, unsigned int suggested_priority) const;
708 /// the version that refers to flags_.priority
709 unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority) const;
710 private:
711 // auxiliaries used by sched_scrub():
712 double next_deepscrub_interval() const;
713
714 /// should we perform deep scrub?
715 bool is_time_for_deep(bool allow_deep_scrub,
716 bool allow_shallow_scrub,
717 bool has_deep_errors,
718 const requested_scrub_t& planned) const;
719
720 /**
721 * Validate the various 'next scrub' flags in m_planned_scrub against configuration
722 * and scrub-related timestamps.
723 *
724 * @returns an updated copy of the m_planned_flags (or nothing if no scrubbing)
725 */
726 std::optional<requested_scrub_t> validate_scrub_mode() const;
727
728 std::optional<requested_scrub_t> validate_periodic_mode(
729 bool allow_deep_scrub,
730 bool try_to_auto_repair,
731 bool allow_shallow_scrub,
732 bool time_for_deep,
733 bool has_deep_errors,
734 const requested_scrub_t& planned) const;
735
736 std::optional<requested_scrub_t> validate_initiated_scrub(
737 bool allow_deep_scrub,
738 bool try_to_auto_repair,
739 bool time_for_deep,
740 bool has_deep_errors,
741 const requested_scrub_t& planned) const;
742
743 using ScrubAPI = void (ScrubPgIF::*)(epoch_t epoch_queued);
744 void forward_scrub_event(ScrubAPI fn, epoch_t epoch_queued, std::string_view desc);
745 // and for events that carry a meaningful 'activation token'
746 using ScrubSafeAPI = void (ScrubPgIF::*)(epoch_t epoch_queued,
747 Scrub::act_token_t act_token);
748 void forward_scrub_event(ScrubSafeAPI fn,
749 epoch_t epoch_queued,
750 Scrub::act_token_t act_token,
751 std::string_view desc);
752
753 public:
754 virtual void do_request(
755 OpRequestRef& op,
756 ThreadPool::TPHandle &handle
757 ) = 0;
758 virtual void clear_cache() = 0;
759 virtual int get_cache_obj_count() = 0;
760
761 virtual void snap_trimmer(epoch_t epoch_queued) = 0;
762 virtual void do_command(
763 const std::string_view& prefix,
764 const cmdmap_t& cmdmap,
765 const ceph::buffer::list& idata,
766 std::function<void(int,const std::string&,ceph::buffer::list&)> on_finish) = 0;
767
768 virtual bool agent_work(int max) = 0;
769 virtual bool agent_work(int max, int agent_flush_quota) = 0;
770 virtual void agent_stop() = 0;
771 virtual void agent_delay() = 0;
772 virtual void agent_clear() = 0;
773 virtual void agent_choose_mode_restart() = 0;
774
775 struct C_DeleteMore : public Context {
776 PGRef pg;
777 epoch_t epoch;
778 C_DeleteMore(PG *p, epoch_t e) : pg(p), epoch(e) {}
779 void finish(int r) override {
780 ceph_abort();
781 }
782 void complete(int r) override;
783 };
784
785 virtual void set_dynamic_perf_stats_queries(
786 const std::list<OSDPerfMetricQuery> &queries) {
787 }
788 virtual void get_dynamic_perf_stats(DynamicPerfStats *stats) {
789 }
790
791 uint64_t get_min_alloc_size() const;
792
793 // reference counting
794 #ifdef PG_DEBUG_REFS
795 uint64_t get_with_id();
796 void put_with_id(uint64_t);
797 void dump_live_ids();
798 #endif
799 void get(const char* tag);
800 void put(const char* tag);
801 int get_num_ref() {
802 return ref;
803 }
804
805 // ctor
806 PG(OSDService *o, OSDMapRef curmap,
807 const PGPool &pool, spg_t p);
808 ~PG() override;
809
810 // prevent copying
811 explicit PG(const PG& rhs) = delete;
812 PG& operator=(const PG& rhs) = delete;
813
814 protected:
815 // -------------
816 // protected
817 OSDService *osd;
818 public:
819 OSDShard *osd_shard = nullptr;
820 OSDShardPGSlot *pg_slot = nullptr;
821 protected:
822 CephContext *cct;
823
824 // locking and reference counting.
825 // I destroy myself when the reference count hits zero.
826 // lock() should be called before doing anything.
827 // get() should be called on pointer copy (to another thread, etc.).
828 // put() should be called on destruction of some previously copied pointer.
829 // unlock() when done with the current pointer (_most common_).
830 mutable ceph::mutex _lock = ceph::make_mutex("PG::_lock");
831 #ifndef CEPH_DEBUG_MUTEX
832 mutable std::thread::id locked_by;
833 #endif
834 std::atomic<unsigned int> ref{0};
835
836 #ifdef PG_DEBUG_REFS
837 ceph::mutex _ref_id_lock = ceph::make_mutex("PG::_ref_id_lock");
838 std::map<uint64_t, std::string> _live_ids;
839 std::map<std::string, uint64_t> _tag_counts;
840 uint64_t _ref_id = 0;
841
842 friend uint64_t get_with_id(PG *pg) { return pg->get_with_id(); }
843 friend void put_with_id(PG *pg, uint64_t id) { return pg->put_with_id(id); }
844 #endif
845
846 private:
847 friend void intrusive_ptr_add_ref(PG *pg) {
848 pg->get("intptr");
849 }
850 friend void intrusive_ptr_release(PG *pg) {
851 pg->put("intptr");
852 }
853
854
855 // =====================
856
857 protected:
858 OSDriver osdriver;
859 SnapMapper snap_mapper;
860
861 virtual PGBackend *get_pgbackend() = 0;
862 virtual const PGBackend* get_pgbackend() const = 0;
863
864 protected:
865 void requeue_map_waiters();
866
867 protected:
868
869 ZTracer::Endpoint trace_endpoint;
870
871
872 protected:
873 __u8 info_struct_v = 0;
874 void upgrade(ObjectStore *store);
875
876 protected:
877 ghobject_t pgmeta_oid;
878
879 // ------------------
880 interval_set<snapid_t> snap_trimq;
881 std::set<snapid_t> snap_trimq_repeat;
882
883 /* You should not use these items without taking their respective queue locks
884 * (if they have one) */
885 xlist<PG*>::item stat_queue_item;
886 bool recovery_queued;
887
888 int recovery_ops_active;
889 std::set<pg_shard_t> waiting_on_backfill;
890 #ifdef DEBUG_RECOVERY_OIDS
891 multiset<hobject_t> recovering_oids;
892 #endif
893
894 public:
895 bool dne() { return info.dne(); }
896
897 void send_cluster_message(
898 int osd, MessageRef m, epoch_t epoch, bool share_map_update) override;
899
900 protected:
901 epoch_t get_last_peering_reset() const {
902 return recovery_state.get_last_peering_reset();
903 }
904
905 /* heartbeat peers */
906 void set_probe_targets(const std::set<pg_shard_t> &probe_set) override;
907 void clear_probe_targets() override;
908
909 ceph::mutex heartbeat_peer_lock =
910 ceph::make_mutex("PG::heartbeat_peer_lock");
911 std::set<int> heartbeat_peers;
912 std::set<int> probe_targets;
913
914 protected:
915 BackfillInterval backfill_info;
916 std::map<pg_shard_t, BackfillInterval> peer_backfill_info;
917 bool backfill_reserving;
918
919 // The primary's num_bytes and local num_bytes for this pg, only valid
920 // during backfill for non-primary shards.
921 // Both of these are adjusted for EC to reflect the on-disk bytes
922 std::atomic<int64_t> primary_num_bytes = 0;
923 std::atomic<int64_t> local_num_bytes = 0;
924
925 public:
926 // Space reserved for backfill is primary_num_bytes - local_num_bytes
927 // Don't care that difference itself isn't atomic
928 uint64_t get_reserved_num_bytes() {
929 int64_t primary = primary_num_bytes.load();
930 int64_t local = local_num_bytes.load();
931 if (primary > local)
932 return primary - local;
933 else
934 return 0;
935 }
936
937 bool is_remote_backfilling() {
938 return primary_num_bytes.load() > 0;
939 }
940
941 bool try_reserve_recovery_space(int64_t primary, int64_t local) override;
942 void unreserve_recovery_space() override;
943
944 // If num_bytes are inconsistent and local_num- goes negative
945 // it's ok, because it would then be ignored.
946
947 // The value of num_bytes could be negative,
948 // but we don't let local_num_bytes go negative.
949 void add_local_num_bytes(int64_t num_bytes) {
950 if (num_bytes) {
951 int64_t prev_bytes = local_num_bytes.load();
952 int64_t new_bytes;
953 do {
954 new_bytes = prev_bytes + num_bytes;
955 if (new_bytes < 0)
956 new_bytes = 0;
957 } while(!local_num_bytes.compare_exchange_weak(prev_bytes, new_bytes));
958 }
959 }
960 void sub_local_num_bytes(int64_t num_bytes) {
961 ceph_assert(num_bytes >= 0);
962 if (num_bytes) {
963 int64_t prev_bytes = local_num_bytes.load();
964 int64_t new_bytes;
965 do {
966 new_bytes = prev_bytes - num_bytes;
967 if (new_bytes < 0)
968 new_bytes = 0;
969 } while(!local_num_bytes.compare_exchange_weak(prev_bytes, new_bytes));
970 }
971 }
972 // The value of num_bytes could be negative,
973 // but we don't let info.stats.stats.sum.num_bytes go negative.
974 void add_num_bytes(int64_t num_bytes) {
975 ceph_assert(ceph_mutex_is_locked_by_me(_lock));
976 if (num_bytes) {
977 recovery_state.update_stats(
978 [num_bytes](auto &history, auto &stats) {
979 stats.stats.sum.num_bytes += num_bytes;
980 if (stats.stats.sum.num_bytes < 0) {
981 stats.stats.sum.num_bytes = 0;
982 }
983 return false;
984 });
985 }
986 }
987 void sub_num_bytes(int64_t num_bytes) {
988 ceph_assert(ceph_mutex_is_locked_by_me(_lock));
989 ceph_assert(num_bytes >= 0);
990 if (num_bytes) {
991 recovery_state.update_stats(
992 [num_bytes](auto &history, auto &stats) {
993 stats.stats.sum.num_bytes -= num_bytes;
994 if (stats.stats.sum.num_bytes < 0) {
995 stats.stats.sum.num_bytes = 0;
996 }
997 return false;
998 });
999 }
1000 }
1001
1002 // Only used in testing so not worried about needing the PG lock here
1003 int64_t get_stats_num_bytes() {
1004 std::lock_guard l{_lock};
1005 int num_bytes = info.stats.stats.sum.num_bytes;
1006 if (pool.info.is_erasure()) {
1007 num_bytes /= (int)get_pgbackend()->get_ec_data_chunk_count();
1008 // Round up each object by a stripe
1009 num_bytes += get_pgbackend()->get_ec_stripe_chunk_size() * info.stats.stats.sum.num_objects;
1010 }
1011 int64_t lnb = local_num_bytes.load();
1012 if (lnb && lnb != num_bytes) {
1013 lgeneric_dout(cct, 0) << this << " " << info.pgid << " num_bytes mismatch "
1014 << lnb << " vs stats "
1015 << info.stats.stats.sum.num_bytes << " / chunk "
1016 << get_pgbackend()->get_ec_data_chunk_count()
1017 << dendl;
1018 }
1019 return num_bytes;
1020 }
1021
1022 protected:
1023
1024 /*
1025 * blocked request wait hierarchy
1026 *
1027 * In order to preserve request ordering we need to be careful about the
1028 * order in which blocked requests get requeued. Generally speaking, we
1029 * push the requests back up to the op_wq in reverse order (most recent
1030 * request first) so that they come back out again in the original order.
1031 * However, because there are multiple wait queues, we need to requeue
1032 * waitlists in order. Generally speaking, we requeue the wait lists
1033 * that are checked first.
1034 *
1035 * Here are the various wait lists, in the order they are used during
1036 * request processing, with notes:
1037 *
1038 * - waiting_for_map
1039 * - may start or stop blocking at any time (depending on client epoch)
1040 * - waiting_for_peered
1041 * - !is_peered()
1042 * - only starts blocking on interval change; never restarts
1043 * - waiting_for_flush
1044 * - flushes_in_progress
1045 * - waiting for final flush during activate
1046 * - waiting_for_active
1047 * - !is_active()
1048 * - only starts blocking on interval change; never restarts
1049 * - waiting_for_readable
1050 * - now > readable_until
1051 * - unblocks when we get fresh(er) osd_pings
1052 * - waiting_for_scrub
1053 * - starts and stops blocking for varying intervals during scrub
1054 * - waiting_for_unreadable_object
1055 * - never restarts once object is readable (* except for EIO?)
1056 * - waiting_for_degraded_object
1057 * - never restarts once object is writeable (* except for EIO?)
1058 * - waiting_for_blocked_object
1059 * - starts and stops based on proxied op activity
1060 * - obc rwlocks
1061 * - starts and stops based on read/write activity
1062 *
1063 * Notes:
1064 *
1065 * 1. During and interval change, we requeue *everything* in the above order.
1066 *
1067 * 2. When an obc rwlock is released, we check for a scrub block and requeue
1068 * the op there if it applies. We ignore the unreadable/degraded/blocked
1069 * queues because we assume they cannot apply at that time (this is
1070 * probably mostly true).
1071 *
1072 * 3. The requeue_ops helper will push ops onto the waiting_for_map std::list if
1073 * it is non-empty.
1074 *
1075 * These three behaviors are generally sufficient to maintain ordering, with
1076 * the possible exception of cases where we make an object degraded or
1077 * unreadable that was previously okay, e.g. when scrub or op processing
1078 * encounter an unexpected error. FIXME.
1079 */
1080
1081 // ops with newer maps than our (or blocked behind them)
1082 // track these by client, since inter-request ordering doesn't otherwise
1083 // matter.
1084 std::unordered_map<entity_name_t,std::list<OpRequestRef>> waiting_for_map;
1085
1086 // ops waiting on peered
1087 std::list<OpRequestRef> waiting_for_peered;
1088
1089 /// ops waiting on readble
1090 std::list<OpRequestRef> waiting_for_readable;
1091
1092 // ops waiting on active (require peered as well)
1093 std::list<OpRequestRef> waiting_for_active;
1094 std::list<OpRequestRef> waiting_for_flush;
1095 std::list<OpRequestRef> waiting_for_scrub;
1096
1097 std::list<OpRequestRef> waiting_for_cache_not_full;
1098 std::list<OpRequestRef> waiting_for_clean_to_primary_repair;
1099 std::map<hobject_t, std::list<OpRequestRef>> waiting_for_unreadable_object,
1100 waiting_for_degraded_object,
1101 waiting_for_blocked_object;
1102
1103 std::set<hobject_t> objects_blocked_on_cache_full;
1104 std::map<hobject_t,snapid_t> objects_blocked_on_degraded_snap;
1105 std::map<hobject_t,ObjectContextRef> objects_blocked_on_snap_promotion;
1106
1107 // Callbacks should assume pg (and nothing else) is locked
1108 std::map<hobject_t, std::list<Context*>> callbacks_for_degraded_object;
1109
1110 std::map<eversion_t,
1111 std::list<
1112 std::tuple<OpRequestRef, version_t, int,
1113 std::vector<pg_log_op_return_item_t>>>> waiting_for_ondisk;
1114
1115 void requeue_object_waiters(std::map<hobject_t, std::list<OpRequestRef>>& m);
1116 void requeue_op(OpRequestRef op);
1117 void requeue_ops(std::list<OpRequestRef> &l);
1118
1119 // stats that persist lazily
1120 object_stat_collection_t unstable_stats;
1121
1122 // publish stats
1123 ceph::mutex pg_stats_publish_lock =
1124 ceph::make_mutex("PG::pg_stats_publish_lock");
1125 std::optional<pg_stat_t> pg_stats_publish;
1126
1127 friend class TestOpsSocketHook;
1128 void publish_stats_to_osd() override;
1129
1130 bool needs_recovery() const {
1131 return recovery_state.needs_recovery();
1132 }
1133 bool needs_backfill() const {
1134 return recovery_state.needs_backfill();
1135 }
1136
1137 bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const;
1138
1139 struct PGLogEntryHandler : public PGLog::LogEntryHandler {
1140 PG *pg;
1141 ObjectStore::Transaction *t;
1142 PGLogEntryHandler(PG *pg, ObjectStore::Transaction *t) : pg(pg), t(t) {}
1143
1144 // LogEntryHandler
1145 void remove(const hobject_t &hoid) override {
1146 pg->get_pgbackend()->remove(hoid, t);
1147 }
1148 void try_stash(const hobject_t &hoid, version_t v) override {
1149 pg->get_pgbackend()->try_stash(hoid, v, t);
1150 }
1151 void rollback(const pg_log_entry_t &entry) override {
1152 ceph_assert(entry.can_rollback());
1153 pg->get_pgbackend()->rollback(entry, t);
1154 }
1155 void rollforward(const pg_log_entry_t &entry) override {
1156 pg->get_pgbackend()->rollforward(entry, t);
1157 }
1158 void trim(const pg_log_entry_t &entry) override {
1159 pg->get_pgbackend()->trim(entry, t);
1160 }
1161 };
1162
1163 void update_object_snap_mapping(
1164 ObjectStore::Transaction *t, const hobject_t &soid,
1165 const std::set<snapid_t> &snaps);
1166 void clear_object_snap_mapping(
1167 ObjectStore::Transaction *t, const hobject_t &soid);
1168 void remove_snap_mapped_object(
1169 ObjectStore::Transaction& t, const hobject_t& soid);
1170
1171 bool have_unfound() const {
1172 return recovery_state.have_unfound();
1173 }
1174 uint64_t get_num_unfound() const {
1175 return recovery_state.get_num_unfound();
1176 }
1177
1178 virtual void check_local() = 0;
1179
1180 void purge_strays();
1181
1182 void update_heartbeat_peers(std::set<int> peers) override;
1183
1184 Context *finish_sync_event;
1185
1186 Context *finish_recovery();
1187 void _finish_recovery(Context *c);
1188 struct C_PG_FinishRecovery : public Context {
1189 PGRef pg;
1190 explicit C_PG_FinishRecovery(PG *p) : pg(p) {}
1191 void finish(int r) override {
1192 pg->_finish_recovery(this);
1193 }
1194 };
1195 void cancel_recovery();
1196 void clear_recovery_state();
1197 virtual void _clear_recovery_state() = 0;
1198 void start_recovery_op(const hobject_t& soid);
1199 void finish_recovery_op(const hobject_t& soid, bool dequeue=false);
1200
1201 virtual void _split_into(pg_t child_pgid, PG *child, unsigned split_bits) = 0;
1202
1203 friend class C_OSD_RepModify_Commit;
1204 friend struct C_DeleteMore;
1205
1206 // -- backoff --
1207 ceph::mutex backoff_lock = // orders inside Backoff::lock
1208 ceph::make_mutex("PG::backoff_lock");
1209 std::map<hobject_t,std::set<ceph::ref_t<Backoff>>> backoffs;
1210
1211 void add_backoff(const ceph::ref_t<Session>& s, const hobject_t& begin, const hobject_t& end);
1212 void release_backoffs(const hobject_t& begin, const hobject_t& end);
1213 void release_backoffs(const hobject_t& o) {
1214 release_backoffs(o, o);
1215 }
1216 void clear_backoffs();
1217
1218 void add_pg_backoff(const ceph::ref_t<Session>& s) {
1219 hobject_t begin = info.pgid.pgid.get_hobj_start();
1220 hobject_t end = info.pgid.pgid.get_hobj_end(pool.info.get_pg_num());
1221 add_backoff(s, begin, end);
1222 }
1223 public:
1224 void release_pg_backoffs() {
1225 hobject_t begin = info.pgid.pgid.get_hobj_start();
1226 hobject_t end = info.pgid.pgid.get_hobj_end(pool.info.get_pg_num());
1227 release_backoffs(begin, end);
1228 }
1229
1230 // -- scrub --
1231 protected:
1232 bool scrub_after_recovery;
1233
1234 int active_pushes;
1235
1236 [[nodiscard]] bool ops_blocked_by_scrub() const;
1237 [[nodiscard]] Scrub::scrub_prio_t is_scrub_blocking_ops() const;
1238
1239 void _scan_rollback_obs(const std::vector<ghobject_t> &rollback_obs);
1240 /**
1241 * returns true if [begin, end) is good to scrub at this time
1242 * a false return value obliges the implementer to requeue scrub when the
1243 * condition preventing scrub clears
1244 */
1245 virtual bool _range_available_for_scrub(
1246 const hobject_t &begin, const hobject_t &end) = 0;
1247
1248 /**
1249 * Initiate the process that will create our scrub map for the Primary.
1250 * (triggered by MSG_OSD_REP_SCRUB)
1251 */
1252 void replica_scrub(OpRequestRef op, ThreadPool::TPHandle &handle);
1253
1254 // -- recovery state --
1255
1256 struct QueuePeeringEvt : Context {
1257 PGRef pg;
1258 PGPeeringEventRef evt;
1259
1260 template <class EVT>
1261 QueuePeeringEvt(PG *pg, epoch_t epoch, EVT evt) :
1262 pg(pg), evt(std::make_shared<PGPeeringEvent>(epoch, epoch, evt)) {}
1263
1264 QueuePeeringEvt(PG *pg, PGPeeringEventRef evt) :
1265 pg(pg), evt(std::move(evt)) {}
1266
1267 void finish(int r) override {
1268 pg->lock();
1269 pg->queue_peering_event(std::move(evt));
1270 pg->unlock();
1271 }
1272 };
1273
1274
1275 public:
1276 int pg_stat_adjust(osd_stat_t *new_stat);
1277 protected:
1278 bool delete_needs_sleep = false;
1279
1280 protected:
1281 bool state_test(uint64_t m) const { return recovery_state.state_test(m); }
1282 void state_set(uint64_t m) { recovery_state.state_set(m); }
1283 void state_clear(uint64_t m) { recovery_state.state_clear(m); }
1284
1285 bool is_complete() const {
1286 return recovery_state.is_complete();
1287 }
1288 bool should_send_notify() const {
1289 return recovery_state.should_send_notify();
1290 }
1291
1292 bool is_active() const { return recovery_state.is_active(); }
1293 bool is_activating() const { return recovery_state.is_activating(); }
1294 bool is_peering() const { return recovery_state.is_peering(); }
1295 bool is_down() const { return recovery_state.is_down(); }
1296 bool is_recovery_unfound() const { return recovery_state.is_recovery_unfound(); }
1297 bool is_backfill_unfound() const { return recovery_state.is_backfill_unfound(); }
1298 bool is_incomplete() const { return recovery_state.is_incomplete(); }
1299 bool is_clean() const { return recovery_state.is_clean(); }
1300 bool is_degraded() const { return recovery_state.is_degraded(); }
1301 bool is_undersized() const { return recovery_state.is_undersized(); }
1302 bool is_scrubbing() const { return state_test(PG_STATE_SCRUBBING); } // Primary only
1303 bool is_remapped() const { return recovery_state.is_remapped(); }
1304 bool is_peered() const { return recovery_state.is_peered(); }
1305 bool is_recovering() const { return recovery_state.is_recovering(); }
1306 bool is_premerge() const { return recovery_state.is_premerge(); }
1307 bool is_repair() const { return recovery_state.is_repair(); }
1308 bool is_laggy() const { return state_test(PG_STATE_LAGGY); }
1309 bool is_wait() const { return state_test(PG_STATE_WAIT); }
1310
1311 bool is_empty() const { return recovery_state.is_empty(); }
1312
1313 // pg on-disk state
1314 void do_pending_flush();
1315
1316 public:
1317 void prepare_write(
1318 pg_info_t &info,
1319 pg_info_t &last_written_info,
1320 PastIntervals &past_intervals,
1321 PGLog &pglog,
1322 bool dirty_info,
1323 bool dirty_big_info,
1324 bool need_write_epoch,
1325 ObjectStore::Transaction &t) override;
1326
1327 void write_if_dirty(PeeringCtx &rctx) {
1328 write_if_dirty(rctx.transaction);
1329 }
1330 protected:
1331 void write_if_dirty(ObjectStore::Transaction& t) {
1332 recovery_state.write_if_dirty(t);
1333 }
1334
1335 PGLog::IndexedLog projected_log;
1336 bool check_in_progress_op(
1337 const osd_reqid_t &r,
1338 eversion_t *version,
1339 version_t *user_version,
1340 int *return_code,
1341 std::vector<pg_log_op_return_item_t> *op_returns) const;
1342 eversion_t projected_last_update;
1343 eversion_t get_next_version() const {
1344 eversion_t at_version(
1345 get_osdmap_epoch(),
1346 projected_last_update.version+1);
1347 ceph_assert(at_version > info.last_update);
1348 ceph_assert(at_version > recovery_state.get_pg_log().get_head());
1349 ceph_assert(at_version > projected_last_update);
1350 return at_version;
1351 }
1352
1353 bool check_log_for_corruption(ObjectStore *store);
1354
1355 std::string get_corrupt_pg_log_name() const;
1356
1357 void update_snap_map(
1358 const std::vector<pg_log_entry_t> &log_entries,
1359 ObjectStore::Transaction& t);
1360
1361 void filter_snapc(std::vector<snapid_t> &snaps);
1362
1363 virtual void kick_snap_trim() = 0;
1364 virtual void snap_trimmer_scrub_complete() = 0;
1365
1366 void queue_recovery();
1367 void queue_scrub_after_repair();
1368 unsigned int get_scrub_priority();
1369
1370 bool try_flush_or_schedule_async() override;
1371 void start_flush_on_transaction(
1372 ObjectStore::Transaction &t) override;
1373
1374 void update_history(const pg_history_t& history) {
1375 recovery_state.update_history(history);
1376 }
1377
1378 // OpRequest queueing
1379 bool can_discard_op(OpRequestRef& op);
1380 bool can_discard_scan(OpRequestRef op);
1381 bool can_discard_backfill(OpRequestRef op);
1382 bool can_discard_request(OpRequestRef& op);
1383
1384 template<typename T, int MSGTYPE>
1385 bool can_discard_replica_op(OpRequestRef& op);
1386
1387 bool old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch);
1388 bool old_peering_evt(PGPeeringEventRef evt) {
1389 return old_peering_msg(evt->get_epoch_sent(), evt->get_epoch_requested());
1390 }
1391 bool have_same_or_newer_map(epoch_t e) {
1392 return e <= get_osdmap_epoch();
1393 }
1394
1395 bool op_has_sufficient_caps(OpRequestRef& op);
1396
1397 // abstract bits
1398 friend struct FlushState;
1399
1400 friend ostream& operator<<(ostream& out, const PG& pg);
1401
1402 protected:
1403 PeeringState recovery_state;
1404
1405 // ref to recovery_state.pool
1406 const PGPool &pool;
1407
1408 // ref to recovery_state.info
1409 const pg_info_t &info;
1410
1411
1412 // ScrubberPasskey getters/misc:
1413 public:
1414 const pg_info_t& get_pg_info(ScrubberPasskey) const final { return info; }
1415
1416 OSDService* get_pg_osd(ScrubberPasskey) const { return osd; }
1417
1418 requested_scrub_t& get_planned_scrub(ScrubberPasskey)
1419 {
1420 return m_planned_scrub;
1421 }
1422
1423 void force_object_missing(ScrubberPasskey,
1424 const std::set<pg_shard_t>& peer,
1425 const hobject_t& oid,
1426 eversion_t version) final
1427 {
1428 recovery_state.force_object_missing(peer, oid, version);
1429 }
1430
1431 uint64_t logical_to_ondisk_size(uint64_t logical_size) const final
1432 {
1433 return get_pgbackend()->be_get_ondisk_size(logical_size);
1434 }
1435 };
1436
1437 #endif