]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/PG.h
import ceph quincy 17.2.1
[ceph.git] / ceph / src / osd / PG.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_PG_H
16 #define CEPH_PG_H
17
18 #include <boost/scoped_ptr.hpp>
19 #include <boost/container/flat_set.hpp>
20 #include "include/mempool.h"
21
22 // re-include our assert to clobber boost's
23 #include "include/ceph_assert.h"
24 #include "include/common_fwd.h"
25
26 #include "include/types.h"
27 #include "include/stringify.h"
28 #include "osd_types.h"
29 #include "include/xlist.h"
30 #include "SnapMapper.h"
31 #include "Session.h"
32 #include "common/Timer.h"
33
34 #include "PGLog.h"
35 #include "OSDMap.h"
36 #include "include/str_list.h"
37 #include "PGBackend.h"
38 #include "PGPeeringEvent.h"
39 #include "PeeringState.h"
40 #include "recovery_types.h"
41 #include "MissingLoc.h"
42 #include "scrubber_common.h"
43
44 #include "mgr/OSDPerfMetricTypes.h"
45
46 #include <atomic>
47 #include <list>
48 #include <memory>
49 #include <string>
50 #include <tuple>
51
52 //#define DEBUG_RECOVERY_OIDS // track std::set of recovering oids explicitly, to find counting bugs
53 //#define PG_DEBUG_REFS // track provenance of pg refs, helpful for finding leaks
54
55 class OSD;
56 class OSDService;
57 class OSDShard;
58 class OSDShardPGSlot;
59
60 class PG;
61 struct OpRequest;
62 typedef OpRequest::Ref OpRequestRef;
63 class DynamicPerfStats;
64 class PgScrubber;
65
66 namespace Scrub {
67 class Store;
68 class ReplicaReservations;
69 class LocalReservation;
70 class ReservedByRemotePrimary;
71 enum class schedule_result_t;
72 }
73
74 #ifdef PG_DEBUG_REFS
75 #include "common/tracked_int_ptr.hpp"
76 uint64_t get_with_id(PG *pg);
77 void put_with_id(PG *pg, uint64_t id);
78 typedef TrackedIntPtr<PG> PGRef;
79 #else
80 typedef boost::intrusive_ptr<PG> PGRef;
81 #endif
82
83 class PGRecoveryStats {
84 struct per_state_info {
85 uint64_t enter, exit; // enter/exit counts
86 uint64_t events;
87 utime_t event_time; // time spent processing events
88 utime_t total_time; // total time in state
89 utime_t min_time, max_time;
90
91 // cppcheck-suppress unreachableCode
92 per_state_info() : enter(0), exit(0), events(0) {}
93 };
94 std::map<const char *,per_state_info> info;
95 ceph::mutex lock = ceph::make_mutex("PGRecoverStats::lock");
96
97 public:
98 PGRecoveryStats() = default;
99
100 void reset() {
101 std::lock_guard l(lock);
102 info.clear();
103 }
104 void dump(ostream& out) {
105 std::lock_guard l(lock);
106 for (std::map<const char *,per_state_info>::iterator p = info.begin(); p != info.end(); ++p) {
107 per_state_info& i = p->second;
108 out << i.enter << "\t" << i.exit << "\t"
109 << i.events << "\t" << i.event_time << "\t"
110 << i.total_time << "\t"
111 << i.min_time << "\t" << i.max_time << "\t"
112 << p->first << "\n";
113 }
114 }
115
116 void dump_formatted(ceph::Formatter *f) {
117 std::lock_guard l(lock);
118 f->open_array_section("pg_recovery_stats");
119 for (std::map<const char *,per_state_info>::iterator p = info.begin();
120 p != info.end(); ++p) {
121 per_state_info& i = p->second;
122 f->open_object_section("recovery_state");
123 f->dump_int("enter", i.enter);
124 f->dump_int("exit", i.exit);
125 f->dump_int("events", i.events);
126 f->dump_stream("event_time") << i.event_time;
127 f->dump_stream("total_time") << i.total_time;
128 f->dump_stream("min_time") << i.min_time;
129 f->dump_stream("max_time") << i.max_time;
130 std::vector<std::string> states;
131 get_str_vec(p->first, "/", states);
132 f->open_array_section("nested_states");
133 for (std::vector<std::string>::iterator st = states.begin();
134 st != states.end(); ++st) {
135 f->dump_string("state", *st);
136 }
137 f->close_section();
138 f->close_section();
139 }
140 f->close_section();
141 }
142
143 void log_enter(const char *s) {
144 std::lock_guard l(lock);
145 info[s].enter++;
146 }
147 void log_exit(const char *s, utime_t dur, uint64_t events, utime_t event_dur) {
148 std::lock_guard l(lock);
149 per_state_info &i = info[s];
150 i.exit++;
151 i.total_time += dur;
152 if (dur > i.max_time)
153 i.max_time = dur;
154 if (dur < i.min_time || i.min_time == utime_t())
155 i.min_time = dur;
156 i.events += events;
157 i.event_time += event_dur;
158 }
159 };
160
161 /** PG - Replica Placement Group
162 *
163 */
164
165 /// Facilitating scrub-realated object access to private PG data
166 class ScrubberPasskey {
167 private:
168 friend class Scrub::ReplicaReservations;
169 friend class PrimaryLogScrub;
170 ScrubberPasskey() {}
171 ScrubberPasskey(const ScrubberPasskey&) = default;
172 ScrubberPasskey& operator=(const ScrubberPasskey&) = delete;
173 };
174
175 class PG : public DoutPrefixProvider, public PeeringState::PeeringListener {
176 friend struct NamedState;
177 friend class PeeringState;
178 friend class PgScrubber;
179
180 public:
181 const pg_shard_t pg_whoami;
182 const spg_t pg_id;
183
184 /// the 'scrubber'. Will be allocated in the derivative (PrimaryLogPG) ctor,
185 /// and be removed only in the PrimaryLogPG destructor.
186 std::unique_ptr<ScrubPgIF> m_scrubber;
187
188 /// flags detailing scheduling/operation characteristics of the next scrub
189 requested_scrub_t m_planned_scrub;
190
191 /// scrubbing state for both Primary & replicas
192 bool is_scrub_active() const { return m_scrubber->is_scrub_active(); }
193
194 /// set when the scrub request is queued, and reset after scrubbing fully
195 /// cleaned up.
196 bool is_scrub_queued_or_active() const { return m_scrubber->is_queued_or_active(); }
197
198 public:
199 // -- members --
200 const coll_t coll;
201
202 ObjectStore::CollectionHandle ch;
203
204 // -- methods --
205 std::ostream& gen_prefix(std::ostream& out) const override;
206 CephContext *get_cct() const override {
207 return cct;
208 }
209 unsigned get_subsys() const override {
210 return ceph_subsys_osd;
211 }
212
213 const char* const get_current_state() const {
214 return recovery_state.get_current_state();
215 }
216
217 const OSDMapRef& get_osdmap() const {
218 ceph_assert(is_locked());
219 return recovery_state.get_osdmap();
220 }
221
222 epoch_t get_osdmap_epoch() const override final {
223 return recovery_state.get_osdmap()->get_epoch();
224 }
225
226 PerfCounters &get_peering_perf() override;
227 PerfCounters &get_perf_logger() override;
228 void log_state_enter(const char *state) override;
229 void log_state_exit(
230 const char *state_name, utime_t enter_time,
231 uint64_t events, utime_t event_dur) override;
232
233 void lock(bool no_lockdep = false) const;
234 void unlock() const;
235 bool is_locked() const;
236
237 const spg_t& get_pgid() const {
238 return pg_id;
239 }
240
241 const PGPool& get_pool() const {
242 return pool;
243 }
244 uint64_t get_last_user_version() const {
245 return info.last_user_version;
246 }
247 const pg_history_t& get_history() const {
248 return info.history;
249 }
250 bool get_need_up_thru() const {
251 return recovery_state.get_need_up_thru();
252 }
253 epoch_t get_same_interval_since() const {
254 return info.history.same_interval_since;
255 }
256
257 static void set_last_scrub_stamp(
258 utime_t t, pg_history_t &history, pg_stat_t &stats) {
259 stats.last_scrub_stamp = t;
260 history.last_scrub_stamp = t;
261 }
262
263 void set_last_scrub_stamp(utime_t t) {
264 recovery_state.update_stats(
265 [=](auto &history, auto &stats) {
266 set_last_scrub_stamp(t, history, stats);
267 return true;
268 });
269 }
270
271 static void set_last_deep_scrub_stamp(
272 utime_t t, pg_history_t &history, pg_stat_t &stats) {
273 stats.last_deep_scrub_stamp = t;
274 history.last_deep_scrub_stamp = t;
275 }
276
277 void set_last_deep_scrub_stamp(utime_t t) {
278 recovery_state.update_stats(
279 [=](auto &history, auto &stats) {
280 set_last_deep_scrub_stamp(t, history, stats);
281 return true;
282 });
283 }
284
285 static void add_objects_scrubbed_count(
286 int64_t count, pg_stat_t &stats) {
287 stats.objects_scrubbed += count;
288 }
289
290 void add_objects_scrubbed_count(int64_t count) {
291 recovery_state.update_stats(
292 [=](auto &history, auto &stats) {
293 add_objects_scrubbed_count(count, stats);
294 return true;
295 });
296 }
297
298 static void reset_objects_scrubbed(pg_stat_t &stats) {
299 stats.objects_scrubbed = 0;
300 }
301
302 void reset_objects_scrubbed() {
303 recovery_state.update_stats(
304 [=](auto &history, auto &stats) {
305 reset_objects_scrubbed(stats);
306 return true;
307 });
308 }
309
310 bool is_deleting() const {
311 return recovery_state.is_deleting();
312 }
313 bool is_deleted() const {
314 return recovery_state.is_deleted();
315 }
316 bool is_nonprimary() const {
317 return recovery_state.is_nonprimary();
318 }
319 bool is_primary() const {
320 return recovery_state.is_primary();
321 }
322 bool pg_has_reset_since(epoch_t e) {
323 ceph_assert(is_locked());
324 return recovery_state.pg_has_reset_since(e);
325 }
326
327 bool is_ec_pg() const {
328 return recovery_state.is_ec_pg();
329 }
330 int get_role() const {
331 return recovery_state.get_role();
332 }
333 const std::vector<int> get_acting() const {
334 return recovery_state.get_acting();
335 }
336 const std::set<pg_shard_t> &get_actingset() const {
337 return recovery_state.get_actingset();
338 }
339 int get_acting_primary() const {
340 return recovery_state.get_acting_primary();
341 }
342 pg_shard_t get_primary() const {
343 return recovery_state.get_primary();
344 }
345 const std::vector<int> get_up() const {
346 return recovery_state.get_up();
347 }
348 int get_up_primary() const {
349 return recovery_state.get_up_primary();
350 }
351 const PastIntervals& get_past_intervals() const {
352 return recovery_state.get_past_intervals();
353 }
354 bool is_acting_recovery_backfill(pg_shard_t osd) const {
355 return recovery_state.is_acting_recovery_backfill(osd);
356 }
357 const std::set<pg_shard_t> &get_acting_recovery_backfill() const {
358 return recovery_state.get_acting_recovery_backfill();
359 }
360 bool is_acting(pg_shard_t osd) const {
361 return recovery_state.is_acting(osd);
362 }
363 bool is_up(pg_shard_t osd) const {
364 return recovery_state.is_up(osd);
365 }
366 static bool has_shard(bool ec, const std::vector<int>& v, pg_shard_t osd) {
367 return PeeringState::has_shard(ec, v, osd);
368 }
369
370 /// initialize created PG
371 void init(
372 int role,
373 const std::vector<int>& up,
374 int up_primary,
375 const std::vector<int>& acting,
376 int acting_primary,
377 const pg_history_t& history,
378 const PastIntervals& pim,
379 ObjectStore::Transaction &t);
380
381 /// read existing pg state off disk
382 void read_state(ObjectStore *store);
383 static int peek_map_epoch(ObjectStore *store, spg_t pgid, epoch_t *pepoch);
384
385 static int get_latest_struct_v() {
386 return pg_latest_struct_v;
387 }
388 static int get_compat_struct_v() {
389 return pg_compat_struct_v;
390 }
391 static int read_info(
392 ObjectStore *store, spg_t pgid, const coll_t &coll,
393 pg_info_t &info, PastIntervals &past_intervals,
394 __u8 &);
395 static bool _has_removal_flag(ObjectStore *store, spg_t pgid);
396
397 void rm_backoff(const ceph::ref_t<Backoff>& b);
398
399 void update_snap_mapper_bits(uint32_t bits) {
400 snap_mapper.update_bits(bits);
401 }
402 void start_split_stats(const std::set<spg_t>& childpgs, std::vector<object_stat_sum_t> *v);
403 virtual void split_colls(
404 spg_t child,
405 int split_bits,
406 int seed,
407 const pg_pool_t *pool,
408 ObjectStore::Transaction &t) = 0;
409 void split_into(pg_t child_pgid, PG *child, unsigned split_bits);
410 void merge_from(std::map<spg_t,PGRef>& sources, PeeringCtx &rctx,
411 unsigned split_bits,
412 const pg_merge_meta_t& last_pg_merge_meta);
413 void finish_split_stats(const object_stat_sum_t& stats,
414 ObjectStore::Transaction &t);
415
416 void scrub(epoch_t queued, ThreadPool::TPHandle& handle)
417 {
418 // a new scrub
419 forward_scrub_event(&ScrubPgIF::initiate_regular_scrub, queued, "StartScrub");
420 }
421
422 /**
423 * a special version of PG::scrub(), which:
424 * - is initiated after repair, and
425 * (not true anymore:)
426 * - is not required to allocate local/remote OSD scrub resources
427 */
428 void recovery_scrub(epoch_t queued, ThreadPool::TPHandle& handle)
429 {
430 // a new scrub
431 forward_scrub_event(&ScrubPgIF::initiate_scrub_after_repair, queued,
432 "AfterRepairScrub");
433 }
434
435 void replica_scrub(epoch_t queued,
436 Scrub::act_token_t act_token,
437 ThreadPool::TPHandle& handle);
438
439 void replica_scrub_resched(epoch_t queued,
440 Scrub::act_token_t act_token,
441 ThreadPool::TPHandle& handle)
442 {
443 forward_scrub_event(&ScrubPgIF::send_sched_replica, queued, act_token,
444 "SchedReplica");
445 }
446
447 void scrub_send_resources_granted(epoch_t queued, ThreadPool::TPHandle& handle)
448 {
449 forward_scrub_event(&ScrubPgIF::send_remotes_reserved, queued, "RemotesReserved");
450 }
451
452 void scrub_send_resources_denied(epoch_t queued, ThreadPool::TPHandle& handle)
453 {
454 forward_scrub_event(&ScrubPgIF::send_reservation_failure, queued,
455 "ReservationFailure");
456 }
457
458 void scrub_send_scrub_resched(epoch_t queued, ThreadPool::TPHandle& handle)
459 {
460 forward_scrub_event(&ScrubPgIF::send_scrub_resched, queued, "InternalSchedScrub");
461 }
462
463 void scrub_send_pushes_update(epoch_t queued, ThreadPool::TPHandle& handle)
464 {
465 forward_scrub_event(&ScrubPgIF::active_pushes_notification, queued,
466 "ActivePushesUpd");
467 }
468
469 void scrub_send_applied_update(epoch_t queued, ThreadPool::TPHandle& handle)
470 {
471 forward_scrub_event(&ScrubPgIF::update_applied_notification, queued,
472 "UpdatesApplied");
473 }
474
475 void scrub_send_unblocking(epoch_t queued, ThreadPool::TPHandle& handle)
476 {
477 forward_scrub_event(&ScrubPgIF::send_scrub_unblock, queued, "Unblocked");
478 }
479
480 void scrub_send_digest_update(epoch_t queued, ThreadPool::TPHandle& handle)
481 {
482 forward_scrub_event(&ScrubPgIF::digest_update_notification, queued, "DigestUpdate");
483 }
484
485 void scrub_send_local_map_ready(epoch_t queued, ThreadPool::TPHandle& handle)
486 {
487 forward_scrub_event(&ScrubPgIF::send_local_map_done, queued, "IntLocalMapDone");
488 }
489
490 void scrub_send_replmaps_ready(epoch_t queued, ThreadPool::TPHandle& handle)
491 {
492 forward_scrub_event(&ScrubPgIF::send_replica_maps_ready, queued, "GotReplicas");
493 }
494
495 void scrub_send_replica_pushes(epoch_t queued, ThreadPool::TPHandle& handle)
496 {
497 forward_scrub_event(&ScrubPgIF::send_replica_pushes_upd, queued,
498 "ReplicaPushesUpd");
499 }
500
501 void scrub_send_maps_compared(epoch_t queued, ThreadPool::TPHandle& handle)
502 {
503 forward_scrub_event(&ScrubPgIF::send_maps_compared, queued, "MapsCompared");
504 }
505
506 void scrub_send_get_next_chunk(epoch_t queued, ThreadPool::TPHandle& handle)
507 {
508 forward_scrub_event(&ScrubPgIF::send_get_next_chunk, queued, "NextChunk");
509 }
510
511 void scrub_send_scrub_is_finished(epoch_t queued, ThreadPool::TPHandle& handle)
512 {
513 forward_scrub_event(&ScrubPgIF::send_scrub_is_finished, queued, "ScrubFinished");
514 }
515
516 void scrub_send_chunk_free(epoch_t queued, ThreadPool::TPHandle& handle)
517 {
518 forward_scrub_event(&ScrubPgIF::send_chunk_free, queued, "SelectedChunkFree");
519 }
520
521 void scrub_send_chunk_busy(epoch_t queued, ThreadPool::TPHandle& handle)
522 {
523 forward_scrub_event(&ScrubPgIF::send_chunk_busy, queued, "ChunkIsBusy");
524 }
525
526 void queue_want_pg_temp(const std::vector<int> &wanted) override;
527 void clear_want_pg_temp() override;
528
529 void on_new_interval() override;
530
531 void on_role_change() override;
532 virtual void plpg_on_role_change() = 0;
533
534 void init_collection_pool_opts();
535 void on_pool_change() override;
536 virtual void plpg_on_pool_change() = 0;
537
538 void on_info_history_change() override;
539
540 void on_primary_status_change(bool was_primary, bool now_primary) override;
541
542 void reschedule_scrub() override;
543
544 void scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type) override;
545
546 uint64_t get_snap_trimq_size() const override {
547 return snap_trimq.size();
548 }
549
550 static void add_objects_trimmed_count(
551 int64_t count, pg_stat_t &stats) {
552 stats.objects_trimmed += count;
553 }
554
555 void add_objects_trimmed_count(int64_t count) {
556 recovery_state.update_stats_wo_resched(
557 [=](auto &history, auto &stats) {
558 add_objects_trimmed_count(count, stats);
559 });
560 }
561
562 static void reset_objects_trimmed(pg_stat_t &stats) {
563 stats.objects_trimmed = 0;
564 }
565
566 void reset_objects_trimmed() {
567 recovery_state.update_stats_wo_resched(
568 [=](auto &history, auto &stats) {
569 reset_objects_trimmed(stats);
570 });
571 }
572
573 utime_t snaptrim_begin_stamp;
574
575 void set_snaptrim_begin_stamp() {
576 snaptrim_begin_stamp = ceph_clock_now();
577 }
578
579 void set_snaptrim_duration() {
580 utime_t cur_stamp = ceph_clock_now();
581 utime_t duration = cur_stamp - snaptrim_begin_stamp;
582 recovery_state.update_stats_wo_resched(
583 [=](auto &history, auto &stats) {
584 stats.snaptrim_duration = double(duration);
585 });
586 }
587
588 unsigned get_target_pg_log_entries() const override;
589
590 void clear_publish_stats() override;
591 void clear_primary_state() override;
592
593 epoch_t oldest_stored_osdmap() override;
594 OstreamTemp get_clog_error() override;
595 OstreamTemp get_clog_info() override;
596 OstreamTemp get_clog_debug() override;
597
598 void schedule_event_after(
599 PGPeeringEventRef event,
600 float delay) override;
601 void request_local_background_io_reservation(
602 unsigned priority,
603 PGPeeringEventURef on_grant,
604 PGPeeringEventURef on_preempt) override;
605 void update_local_background_io_priority(
606 unsigned priority) override;
607 void cancel_local_background_io_reservation() override;
608
609 void request_remote_recovery_reservation(
610 unsigned priority,
611 PGPeeringEventURef on_grant,
612 PGPeeringEventURef on_preempt) override;
613 void cancel_remote_recovery_reservation() override;
614
615 void schedule_event_on_commit(
616 ObjectStore::Transaction &t,
617 PGPeeringEventRef on_commit) override;
618
619 void on_active_exit() override;
620
621 Context *on_clean() override {
622 if (is_active()) {
623 kick_snap_trim();
624 }
625 requeue_ops(waiting_for_clean_to_primary_repair);
626 return finish_recovery();
627 }
628
629 void on_activate(interval_set<snapid_t> snaps) override;
630
631 void on_activate_committed() override;
632
633 void on_active_actmap() override;
634 void on_active_advmap(const OSDMapRef &osdmap) override;
635
636 void queue_snap_retrim(snapid_t snap);
637
638 void on_backfill_reserved() override;
639 void on_backfill_canceled() override;
640 void on_recovery_reserved() override;
641
642 bool is_forced_recovery_or_backfill() const {
643 return recovery_state.is_forced_recovery_or_backfill();
644 }
645
646 PGLog::LogEntryHandlerRef get_log_handler(
647 ObjectStore::Transaction &t) override {
648 return std::make_unique<PG::PGLogEntryHandler>(this, &t);
649 }
650
651 std::pair<ghobject_t, bool> do_delete_work(ObjectStore::Transaction &t,
652 ghobject_t _next) override;
653
654 void clear_ready_to_merge() override;
655 void set_not_ready_to_merge_target(pg_t pgid, pg_t src) override;
656 void set_not_ready_to_merge_source(pg_t pgid) override;
657 void set_ready_to_merge_target(eversion_t lu, epoch_t les, epoch_t lec) override;
658 void set_ready_to_merge_source(eversion_t lu) override;
659
660 void send_pg_created(pg_t pgid) override;
661
662 ceph::signedspan get_mnow() override;
663 HeartbeatStampsRef get_hb_stamps(int peer) override;
664 void schedule_renew_lease(epoch_t lpr, ceph::timespan delay) override;
665 void queue_check_readable(epoch_t lpr, ceph::timespan delay) override;
666
667 void rebuild_missing_set_with_deletes(PGLog &pglog) override;
668
669 void queue_peering_event(PGPeeringEventRef evt);
670 void do_peering_event(PGPeeringEventRef evt, PeeringCtx &rcx);
671 void queue_null(epoch_t msg_epoch, epoch_t query_epoch);
672 void queue_flushed(epoch_t started_at);
673 void handle_advance_map(
674 OSDMapRef osdmap, OSDMapRef lastmap,
675 std::vector<int>& newup, int up_primary,
676 std::vector<int>& newacting, int acting_primary,
677 PeeringCtx &rctx);
678 void handle_activate_map(PeeringCtx &rctx);
679 void handle_initialize(PeeringCtx &rxcx);
680 void handle_query_state(ceph::Formatter *f);
681
682 /**
683 * @param ops_begun returns how many recovery ops the function started
684 * @returns true if any useful work was accomplished; false otherwise
685 */
686 virtual bool start_recovery_ops(
687 uint64_t max,
688 ThreadPool::TPHandle &handle,
689 uint64_t *ops_begun) = 0;
690
691 // more work after the above, but with a PeeringCtx
692 void find_unfound(epoch_t queued, PeeringCtx &rctx);
693
694 virtual void get_watchers(std::list<obj_watch_item_t> *ls) = 0;
695
696 void dump_pgstate_history(ceph::Formatter *f);
697 void dump_missing(ceph::Formatter *f);
698
699 void with_pg_stats(std::function<void(const pg_stat_t&, epoch_t lec)>&& f);
700 void with_heartbeat_peers(std::function<void(int)>&& f);
701
702 void shutdown();
703 virtual void on_shutdown() = 0;
704
705 bool get_must_scrub() const;
706 Scrub::schedule_result_t sched_scrub();
707
708 unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority, unsigned int suggested_priority) const;
709 /// the version that refers to flags_.priority
710 unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority) const;
711 private:
712 // auxiliaries used by sched_scrub():
713 double next_deepscrub_interval() const;
714
715 /// should we perform deep scrub?
716 bool is_time_for_deep(bool allow_deep_scrub,
717 bool allow_scrub,
718 bool has_deep_errors,
719 const requested_scrub_t& planned) const;
720
721 /**
722 * Verify the various 'next scrub' flags in m_planned_scrub against configuration
723 * and scrub-related timestamps.
724 *
725 * @returns an updated copy of the m_planned_flags (or nothing if no scrubbing)
726 */
727 std::optional<requested_scrub_t> verify_scrub_mode() const;
728
729 bool verify_periodic_scrub_mode(bool allow_deep_scrub,
730 bool try_to_auto_repair,
731 bool allow_regular_scrub,
732 bool has_deep_errors,
733 requested_scrub_t& planned) const;
734
735 using ScrubAPI = void (ScrubPgIF::*)(epoch_t epoch_queued);
736 void forward_scrub_event(ScrubAPI fn, epoch_t epoch_queued, std::string_view desc);
737 // and for events that carry a meaningful 'activation token'
738 using ScrubSafeAPI = void (ScrubPgIF::*)(epoch_t epoch_queued,
739 Scrub::act_token_t act_token);
740 void forward_scrub_event(ScrubSafeAPI fn,
741 epoch_t epoch_queued,
742 Scrub::act_token_t act_token,
743 std::string_view desc);
744
745 public:
746 virtual void do_request(
747 OpRequestRef& op,
748 ThreadPool::TPHandle &handle
749 ) = 0;
750 virtual void clear_cache() = 0;
751 virtual int get_cache_obj_count() = 0;
752
753 virtual void snap_trimmer(epoch_t epoch_queued) = 0;
754 virtual void do_command(
755 const std::string_view& prefix,
756 const cmdmap_t& cmdmap,
757 const ceph::buffer::list& idata,
758 std::function<void(int,const std::string&,ceph::buffer::list&)> on_finish) = 0;
759
760 virtual bool agent_work(int max) = 0;
761 virtual bool agent_work(int max, int agent_flush_quota) = 0;
762 virtual void agent_stop() = 0;
763 virtual void agent_delay() = 0;
764 virtual void agent_clear() = 0;
765 virtual void agent_choose_mode_restart() = 0;
766
767 struct C_DeleteMore : public Context {
768 PGRef pg;
769 epoch_t epoch;
770 C_DeleteMore(PG *p, epoch_t e) : pg(p), epoch(e) {}
771 void finish(int r) override {
772 ceph_abort();
773 }
774 void complete(int r) override;
775 };
776
777 virtual void set_dynamic_perf_stats_queries(
778 const std::list<OSDPerfMetricQuery> &queries) {
779 }
780 virtual void get_dynamic_perf_stats(DynamicPerfStats *stats) {
781 }
782
783 uint64_t get_min_alloc_size() const;
784
785 // reference counting
786 #ifdef PG_DEBUG_REFS
787 uint64_t get_with_id();
788 void put_with_id(uint64_t);
789 void dump_live_ids();
790 #endif
791 void get(const char* tag);
792 void put(const char* tag);
793 int get_num_ref() {
794 return ref;
795 }
796
797 // ctor
798 PG(OSDService *o, OSDMapRef curmap,
799 const PGPool &pool, spg_t p);
800 ~PG() override;
801
802 // prevent copying
803 explicit PG(const PG& rhs) = delete;
804 PG& operator=(const PG& rhs) = delete;
805
806 protected:
807 // -------------
808 // protected
809 OSDService *osd;
810 public:
811 OSDShard *osd_shard = nullptr;
812 OSDShardPGSlot *pg_slot = nullptr;
813 protected:
814 CephContext *cct;
815
816 // locking and reference counting.
817 // I destroy myself when the reference count hits zero.
818 // lock() should be called before doing anything.
819 // get() should be called on pointer copy (to another thread, etc.).
820 // put() should be called on destruction of some previously copied pointer.
821 // unlock() when done with the current pointer (_most common_).
822 mutable ceph::mutex _lock = ceph::make_mutex("PG::_lock");
823 #ifndef CEPH_DEBUG_MUTEX
824 mutable std::thread::id locked_by;
825 #endif
826 std::atomic<unsigned int> ref{0};
827
828 #ifdef PG_DEBUG_REFS
829 ceph::mutex _ref_id_lock = ceph::make_mutex("PG::_ref_id_lock");
830 std::map<uint64_t, std::string> _live_ids;
831 std::map<std::string, uint64_t> _tag_counts;
832 uint64_t _ref_id = 0;
833
834 friend uint64_t get_with_id(PG *pg) { return pg->get_with_id(); }
835 friend void put_with_id(PG *pg, uint64_t id) { return pg->put_with_id(id); }
836 #endif
837
838 private:
839 friend void intrusive_ptr_add_ref(PG *pg) {
840 pg->get("intptr");
841 }
842 friend void intrusive_ptr_release(PG *pg) {
843 pg->put("intptr");
844 }
845
846
847 // =====================
848
849 protected:
850 OSDriver osdriver;
851 SnapMapper snap_mapper;
852
853 virtual PGBackend *get_pgbackend() = 0;
854 virtual const PGBackend* get_pgbackend() const = 0;
855
856 protected:
857 void requeue_map_waiters();
858
859 protected:
860
861 ZTracer::Endpoint trace_endpoint;
862
863
864 protected:
865 __u8 info_struct_v = 0;
866 void upgrade(ObjectStore *store);
867
868 protected:
869 ghobject_t pgmeta_oid;
870
871 // ------------------
872 interval_set<snapid_t> snap_trimq;
873 std::set<snapid_t> snap_trimq_repeat;
874
875 /* You should not use these items without taking their respective queue locks
876 * (if they have one) */
877 xlist<PG*>::item stat_queue_item;
878 bool recovery_queued;
879
880 int recovery_ops_active;
881 std::set<pg_shard_t> waiting_on_backfill;
882 #ifdef DEBUG_RECOVERY_OIDS
883 multiset<hobject_t> recovering_oids;
884 #endif
885
886 public:
887 bool dne() { return info.dne(); }
888
889 void send_cluster_message(
890 int osd, MessageRef m, epoch_t epoch, bool share_map_update) override;
891
892 protected:
893 epoch_t get_last_peering_reset() const {
894 return recovery_state.get_last_peering_reset();
895 }
896
897 /* heartbeat peers */
898 void set_probe_targets(const std::set<pg_shard_t> &probe_set) override;
899 void clear_probe_targets() override;
900
901 ceph::mutex heartbeat_peer_lock =
902 ceph::make_mutex("PG::heartbeat_peer_lock");
903 std::set<int> heartbeat_peers;
904 std::set<int> probe_targets;
905
906 protected:
907 BackfillInterval backfill_info;
908 std::map<pg_shard_t, BackfillInterval> peer_backfill_info;
909 bool backfill_reserving;
910
911 // The primary's num_bytes and local num_bytes for this pg, only valid
912 // during backfill for non-primary shards.
913 // Both of these are adjusted for EC to reflect the on-disk bytes
914 std::atomic<int64_t> primary_num_bytes = 0;
915 std::atomic<int64_t> local_num_bytes = 0;
916
917 public:
918 // Space reserved for backfill is primary_num_bytes - local_num_bytes
919 // Don't care that difference itself isn't atomic
920 uint64_t get_reserved_num_bytes() {
921 int64_t primary = primary_num_bytes.load();
922 int64_t local = local_num_bytes.load();
923 if (primary > local)
924 return primary - local;
925 else
926 return 0;
927 }
928
929 bool is_remote_backfilling() {
930 return primary_num_bytes.load() > 0;
931 }
932
933 bool try_reserve_recovery_space(int64_t primary, int64_t local) override;
934 void unreserve_recovery_space() override;
935
936 // If num_bytes are inconsistent and local_num- goes negative
937 // it's ok, because it would then be ignored.
938
939 // The value of num_bytes could be negative,
940 // but we don't let local_num_bytes go negative.
941 void add_local_num_bytes(int64_t num_bytes) {
942 if (num_bytes) {
943 int64_t prev_bytes = local_num_bytes.load();
944 int64_t new_bytes;
945 do {
946 new_bytes = prev_bytes + num_bytes;
947 if (new_bytes < 0)
948 new_bytes = 0;
949 } while(!local_num_bytes.compare_exchange_weak(prev_bytes, new_bytes));
950 }
951 }
952 void sub_local_num_bytes(int64_t num_bytes) {
953 ceph_assert(num_bytes >= 0);
954 if (num_bytes) {
955 int64_t prev_bytes = local_num_bytes.load();
956 int64_t new_bytes;
957 do {
958 new_bytes = prev_bytes - num_bytes;
959 if (new_bytes < 0)
960 new_bytes = 0;
961 } while(!local_num_bytes.compare_exchange_weak(prev_bytes, new_bytes));
962 }
963 }
964 // The value of num_bytes could be negative,
965 // but we don't let info.stats.stats.sum.num_bytes go negative.
966 void add_num_bytes(int64_t num_bytes) {
967 ceph_assert(ceph_mutex_is_locked_by_me(_lock));
968 if (num_bytes) {
969 recovery_state.update_stats(
970 [num_bytes](auto &history, auto &stats) {
971 stats.stats.sum.num_bytes += num_bytes;
972 if (stats.stats.sum.num_bytes < 0) {
973 stats.stats.sum.num_bytes = 0;
974 }
975 return false;
976 });
977 }
978 }
979 void sub_num_bytes(int64_t num_bytes) {
980 ceph_assert(ceph_mutex_is_locked_by_me(_lock));
981 ceph_assert(num_bytes >= 0);
982 if (num_bytes) {
983 recovery_state.update_stats(
984 [num_bytes](auto &history, auto &stats) {
985 stats.stats.sum.num_bytes -= num_bytes;
986 if (stats.stats.sum.num_bytes < 0) {
987 stats.stats.sum.num_bytes = 0;
988 }
989 return false;
990 });
991 }
992 }
993
994 // Only used in testing so not worried about needing the PG lock here
995 int64_t get_stats_num_bytes() {
996 std::lock_guard l{_lock};
997 int num_bytes = info.stats.stats.sum.num_bytes;
998 if (pool.info.is_erasure()) {
999 num_bytes /= (int)get_pgbackend()->get_ec_data_chunk_count();
1000 // Round up each object by a stripe
1001 num_bytes += get_pgbackend()->get_ec_stripe_chunk_size() * info.stats.stats.sum.num_objects;
1002 }
1003 int64_t lnb = local_num_bytes.load();
1004 if (lnb && lnb != num_bytes) {
1005 lgeneric_dout(cct, 0) << this << " " << info.pgid << " num_bytes mismatch "
1006 << lnb << " vs stats "
1007 << info.stats.stats.sum.num_bytes << " / chunk "
1008 << get_pgbackend()->get_ec_data_chunk_count()
1009 << dendl;
1010 }
1011 return num_bytes;
1012 }
1013
1014 protected:
1015
1016 /*
1017 * blocked request wait hierarchy
1018 *
1019 * In order to preserve request ordering we need to be careful about the
1020 * order in which blocked requests get requeued. Generally speaking, we
1021 * push the requests back up to the op_wq in reverse order (most recent
1022 * request first) so that they come back out again in the original order.
1023 * However, because there are multiple wait queues, we need to requeue
1024 * waitlists in order. Generally speaking, we requeue the wait lists
1025 * that are checked first.
1026 *
1027 * Here are the various wait lists, in the order they are used during
1028 * request processing, with notes:
1029 *
1030 * - waiting_for_map
1031 * - may start or stop blocking at any time (depending on client epoch)
1032 * - waiting_for_peered
1033 * - !is_peered()
1034 * - only starts blocking on interval change; never restarts
1035 * - waiting_for_flush
1036 * - flushes_in_progress
1037 * - waiting for final flush during activate
1038 * - waiting_for_active
1039 * - !is_active()
1040 * - only starts blocking on interval change; never restarts
1041 * - waiting_for_readable
1042 * - now > readable_until
1043 * - unblocks when we get fresh(er) osd_pings
1044 * - waiting_for_scrub
1045 * - starts and stops blocking for varying intervals during scrub
1046 * - waiting_for_unreadable_object
1047 * - never restarts once object is readable (* except for EIO?)
1048 * - waiting_for_degraded_object
1049 * - never restarts once object is writeable (* except for EIO?)
1050 * - waiting_for_blocked_object
1051 * - starts and stops based on proxied op activity
1052 * - obc rwlocks
1053 * - starts and stops based on read/write activity
1054 *
1055 * Notes:
1056 *
1057 * 1. During and interval change, we requeue *everything* in the above order.
1058 *
1059 * 2. When an obc rwlock is released, we check for a scrub block and requeue
1060 * the op there if it applies. We ignore the unreadable/degraded/blocked
1061 * queues because we assume they cannot apply at that time (this is
1062 * probably mostly true).
1063 *
1064 * 3. The requeue_ops helper will push ops onto the waiting_for_map std::list if
1065 * it is non-empty.
1066 *
1067 * These three behaviors are generally sufficient to maintain ordering, with
1068 * the possible exception of cases where we make an object degraded or
1069 * unreadable that was previously okay, e.g. when scrub or op processing
1070 * encounter an unexpected error. FIXME.
1071 */
1072
1073 // ops with newer maps than our (or blocked behind them)
1074 // track these by client, since inter-request ordering doesn't otherwise
1075 // matter.
1076 std::unordered_map<entity_name_t,std::list<OpRequestRef>> waiting_for_map;
1077
1078 // ops waiting on peered
1079 std::list<OpRequestRef> waiting_for_peered;
1080
1081 /// ops waiting on readble
1082 std::list<OpRequestRef> waiting_for_readable;
1083
1084 // ops waiting on active (require peered as well)
1085 std::list<OpRequestRef> waiting_for_active;
1086 std::list<OpRequestRef> waiting_for_flush;
1087 std::list<OpRequestRef> waiting_for_scrub;
1088
1089 std::list<OpRequestRef> waiting_for_cache_not_full;
1090 std::list<OpRequestRef> waiting_for_clean_to_primary_repair;
1091 std::map<hobject_t, std::list<OpRequestRef>> waiting_for_unreadable_object,
1092 waiting_for_degraded_object,
1093 waiting_for_blocked_object;
1094
1095 std::set<hobject_t> objects_blocked_on_cache_full;
1096 std::map<hobject_t,snapid_t> objects_blocked_on_degraded_snap;
1097 std::map<hobject_t,ObjectContextRef> objects_blocked_on_snap_promotion;
1098
1099 // Callbacks should assume pg (and nothing else) is locked
1100 std::map<hobject_t, std::list<Context*>> callbacks_for_degraded_object;
1101
1102 std::map<eversion_t,
1103 std::list<
1104 std::tuple<OpRequestRef, version_t, int,
1105 std::vector<pg_log_op_return_item_t>>>> waiting_for_ondisk;
1106
1107 void requeue_object_waiters(std::map<hobject_t, std::list<OpRequestRef>>& m);
1108 void requeue_op(OpRequestRef op);
1109 void requeue_ops(std::list<OpRequestRef> &l);
1110
1111 // stats that persist lazily
1112 object_stat_collection_t unstable_stats;
1113
1114 // publish stats
1115 ceph::mutex pg_stats_publish_lock =
1116 ceph::make_mutex("PG::pg_stats_publish_lock");
1117 std::optional<pg_stat_t> pg_stats_publish;
1118
1119 friend class TestOpsSocketHook;
1120 void publish_stats_to_osd() override;
1121
1122 bool needs_recovery() const {
1123 return recovery_state.needs_recovery();
1124 }
1125 bool needs_backfill() const {
1126 return recovery_state.needs_backfill();
1127 }
1128
1129 bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const;
1130
1131 struct PGLogEntryHandler : public PGLog::LogEntryHandler {
1132 PG *pg;
1133 ObjectStore::Transaction *t;
1134 PGLogEntryHandler(PG *pg, ObjectStore::Transaction *t) : pg(pg), t(t) {}
1135
1136 // LogEntryHandler
1137 void remove(const hobject_t &hoid) override {
1138 pg->get_pgbackend()->remove(hoid, t);
1139 }
1140 void try_stash(const hobject_t &hoid, version_t v) override {
1141 pg->get_pgbackend()->try_stash(hoid, v, t);
1142 }
1143 void rollback(const pg_log_entry_t &entry) override {
1144 ceph_assert(entry.can_rollback());
1145 pg->get_pgbackend()->rollback(entry, t);
1146 }
1147 void rollforward(const pg_log_entry_t &entry) override {
1148 pg->get_pgbackend()->rollforward(entry, t);
1149 }
1150 void trim(const pg_log_entry_t &entry) override {
1151 pg->get_pgbackend()->trim(entry, t);
1152 }
1153 };
1154
1155 void update_object_snap_mapping(
1156 ObjectStore::Transaction *t, const hobject_t &soid,
1157 const std::set<snapid_t> &snaps);
1158 void clear_object_snap_mapping(
1159 ObjectStore::Transaction *t, const hobject_t &soid);
1160 void remove_snap_mapped_object(
1161 ObjectStore::Transaction& t, const hobject_t& soid);
1162
1163 bool have_unfound() const {
1164 return recovery_state.have_unfound();
1165 }
1166 uint64_t get_num_unfound() const {
1167 return recovery_state.get_num_unfound();
1168 }
1169
1170 virtual void check_local() = 0;
1171
1172 void purge_strays();
1173
1174 void update_heartbeat_peers(std::set<int> peers) override;
1175
1176 Context *finish_sync_event;
1177
1178 Context *finish_recovery();
1179 void _finish_recovery(Context *c);
1180 struct C_PG_FinishRecovery : public Context {
1181 PGRef pg;
1182 explicit C_PG_FinishRecovery(PG *p) : pg(p) {}
1183 void finish(int r) override {
1184 pg->_finish_recovery(this);
1185 }
1186 };
1187 void cancel_recovery();
1188 void clear_recovery_state();
1189 virtual void _clear_recovery_state() = 0;
1190 void start_recovery_op(const hobject_t& soid);
1191 void finish_recovery_op(const hobject_t& soid, bool dequeue=false);
1192
1193 virtual void _split_into(pg_t child_pgid, PG *child, unsigned split_bits) = 0;
1194
1195 friend class C_OSD_RepModify_Commit;
1196 friend struct C_DeleteMore;
1197
1198 // -- backoff --
1199 ceph::mutex backoff_lock = // orders inside Backoff::lock
1200 ceph::make_mutex("PG::backoff_lock");
1201 std::map<hobject_t,std::set<ceph::ref_t<Backoff>>> backoffs;
1202
1203 void add_backoff(const ceph::ref_t<Session>& s, const hobject_t& begin, const hobject_t& end);
1204 void release_backoffs(const hobject_t& begin, const hobject_t& end);
1205 void release_backoffs(const hobject_t& o) {
1206 release_backoffs(o, o);
1207 }
1208 void clear_backoffs();
1209
1210 void add_pg_backoff(const ceph::ref_t<Session>& s) {
1211 hobject_t begin = info.pgid.pgid.get_hobj_start();
1212 hobject_t end = info.pgid.pgid.get_hobj_end(pool.info.get_pg_num());
1213 add_backoff(s, begin, end);
1214 }
1215 public:
1216 void release_pg_backoffs() {
1217 hobject_t begin = info.pgid.pgid.get_hobj_start();
1218 hobject_t end = info.pgid.pgid.get_hobj_end(pool.info.get_pg_num());
1219 release_backoffs(begin, end);
1220 }
1221
1222 // -- scrub --
1223 protected:
1224 bool scrub_after_recovery;
1225
1226 int active_pushes;
1227
1228 void repair_object(
1229 const hobject_t &soid,
1230 const std::list<std::pair<ScrubMap::object, pg_shard_t> > &ok_peers,
1231 const std::set<pg_shard_t> &bad_peers);
1232
1233 [[nodiscard]] bool ops_blocked_by_scrub() const;
1234 [[nodiscard]] Scrub::scrub_prio_t is_scrub_blocking_ops() const;
1235
1236 void _repair_oinfo_oid(ScrubMap &map);
1237 void _scan_rollback_obs(const std::vector<ghobject_t> &rollback_obs);
1238 /**
1239 * returns true if [begin, end) is good to scrub at this time
1240 * a false return value obliges the implementer to requeue scrub when the
1241 * condition preventing scrub clears
1242 */
1243 virtual bool _range_available_for_scrub(
1244 const hobject_t &begin, const hobject_t &end) = 0;
1245
1246 /**
1247 * Initiate the process that will create our scrub map for the Primary.
1248 * (triggered by MSG_OSD_REP_SCRUB)
1249 */
1250 void replica_scrub(OpRequestRef op, ThreadPool::TPHandle &handle);
1251
1252 // -- recovery state --
1253
1254 struct QueuePeeringEvt : Context {
1255 PGRef pg;
1256 PGPeeringEventRef evt;
1257
1258 template <class EVT>
1259 QueuePeeringEvt(PG *pg, epoch_t epoch, EVT evt) :
1260 pg(pg), evt(std::make_shared<PGPeeringEvent>(epoch, epoch, evt)) {}
1261
1262 QueuePeeringEvt(PG *pg, PGPeeringEventRef evt) :
1263 pg(pg), evt(std::move(evt)) {}
1264
1265 void finish(int r) override {
1266 pg->lock();
1267 pg->queue_peering_event(std::move(evt));
1268 pg->unlock();
1269 }
1270 };
1271
1272
1273 public:
1274 int pg_stat_adjust(osd_stat_t *new_stat);
1275 protected:
1276 bool delete_needs_sleep = false;
1277
1278 protected:
1279 bool state_test(uint64_t m) const { return recovery_state.state_test(m); }
1280 void state_set(uint64_t m) { recovery_state.state_set(m); }
1281 void state_clear(uint64_t m) { recovery_state.state_clear(m); }
1282
1283 bool is_complete() const {
1284 return recovery_state.is_complete();
1285 }
1286 bool should_send_notify() const {
1287 return recovery_state.should_send_notify();
1288 }
1289
1290 bool is_active() const { return recovery_state.is_active(); }
1291 bool is_activating() const { return recovery_state.is_activating(); }
1292 bool is_peering() const { return recovery_state.is_peering(); }
1293 bool is_down() const { return recovery_state.is_down(); }
1294 bool is_recovery_unfound() const { return recovery_state.is_recovery_unfound(); }
1295 bool is_backfill_unfound() const { return recovery_state.is_backfill_unfound(); }
1296 bool is_incomplete() const { return recovery_state.is_incomplete(); }
1297 bool is_clean() const { return recovery_state.is_clean(); }
1298 bool is_degraded() const { return recovery_state.is_degraded(); }
1299 bool is_undersized() const { return recovery_state.is_undersized(); }
1300 bool is_scrubbing() const { return state_test(PG_STATE_SCRUBBING); } // Primary only
1301 bool is_remapped() const { return recovery_state.is_remapped(); }
1302 bool is_peered() const { return recovery_state.is_peered(); }
1303 bool is_recovering() const { return recovery_state.is_recovering(); }
1304 bool is_premerge() const { return recovery_state.is_premerge(); }
1305 bool is_repair() const { return recovery_state.is_repair(); }
1306 bool is_laggy() const { return state_test(PG_STATE_LAGGY); }
1307 bool is_wait() const { return state_test(PG_STATE_WAIT); }
1308
1309 bool is_empty() const { return recovery_state.is_empty(); }
1310
1311 // pg on-disk state
1312 void do_pending_flush();
1313
1314 public:
1315 void prepare_write(
1316 pg_info_t &info,
1317 pg_info_t &last_written_info,
1318 PastIntervals &past_intervals,
1319 PGLog &pglog,
1320 bool dirty_info,
1321 bool dirty_big_info,
1322 bool need_write_epoch,
1323 ObjectStore::Transaction &t) override;
1324
1325 void write_if_dirty(PeeringCtx &rctx) {
1326 write_if_dirty(rctx.transaction);
1327 }
1328 protected:
1329 void write_if_dirty(ObjectStore::Transaction& t) {
1330 recovery_state.write_if_dirty(t);
1331 }
1332
1333 PGLog::IndexedLog projected_log;
1334 bool check_in_progress_op(
1335 const osd_reqid_t &r,
1336 eversion_t *version,
1337 version_t *user_version,
1338 int *return_code,
1339 std::vector<pg_log_op_return_item_t> *op_returns) const;
1340 eversion_t projected_last_update;
1341 eversion_t get_next_version() const {
1342 eversion_t at_version(
1343 get_osdmap_epoch(),
1344 projected_last_update.version+1);
1345 ceph_assert(at_version > info.last_update);
1346 ceph_assert(at_version > recovery_state.get_pg_log().get_head());
1347 ceph_assert(at_version > projected_last_update);
1348 return at_version;
1349 }
1350
1351 bool check_log_for_corruption(ObjectStore *store);
1352
1353 std::string get_corrupt_pg_log_name() const;
1354
1355 void update_snap_map(
1356 const std::vector<pg_log_entry_t> &log_entries,
1357 ObjectStore::Transaction& t);
1358
1359 void filter_snapc(std::vector<snapid_t> &snaps);
1360
1361 virtual void kick_snap_trim() = 0;
1362 virtual void snap_trimmer_scrub_complete() = 0;
1363
1364 void queue_recovery();
1365 void queue_scrub_after_repair();
1366 unsigned int get_scrub_priority();
1367
1368 bool try_flush_or_schedule_async() override;
1369 void start_flush_on_transaction(
1370 ObjectStore::Transaction &t) override;
1371
1372 void update_history(const pg_history_t& history) {
1373 recovery_state.update_history(history);
1374 }
1375
1376 // OpRequest queueing
1377 bool can_discard_op(OpRequestRef& op);
1378 bool can_discard_scan(OpRequestRef op);
1379 bool can_discard_backfill(OpRequestRef op);
1380 bool can_discard_request(OpRequestRef& op);
1381
1382 template<typename T, int MSGTYPE>
1383 bool can_discard_replica_op(OpRequestRef& op);
1384
1385 bool old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch);
1386 bool old_peering_evt(PGPeeringEventRef evt) {
1387 return old_peering_msg(evt->get_epoch_sent(), evt->get_epoch_requested());
1388 }
1389 bool have_same_or_newer_map(epoch_t e) {
1390 return e <= get_osdmap_epoch();
1391 }
1392
1393 bool op_has_sufficient_caps(OpRequestRef& op);
1394
1395 // abstract bits
1396 friend struct FlushState;
1397
1398 friend ostream& operator<<(ostream& out, const PG& pg);
1399
1400 protected:
1401 PeeringState recovery_state;
1402
1403 // ref to recovery_state.pool
1404 const PGPool &pool;
1405
1406 // ref to recovery_state.info
1407 const pg_info_t &info;
1408
1409
1410 // ScrubberPasskey getters:
1411 public:
1412 const pg_info_t& get_pg_info(ScrubberPasskey) const {
1413 return info;
1414 }
1415
1416 OSDService* get_pg_osd(ScrubberPasskey) const {
1417 return osd;
1418 }
1419
1420 };
1421
1422 #endif