]> git.proxmox.com Git - ceph.git/blame - ceph/src/osd/PG.h
update sources to v12.2.3
[ceph.git] / ceph / src / osd / PG.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_PG_H
16#define CEPH_PG_H
17
18#include <boost/statechart/custom_reaction.hpp>
19#include <boost/statechart/event.hpp>
20#include <boost/statechart/simple_state.hpp>
21#include <boost/statechart/state.hpp>
22#include <boost/statechart/state_machine.hpp>
23#include <boost/statechart/transition.hpp>
24#include <boost/statechart/event_base.hpp>
25#include <boost/scoped_ptr.hpp>
26#include <boost/circular_buffer.hpp>
27#include "include/memory.h"
28#include "include/mempool.h"
29
30// re-include our assert to clobber boost's
31#include "include/assert.h"
32
33#include "include/types.h"
34#include "include/stringify.h"
35#include "osd_types.h"
36#include "include/xlist.h"
37#include "SnapMapper.h"
38#include "Session.h"
39#include "common/Timer.h"
40
41#include "PGLog.h"
42#include "OSDMap.h"
43#include "messages/MOSDPGLog.h"
44#include "include/str_list.h"
45#include "PGBackend.h"
46
47#include <atomic>
48#include <list>
49#include <memory>
50#include <stack>
51#include <string>
52#include <tuple>
53using namespace std;
54
55// #include "include/unordered_map.h"
56// #include "include/unordered_set.h"
57
58//#define DEBUG_RECOVERY_OIDS // track set of recovering oids explicitly, to find counting bugs
59
60class OSD;
61class OSDService;
62class MOSDOp;
63class MOSDPGScan;
64class MOSDPGBackfill;
65class MOSDPGInfo;
66
67class PG;
68struct OpRequest;
69typedef OpRequest::Ref OpRequestRef;
70class MOSDPGLog;
71class CephContext;
72
73namespace Scrub {
74 class Store;
75}
76
77void intrusive_ptr_add_ref(PG *pg);
78void intrusive_ptr_release(PG *pg);
79
80using state_history_entry = std::tuple<utime_t, utime_t, const char*>;
81using embedded_state = std::pair<utime_t, const char*>;
82
83struct PGStateInstance {
84 // Time spent in pg states
85
86 void setepoch(const epoch_t current_epoch) {
87 this_epoch = current_epoch;
88 }
89
90 void enter_state(const utime_t entime, const char* state) {
91 embedded_states.push(std::make_pair(entime, state));
92 }
93
94 void exit_state(const utime_t extime) {
95 embedded_state this_state = embedded_states.top();
96 state_history.push_back(state_history_entry{
97 this_state.first, extime, this_state.second});
98 embedded_states.pop();
99 }
100
101 epoch_t this_epoch;
102 utime_t enter_time;
103 std::vector<state_history_entry> state_history;
104 std::stack<embedded_state> embedded_states;
105};
106
107class PGStateHistory {
108 // Member access protected with the PG lock
109public:
110 PGStateHistory() : buffer(10) {}
111
112 void enter(PG* pg, const utime_t entime, const char* state);
113
114 void exit(const char* state);
115
116 void reset() {
117 pi = nullptr;
118 }
119
120 void set_pg_in_destructor() { pg_in_destructor = true; }
121
122 void dump(Formatter* f) const;
123
124private:
125 bool pg_in_destructor = false;
126 PG* thispg = nullptr;
127 std::unique_ptr<PGStateInstance> tmppi;
128 PGStateInstance* pi = nullptr;
129 boost::circular_buffer<std::unique_ptr<PGStateInstance>> buffer;
130
131};
132
133#ifdef PG_DEBUG_REFS
134#include "common/tracked_int_ptr.hpp"
135 uint64_t get_with_id(PG *pg);
136 void put_with_id(PG *pg, uint64_t id);
137 typedef TrackedIntPtr<PG> PGRef;
138#else
139 typedef boost::intrusive_ptr<PG> PGRef;
140#endif
141
142class PGRecoveryStats {
143 struct per_state_info {
144 uint64_t enter, exit; // enter/exit counts
145 uint64_t events;
146 utime_t event_time; // time spent processing events
147 utime_t total_time; // total time in state
148 utime_t min_time, max_time;
149
150 // cppcheck-suppress unreachableCode
151 per_state_info() : enter(0), exit(0), events(0) {}
152 };
153 map<const char *,per_state_info> info;
154 Mutex lock;
155
156 public:
157 PGRecoveryStats() : lock("PGRecoverStats::lock") {}
158
159 void reset() {
160 Mutex::Locker l(lock);
161 info.clear();
162 }
163 void dump(ostream& out) {
164 Mutex::Locker l(lock);
165 for (map<const char *,per_state_info>::iterator p = info.begin(); p != info.end(); ++p) {
166 per_state_info& i = p->second;
167 out << i.enter << "\t" << i.exit << "\t"
168 << i.events << "\t" << i.event_time << "\t"
169 << i.total_time << "\t"
170 << i.min_time << "\t" << i.max_time << "\t"
171 << p->first << "\n";
172 }
173 }
174
175 void dump_formatted(Formatter *f) {
176 Mutex::Locker l(lock);
177 f->open_array_section("pg_recovery_stats");
178 for (map<const char *,per_state_info>::iterator p = info.begin();
179 p != info.end(); ++p) {
180 per_state_info& i = p->second;
181 f->open_object_section("recovery_state");
182 f->dump_int("enter", i.enter);
183 f->dump_int("exit", i.exit);
184 f->dump_int("events", i.events);
185 f->dump_stream("event_time") << i.event_time;
186 f->dump_stream("total_time") << i.total_time;
187 f->dump_stream("min_time") << i.min_time;
188 f->dump_stream("max_time") << i.max_time;
189 vector<string> states;
190 get_str_vec(p->first, "/", states);
191 f->open_array_section("nested_states");
192 for (vector<string>::iterator st = states.begin();
193 st != states.end(); ++st) {
194 f->dump_string("state", *st);
195 }
196 f->close_section();
197 f->close_section();
198 }
199 f->close_section();
200 }
201
202 void log_enter(const char *s) {
203 Mutex::Locker l(lock);
204 info[s].enter++;
205 }
206 void log_exit(const char *s, utime_t dur, uint64_t events, utime_t event_dur) {
207 Mutex::Locker l(lock);
208 per_state_info &i = info[s];
209 i.exit++;
210 i.total_time += dur;
211 if (dur > i.max_time)
212 i.max_time = dur;
213 if (dur < i.min_time || i.min_time == utime_t())
214 i.min_time = dur;
215 i.events += events;
216 i.event_time += event_dur;
217 }
218};
219
220struct PGPool {
221 CephContext* cct;
222 epoch_t cached_epoch;
223 int64_t id;
224 string name;
225 uint64_t auid;
226
227 pg_pool_t info;
228 SnapContext snapc; // the default pool snapc, ready to go.
229
230 interval_set<snapid_t> cached_removed_snaps; // current removed_snaps set
231 interval_set<snapid_t> newly_removed_snaps; // newly removed in the last epoch
232
233 PGPool(CephContext* cct, OSDMapRef map, int64_t i)
234 : cct(cct),
235 cached_epoch(map->get_epoch()),
236 id(i),
237 name(map->get_pool_name(id)),
238 auid(map->get_pg_pool(id)->auid) {
239 const pg_pool_t *pi = map->get_pg_pool(id);
240 assert(pi);
241 info = *pi;
242 snapc = pi->get_snap_context();
243 pi->build_removed_snaps(cached_removed_snaps);
244 }
245
246 void update(OSDMapRef map);
247};
248
249/** PG - Replica Placement Group
250 *
251 */
252
253class PG : public DoutPrefixProvider {
254protected:
255 OSDService *osd;
256 CephContext *cct;
257 OSDriver osdriver;
258 SnapMapper snap_mapper;
224ce89b 259 bool eio_errors_to_process = false;
7c673cae
FG
260
261 virtual PGBackend *get_pgbackend() = 0;
262public:
263 std::string gen_prefix() const override;
264 CephContext *get_cct() const override { return cct; }
265 unsigned get_subsys() const override { return ceph_subsys_osd; }
266
267 /*** PG ****/
268 void update_snap_mapper_bits(uint32_t bits) {
269 snap_mapper.update_bits(bits);
270 }
271 /// get_is_recoverable_predicate: caller owns returned pointer and must delete when done
272 IsPGRecoverablePredicate *get_is_recoverable_predicate() {
273 return get_pgbackend()->get_is_recoverable_predicate();
274 }
275protected:
276 OSDMapRef osdmap_ref;
277 OSDMapRef last_persisted_osdmap_ref;
278 PGPool pool;
279
280 void requeue_map_waiters();
281
282 void update_osdmap_ref(OSDMapRef newmap) {
283 assert(_lock.is_locked_by_me());
284 osdmap_ref = std::move(newmap);
285 }
286
287public:
288 OSDMapRef get_osdmap() const {
289 assert(is_locked());
290 assert(osdmap_ref);
291 return osdmap_ref;
292 }
293protected:
294
295 /** locking and reference counting.
296 * I destroy myself when the reference count hits zero.
297 * lock() should be called before doing anything.
298 * get() should be called on pointer copy (to another thread, etc.).
299 * put() should be called on destruction of some previously copied pointer.
300 * unlock() when done with the current pointer (_most common_).
301 */
302 mutable Mutex _lock;
303 std::atomic_uint ref{0};
304
305#ifdef PG_DEBUG_REFS
306 Mutex _ref_id_lock;
307 map<uint64_t, string> _live_ids;
308 map<string, uint64_t> _tag_counts;
309 uint64_t _ref_id;
310#endif
311
312public:
313 bool deleting; // true while in removing or OSD is shutting down
314
315 ZTracer::Endpoint trace_endpoint;
316
317 void lock_suspend_timeout(ThreadPool::TPHandle &handle);
318 void lock(bool no_lockdep = false) const;
319 void unlock() const {
320 //generic_dout(0) << this << " " << info.pgid << " unlock" << dendl;
321 assert(!dirty_info);
322 assert(!dirty_big_info);
323 _lock.Unlock();
324 }
325
326 bool is_locked() const {
327 return _lock.is_locked();
328 }
329
330#ifdef PG_DEBUG_REFS
331 uint64_t get_with_id();
332 void put_with_id(uint64_t);
333 void dump_live_ids();
334#endif
335 void get(const char* tag);
336 void put(const char* tag);
337
338 bool dirty_info, dirty_big_info;
339
340public:
341 bool is_ec_pg() const {
342 return pool.info.ec_pool();
343 }
344 // pg state
345 pg_info_t info; ///< current pg info
346 pg_info_t last_written_info; ///< last written info
347 __u8 info_struct_v;
348 static const __u8 cur_struct_v = 10;
349 // v10 is the new past_intervals encoding
350 // v9 was fastinfo_key addition
351 // v8 was the move to a per-pg pgmeta object
352 // v7 was SnapMapper addition in 86658392516d5175b2756659ef7ffaaf95b0f8ad
353 // (first appeared in cuttlefish).
354 static const __u8 compat_struct_v = 7;
355 bool must_upgrade() {
356 return info_struct_v < cur_struct_v;
357 }
358 bool can_upgrade() {
359 return info_struct_v >= compat_struct_v;
360 }
361 void upgrade(ObjectStore *store);
362
363 const coll_t coll;
364 ObjectStore::CollectionHandle ch;
365 PGLog pg_log;
366 static string get_info_key(spg_t pgid) {
367 return stringify(pgid) + "_info";
368 }
369 static string get_biginfo_key(spg_t pgid) {
370 return stringify(pgid) + "_biginfo";
371 }
372 static string get_epoch_key(spg_t pgid) {
373 return stringify(pgid) + "_epoch";
374 }
375 ghobject_t pgmeta_oid;
376
377 class MissingLoc {
378 map<hobject_t, pg_missing_item> needs_recovery_map;
379 map<hobject_t, set<pg_shard_t> > missing_loc;
380 set<pg_shard_t> missing_loc_sources;
381 PG *pg;
382 set<pg_shard_t> empty_set;
383 public:
384 boost::scoped_ptr<IsPGReadablePredicate> is_readable;
385 boost::scoped_ptr<IsPGRecoverablePredicate> is_recoverable;
386 explicit MissingLoc(PG *pg)
387 : pg(pg) {}
388 void set_backend_predicates(
389 IsPGReadablePredicate *_is_readable,
390 IsPGRecoverablePredicate *_is_recoverable) {
391 is_readable.reset(_is_readable);
392 is_recoverable.reset(_is_recoverable);
393 }
394 string gen_prefix() const { return pg->gen_prefix(); }
395 bool needs_recovery(
396 const hobject_t &hoid,
397 eversion_t *v = 0) const {
398 map<hobject_t, pg_missing_item>::const_iterator i =
399 needs_recovery_map.find(hoid);
400 if (i == needs_recovery_map.end())
401 return false;
402 if (v)
403 *v = i->second.need;
404 return true;
405 }
c07f9fc5
FG
406 bool is_deleted(const hobject_t &hoid) const {
407 auto i = needs_recovery_map.find(hoid);
408 if (i == needs_recovery_map.end())
409 return false;
410 return i->second.is_delete();
411 }
7c673cae 412 bool is_unfound(const hobject_t &hoid) const {
c07f9fc5 413 return needs_recovery(hoid) && !is_deleted(hoid) && (
7c673cae
FG
414 !missing_loc.count(hoid) ||
415 !(*is_recoverable)(missing_loc.find(hoid)->second));
416 }
417 bool readable_with_acting(
418 const hobject_t &hoid,
419 const set<pg_shard_t> &acting) const;
420 uint64_t num_unfound() const {
421 uint64_t ret = 0;
422 for (map<hobject_t, pg_missing_item>::const_iterator i =
423 needs_recovery_map.begin();
424 i != needs_recovery_map.end();
425 ++i) {
426 if (is_unfound(i->first))
427 ++ret;
428 }
429 return ret;
430 }
431
432 bool have_unfound() const {
433 for (map<hobject_t, pg_missing_item>::const_iterator i =
434 needs_recovery_map.begin();
435 i != needs_recovery_map.end();
436 ++i) {
437 if (is_unfound(i->first))
438 return true;
439 }
440 return false;
441 }
442 void clear() {
443 needs_recovery_map.clear();
444 missing_loc.clear();
445 missing_loc_sources.clear();
446 }
447
448 void add_location(const hobject_t &hoid, pg_shard_t location) {
449 missing_loc[hoid].insert(location);
450 }
451 void remove_location(const hobject_t &hoid, pg_shard_t location) {
452 missing_loc[hoid].erase(location);
453 }
454 void add_active_missing(const pg_missing_t &missing) {
455 for (map<hobject_t, pg_missing_item>::const_iterator i =
456 missing.get_items().begin();
457 i != missing.get_items().end();
458 ++i) {
459 map<hobject_t, pg_missing_item>::const_iterator j =
460 needs_recovery_map.find(i->first);
461 if (j == needs_recovery_map.end()) {
462 needs_recovery_map.insert(*i);
463 } else {
c07f9fc5
FG
464 lgeneric_dout(pg->cct, 0) << this << " " << pg->info.pgid << " unexpected need for "
465 << i->first << " have " << j->second
466 << " tried to add " << i->second << dendl;
7c673cae
FG
467 assert(i->second.need == j->second.need);
468 }
469 }
470 }
471
472 void add_missing(const hobject_t &hoid, eversion_t need, eversion_t have) {
473 needs_recovery_map[hoid] = pg_missing_item(need, have);
474 }
475 void revise_need(const hobject_t &hoid, eversion_t need) {
476 assert(needs_recovery(hoid));
477 needs_recovery_map[hoid].need = need;
478 }
479
480 /// Adds info about a possible recovery source
481 bool add_source_info(
482 pg_shard_t source, ///< [in] source
483 const pg_info_t &oinfo, ///< [in] info
484 const pg_missing_t &omissing, ///< [in] (optional) missing
485 ThreadPool::TPHandle* handle ///< [in] ThreadPool handle
486 ); ///< @return whether a new object location was discovered
487
488 /// Adds recovery sources in batch
489 void add_batch_sources_info(
490 const set<pg_shard_t> &sources, ///< [in] a set of resources which can be used for all objects
491 ThreadPool::TPHandle* handle ///< [in] ThreadPool handle
492 );
493
494 /// Uses osdmap to update structures for now down sources
495 void check_recovery_sources(const OSDMapRef& osdmap);
496
497 /// Call when hoid is no longer missing in acting set
498 void recovered(const hobject_t &hoid) {
499 needs_recovery_map.erase(hoid);
500 missing_loc.erase(hoid);
501 }
502
503 /// Call to update structures for hoid after a change
504 void rebuild(
505 const hobject_t &hoid,
506 pg_shard_t self,
507 const set<pg_shard_t> to_recover,
508 const pg_info_t &info,
509 const pg_missing_t &missing,
510 const map<pg_shard_t, pg_missing_t> &pmissing,
511 const map<pg_shard_t, pg_info_t> &pinfo) {
512 recovered(hoid);
513 boost::optional<pg_missing_item> item;
514 auto miter = missing.get_items().find(hoid);
515 if (miter != missing.get_items().end()) {
516 item = miter->second;
517 } else {
518 for (auto &&i: to_recover) {
519 if (i == self)
520 continue;
521 auto pmiter = pmissing.find(i);
522 assert(pmiter != pmissing.end());
523 miter = pmiter->second.get_items().find(hoid);
524 if (miter != pmiter->second.get_items().end()) {
525 item = miter->second;
526 break;
527 }
528 }
529 }
530 if (!item)
531 return; // recovered!
532
533 needs_recovery_map[hoid] = *item;
c07f9fc5
FG
534 if (item->is_delete())
535 return;
7c673cae
FG
536 auto mliter =
537 missing_loc.insert(make_pair(hoid, set<pg_shard_t>())).first;
538 assert(info.last_backfill.is_max());
539 assert(info.last_update >= item->need);
540 if (!missing.is_missing(hoid))
541 mliter->second.insert(self);
542 for (auto &&i: pmissing) {
543 auto pinfoiter = pinfo.find(i.first);
544 assert(pinfoiter != pinfo.end());
545 if (item->need <= pinfoiter->second.last_update &&
546 hoid <= pinfoiter->second.last_backfill &&
547 !i.second.is_missing(hoid))
548 mliter->second.insert(i.first);
549 }
550 }
551
552 const set<pg_shard_t> &get_locations(const hobject_t &hoid) const {
553 return missing_loc.count(hoid) ?
554 missing_loc.find(hoid)->second : empty_set;
555 }
556 const map<hobject_t, set<pg_shard_t>> &get_missing_locs() const {
557 return missing_loc;
558 }
559 const map<hobject_t, pg_missing_item> &get_needs_recovery() const {
560 return needs_recovery_map;
561 }
562 } missing_loc;
563
564 PastIntervals past_intervals;
565
566 interval_set<snapid_t> snap_trimq;
567
568 /* You should not use these items without taking their respective queue locks
569 * (if they have one) */
570 xlist<PG*>::item stat_queue_item;
571 bool scrub_queued;
572 bool recovery_queued;
573
574 int recovery_ops_active;
575 set<pg_shard_t> waiting_on_backfill;
576#ifdef DEBUG_RECOVERY_OIDS
577 set<hobject_t> recovering_oids;
578#endif
579
580protected:
581 int role; // 0 = primary, 1 = replica, -1=none.
582 unsigned state; // PG_STATE_*
583
584 bool send_notify; ///< true if we are non-primary and should notify the primary
585
586public:
587 eversion_t last_update_ondisk; // last_update that has committed; ONLY DEFINED WHEN is_active()
588 eversion_t last_complete_ondisk; // last_complete that has committed.
589 eversion_t last_update_applied;
590
591
592 struct C_UpdateLastRollbackInfoTrimmedToApplied : Context {
593 PGRef pg;
594 epoch_t e;
595 eversion_t v;
596 C_UpdateLastRollbackInfoTrimmedToApplied(PG *pg, epoch_t e, eversion_t v)
597 : pg(pg), e(e), v(v) {}
598 void finish(int) override {
599 pg->lock();
600 if (!pg->pg_has_reset_since(e)) {
601 pg->last_rollback_info_trimmed_to_applied = v;
602 }
603 pg->unlock();
604 }
605 };
606 // entries <= last_rollback_info_trimmed_to_applied have been trimmed,
607 // and the transaction has applied
608 eversion_t last_rollback_info_trimmed_to_applied;
609
610 // primary state
611 public:
612 pg_shard_t primary;
613 pg_shard_t pg_whoami;
614 pg_shard_t up_primary;
615 vector<int> up, acting, want_acting;
616 set<pg_shard_t> actingbackfill, actingset, upset;
617 map<pg_shard_t,eversion_t> peer_last_complete_ondisk;
618 eversion_t min_last_complete_ondisk; // up: min over last_complete_ondisk, peer_last_complete_ondisk
619 eversion_t pg_trim_to;
620
621 set<int> blocked_by; ///< osds we are blocked by (for pg stats)
622
623 // [primary only] content recovery state
624
625public:
626 struct BufferedRecoveryMessages {
627 map<int, map<spg_t, pg_query_t> > query_map;
628 map<int, vector<pair<pg_notify_t, PastIntervals> > > info_map;
629 map<int, vector<pair<pg_notify_t, PastIntervals> > > notify_list;
630 };
631
632 struct RecoveryCtx {
633 utime_t start_time;
634 map<int, map<spg_t, pg_query_t> > *query_map;
635 map<int, vector<pair<pg_notify_t, PastIntervals> > > *info_map;
636 map<int, vector<pair<pg_notify_t, PastIntervals> > > *notify_list;
637 set<PGRef> created_pgs;
638 C_Contexts *on_applied;
639 C_Contexts *on_safe;
640 ObjectStore::Transaction *transaction;
641 ThreadPool::TPHandle* handle;
642 RecoveryCtx(map<int, map<spg_t, pg_query_t> > *query_map,
643 map<int,
644 vector<pair<pg_notify_t, PastIntervals> > > *info_map,
645 map<int,
646 vector<pair<pg_notify_t, PastIntervals> > > *notify_list,
647 C_Contexts *on_applied,
648 C_Contexts *on_safe,
649 ObjectStore::Transaction *transaction)
650 : query_map(query_map), info_map(info_map),
651 notify_list(notify_list),
652 on_applied(on_applied),
653 on_safe(on_safe),
654 transaction(transaction),
655 handle(NULL) {}
656
657 RecoveryCtx(BufferedRecoveryMessages &buf, RecoveryCtx &rctx)
658 : query_map(&(buf.query_map)),
659 info_map(&(buf.info_map)),
660 notify_list(&(buf.notify_list)),
661 on_applied(rctx.on_applied),
662 on_safe(rctx.on_safe),
663 transaction(rctx.transaction),
664 handle(rctx.handle) {}
665
666 void accept_buffered_messages(BufferedRecoveryMessages &m) {
667 assert(query_map);
668 assert(info_map);
669 assert(notify_list);
670 for (map<int, map<spg_t, pg_query_t> >::iterator i = m.query_map.begin();
671 i != m.query_map.end();
672 ++i) {
673 map<spg_t, pg_query_t> &omap = (*query_map)[i->first];
674 for (map<spg_t, pg_query_t>::iterator j = i->second.begin();
675 j != i->second.end();
676 ++j) {
677 omap[j->first] = j->second;
678 }
679 }
680 for (map<int, vector<pair<pg_notify_t, PastIntervals> > >::iterator i
681 = m.info_map.begin();
682 i != m.info_map.end();
683 ++i) {
684 vector<pair<pg_notify_t, PastIntervals> > &ovec =
685 (*info_map)[i->first];
686 ovec.reserve(ovec.size() + i->second.size());
687 ovec.insert(ovec.end(), i->second.begin(), i->second.end());
688 }
689 for (map<int, vector<pair<pg_notify_t, PastIntervals> > >::iterator i
690 = m.notify_list.begin();
691 i != m.notify_list.end();
692 ++i) {
693 vector<pair<pg_notify_t, PastIntervals> > &ovec =
694 (*notify_list)[i->first];
695 ovec.reserve(ovec.size() + i->second.size());
696 ovec.insert(ovec.end(), i->second.begin(), i->second.end());
697 }
698 }
699 };
700
701
702 PGStateHistory pgstate_history;
703
704 struct NamedState {
705 const char *state_name;
706 utime_t enter_time;
707 PG* pg;
708 const char *get_state_name() { return state_name; }
709 NamedState(PG *pg_, const char *state_name_)
710 : state_name(state_name_), enter_time(ceph_clock_now()), pg(pg_) {
711 pg->pgstate_history.enter(pg, enter_time, state_name);
712 }
713 virtual ~NamedState() { pg->pgstate_history.exit(state_name); }
714 };
715
716
717
718protected:
719
720 /*
721 * peer_info -- projected (updates _before_ replicas ack)
722 * peer_missing -- committed (updates _after_ replicas ack)
723 */
724
725 bool need_up_thru;
726 set<pg_shard_t> stray_set; // non-acting osds that have PG data.
727 eversion_t oldest_update; // acting: lowest (valid) last_update in active set
728 map<pg_shard_t, pg_info_t> peer_info; // info from peers (stray or prior)
729 set<pg_shard_t> peer_purged; // peers purged
730 map<pg_shard_t, pg_missing_t> peer_missing;
731 set<pg_shard_t> peer_log_requested; // logs i've requested (and start stamps)
732 set<pg_shard_t> peer_missing_requested;
733
734 // i deleted these strays; ignore racing PGInfo from them
735 set<pg_shard_t> peer_activated;
736
737 // primary-only, recovery-only state
738 set<pg_shard_t> might_have_unfound; // These osds might have objects on them
739 // which are unfound on the primary
740 epoch_t last_peering_reset;
741
742
743 /* heartbeat peers */
744 void set_probe_targets(const set<pg_shard_t> &probe_set);
745 void clear_probe_targets();
746public:
747 Mutex heartbeat_peer_lock;
748 set<int> heartbeat_peers;
749 set<int> probe_targets;
750
751 /**
752 * BackfillInterval
753 *
754 * Represents the objects in a range [begin, end)
755 *
756 * Possible states:
757 * 1) begin == end == hobject_t() indicates the the interval is unpopulated
758 * 2) Else, objects contains all objects in [begin, end)
759 */
760 struct BackfillInterval {
761 // info about a backfill interval on a peer
762 eversion_t version; /// version at which the scan occurred
763 map<hobject_t,eversion_t> objects;
764 hobject_t begin;
765 hobject_t end;
766
767 /// clear content
768 void clear() {
769 *this = BackfillInterval();
770 }
771
772 /// clear objects list only
773 void clear_objects() {
774 objects.clear();
775 }
776
777 /// reinstantiate with a new start+end position and sort order
778 void reset(hobject_t start) {
779 clear();
780 begin = end = start;
781 }
782
783 /// true if there are no objects in this interval
784 bool empty() const {
785 return objects.empty();
786 }
787
788 /// true if interval extends to the end of the range
789 bool extends_to_end() const {
790 return end.is_max();
791 }
792
793 /// removes items <= soid and adjusts begin to the first object
794 void trim_to(const hobject_t &soid) {
795 trim();
796 while (!objects.empty() &&
797 objects.begin()->first <= soid) {
798 pop_front();
799 }
800 }
801
802 /// Adjusts begin to the first object
803 void trim() {
804 if (!objects.empty())
805 begin = objects.begin()->first;
806 else
807 begin = end;
808 }
809
810 /// drop first entry, and adjust @begin accordingly
811 void pop_front() {
812 assert(!objects.empty());
813 objects.erase(objects.begin());
814 trim();
815 }
816
817 /// dump
818 void dump(Formatter *f) const {
819 f->dump_stream("begin") << begin;
820 f->dump_stream("end") << end;
821 f->open_array_section("objects");
822 for (map<hobject_t, eversion_t>::const_iterator i =
823 objects.begin();
824 i != objects.end();
825 ++i) {
826 f->open_object_section("object");
827 f->dump_stream("object") << i->first;
828 f->dump_stream("version") << i->second;
829 f->close_section();
830 }
831 f->close_section();
832 }
833 };
834
835protected:
836 BackfillInterval backfill_info;
837 map<pg_shard_t, BackfillInterval> peer_backfill_info;
838 bool backfill_reserved;
839 bool backfill_reserving;
840
841 friend class OSD;
842
843public:
844 set<pg_shard_t> backfill_targets;
845
846 bool is_backfill_targets(pg_shard_t osd) {
847 return backfill_targets.count(osd);
848 }
849
850protected:
851
852 /*
853 * blocked request wait hierarchy
854 *
855 * In order to preserve request ordering we need to be careful about the
856 * order in which blocked requests get requeued. Generally speaking, we
857 * push the requests back up to the op_wq in reverse order (most recent
858 * request first) so that they come back out again in the original order.
859 * However, because there are multiple wait queues, we need to requeue
860 * waitlists in order. Generally speaking, we requeue the wait lists
861 * that are checked first.
862 *
863 * Here are the various wait lists, in the order they are used during
864 * request processing, with notes:
865 *
866 * - waiting_for_map
867 * - may start or stop blocking at any time (depending on client epoch)
868 * - waiting_for_peered
869 * - !is_peered() or flushes_in_progress
870 * - only starts blocking on interval change; never restarts
871 * - waiting_for_active
872 * - !is_active()
873 * - only starts blocking on interval change; never restarts
b32b8144
FG
874 * - waiting_for_flush
875 * - is_active() and flushes_in_progress
876 * - waiting for final flush during activate
7c673cae
FG
877 * - waiting_for_scrub
878 * - starts and stops blocking for varying intervals during scrub
879 * - waiting_for_unreadable_object
880 * - never restarts once object is readable (* except for EIO?)
881 * - waiting_for_degraded_object
882 * - never restarts once object is writeable (* except for EIO?)
883 * - waiting_for_blocked_object
884 * - starts and stops based on proxied op activity
885 * - obc rwlocks
886 * - starts and stops based on read/write activity
887 *
888 * Notes:
889 *
890 * 1. During and interval change, we requeue *everything* in the above order.
891 *
892 * 2. When an obc rwlock is released, we check for a scrub block and requeue
893 * the op there if it applies. We ignore the unreadable/degraded/blocked
894 * queues because we assume they cannot apply at that time (this is
895 * probably mostly true).
896 *
897 * 3. The requeue_ops helper will push ops onto the waiting_for_map list if
898 * it is non-empty.
899 *
900 * These three behaviors are generally sufficient to maintain ordering, with
901 * the possible exception of cases where we make an object degraded or
902 * unreadable that was previously okay, e.g. when scrub or op processing
903 * encounter an unexpected error. FIXME.
904 */
905
906 // pg waiters
907 unsigned flushes_in_progress;
908
909 // ops with newer maps than our (or blocked behind them)
910 // track these by client, since inter-request ordering doesn't otherwise
911 // matter.
912 unordered_map<entity_name_t,list<OpRequestRef>> waiting_for_map;
913
914 // ops waiting on peered
915 list<OpRequestRef> waiting_for_peered;
916
917 // ops waiting on active (require peered as well)
918 list<OpRequestRef> waiting_for_active;
b32b8144 919 list<OpRequestRef> waiting_for_flush;
7c673cae
FG
920 list<OpRequestRef> waiting_for_scrub;
921
922 list<OpRequestRef> waiting_for_cache_not_full;
224ce89b 923 list<OpRequestRef> waiting_for_clean_to_primary_repair;
7c673cae
FG
924 map<hobject_t, list<OpRequestRef>> waiting_for_unreadable_object,
925 waiting_for_degraded_object,
926 waiting_for_blocked_object;
927
928 set<hobject_t> objects_blocked_on_cache_full;
929 map<hobject_t,snapid_t> objects_blocked_on_degraded_snap;
930 map<hobject_t,ObjectContextRef> objects_blocked_on_snap_promotion;
931
932 // Callbacks should assume pg (and nothing else) is locked
933 map<hobject_t, list<Context*>> callbacks_for_degraded_object;
934
935 map<eversion_t,
936 list<pair<OpRequestRef, version_t> > > waiting_for_ondisk;
937
938 void requeue_object_waiters(map<hobject_t, list<OpRequestRef>>& m);
939 void requeue_op(OpRequestRef op);
940 void requeue_ops(list<OpRequestRef> &l);
941
942 // stats that persist lazily
943 object_stat_collection_t unstable_stats;
944
945 // publish stats
946 Mutex pg_stats_publish_lock;
947 bool pg_stats_publish_valid;
948 pg_stat_t pg_stats_publish;
949
950 // for ordering writes
951 ceph::shared_ptr<ObjectStore::Sequencer> osr;
952
953 void _update_calc_stats();
954 void _update_blocked_by();
955 void publish_stats_to_osd();
956 void clear_publish_stats();
957
958public:
959 void clear_primary_state();
960
7c673cae
FG
961 bool is_actingbackfill(pg_shard_t osd) const {
962 return actingbackfill.count(osd);
963 }
964 bool is_acting(pg_shard_t osd) const {
965 return has_shard(pool.info.ec_pool(), acting, osd);
966 }
967 bool is_up(pg_shard_t osd) const {
968 return has_shard(pool.info.ec_pool(), up, osd);
969 }
970 static bool has_shard(bool ec, const vector<int>& v, pg_shard_t osd) {
971 if (ec) {
972 return v.size() > (unsigned)osd.shard && v[osd.shard] == osd.osd;
973 } else {
974 return std::find(v.begin(), v.end(), osd.osd) != v.end();
975 }
976 }
977
978 bool needs_recovery() const;
979 bool needs_backfill() const;
980
c07f9fc5
FG
981 /// clip calculated priority to reasonable range
982 inline int clamp_recovery_priority(int priority);
7c673cae
FG
983 /// get log recovery reservation priority
984 unsigned get_recovery_priority();
985 /// get backfill reservation priority
986 unsigned get_backfill_priority();
987
988 void mark_clean(); ///< mark an active pg clean
d2e6a577 989 void _change_recovery_force_mode(int new_mode, bool clear);
7c673cae
FG
990
991 /// return [start,end) bounds for required past_intervals
992 static pair<epoch_t, epoch_t> get_required_past_interval_bounds(
993 const pg_info_t &info,
994 epoch_t oldest_map) {
995 epoch_t start = MAX(
996 info.history.last_epoch_clean ? info.history.last_epoch_clean :
31f18b77 997 info.history.epoch_pool_created,
7c673cae
FG
998 oldest_map);
999 epoch_t end = MAX(
1000 info.history.same_interval_since,
31f18b77 1001 info.history.epoch_pool_created);
7c673cae
FG
1002 return make_pair(start, end);
1003 }
1004 void check_past_interval_bounds() const;
1005 PastIntervals::PriorSet build_prior();
1006
1007 void remove_down_peer_info(const OSDMapRef osdmap);
1008
1009 bool adjust_need_up_thru(const OSDMapRef osdmap);
1010
1011 bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const;
1012 virtual void dump_recovery_info(Formatter *f) const = 0;
1013
1014 bool calc_min_last_complete_ondisk() {
1015 eversion_t min = last_complete_ondisk;
1016 assert(!actingbackfill.empty());
1017 for (set<pg_shard_t>::iterator i = actingbackfill.begin();
1018 i != actingbackfill.end();
1019 ++i) {
1020 if (*i == get_primary()) continue;
1021 if (peer_last_complete_ondisk.count(*i) == 0)
1022 return false; // we don't have complete info
1023 eversion_t a = peer_last_complete_ondisk[*i];
1024 if (a < min)
1025 min = a;
1026 }
1027 if (min == min_last_complete_ondisk)
1028 return false;
1029 min_last_complete_ondisk = min;
1030 return true;
1031 }
1032
1033 virtual void calc_trim_to() = 0;
1034
1035 void proc_replica_log(pg_info_t &oinfo, const pg_log_t &olog,
1036 pg_missing_t& omissing, pg_shard_t from);
1037 void proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog,
1038 pg_missing_t& omissing, pg_shard_t from);
1039 bool proc_replica_info(
1040 pg_shard_t from, const pg_info_t &info, epoch_t send_epoch);
1041
1042 struct PGLogEntryHandler : public PGLog::LogEntryHandler {
1043 PG *pg;
1044 ObjectStore::Transaction *t;
1045 PGLogEntryHandler(PG *pg, ObjectStore::Transaction *t) : pg(pg), t(t) {}
1046
1047 // LogEntryHandler
1048 void remove(const hobject_t &hoid) override {
1049 pg->get_pgbackend()->remove(hoid, t);
1050 }
1051 void try_stash(const hobject_t &hoid, version_t v) override {
1052 pg->get_pgbackend()->try_stash(hoid, v, t);
1053 }
1054 void rollback(const pg_log_entry_t &entry) override {
1055 assert(entry.can_rollback());
1056 pg->get_pgbackend()->rollback(entry, t);
1057 }
1058 void rollforward(const pg_log_entry_t &entry) override {
1059 pg->get_pgbackend()->rollforward(entry, t);
1060 }
1061 void trim(const pg_log_entry_t &entry) override {
1062 pg->get_pgbackend()->trim(entry, t);
1063 }
1064 };
1065
1066 void update_object_snap_mapping(
1067 ObjectStore::Transaction *t, const hobject_t &soid,
1068 const set<snapid_t> &snaps);
1069 void clear_object_snap_mapping(
1070 ObjectStore::Transaction *t, const hobject_t &soid);
1071 void remove_snap_mapped_object(
1072 ObjectStore::Transaction& t, const hobject_t& soid);
1073 void merge_log(
1074 ObjectStore::Transaction& t, pg_info_t &oinfo,
1075 pg_log_t &olog, pg_shard_t from);
1076 void rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead);
1077 bool search_for_missing(
1078 const pg_info_t &oinfo, const pg_missing_t &omissing,
1079 pg_shard_t fromosd,
1080 RecoveryCtx*);
1081
1082 void check_for_lost_objects();
1083 void forget_lost_objects();
1084
1085 void discover_all_missing(std::map<int, map<spg_t,pg_query_t> > &query_map);
1086
1087 void trim_write_ahead();
1088
1089 map<pg_shard_t, pg_info_t>::const_iterator find_best_info(
1090 const map<pg_shard_t, pg_info_t> &infos,
1091 bool restrict_to_up_acting,
1092 bool *history_les_bound) const;
1093 static void calc_ec_acting(
1094 map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
1095 unsigned size,
1096 const vector<int> &acting,
1097 pg_shard_t acting_primary,
1098 const vector<int> &up,
1099 pg_shard_t up_primary,
1100 const map<pg_shard_t, pg_info_t> &all_info,
7c673cae
FG
1101 bool restrict_to_up_acting,
1102 vector<int> *want,
1103 set<pg_shard_t> *backfill,
1104 set<pg_shard_t> *acting_backfill,
1105 pg_shard_t *want_primary,
1106 ostream &ss);
1107 static void calc_replicated_acting(
1108 map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
1109 unsigned size,
1110 const vector<int> &acting,
1111 pg_shard_t acting_primary,
1112 const vector<int> &up,
1113 pg_shard_t up_primary,
1114 const map<pg_shard_t, pg_info_t> &all_info,
7c673cae
FG
1115 bool restrict_to_up_acting,
1116 vector<int> *want,
1117 set<pg_shard_t> *backfill,
1118 set<pg_shard_t> *acting_backfill,
1119 pg_shard_t *want_primary,
1120 ostream &ss);
1121 bool choose_acting(pg_shard_t &auth_log_shard,
1122 bool restrict_to_up_acting,
1123 bool *history_les_bound);
1124 void build_might_have_unfound();
1125 void activate(
1126 ObjectStore::Transaction& t,
1127 epoch_t activation_epoch,
1128 list<Context*>& tfin,
1129 map<int, map<spg_t,pg_query_t> >& query_map,
1130 map<int,
1131 vector<pair<pg_notify_t, PastIntervals> > > *activator_map,
1132 RecoveryCtx *ctx);
1133 void _activate_committed(epoch_t epoch, epoch_t activation_epoch);
1134 void all_activated_and_committed();
1135
1136 void proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &info);
1137
1138 bool have_unfound() const {
1139 return missing_loc.have_unfound();
1140 }
1141 uint64_t get_num_unfound() const {
1142 return missing_loc.num_unfound();
1143 }
1144
1145 virtual void check_local() = 0;
1146
1147 /**
1148 * @param ops_begun returns how many recovery ops the function started
1149 * @returns true if any useful work was accomplished; false otherwise
1150 */
1151 virtual bool start_recovery_ops(
1152 uint64_t max,
1153 ThreadPool::TPHandle &handle,
1154 uint64_t *ops_begun) = 0;
1155
1156 void purge_strays();
1157
1158 void update_heartbeat_peers();
1159
1160 Context *finish_sync_event;
1161
1162 void finish_recovery(list<Context*>& tfin);
1163 void _finish_recovery(Context *c);
1164 void cancel_recovery();
1165 void clear_recovery_state();
1166 virtual void _clear_recovery_state() = 0;
1167 virtual void check_recovery_sources(const OSDMapRef& newmap) = 0;
1168 void start_recovery_op(const hobject_t& soid);
1169 void finish_recovery_op(const hobject_t& soid, bool dequeue=false);
1170
1171 void split_into(pg_t child_pgid, PG *child, unsigned split_bits);
1172 virtual void _split_into(pg_t child_pgid, PG *child, unsigned split_bits) = 0;
1173
1174 friend class C_OSD_RepModify_Commit;
1175
1176 // -- backoff --
1177 Mutex backoff_lock; // orders inside Backoff::lock
1178 map<hobject_t,set<BackoffRef>> backoffs;
1179
1180 void add_backoff(SessionRef s, const hobject_t& begin, const hobject_t& end);
1181 void release_backoffs(const hobject_t& begin, const hobject_t& end);
1182 void release_backoffs(const hobject_t& o) {
1183 release_backoffs(o, o);
1184 }
1185 void clear_backoffs();
1186
1187 void add_pg_backoff(SessionRef s) {
1188 hobject_t begin = info.pgid.pgid.get_hobj_start();
1189 hobject_t end = info.pgid.pgid.get_hobj_end(pool.info.get_pg_num());
1190 add_backoff(s, begin, end);
1191 }
1192 void release_pg_backoffs() {
1193 hobject_t begin = info.pgid.pgid.get_hobj_start();
1194 hobject_t end = info.pgid.pgid.get_hobj_end(pool.info.get_pg_num());
1195 release_backoffs(begin, end);
1196 }
1197
1198 void rm_backoff(BackoffRef b);
1199
1200 // -- scrub --
1201 struct Scrubber {
1202 Scrubber();
1203 ~Scrubber();
1204
1205 // metadata
1206 set<pg_shard_t> reserved_peers;
1207 bool reserved, reserve_failed;
1208 epoch_t epoch_start;
1209
1210 // common to both scrubs
1211 bool active;
7c673cae
FG
1212 int waiting_on;
1213 set<pg_shard_t> waiting_on_whom;
1214 int shallow_errors;
1215 int deep_errors;
1216 int fixed;
1217 ScrubMap primary_scrubmap;
1218 map<pg_shard_t, ScrubMap> received_maps;
1219 OpRequestRef active_rep_scrub;
1220 utime_t scrub_reg_stamp; // stamp we registered for
1221
1222 // For async sleep
1223 bool sleeping = false;
1224 bool needs_sleep = true;
1225 utime_t sleep_start;
1226
1227 // flags to indicate explicitly requested scrubs (by admin)
1228 bool must_scrub, must_deep_scrub, must_repair;
1229
1230 // Priority to use for scrub scheduling
1231 unsigned priority;
1232
1233 // this flag indicates whether we would like to do auto-repair of the PG or not
1234 bool auto_repair;
1235
1236 // Maps from objects with errors to missing/inconsistent peers
1237 map<hobject_t, set<pg_shard_t>> missing;
1238 map<hobject_t, set<pg_shard_t>> inconsistent;
1239
1240 // Map from object with errors to good peers
1241 map<hobject_t, list<pair<ScrubMap::object, pg_shard_t> >> authoritative;
1242
1243 // Cleaned map pending snap metadata scrub
1244 ScrubMap cleaned_meta_map;
1245
1246 // digest updates which we are waiting on
1247 int num_digest_updates_pending;
1248
1249 // chunky scrub
1250 hobject_t start, end;
1251 eversion_t subset_last_update;
1252
1253 // chunky scrub state
1254 enum State {
1255 INACTIVE,
1256 NEW_CHUNK,
1257 WAIT_PUSHES,
1258 WAIT_LAST_UPDATE,
1259 BUILD_MAP,
1260 WAIT_REPLICAS,
1261 COMPARE_MAPS,
1262 WAIT_DIGEST_UPDATES,
1263 FINISH,
1264 } state;
1265
1266 std::unique_ptr<Scrub::Store> store;
1267 // deep scrub
1268 bool deep;
1269 uint32_t seed;
1270
1271 list<Context*> callbacks;
1272 void add_callback(Context *context) {
1273 callbacks.push_back(context);
1274 }
1275 void run_callbacks() {
1276 list<Context*> to_run;
1277 to_run.swap(callbacks);
1278 for (list<Context*>::iterator i = to_run.begin();
1279 i != to_run.end();
1280 ++i) {
1281 (*i)->complete(0);
1282 }
1283 }
1284
1285 static const char *state_string(const PG::Scrubber::State& state) {
1286 const char *ret = NULL;
1287 switch( state )
1288 {
1289 case INACTIVE: ret = "INACTIVE"; break;
1290 case NEW_CHUNK: ret = "NEW_CHUNK"; break;
1291 case WAIT_PUSHES: ret = "WAIT_PUSHES"; break;
1292 case WAIT_LAST_UPDATE: ret = "WAIT_LAST_UPDATE"; break;
1293 case BUILD_MAP: ret = "BUILD_MAP"; break;
1294 case WAIT_REPLICAS: ret = "WAIT_REPLICAS"; break;
1295 case COMPARE_MAPS: ret = "COMPARE_MAPS"; break;
1296 case WAIT_DIGEST_UPDATES: ret = "WAIT_DIGEST_UPDATES"; break;
1297 case FINISH: ret = "FINISH"; break;
1298 }
1299 return ret;
1300 }
1301
1302 bool is_chunky_scrub_active() const { return state != INACTIVE; }
1303
1304 // classic (non chunk) scrubs block all writes
1305 // chunky scrubs only block writes to a range
1306 bool write_blocked_by_scrub(const hobject_t &soid) {
1307 return (soid >= start && soid < end);
1308 }
1309
1310 // clear all state
1311 void reset() {
1312 active = false;
7c673cae
FG
1313 waiting_on = 0;
1314 waiting_on_whom.clear();
1315 if (active_rep_scrub) {
1316 active_rep_scrub = OpRequestRef();
1317 }
1318 received_maps.clear();
1319
1320 must_scrub = false;
1321 must_deep_scrub = false;
1322 must_repair = false;
1323 auto_repair = false;
1324
1325 state = PG::Scrubber::INACTIVE;
1326 start = hobject_t();
1327 end = hobject_t();
1328 subset_last_update = eversion_t();
1329 shallow_errors = 0;
1330 deep_errors = 0;
1331 fixed = 0;
1332 deep = false;
1333 seed = 0;
1334 run_callbacks();
1335 inconsistent.clear();
1336 missing.clear();
1337 authoritative.clear();
1338 num_digest_updates_pending = 0;
1339 cleaned_meta_map = ScrubMap();
1340 sleeping = false;
1341 needs_sleep = true;
1342 sleep_start = utime_t();
1343 }
1344
1345 void create_results(const hobject_t& obj);
1346 void cleanup_store(ObjectStore::Transaction *t);
1347 } scrubber;
1348
1349 bool scrub_after_recovery;
1350
1351 int active_pushes;
1352
1353 void repair_object(
1354 const hobject_t& soid, list<pair<ScrubMap::object, pg_shard_t> > *ok_peers,
1355 pg_shard_t bad_peer);
1356
1357 void scrub(epoch_t queued, ThreadPool::TPHandle &handle);
1358 void chunky_scrub(ThreadPool::TPHandle &handle);
1359 void scrub_compare_maps();
1360 /**
1361 * return true if any inconsistency/missing is repaired, false otherwise
1362 */
1363 bool scrub_process_inconsistent();
31f18b77 1364 bool ops_blocked_by_scrub() const;
7c673cae
FG
1365 void scrub_finish();
1366 void scrub_clear_state();
1367 void _scan_snaps(ScrubMap &map);
224ce89b 1368 void _repair_oinfo_oid(ScrubMap &map);
7c673cae
FG
1369 void _scan_rollback_obs(
1370 const vector<ghobject_t> &rollback_obs,
1371 ThreadPool::TPHandle &handle);
1372 void _request_scrub_map(pg_shard_t replica, eversion_t version,
1373 hobject_t start, hobject_t end, bool deep,
1374 uint32_t seed);
1375 int build_scrub_map_chunk(
1376 ScrubMap &map,
1377 hobject_t start, hobject_t end, bool deep, uint32_t seed,
1378 ThreadPool::TPHandle &handle);
1379 /**
1380 * returns true if [begin, end) is good to scrub at this time
1381 * a false return value obliges the implementer to requeue scrub when the
1382 * condition preventing scrub clears
1383 */
1384 virtual bool _range_available_for_scrub(
1385 const hobject_t &begin, const hobject_t &end) = 0;
1386 virtual void scrub_snapshot_metadata(
1387 ScrubMap &map,
1388 const std::map<hobject_t, pair<uint32_t, uint32_t>> &missing_digest) { }
1389 virtual void _scrub_clear_state() { }
1390 virtual void _scrub_finish() { }
1391 virtual void split_colls(
1392 spg_t child,
1393 int split_bits,
1394 int seed,
1395 const pg_pool_t *pool,
1396 ObjectStore::Transaction *t) = 0;
1397 void clear_scrub_reserved();
1398 void scrub_reserve_replicas();
1399 void scrub_unreserve_replicas();
1400 bool scrub_all_replicas_reserved() const;
1401 bool sched_scrub();
1402 void reg_next_scrub();
1403 void unreg_next_scrub();
1404
1405 void replica_scrub(
1406 OpRequestRef op,
1407 ThreadPool::TPHandle &handle);
1408 void do_replica_scrub_map(OpRequestRef op);
1409 void sub_op_scrub_map(OpRequestRef op);
1410
1411 void handle_scrub_reserve_request(OpRequestRef op);
1412 void handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from);
1413 void handle_scrub_reserve_reject(OpRequestRef op, pg_shard_t from);
1414 void handle_scrub_reserve_release(OpRequestRef op);
1415
1416 void reject_reservation();
3efd9988
FG
1417 void schedule_backfill_retry(float retry);
1418 void schedule_recovery_retry(float retry);
7c673cae
FG
1419
1420 // -- recovery state --
1421
1422 template <class EVT>
1423 struct QueuePeeringEvt : Context {
1424 PGRef pg;
1425 epoch_t epoch;
1426 EVT evt;
1427 QueuePeeringEvt(PG *pg, epoch_t epoch, EVT evt) :
1428 pg(pg), epoch(epoch), evt(evt) {}
1429 void finish(int r) override {
1430 pg->lock();
1431 pg->queue_peering_event(PG::CephPeeringEvtRef(
1432 new PG::CephPeeringEvt(
1433 epoch,
1434 epoch,
1435 evt)));
1436 pg->unlock();
1437 }
1438 };
1439
1440 class CephPeeringEvt {
1441 epoch_t epoch_sent;
1442 epoch_t epoch_requested;
1443 boost::intrusive_ptr< const boost::statechart::event_base > evt;
1444 string desc;
1445 public:
1446 MEMPOOL_CLASS_HELPERS();
1447 template <class T>
1448 CephPeeringEvt(epoch_t epoch_sent,
1449 epoch_t epoch_requested,
1450 const T &evt_) :
1451 epoch_sent(epoch_sent), epoch_requested(epoch_requested),
1452 evt(evt_.intrusive_from_this()) {
1453 stringstream out;
1454 out << "epoch_sent: " << epoch_sent
1455 << " epoch_requested: " << epoch_requested << " ";
1456 evt_.print(&out);
1457 desc = out.str();
1458 }
1459 epoch_t get_epoch_sent() { return epoch_sent; }
1460 epoch_t get_epoch_requested() { return epoch_requested; }
1461 const boost::statechart::event_base &get_event() { return *evt; }
1462 string get_desc() { return desc; }
1463 };
1464 typedef ceph::shared_ptr<CephPeeringEvt> CephPeeringEvtRef;
1465 list<CephPeeringEvtRef> peering_queue; // op queue
1466 list<CephPeeringEvtRef> peering_waiters;
1467
1468 struct QueryState : boost::statechart::event< QueryState > {
1469 Formatter *f;
1470 explicit QueryState(Formatter *f) : f(f) {}
1471 void print(std::ostream *out) const {
1472 *out << "Query";
1473 }
1474 };
1475
1476 struct MInfoRec : boost::statechart::event< MInfoRec > {
1477 pg_shard_t from;
1478 pg_info_t info;
1479 epoch_t msg_epoch;
1480 MInfoRec(pg_shard_t from, const pg_info_t &info, epoch_t msg_epoch) :
1481 from(from), info(info), msg_epoch(msg_epoch) {}
1482 void print(std::ostream *out) const {
1483 *out << "MInfoRec from " << from << " info: " << info;
1484 }
1485 };
1486
1487 struct MLogRec : boost::statechart::event< MLogRec > {
1488 pg_shard_t from;
1489 boost::intrusive_ptr<MOSDPGLog> msg;
1490 MLogRec(pg_shard_t from, MOSDPGLog *msg) :
1491 from(from), msg(msg) {}
1492 void print(std::ostream *out) const {
1493 *out << "MLogRec from " << from;
1494 }
1495 };
1496
1497 struct MNotifyRec : boost::statechart::event< MNotifyRec > {
1498 pg_shard_t from;
1499 pg_notify_t notify;
1500 uint64_t features;
1501 MNotifyRec(pg_shard_t from, const pg_notify_t &notify, uint64_t f) :
1502 from(from), notify(notify), features(f) {}
1503 void print(std::ostream *out) const {
1504 *out << "MNotifyRec from " << from << " notify: " << notify
1505 << " features: 0x" << hex << features << dec;
1506 }
1507 };
1508
1509 struct MQuery : boost::statechart::event< MQuery > {
1510 pg_shard_t from;
1511 pg_query_t query;
1512 epoch_t query_epoch;
1513 MQuery(pg_shard_t from, const pg_query_t &query, epoch_t query_epoch):
1514 from(from), query(query), query_epoch(query_epoch) {}
1515 void print(std::ostream *out) const {
1516 *out << "MQuery from " << from
1517 << " query_epoch " << query_epoch
1518 << " query: " << query;
1519 }
1520 };
1521
1522 struct AdvMap : boost::statechart::event< AdvMap > {
1523 OSDMapRef osdmap;
1524 OSDMapRef lastmap;
1525 vector<int> newup, newacting;
1526 int up_primary, acting_primary;
1527 AdvMap(
1528 OSDMapRef osdmap, OSDMapRef lastmap,
1529 vector<int>& newup, int up_primary,
1530 vector<int>& newacting, int acting_primary):
1531 osdmap(osdmap), lastmap(lastmap),
1532 newup(newup),
1533 newacting(newacting),
1534 up_primary(up_primary),
1535 acting_primary(acting_primary) {}
1536 void print(std::ostream *out) const {
1537 *out << "AdvMap";
1538 }
1539 };
1540
1541 struct ActMap : boost::statechart::event< ActMap > {
1542 ActMap() : boost::statechart::event< ActMap >() {}
1543 void print(std::ostream *out) const {
1544 *out << "ActMap";
1545 }
1546 };
1547 struct Activate : boost::statechart::event< Activate > {
1548 epoch_t activation_epoch;
1549 explicit Activate(epoch_t q) : boost::statechart::event< Activate >(),
1550 activation_epoch(q) {}
1551 void print(std::ostream *out) const {
1552 *out << "Activate from " << activation_epoch;
1553 }
1554 };
1555 struct RequestBackfillPrio : boost::statechart::event< RequestBackfillPrio > {
1556 unsigned priority;
1557 explicit RequestBackfillPrio(unsigned prio) :
1558 boost::statechart::event< RequestBackfillPrio >(),
1559 priority(prio) {}
1560 void print(std::ostream *out) const {
1561 *out << "RequestBackfillPrio: priority " << priority;
1562 }
1563 };
1564#define TrivialEvent(T) struct T : boost::statechart::event< T > { \
1565 T() : boost::statechart::event< T >() {} \
1566 void print(std::ostream *out) const { \
1567 *out << #T; \
1568 } \
1569 };
3efd9988
FG
1570 struct DeferBackfill : boost::statechart::event<DeferBackfill> {
1571 float delay;
1572 explicit DeferBackfill(float delay) : delay(delay) {}
1573 void print(std::ostream *out) const {
1574 *out << "DeferBackfill: delay " << delay;
1575 }
1576 };
1577 struct DeferRecovery : boost::statechart::event<DeferRecovery> {
1578 float delay;
1579 explicit DeferRecovery(float delay) : delay(delay) {}
1580 void print(std::ostream *out) const {
1581 *out << "DeferRecovery: delay " << delay;
1582 }
1583 };
b32b8144
FG
1584 struct UnfoundBackfill : boost::statechart::event<UnfoundBackfill> {
1585 explicit UnfoundBackfill() {}
1586 void print(std::ostream *out) const {
1587 *out << "UnfoundBackfill";
1588 }
1589 };
1590 struct UnfoundRecovery : boost::statechart::event<UnfoundRecovery> {
1591 explicit UnfoundRecovery() {}
1592 void print(std::ostream *out) const {
1593 *out << "UnfoundRecovery";
1594 }
1595 };
1596protected:
7c673cae
FG
1597 TrivialEvent(Initialize)
1598 TrivialEvent(Load)
1599 TrivialEvent(GotInfo)
1600 TrivialEvent(NeedUpThru)
1601 TrivialEvent(NullEvt)
1602 TrivialEvent(FlushedEvt)
1603 TrivialEvent(Backfilled)
1604 TrivialEvent(LocalBackfillReserved)
1605 TrivialEvent(RemoteBackfillReserved)
3efd9988 1606 TrivialEvent(RejectRemoteReservation)
7c673cae 1607 TrivialEvent(RemoteReservationRejected)
3efd9988 1608 TrivialEvent(RemoteReservationCanceled)
7c673cae
FG
1609 TrivialEvent(RequestBackfill)
1610 TrivialEvent(RequestRecovery)
1611 TrivialEvent(RecoveryDone)
1612 TrivialEvent(BackfillTooFull)
1613 TrivialEvent(RecoveryTooFull)
3efd9988
FG
1614
1615 TrivialEvent(MakePrimary)
1616 TrivialEvent(MakeStray)
1617 TrivialEvent(NeedActingChange)
1618 TrivialEvent(IsIncomplete)
1619 TrivialEvent(IsDown)
7c673cae
FG
1620
1621 TrivialEvent(AllReplicasRecovered)
1622 TrivialEvent(DoRecovery)
1623 TrivialEvent(LocalRecoveryReserved)
1624 TrivialEvent(RemoteRecoveryReserved)
1625 TrivialEvent(AllRemotesReserved)
1626 TrivialEvent(AllBackfillsReserved)
1627 TrivialEvent(GoClean)
1628
1629 TrivialEvent(AllReplicasActivated)
1630
1631 TrivialEvent(IntervalFlush)
1632
1633 /* Encapsulates PG recovery process */
1634 class RecoveryState {
1635 void start_handle(RecoveryCtx *new_ctx);
1636 void end_handle();
1637 public:
1638 void begin_block_outgoing();
1639 void end_block_outgoing();
1640 void clear_blocked_outgoing();
1641 private:
1642
1643 /* States */
1644 struct Initial;
1645 class RecoveryMachine : public boost::statechart::state_machine< RecoveryMachine, Initial > {
1646 RecoveryState *state;
1647 public:
1648 PG *pg;
1649
1650 utime_t event_time;
1651 uint64_t event_count;
1652
1653 void clear_event_counters() {
1654 event_time = utime_t();
1655 event_count = 0;
1656 }
1657
1658 void log_enter(const char *state_name);
1659 void log_exit(const char *state_name, utime_t duration);
1660
1661 RecoveryMachine(RecoveryState *state, PG *pg) : state(state), pg(pg), event_count(0) {}
1662
1663 /* Accessor functions for state methods */
1664 ObjectStore::Transaction* get_cur_transaction() {
1665 assert(state->rctx);
1666 assert(state->rctx->transaction);
1667 return state->rctx->transaction;
1668 }
1669
1670 void send_query(pg_shard_t to, const pg_query_t &query) {
1671 assert(state->rctx);
1672 assert(state->rctx->query_map);
1673 (*state->rctx->query_map)[to.osd][spg_t(pg->info.pgid.pgid, to.shard)] =
1674 query;
1675 }
1676
1677 map<int, map<spg_t, pg_query_t> > *get_query_map() {
1678 assert(state->rctx);
1679 assert(state->rctx->query_map);
1680 return state->rctx->query_map;
1681 }
1682
1683 map<int, vector<pair<pg_notify_t, PastIntervals> > > *get_info_map() {
1684 assert(state->rctx);
1685 assert(state->rctx->info_map);
1686 return state->rctx->info_map;
1687 }
1688
1689 list< Context* > *get_on_safe_context_list() {
1690 assert(state->rctx);
1691 assert(state->rctx->on_safe);
1692 return &(state->rctx->on_safe->contexts);
1693 }
1694
1695 list< Context * > *get_on_applied_context_list() {
1696 assert(state->rctx);
1697 assert(state->rctx->on_applied);
1698 return &(state->rctx->on_applied->contexts);
1699 }
1700
1701 RecoveryCtx *get_recovery_ctx() { return &*(state->rctx); }
1702
1703 void send_notify(pg_shard_t to,
1704 const pg_notify_t &info, const PastIntervals &pi) {
1705 assert(state->rctx);
1706 assert(state->rctx->notify_list);
1707 (*state->rctx->notify_list)[to.osd].push_back(make_pair(info, pi));
1708 }
1709 };
1710 friend class RecoveryMachine;
1711
1712 /* States */
b32b8144
FG
1713 // Initial
1714 // Reset
1715 // Start
1716 // Started
1717 // Primary
1718 // WaitActingChange
1719 // Peering
1720 // GetInfo
1721 // GetLog
1722 // GetMissing
1723 // WaitUpThru
1724 // Incomplete
1725 // Active
1726 // Activating
1727 // Clean
1728 // Recovered
1729 // Backfilling
1730 // WaitRemoteBackfillReserved
1731 // WaitLocalBackfillReserved
1732 // NotBackfilling
1733 // NotRecovering
1734 // Recovering
1735 // WaitRemoteRecoveryReserved
1736 // WaitLocalRecoveryReserved
1737 // ReplicaActive
1738 // RepNotRecovering
1739 // RepRecovering
1740 // RepWaitBackfillReserved
1741 // RepWaitRecoveryReserved
1742 // Stray
7c673cae
FG
1743
1744 struct Crashed : boost::statechart::state< Crashed, RecoveryMachine >, NamedState {
1745 explicit Crashed(my_context ctx);
1746 };
1747
1748 struct Reset;
1749
1750 struct Initial : boost::statechart::state< Initial, RecoveryMachine >, NamedState {
1751 explicit Initial(my_context ctx);
1752 void exit();
1753
1754 typedef boost::mpl::list <
1755 boost::statechart::transition< Initialize, Reset >,
1756 boost::statechart::custom_reaction< Load >,
1757 boost::statechart::custom_reaction< NullEvt >,
1758 boost::statechart::transition< boost::statechart::event_base, Crashed >
1759 > reactions;
1760
1761 boost::statechart::result react(const Load&);
1762 boost::statechart::result react(const MNotifyRec&);
1763 boost::statechart::result react(const MInfoRec&);
1764 boost::statechart::result react(const MLogRec&);
1765 boost::statechart::result react(const boost::statechart::event_base&) {
1766 return discard_event();
1767 }
1768 };
1769
1770 struct Reset : boost::statechart::state< Reset, RecoveryMachine >, NamedState {
1771 explicit Reset(my_context ctx);
1772 void exit();
1773
1774 typedef boost::mpl::list <
1775 boost::statechart::custom_reaction< QueryState >,
1776 boost::statechart::custom_reaction< AdvMap >,
1777 boost::statechart::custom_reaction< ActMap >,
1778 boost::statechart::custom_reaction< NullEvt >,
1779 boost::statechart::custom_reaction< FlushedEvt >,
1780 boost::statechart::custom_reaction< IntervalFlush >,
1781 boost::statechart::transition< boost::statechart::event_base, Crashed >
1782 > reactions;
1783 boost::statechart::result react(const QueryState& q);
1784 boost::statechart::result react(const AdvMap&);
1785 boost::statechart::result react(const ActMap&);
1786 boost::statechart::result react(const FlushedEvt&);
1787 boost::statechart::result react(const IntervalFlush&);
1788 boost::statechart::result react(const boost::statechart::event_base&) {
1789 return discard_event();
1790 }
1791 };
1792
1793 struct Start;
1794
1795 struct Started : boost::statechart::state< Started, RecoveryMachine, Start >, NamedState {
1796 explicit Started(my_context ctx);
1797 void exit();
1798
1799 typedef boost::mpl::list <
1800 boost::statechart::custom_reaction< QueryState >,
1801 boost::statechart::custom_reaction< AdvMap >,
1802 boost::statechart::custom_reaction< NullEvt >,
1803 boost::statechart::custom_reaction< FlushedEvt >,
1804 boost::statechart::custom_reaction< IntervalFlush >,
1805 boost::statechart::transition< boost::statechart::event_base, Crashed >
1806 > reactions;
1807 boost::statechart::result react(const QueryState& q);
1808 boost::statechart::result react(const AdvMap&);
1809 boost::statechart::result react(const FlushedEvt&);
1810 boost::statechart::result react(const IntervalFlush&);
1811 boost::statechart::result react(const boost::statechart::event_base&) {
1812 return discard_event();
1813 }
1814 };
1815
7c673cae
FG
1816 struct Primary;
1817 struct Stray;
1818
1819 struct Start : boost::statechart::state< Start, Started >, NamedState {
1820 explicit Start(my_context ctx);
1821 void exit();
1822
1823 typedef boost::mpl::list <
1824 boost::statechart::transition< MakePrimary, Primary >,
1825 boost::statechart::transition< MakeStray, Stray >
1826 > reactions;
1827 };
1828
1829 struct Peering;
1830 struct WaitActingChange;
7c673cae 1831 struct Incomplete;
7c673cae 1832 struct Down;
7c673cae
FG
1833
1834 struct Primary : boost::statechart::state< Primary, Started, Peering >, NamedState {
1835 explicit Primary(my_context ctx);
1836 void exit();
1837
1838 typedef boost::mpl::list <
1839 boost::statechart::custom_reaction< ActMap >,
1840 boost::statechart::custom_reaction< MNotifyRec >,
1841 boost::statechart::transition< NeedActingChange, WaitActingChange >
1842 > reactions;
1843 boost::statechart::result react(const ActMap&);
1844 boost::statechart::result react(const MNotifyRec&);
1845 };
1846
1847 struct WaitActingChange : boost::statechart::state< WaitActingChange, Primary>,
1848 NamedState {
1849 typedef boost::mpl::list <
1850 boost::statechart::custom_reaction< QueryState >,
1851 boost::statechart::custom_reaction< AdvMap >,
1852 boost::statechart::custom_reaction< MLogRec >,
1853 boost::statechart::custom_reaction< MInfoRec >,
1854 boost::statechart::custom_reaction< MNotifyRec >
1855 > reactions;
1856 explicit WaitActingChange(my_context ctx);
1857 boost::statechart::result react(const QueryState& q);
1858 boost::statechart::result react(const AdvMap&);
1859 boost::statechart::result react(const MLogRec&);
1860 boost::statechart::result react(const MInfoRec&);
1861 boost::statechart::result react(const MNotifyRec&);
1862 void exit();
1863 };
1864
1865 struct GetInfo;
1866 struct Active;
1867
1868 struct Peering : boost::statechart::state< Peering, Primary, GetInfo >, NamedState {
1869 PastIntervals::PriorSet prior_set;
1870 bool history_les_bound; //< need osd_find_best_info_ignore_history_les
1871
1872 explicit Peering(my_context ctx);
1873 void exit();
1874
1875 typedef boost::mpl::list <
1876 boost::statechart::custom_reaction< QueryState >,
1877 boost::statechart::transition< Activate, Active >,
1878 boost::statechart::custom_reaction< AdvMap >
1879 > reactions;
1880 boost::statechart::result react(const QueryState& q);
1881 boost::statechart::result react(const AdvMap &advmap);
1882 };
1883
1884 struct WaitLocalRecoveryReserved;
1885 struct Activating;
1886 struct Active : boost::statechart::state< Active, Primary, Activating >, NamedState {
1887 explicit Active(my_context ctx);
1888 void exit();
1889
1890 const set<pg_shard_t> remote_shards_to_reserve_recovery;
1891 const set<pg_shard_t> remote_shards_to_reserve_backfill;
1892 bool all_replicas_activated;
1893
1894 typedef boost::mpl::list <
1895 boost::statechart::custom_reaction< QueryState >,
1896 boost::statechart::custom_reaction< ActMap >,
1897 boost::statechart::custom_reaction< AdvMap >,
1898 boost::statechart::custom_reaction< MInfoRec >,
1899 boost::statechart::custom_reaction< MNotifyRec >,
1900 boost::statechart::custom_reaction< MLogRec >,
1901 boost::statechart::custom_reaction< Backfilled >,
3efd9988
FG
1902 boost::statechart::custom_reaction< AllReplicasActivated >,
1903 boost::statechart::custom_reaction< DeferRecovery >,
b32b8144
FG
1904 boost::statechart::custom_reaction< DeferBackfill >,
1905 boost::statechart::custom_reaction< UnfoundRecovery >,
1906 boost::statechart::custom_reaction< UnfoundBackfill >,
1907 boost::statechart::custom_reaction< DoRecovery>
7c673cae
FG
1908 > reactions;
1909 boost::statechart::result react(const QueryState& q);
1910 boost::statechart::result react(const ActMap&);
1911 boost::statechart::result react(const AdvMap&);
1912 boost::statechart::result react(const MInfoRec& infoevt);
1913 boost::statechart::result react(const MNotifyRec& notevt);
1914 boost::statechart::result react(const MLogRec& logevt);
1915 boost::statechart::result react(const Backfilled&) {
1916 return discard_event();
1917 }
1918 boost::statechart::result react(const AllReplicasActivated&);
3efd9988
FG
1919 boost::statechart::result react(const DeferRecovery& evt) {
1920 return discard_event();
1921 }
1922 boost::statechart::result react(const DeferBackfill& evt) {
1923 return discard_event();
1924 }
b32b8144
FG
1925 boost::statechart::result react(const UnfoundRecovery& evt) {
1926 return discard_event();
1927 }
1928 boost::statechart::result react(const UnfoundBackfill& evt) {
1929 return discard_event();
1930 }
1931 boost::statechart::result react(const DoRecovery&) {
1932 return discard_event();
1933 }
7c673cae
FG
1934 };
1935
1936 struct Clean : boost::statechart::state< Clean, Active >, NamedState {
1937 typedef boost::mpl::list<
1938 boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >
1939 > reactions;
1940 explicit Clean(my_context ctx);
1941 void exit();
1942 };
1943
1944 struct Recovered : boost::statechart::state< Recovered, Active >, NamedState {
1945 typedef boost::mpl::list<
1946 boost::statechart::transition< GoClean, Clean >,
224ce89b 1947 boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
7c673cae
FG
1948 boost::statechart::custom_reaction< AllReplicasActivated >
1949 > reactions;
1950 explicit Recovered(my_context ctx);
1951 void exit();
1952 boost::statechart::result react(const AllReplicasActivated&) {
1953 post_event(GoClean());
1954 return forward_event();
1955 }
1956 };
1957
1958 struct Backfilling : boost::statechart::state< Backfilling, Active >, NamedState {
1959 typedef boost::mpl::list<
1960 boost::statechart::transition< Backfilled, Recovered >,
3efd9988 1961 boost::statechart::custom_reaction< DeferBackfill >,
b32b8144 1962 boost::statechart::custom_reaction< UnfoundBackfill >,
7c673cae
FG
1963 boost::statechart::custom_reaction< RemoteReservationRejected >
1964 > reactions;
1965 explicit Backfilling(my_context ctx);
1966 boost::statechart::result react(const RemoteReservationRejected& evt);
3efd9988 1967 boost::statechart::result react(const DeferBackfill& evt);
b32b8144 1968 boost::statechart::result react(const UnfoundBackfill& evt);
7c673cae
FG
1969 void exit();
1970 };
1971
1972 struct WaitRemoteBackfillReserved : boost::statechart::state< WaitRemoteBackfillReserved, Active >, NamedState {
1973 typedef boost::mpl::list<
1974 boost::statechart::custom_reaction< RemoteBackfillReserved >,
1975 boost::statechart::custom_reaction< RemoteReservationRejected >,
1976 boost::statechart::transition< AllBackfillsReserved, Backfilling >
1977 > reactions;
1978 set<pg_shard_t>::const_iterator backfill_osd_it;
1979 explicit WaitRemoteBackfillReserved(my_context ctx);
1980 void exit();
1981 boost::statechart::result react(const RemoteBackfillReserved& evt);
1982 boost::statechart::result react(const RemoteReservationRejected& evt);
1983 };
1984
1985 struct WaitLocalBackfillReserved : boost::statechart::state< WaitLocalBackfillReserved, Active >, NamedState {
1986 typedef boost::mpl::list<
1987 boost::statechart::transition< LocalBackfillReserved, WaitRemoteBackfillReserved >
1988 > reactions;
1989 explicit WaitLocalBackfillReserved(my_context ctx);
1990 void exit();
1991 };
1992
1993 struct NotBackfilling : boost::statechart::state< NotBackfilling, Active>, NamedState {
1994 typedef boost::mpl::list<
1995 boost::statechart::transition< RequestBackfill, WaitLocalBackfillReserved>,
1996 boost::statechart::custom_reaction< RemoteBackfillReserved >,
1997 boost::statechart::custom_reaction< RemoteReservationRejected >
1998 > reactions;
1999 explicit NotBackfilling(my_context ctx);
2000 void exit();
2001 boost::statechart::result react(const RemoteBackfillReserved& evt);
2002 boost::statechart::result react(const RemoteReservationRejected& evt);
2003 };
2004
2005 struct NotRecovering : boost::statechart::state< NotRecovering, Active>, NamedState {
2006 typedef boost::mpl::list<
c07f9fc5 2007 boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
b32b8144
FG
2008 boost::statechart::custom_reaction< DeferRecovery >,
2009 boost::statechart::custom_reaction< UnfoundRecovery >
7c673cae
FG
2010 > reactions;
2011 explicit NotRecovering(my_context ctx);
3efd9988 2012 boost::statechart::result react(const DeferRecovery& evt) {
c07f9fc5
FG
2013 /* no-op */
2014 return discard_event();
2015 }
b32b8144
FG
2016 boost::statechart::result react(const UnfoundRecovery& evt) {
2017 /* no-op */
2018 return discard_event();
2019 }
7c673cae
FG
2020 void exit();
2021 };
2022
2023 struct RepNotRecovering;
2024 struct ReplicaActive : boost::statechart::state< ReplicaActive, Started, RepNotRecovering >, NamedState {
2025 explicit ReplicaActive(my_context ctx);
2026 void exit();
2027
2028 typedef boost::mpl::list <
2029 boost::statechart::custom_reaction< QueryState >,
2030 boost::statechart::custom_reaction< ActMap >,
2031 boost::statechart::custom_reaction< MQuery >,
2032 boost::statechart::custom_reaction< MInfoRec >,
2033 boost::statechart::custom_reaction< MLogRec >,
3efd9988
FG
2034 boost::statechart::custom_reaction< Activate >,
2035 boost::statechart::custom_reaction< DeferRecovery >,
b32b8144
FG
2036 boost::statechart::custom_reaction< DeferBackfill >,
2037 boost::statechart::custom_reaction< UnfoundRecovery >,
2038 boost::statechart::custom_reaction< UnfoundBackfill >
7c673cae
FG
2039 > reactions;
2040 boost::statechart::result react(const QueryState& q);
2041 boost::statechart::result react(const MInfoRec& infoevt);
2042 boost::statechart::result react(const MLogRec& logevt);
2043 boost::statechart::result react(const ActMap&);
2044 boost::statechart::result react(const MQuery&);
2045 boost::statechart::result react(const Activate&);
3efd9988
FG
2046 boost::statechart::result react(const DeferRecovery& evt) {
2047 return discard_event();
2048 }
2049 boost::statechart::result react(const DeferBackfill& evt) {
2050 return discard_event();
2051 }
b32b8144
FG
2052 boost::statechart::result react(const UnfoundRecovery& evt) {
2053 return discard_event();
2054 }
2055 boost::statechart::result react(const UnfoundBackfill& evt) {
2056 return discard_event();
2057 }
7c673cae
FG
2058 };
2059
2060 struct RepRecovering : boost::statechart::state< RepRecovering, ReplicaActive >, NamedState {
2061 typedef boost::mpl::list<
2062 boost::statechart::transition< RecoveryDone, RepNotRecovering >,
3efd9988 2063 // for compat with old peers
7c673cae 2064 boost::statechart::transition< RemoteReservationRejected, RepNotRecovering >,
3efd9988 2065 boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >,
7c673cae
FG
2066 boost::statechart::custom_reaction< BackfillTooFull >
2067 > reactions;
2068 explicit RepRecovering(my_context ctx);
2069 boost::statechart::result react(const BackfillTooFull &evt);
2070 void exit();
2071 };
2072
2073 struct RepWaitBackfillReserved : boost::statechart::state< RepWaitBackfillReserved, ReplicaActive >, NamedState {
2074 typedef boost::mpl::list<
2075 boost::statechart::custom_reaction< RemoteBackfillReserved >,
3efd9988
FG
2076 boost::statechart::custom_reaction< RejectRemoteReservation >,
2077 boost::statechart::custom_reaction< RemoteReservationRejected >,
2078 boost::statechart::custom_reaction< RemoteReservationCanceled >
7c673cae
FG
2079 > reactions;
2080 explicit RepWaitBackfillReserved(my_context ctx);
2081 void exit();
2082 boost::statechart::result react(const RemoteBackfillReserved &evt);
3efd9988 2083 boost::statechart::result react(const RejectRemoteReservation &evt);
7c673cae 2084 boost::statechart::result react(const RemoteReservationRejected &evt);
3efd9988 2085 boost::statechart::result react(const RemoteReservationCanceled &evt);
7c673cae
FG
2086 };
2087
2088 struct RepWaitRecoveryReserved : boost::statechart::state< RepWaitRecoveryReserved, ReplicaActive >, NamedState {
2089 typedef boost::mpl::list<
3efd9988
FG
2090 boost::statechart::custom_reaction< RemoteRecoveryReserved >,
2091 // for compat with old peers
2092 boost::statechart::custom_reaction< RemoteReservationRejected >,
2093 boost::statechart::custom_reaction< RemoteReservationCanceled >
7c673cae
FG
2094 > reactions;
2095 explicit RepWaitRecoveryReserved(my_context ctx);
2096 void exit();
2097 boost::statechart::result react(const RemoteRecoveryReserved &evt);
3efd9988
FG
2098 boost::statechart::result react(const RemoteReservationRejected &evt) {
2099 // for compat with old peers
2100 post_event(RemoteReservationCanceled());
2101 return discard_event();
2102 }
2103 boost::statechart::result react(const RemoteReservationCanceled &evt);
7c673cae
FG
2104 };
2105
2106 struct RepNotRecovering : boost::statechart::state< RepNotRecovering, ReplicaActive>, NamedState {
2107 typedef boost::mpl::list<
2108 boost::statechart::custom_reaction< RequestBackfillPrio >,
2109 boost::statechart::transition< RequestRecovery, RepWaitRecoveryReserved >,
3efd9988
FG
2110 boost::statechart::custom_reaction< RejectRemoteReservation >,
2111 boost::statechart::transition< RemoteReservationRejected, RepNotRecovering >,
2112 boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >,
7c673cae
FG
2113 boost::statechart::transition< RecoveryDone, RepNotRecovering > // for compat with pre-reservation peers
2114 > reactions;
2115 explicit RepNotRecovering(my_context ctx);
2116 boost::statechart::result react(const RequestBackfillPrio &evt);
3efd9988 2117 boost::statechart::result react(const RejectRemoteReservation &evt);
7c673cae
FG
2118 void exit();
2119 };
2120
2121 struct Recovering : boost::statechart::state< Recovering, Active >, NamedState {
2122 typedef boost::mpl::list <
2123 boost::statechart::custom_reaction< AllReplicasRecovered >,
3efd9988 2124 boost::statechart::custom_reaction< DeferRecovery >,
b32b8144 2125 boost::statechart::custom_reaction< UnfoundRecovery >,
7c673cae
FG
2126 boost::statechart::custom_reaction< RequestBackfill >
2127 > reactions;
2128 explicit Recovering(my_context ctx);
2129 void exit();
224ce89b 2130 void release_reservations(bool cancel = false);
7c673cae 2131 boost::statechart::result react(const AllReplicasRecovered &evt);
3efd9988 2132 boost::statechart::result react(const DeferRecovery& evt);
b32b8144 2133 boost::statechart::result react(const UnfoundRecovery& evt);
7c673cae
FG
2134 boost::statechart::result react(const RequestBackfill &evt);
2135 };
2136
2137 struct WaitRemoteRecoveryReserved : boost::statechart::state< WaitRemoteRecoveryReserved, Active >, NamedState {
2138 typedef boost::mpl::list <
2139 boost::statechart::custom_reaction< RemoteRecoveryReserved >,
2140 boost::statechart::transition< AllRemotesReserved, Recovering >
2141 > reactions;
2142 set<pg_shard_t>::const_iterator remote_recovery_reservation_it;
2143 explicit WaitRemoteRecoveryReserved(my_context ctx);
2144 boost::statechart::result react(const RemoteRecoveryReserved &evt);
2145 void exit();
2146 };
2147
2148 struct WaitLocalRecoveryReserved : boost::statechart::state< WaitLocalRecoveryReserved, Active >, NamedState {
2149 typedef boost::mpl::list <
2150 boost::statechart::transition< LocalRecoveryReserved, WaitRemoteRecoveryReserved >,
2151 boost::statechart::custom_reaction< RecoveryTooFull >
2152 > reactions;
2153 explicit WaitLocalRecoveryReserved(my_context ctx);
2154 void exit();
2155 boost::statechart::result react(const RecoveryTooFull &evt);
2156 };
2157
2158 struct Activating : boost::statechart::state< Activating, Active >, NamedState {
2159 typedef boost::mpl::list <
2160 boost::statechart::transition< AllReplicasRecovered, Recovered >,
2161 boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
2162 boost::statechart::transition< RequestBackfill, WaitLocalBackfillReserved >
2163 > reactions;
2164 explicit Activating(my_context ctx);
2165 void exit();
2166 };
2167
2168 struct Stray : boost::statechart::state< Stray, Started >, NamedState {
2169 map<int, pair<pg_query_t, epoch_t> > pending_queries;
2170
2171 explicit Stray(my_context ctx);
2172 void exit();
2173
2174 typedef boost::mpl::list <
2175 boost::statechart::custom_reaction< MQuery >,
2176 boost::statechart::custom_reaction< MLogRec >,
2177 boost::statechart::custom_reaction< MInfoRec >,
2178 boost::statechart::custom_reaction< ActMap >,
2179 boost::statechart::custom_reaction< RecoveryDone >
2180 > reactions;
2181 boost::statechart::result react(const MQuery& query);
2182 boost::statechart::result react(const MLogRec& logevt);
2183 boost::statechart::result react(const MInfoRec& infoevt);
2184 boost::statechart::result react(const ActMap&);
2185 boost::statechart::result react(const RecoveryDone&) {
2186 return discard_event();
2187 }
2188 };
2189
2190 struct GetLog;
2191
2192 struct GetInfo : boost::statechart::state< GetInfo, Peering >, NamedState {
2193 set<pg_shard_t> peer_info_requested;
2194
2195 explicit GetInfo(my_context ctx);
2196 void exit();
2197 void get_infos();
2198
2199 typedef boost::mpl::list <
2200 boost::statechart::custom_reaction< QueryState >,
2201 boost::statechart::transition< GotInfo, GetLog >,
2202 boost::statechart::custom_reaction< MNotifyRec >,
2203 boost::statechart::transition< IsDown, Down >
2204 > reactions;
2205 boost::statechart::result react(const QueryState& q);
2206 boost::statechart::result react(const MNotifyRec& infoevt);
2207 };
2208
2209 struct GotLog : boost::statechart::event< GotLog > {
2210 GotLog() : boost::statechart::event< GotLog >() {}
2211 };
2212
2213 struct GetLog : boost::statechart::state< GetLog, Peering >, NamedState {
2214 pg_shard_t auth_log_shard;
2215 boost::intrusive_ptr<MOSDPGLog> msg;
2216
2217 explicit GetLog(my_context ctx);
2218 void exit();
2219
2220 typedef boost::mpl::list <
2221 boost::statechart::custom_reaction< QueryState >,
2222 boost::statechart::custom_reaction< MLogRec >,
2223 boost::statechart::custom_reaction< GotLog >,
2224 boost::statechart::custom_reaction< AdvMap >,
2225 boost::statechart::transition< IsIncomplete, Incomplete >
2226 > reactions;
2227 boost::statechart::result react(const AdvMap&);
2228 boost::statechart::result react(const QueryState& q);
2229 boost::statechart::result react(const MLogRec& logevt);
2230 boost::statechart::result react(const GotLog&);
2231 };
2232
2233 struct WaitUpThru;
2234
2235 struct GetMissing : boost::statechart::state< GetMissing, Peering >, NamedState {
2236 set<pg_shard_t> peer_missing_requested;
2237
2238 explicit GetMissing(my_context ctx);
2239 void exit();
2240
2241 typedef boost::mpl::list <
2242 boost::statechart::custom_reaction< QueryState >,
2243 boost::statechart::custom_reaction< MLogRec >,
2244 boost::statechart::transition< NeedUpThru, WaitUpThru >
2245 > reactions;
2246 boost::statechart::result react(const QueryState& q);
2247 boost::statechart::result react(const MLogRec& logevt);
2248 };
2249
2250 struct WaitUpThru : boost::statechart::state< WaitUpThru, Peering >, NamedState {
2251 explicit WaitUpThru(my_context ctx);
2252 void exit();
2253
2254 typedef boost::mpl::list <
2255 boost::statechart::custom_reaction< QueryState >,
2256 boost::statechart::custom_reaction< ActMap >,
2257 boost::statechart::custom_reaction< MLogRec >
2258 > reactions;
2259 boost::statechart::result react(const QueryState& q);
2260 boost::statechart::result react(const ActMap& am);
2261 boost::statechart::result react(const MLogRec& logrec);
2262 };
2263
2264 struct Down : boost::statechart::state< Down, Peering>, NamedState {
2265 explicit Down(my_context ctx);
2266 typedef boost::mpl::list <
2267 boost::statechart::custom_reaction< QueryState >
2268 > reactions;
2269 boost::statechart::result react(const QueryState& infoevt);
2270 void exit();
2271 };
2272
2273 struct Incomplete : boost::statechart::state< Incomplete, Peering>, NamedState {
2274 typedef boost::mpl::list <
2275 boost::statechart::custom_reaction< AdvMap >,
2276 boost::statechart::custom_reaction< MNotifyRec >,
2277 boost::statechart::custom_reaction< QueryState >
2278 > reactions;
2279 explicit Incomplete(my_context ctx);
2280 boost::statechart::result react(const AdvMap &advmap);
2281 boost::statechart::result react(const MNotifyRec& infoevt);
2282 boost::statechart::result react(const QueryState& infoevt);
2283 void exit();
2284 };
2285
2286
2287 RecoveryMachine machine;
2288 PG *pg;
2289
2290 /// context passed in by state machine caller
2291 RecoveryCtx *orig_ctx;
2292
2293 /// populated if we are buffering messages pending a flush
2294 boost::optional<BufferedRecoveryMessages> messages_pending_flush;
2295
2296 /**
2297 * populated between start_handle() and end_handle(), points into
2298 * the message lists for messages_pending_flush while blocking messages
2299 * or into orig_ctx otherwise
2300 */
2301 boost::optional<RecoveryCtx> rctx;
2302
2303 public:
2304 explicit RecoveryState(PG *pg)
2305 : machine(this, pg), pg(pg), orig_ctx(0) {
2306 machine.initiate();
2307 }
2308
2309 void handle_event(const boost::statechart::event_base &evt,
2310 RecoveryCtx *rctx) {
2311 start_handle(rctx);
2312 machine.process_event(evt);
2313 end_handle();
2314 }
2315
2316 void handle_event(CephPeeringEvtRef evt,
2317 RecoveryCtx *rctx) {
2318 start_handle(rctx);
2319 machine.process_event(evt->get_event());
2320 end_handle();
2321 }
2322
2323 } recovery_state;
2324
2325
2326 public:
2327 PG(OSDService *o, OSDMapRef curmap,
2328 const PGPool &pool, spg_t p);
2329 ~PG() override;
2330
2331 private:
2332 // Prevent copying
2333 explicit PG(const PG& rhs);
2334 PG& operator=(const PG& rhs);
2335 const spg_t pg_id;
2336 uint64_t peer_features;
2337 uint64_t acting_features;
2338 uint64_t upacting_features;
2339
2340 epoch_t last_epoch;
2341
7c673cae
FG
2342 public:
2343 const spg_t& get_pgid() const { return pg_id; }
2344
2345 void reset_min_peer_features() {
2346 peer_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
2347 }
2348 uint64_t get_min_peer_features() const { return peer_features; }
2349 void apply_peer_features(uint64_t f) { peer_features &= f; }
2350
2351 uint64_t get_min_acting_features() const { return acting_features; }
2352 uint64_t get_min_upacting_features() const { return upacting_features; }
c07f9fc5
FG
2353 bool perform_deletes_during_peering() const {
2354 return !(get_osdmap()->test_flag(CEPH_OSDMAP_RECOVERY_DELETES));
2355 }
7c673cae
FG
2356
2357 void init_primary_up_acting(
2358 const vector<int> &newup,
2359 const vector<int> &newacting,
2360 int new_up_primary,
2361 int new_acting_primary) {
2362 actingset.clear();
2363 acting = newacting;
2364 for (uint8_t i = 0; i < acting.size(); ++i) {
2365 if (acting[i] != CRUSH_ITEM_NONE)
2366 actingset.insert(
2367 pg_shard_t(
2368 acting[i],
2369 pool.info.ec_pool() ? shard_id_t(i) : shard_id_t::NO_SHARD));
2370 }
2371 upset.clear();
2372 up = newup;
2373 for (uint8_t i = 0; i < up.size(); ++i) {
2374 if (up[i] != CRUSH_ITEM_NONE)
2375 upset.insert(
2376 pg_shard_t(
2377 up[i],
2378 pool.info.ec_pool() ? shard_id_t(i) : shard_id_t::NO_SHARD));
2379 }
2380 if (!pool.info.ec_pool()) {
2381 up_primary = pg_shard_t(new_up_primary, shard_id_t::NO_SHARD);
2382 primary = pg_shard_t(new_acting_primary, shard_id_t::NO_SHARD);
2383 return;
2384 }
2385 up_primary = pg_shard_t();
2386 primary = pg_shard_t();
2387 for (uint8_t i = 0; i < up.size(); ++i) {
2388 if (up[i] == new_up_primary) {
2389 up_primary = pg_shard_t(up[i], shard_id_t(i));
2390 break;
2391 }
2392 }
2393 for (uint8_t i = 0; i < acting.size(); ++i) {
2394 if (acting[i] == new_acting_primary) {
2395 primary = pg_shard_t(acting[i], shard_id_t(i));
2396 break;
2397 }
2398 }
2399 assert(up_primary.osd == new_up_primary);
2400 assert(primary.osd == new_acting_primary);
2401 }
2402 pg_shard_t get_primary() const { return primary; }
2403
2404 int get_role() const { return role; }
2405 void set_role(int r) { role = r; }
2406
2407 bool is_primary() const { return pg_whoami == primary; }
2408 bool is_replica() const { return role > 0; }
2409
2410 epoch_t get_last_peering_reset() const { return last_peering_reset; }
2411
2412 //int get_state() const { return state; }
2413 bool state_test(int m) const { return (state & m) != 0; }
2414 void state_set(int m) { state |= m; }
2415 void state_clear(int m) { state &= ~m; }
2416
2417 bool is_complete() const { return info.last_complete == info.last_update; }
2418 bool should_send_notify() const { return send_notify; }
2419
2420 int get_state() const { return state; }
2421 bool is_active() const { return state_test(PG_STATE_ACTIVE); }
2422 bool is_activating() const { return state_test(PG_STATE_ACTIVATING); }
2423 bool is_peering() const { return state_test(PG_STATE_PEERING); }
2424 bool is_down() const { return state_test(PG_STATE_DOWN); }
b32b8144
FG
2425 bool is_recovery_unfound() const { return state_test(PG_STATE_RECOVERY_UNFOUND); }
2426 bool is_backfill_unfound() const { return state_test(PG_STATE_BACKFILL_UNFOUND); }
7c673cae
FG
2427 bool is_incomplete() const { return state_test(PG_STATE_INCOMPLETE); }
2428 bool is_clean() const { return state_test(PG_STATE_CLEAN); }
2429 bool is_degraded() const { return state_test(PG_STATE_DEGRADED); }
2430 bool is_undersized() const { return state_test(PG_STATE_UNDERSIZED); }
2431
2432 bool is_scrubbing() const { return state_test(PG_STATE_SCRUBBING); }
b32b8144 2433 bool is_remapped() const { return state_test(PG_STATE_REMAPPED); }
7c673cae
FG
2434 bool is_peered() const {
2435 return state_test(PG_STATE_ACTIVE) || state_test(PG_STATE_PEERED);
2436 }
2437
2438 bool is_empty() const { return info.last_update == eversion_t(0,0); }
2439
2440 void init(
2441 int role,
2442 const vector<int>& up,
2443 int up_primary,
2444 const vector<int>& acting,
2445 int acting_primary,
2446 const pg_history_t& history,
2447 const PastIntervals& pim,
2448 bool backfill,
2449 ObjectStore::Transaction *t);
2450
2451 // pg on-disk state
2452 void do_pending_flush();
2453
2454 static void _create(ObjectStore::Transaction& t, spg_t pgid, int bits);
2455 static void _init(ObjectStore::Transaction& t,
2456 spg_t pgid, const pg_pool_t *pool);
2457
2458private:
2459 void prepare_write_info(map<string,bufferlist> *km);
2460
2461 void update_store_with_options();
2462 void update_store_on_load();
2463
2464public:
2465 static int _prepare_write_info(
2466 CephContext* cct,
2467 map<string,bufferlist> *km,
2468 epoch_t epoch,
2469 pg_info_t &info,
2470 pg_info_t &last_written_info,
2471 PastIntervals &past_intervals,
2472 bool dirty_big_info,
2473 bool dirty_epoch,
2474 bool try_fast_info,
2475 PerfCounters *logger = nullptr);
2476 void write_if_dirty(ObjectStore::Transaction& t);
2477
2478 PGLog::IndexedLog projected_log;
2479 bool check_in_progress_op(
2480 const osd_reqid_t &r,
2481 eversion_t *version,
2482 version_t *user_version,
2483 int *return_code) const;
2484 eversion_t projected_last_update;
2485 eversion_t get_next_version() const {
2486 eversion_t at_version(
2487 get_osdmap()->get_epoch(),
2488 projected_last_update.version+1);
2489 assert(at_version > info.last_update);
2490 assert(at_version > pg_log.get_head());
2491 assert(at_version > projected_last_update);
2492 return at_version;
2493 }
2494
2495 void add_log_entry(const pg_log_entry_t& e, bool applied);
2496 void append_log(
2497 const vector<pg_log_entry_t>& logv,
2498 eversion_t trim_to,
2499 eversion_t roll_forward_to,
2500 ObjectStore::Transaction &t,
2501 bool transaction_applied = true);
2502 bool check_log_for_corruption(ObjectStore *store);
2503 void trim_log();
2504
2505 std::string get_corrupt_pg_log_name() const;
2506 static int read_info(
2507 ObjectStore *store, spg_t pgid, const coll_t &coll,
2508 bufferlist &bl, pg_info_t &info, PastIntervals &past_intervals,
2509 __u8 &);
2510 void read_state(ObjectStore *store, bufferlist &bl);
2511 static bool _has_removal_flag(ObjectStore *store, spg_t pgid);
2512 static int peek_map_epoch(ObjectStore *store, spg_t pgid,
2513 epoch_t *pepoch, bufferlist *bl);
2514 void update_snap_map(
2515 const vector<pg_log_entry_t> &log_entries,
2516 ObjectStore::Transaction& t);
2517
2518 void filter_snapc(vector<snapid_t> &snaps);
2519
2520 void log_weirdness();
2521
2522 virtual void kick_snap_trim() = 0;
2523 virtual void snap_trimmer_scrub_complete() = 0;
31f18b77 2524 bool requeue_scrub(bool high_priority = false);
c07f9fc5 2525 void queue_recovery();
7c673cae
FG
2526 bool queue_scrub();
2527 unsigned get_scrub_priority();
2528
2529 /// share pg info after a pg is active
2530 void share_pg_info();
2531
2532
2533 bool append_log_entries_update_missing(
31f18b77 2534 const mempool::osd_pglog::list<pg_log_entry_t> &entries,
7c673cae
FG
2535 ObjectStore::Transaction &t);
2536
2537 /**
2538 * Merge entries updating missing as necessary on all
2539 * actingbackfill logs and missings (also missing_loc)
2540 */
2541 void merge_new_log_entries(
31f18b77 2542 const mempool::osd_pglog::list<pg_log_entry_t> &entries,
7c673cae
FG
2543 ObjectStore::Transaction &t);
2544
2545 void reset_interval_flush();
2546 void start_peering_interval(
2547 const OSDMapRef lastmap,
2548 const vector<int>& newup, int up_primary,
2549 const vector<int>& newacting, int acting_primary,
2550 ObjectStore::Transaction *t);
2551 void on_new_interval();
2552 virtual void _on_new_interval() = 0;
2553 void start_flush(ObjectStore::Transaction *t,
2554 list<Context *> *on_applied,
2555 list<Context *> *on_safe);
2556 void set_last_peering_reset();
2557 bool pg_has_reset_since(epoch_t e) {
2558 assert(is_locked());
2559 return deleting || e < get_last_peering_reset();
2560 }
2561
2562 void update_history(const pg_history_t& history);
2563 void fulfill_info(pg_shard_t from, const pg_query_t &query,
2564 pair<pg_shard_t, pg_info_t> &notify_info);
2565 void fulfill_log(pg_shard_t from, const pg_query_t &query, epoch_t query_epoch);
2566
2567 void check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap);
2568
2569 bool should_restart_peering(
2570 int newupprimary,
2571 int newactingprimary,
2572 const vector<int>& newup,
2573 const vector<int>& newacting,
2574 OSDMapRef lastmap,
2575 OSDMapRef osdmap);
2576
2577 // OpRequest queueing
2578 bool can_discard_op(OpRequestRef& op);
2579 bool can_discard_scan(OpRequestRef op);
2580 bool can_discard_backfill(OpRequestRef op);
2581 bool can_discard_request(OpRequestRef& op);
2582
2583 template<typename T, int MSGTYPE>
2584 bool can_discard_replica_op(OpRequestRef& op);
2585
2586 bool old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch);
2587 bool old_peering_evt(CephPeeringEvtRef evt) {
2588 return old_peering_msg(evt->get_epoch_sent(), evt->get_epoch_requested());
2589 }
2590 static bool have_same_or_newer_map(epoch_t cur_epoch, epoch_t e) {
2591 return e <= cur_epoch;
2592 }
2593 bool have_same_or_newer_map(epoch_t e) {
2594 return e <= get_osdmap()->get_epoch();
2595 }
2596
2597 bool op_has_sufficient_caps(OpRequestRef& op);
2598
2599
2600 // recovery bits
2601 void take_waiters();
2602 void queue_peering_event(CephPeeringEvtRef evt);
2603 void handle_peering_event(CephPeeringEvtRef evt, RecoveryCtx *rctx);
2604 void queue_query(epoch_t msg_epoch, epoch_t query_epoch,
2605 pg_shard_t from, const pg_query_t& q);
2606 void queue_null(epoch_t msg_epoch, epoch_t query_epoch);
2607 void queue_flushed(epoch_t started_at);
2608 void handle_advance_map(
2609 OSDMapRef osdmap, OSDMapRef lastmap,
2610 vector<int>& newup, int up_primary,
2611 vector<int>& newacting, int acting_primary,
2612 RecoveryCtx *rctx);
2613 void handle_activate_map(RecoveryCtx *rctx);
2614 void handle_create(RecoveryCtx *rctx);
2615 void handle_loaded(RecoveryCtx *rctx);
2616 void handle_query_state(Formatter *f);
2617
2618 virtual void on_removal(ObjectStore::Transaction *t) = 0;
2619
2620
2621 // abstract bits
2622 virtual void do_request(
2623 OpRequestRef& op,
2624 ThreadPool::TPHandle &handle
2625 ) = 0;
2626
2627 virtual void do_op(OpRequestRef& op) = 0;
2628 virtual void do_sub_op(OpRequestRef op) = 0;
2629 virtual void do_sub_op_reply(OpRequestRef op) = 0;
2630 virtual void do_scan(
2631 OpRequestRef op,
2632 ThreadPool::TPHandle &handle
2633 ) = 0;
2634 virtual void do_backfill(OpRequestRef op) = 0;
2635 virtual void snap_trimmer(epoch_t epoch_queued) = 0;
2636
2637 virtual int do_command(
2638 cmdmap_t cmdmap,
2639 ostream& ss,
2640 bufferlist& idata,
2641 bufferlist& odata,
2642 ConnectionRef conn,
2643 ceph_tid_t tid) = 0;
2644
2645 virtual void on_role_change() = 0;
2646 virtual void on_pool_change() = 0;
2647 virtual void on_change(ObjectStore::Transaction *t) = 0;
2648 virtual void on_activate() = 0;
2649 virtual void on_flushed() = 0;
2650 virtual void on_shutdown() = 0;
2651 virtual void check_blacklisted_watchers() = 0;
2652 virtual void get_watchers(std::list<obj_watch_item_t>&) = 0;
2653
2654 virtual bool agent_work(int max) = 0;
2655 virtual bool agent_work(int max, int agent_flush_quota) = 0;
2656 virtual void agent_stop() = 0;
2657 virtual void agent_delay() = 0;
2658 virtual void agent_clear() = 0;
2659 virtual void agent_choose_mode_restart() = 0;
2660};
2661
2662ostream& operator<<(ostream& out, const PG& pg);
2663
2664ostream& operator<<(ostream& out, const PG::BackfillInterval& bi);
2665
2666#endif