#include <boost/scoped_ptr.hpp>
#include <boost/circular_buffer.hpp>
#include <boost/container/flat_set.hpp>
-#include "include/memory.h"
#include "include/mempool.h"
// re-include our assert to clobber boost's
-#include "include/assert.h"
+#include "include/ceph_assert.h"
#include "include/types.h"
#include "include/stringify.h"
#include "messages/MOSDPGLog.h"
#include "include/str_list.h"
#include "PGBackend.h"
+#include "PGPeeringEvent.h"
+
+#include "mgr/OSDPerfMetricTypes.h"
#include <atomic>
#include <list>
#include <stack>
#include <string>
#include <tuple>
-using namespace std;
-
-// #include "include/unordered_map.h"
-// #include "include/unordered_set.h"
//#define DEBUG_RECOVERY_OIDS // track set of recovering oids explicitly, to find counting bugs
+//#define PG_DEBUG_REFS // track provenance of pg refs, helpful for finding leaks
class OSD;
class OSDService;
+class OSDShard;
+class OSDShardPGSlot;
class MOSDOp;
class MOSDPGScan;
class MOSDPGBackfill;
typedef OpRequest::Ref OpRequestRef;
class MOSDPGLog;
class CephContext;
+class DynamicPerfStats;
namespace Scrub {
class Store;
}
-void intrusive_ptr_add_ref(PG *pg);
-void intrusive_ptr_release(PG *pg);
-
using state_history_entry = std::tuple<utime_t, utime_t, const char*>;
using embedded_state = std::pair<utime_t, const char*>;
PGRecoveryStats() : lock("PGRecoverStats::lock") {}
void reset() {
- Mutex::Locker l(lock);
+ std::lock_guard l(lock);
info.clear();
}
void dump(ostream& out) {
- Mutex::Locker l(lock);
+ std::lock_guard l(lock);
for (map<const char *,per_state_info>::iterator p = info.begin(); p != info.end(); ++p) {
per_state_info& i = p->second;
out << i.enter << "\t" << i.exit << "\t"
}
void dump_formatted(Formatter *f) {
- Mutex::Locker l(lock);
+ std::lock_guard l(lock);
f->open_array_section("pg_recovery_stats");
for (map<const char *,per_state_info>::iterator p = info.begin();
p != info.end(); ++p) {
}
void log_enter(const char *s) {
- Mutex::Locker l(lock);
+ std::lock_guard l(lock);
info[s].enter++;
}
void log_exit(const char *s, utime_t dur, uint64_t events, utime_t event_dur) {
- Mutex::Locker l(lock);
+ std::lock_guard l(lock);
per_state_info &i = info[s];
i.exit++;
i.total_time += dur;
epoch_t cached_epoch;
int64_t id;
string name;
- uint64_t auid;
pg_pool_t info;
SnapContext snapc; // the default pool snapc, ready to go.
+ // these two sets are for < mimic only
interval_set<snapid_t> cached_removed_snaps; // current removed_snaps set
interval_set<snapid_t> newly_removed_snaps; // newly removed in the last epoch
- PGPool(CephContext* cct, OSDMapRef map, int64_t i)
+ PGPool(CephContext* cct, OSDMapRef map, int64_t i, const pg_pool_t& info,
+ const string& name)
: cct(cct),
cached_epoch(map->get_epoch()),
id(i),
- name(map->get_pool_name(id)),
- auid(map->get_pg_pool(id)->auid) {
- const pg_pool_t *pi = map->get_pg_pool(id);
- assert(pi);
- info = *pi;
- snapc = pi->get_snap_context();
- pi->build_removed_snaps(cached_removed_snaps);
+ name(name),
+ info(info) {
+ snapc = info.get_snap_context();
+ if (map->require_osd_release < CEPH_RELEASE_MIMIC) {
+ info.build_removed_snaps(cached_removed_snaps);
+ }
}
- void update(OSDMapRef map);
+ void update(CephContext *cct, OSDMapRef map);
};
/** PG - Replica Placement Group
class PG : public DoutPrefixProvider {
public:
+ // -- members --
+ const spg_t pg_id;
+ const coll_t coll;
+
+ ObjectStore::CollectionHandle ch;
+
+ struct RecoveryCtx;
+
+ // -- methods --
+ std::ostream& gen_prefix(std::ostream& out) const override;
+ CephContext *get_cct() const override {
+ return cct;
+ }
+ unsigned get_subsys() const override {
+ return ceph_subsys_osd;
+ }
+
+ const OSDMapRef& get_osdmap() const {
+ ceph_assert(is_locked());
+ ceph_assert(osdmap_ref);
+ return osdmap_ref;
+ }
+ epoch_t get_osdmap_epoch() const {
+ return osdmap_ref->get_epoch();
+ }
+
+ void lock_suspend_timeout(ThreadPool::TPHandle &handle) {
+ handle.suspend_tp_timeout();
+ lock();
+ handle.reset_tp_timeout();
+ }
+ void lock(bool no_lockdep = false) const;
+ void unlock() const {
+ //generic_dout(0) << this << " " << info.pgid << " unlock" << dendl;
+ ceph_assert(!dirty_info);
+ ceph_assert(!dirty_big_info);
+ _lock.Unlock();
+ }
+ bool is_locked() const {
+ return _lock.is_locked();
+ }
+
+ const spg_t& get_pgid() const {
+ return pg_id;
+ }
+
+ const PGPool& get_pool() const {
+ return pool;
+ }
+ uint64_t get_last_user_version() const {
+ return info.last_user_version;
+ }
+ const pg_history_t& get_history() const {
+ return info.history;
+ }
+ bool get_need_up_thru() const {
+ return need_up_thru;
+ }
+ epoch_t get_same_interval_since() const {
+ return info.history.same_interval_since;
+ }
+
+ void set_last_scrub_stamp(utime_t t) {
+ info.stats.last_scrub_stamp = t;
+ info.history.last_scrub_stamp = t;
+ }
+
+ void set_last_deep_scrub_stamp(utime_t t) {
+ info.stats.last_deep_scrub_stamp = t;
+ info.history.last_deep_scrub_stamp = t;
+ }
+
+ bool is_deleting() const {
+ return deleting;
+ }
+ bool is_deleted() const {
+ return deleted;
+ }
+ bool is_replica() const {
+ return role > 0;
+ }
+ bool is_primary() const {
+ return pg_whoami == primary;
+ }
+ bool pg_has_reset_since(epoch_t e) {
+ ceph_assert(is_locked());
+ return deleted || e < get_last_peering_reset();
+ }
+
+ bool is_ec_pg() const {
+ return pool.info.is_erasure();
+ }
+ int get_role() const {
+ return role;
+ }
+ const vector<int> get_acting() const {
+ return acting;
+ }
+ int get_acting_primary() const {
+ return primary.osd;
+ }
+ pg_shard_t get_primary() const {
+ return primary;
+ }
+ const vector<int> get_up() const {
+ return up;
+ }
+ int get_up_primary() const {
+ return up_primary.osd;
+ }
+ const PastIntervals& get_past_intervals() const {
+ return past_intervals;
+ }
+
+ /// initialize created PG
+ void init(
+ int role,
+ const vector<int>& up,
+ int up_primary,
+ const vector<int>& acting,
+ int acting_primary,
+ const pg_history_t& history,
+ const PastIntervals& pim,
+ bool backfill,
+ ObjectStore::Transaction *t);
+
+ /// read existing pg state off disk
+ void read_state(ObjectStore *store);
+ static int peek_map_epoch(ObjectStore *store, spg_t pgid, epoch_t *pepoch);
+
+ static int get_latest_struct_v() {
+ return latest_struct_v;
+ }
+ static int get_compat_struct_v() {
+ return compat_struct_v;
+ }
+ static int read_info(
+ ObjectStore *store, spg_t pgid, const coll_t &coll,
+ pg_info_t &info, PastIntervals &past_intervals,
+ __u8 &);
+ static bool _has_removal_flag(ObjectStore *store, spg_t pgid);
+
+ void rm_backoff(BackoffRef b);
+
+ void update_snap_mapper_bits(uint32_t bits) {
+ snap_mapper.update_bits(bits);
+ }
+ void start_split_stats(const set<spg_t>& childpgs, vector<object_stat_sum_t> *v);
+ virtual void split_colls(
+ spg_t child,
+ int split_bits,
+ int seed,
+ const pg_pool_t *pool,
+ ObjectStore::Transaction *t) = 0;
+ void split_into(pg_t child_pgid, PG *child, unsigned split_bits);
+ void merge_from(map<spg_t,PGRef>& sources, RecoveryCtx *rctx,
+ unsigned split_bits,
+ const pg_merge_meta_t& last_pg_merge_meta);
+ void finish_split_stats(const object_stat_sum_t& stats, ObjectStore::Transaction *t);
+
+ void scrub(epoch_t queued, ThreadPool::TPHandle &handle);
+ void reg_next_scrub();
+ void unreg_next_scrub();
+
+ bool is_forced_recovery_or_backfill() const {
+ return get_state() & (PG_STATE_FORCED_RECOVERY | PG_STATE_FORCED_BACKFILL);
+ }
bool set_force_recovery(bool b);
bool set_force_backfill(bool b);
+ void queue_peering_event(PGPeeringEventRef evt);
+ void do_peering_event(PGPeeringEventRef evt, RecoveryCtx *rcx);
+ void queue_null(epoch_t msg_epoch, epoch_t query_epoch);
+ void queue_flushed(epoch_t started_at);
+ void handle_advance_map(
+ OSDMapRef osdmap, OSDMapRef lastmap,
+ vector<int>& newup, int up_primary,
+ vector<int>& newacting, int acting_primary,
+ RecoveryCtx *rctx);
+ void handle_activate_map(RecoveryCtx *rctx);
+ void handle_initialize(RecoveryCtx *rctx);
+ void handle_query_state(Formatter *f);
+
+ /**
+ * @param ops_begun returns how many recovery ops the function started
+ * @returns true if any useful work was accomplished; false otherwise
+ */
+ virtual bool start_recovery_ops(
+ uint64_t max,
+ ThreadPool::TPHandle &handle,
+ uint64_t *ops_begun) = 0;
+
+ // more work after the above, but with a RecoveryCtx
+ void find_unfound(epoch_t queued, RecoveryCtx *rctx);
+
+ virtual void get_watchers(std::list<obj_watch_item_t> *ls) = 0;
+
+ void dump_pgstate_history(Formatter *f);
+ void dump_missing(Formatter *f);
+
+ void get_pg_stats(std::function<void(const pg_stat_t&, epoch_t lec)> f);
+ void with_heartbeat_peers(std::function<void(int)> f);
+
+ void shutdown();
+ virtual void on_shutdown() = 0;
+
+ bool get_must_scrub() const {
+ return scrubber.must_scrub;
+ }
+ bool sched_scrub();
+
+ virtual void do_request(
+ OpRequestRef& op,
+ ThreadPool::TPHandle &handle
+ ) = 0;
+ virtual void clear_cache() = 0;
+ virtual int get_cache_obj_count() = 0;
+
+ virtual void snap_trimmer(epoch_t epoch_queued) = 0;
+ virtual int do_command(
+ cmdmap_t cmdmap,
+ ostream& ss,
+ bufferlist& idata,
+ bufferlist& odata,
+ ConnectionRef conn,
+ ceph_tid_t tid) = 0;
+
+ virtual bool agent_work(int max) = 0;
+ virtual bool agent_work(int max, int agent_flush_quota) = 0;
+ virtual void agent_stop() = 0;
+ virtual void agent_delay() = 0;
+ virtual void agent_clear() = 0;
+ virtual void agent_choose_mode_restart() = 0;
+
+ virtual void on_removal(ObjectStore::Transaction *t) = 0;
+
+ void _delete_some(ObjectStore::Transaction *t);
+
+ virtual void set_dynamic_perf_stats_queries(
+ const std::list<OSDPerfMetricQuery> &queries) {
+ }
+ virtual void get_dynamic_perf_stats(DynamicPerfStats *stats) {
+ }
+
+ // reference counting
+#ifdef PG_DEBUG_REFS
+ uint64_t get_with_id();
+ void put_with_id(uint64_t);
+ void dump_live_ids();
+#endif
+ void get(const char* tag);
+ void put(const char* tag);
+ int get_num_ref() {
+ return ref;
+ }
+
+ // ctor
+ PG(OSDService *o, OSDMapRef curmap,
+ const PGPool &pool, spg_t p);
+ ~PG() override;
+
+ // prevent copying
+ explicit PG(const PG& rhs) = delete;
+ PG& operator=(const PG& rhs) = delete;
+
protected:
+ // -------------
+ // protected
OSDService *osd;
+public:
+ OSDShard *osd_shard = nullptr;
+ OSDShardPGSlot *pg_slot = nullptr;
+protected:
CephContext *cct;
+
+ // osdmap
+ OSDMapRef osdmap_ref;
+
+ PGPool pool;
+
+ // locking and reference counting.
+ // I destroy myself when the reference count hits zero.
+ // lock() should be called before doing anything.
+ // get() should be called on pointer copy (to another thread, etc.).
+ // put() should be called on destruction of some previously copied pointer.
+ // unlock() when done with the current pointer (_most common_).
+ mutable Mutex _lock = {"PG::_lock"};
+
+ std::atomic<unsigned int> ref{0};
+
+#ifdef PG_DEBUG_REFS
+ Mutex _ref_id_lock = {"PG::_ref_id_lock"};
+ map<uint64_t, string> _live_ids;
+ map<string, uint64_t> _tag_counts;
+ uint64_t _ref_id = 0;
+
+ friend uint64_t get_with_id(PG *pg) { return pg->get_with_id(); }
+ friend void put_with_id(PG *pg, uint64_t id) { return pg->put_with_id(id); }
+#endif
+
+private:
+ friend void intrusive_ptr_add_ref(PG *pg) {
+ pg->get("intptr");
+ }
+ friend void intrusive_ptr_release(PG *pg) {
+ pg->put("intptr");
+ }
+
+
+ // =====================
+
+protected:
OSDriver osdriver;
SnapMapper snap_mapper;
bool eio_errors_to_process = false;
virtual PGBackend *get_pgbackend() = 0;
-public:
- std::string gen_prefix() const override;
- CephContext *get_cct() const override { return cct; }
- unsigned get_subsys() const override { return ceph_subsys_osd; }
+ virtual const PGBackend* get_pgbackend() const = 0;
+protected:
/*** PG ****/
- void update_snap_mapper_bits(uint32_t bits) {
- snap_mapper.update_bits(bits);
- }
/// get_is_recoverable_predicate: caller owns returned pointer and must delete when done
- IsPGRecoverablePredicate *get_is_recoverable_predicate() {
+ IsPGRecoverablePredicate *get_is_recoverable_predicate() const {
return get_pgbackend()->get_is_recoverable_predicate();
}
protected:
- OSDMapRef osdmap_ref;
- OSDMapRef last_persisted_osdmap_ref;
- PGPool pool;
+ epoch_t last_persisted_osdmap;
void requeue_map_waiters();
void update_osdmap_ref(OSDMapRef newmap) {
- assert(_lock.is_locked_by_me());
+ ceph_assert(_lock.is_locked_by_me());
osdmap_ref = std::move(newmap);
}
-public:
- OSDMapRef get_osdmap() const {
- assert(is_locked());
- assert(osdmap_ref);
- return osdmap_ref;
- }
protected:
- /** locking and reference counting.
- * I destroy myself when the reference count hits zero.
- * lock() should be called before doing anything.
- * get() should be called on pointer copy (to another thread, etc.).
- * put() should be called on destruction of some previously copied pointer.
- * unlock() when done with the current pointer (_most common_).
- */
- mutable Mutex _lock;
- std::atomic_uint ref{0};
-
-#ifdef PG_DEBUG_REFS
- Mutex _ref_id_lock;
- map<uint64_t, string> _live_ids;
- map<string, uint64_t> _tag_counts;
- uint64_t _ref_id;
-#endif
-public:
bool deleting; // true while in removing or OSD is shutting down
+ atomic<bool> deleted = {false};
ZTracer::Endpoint trace_endpoint;
- void lock_suspend_timeout(ThreadPool::TPHandle &handle);
- void lock(bool no_lockdep = false) const;
- void unlock() const {
- //generic_dout(0) << this << " " << info.pgid << " unlock" << dendl;
- assert(!dirty_info);
- assert(!dirty_big_info);
- _lock.Unlock();
- }
-
- bool is_locked() const {
- return _lock.is_locked();
- }
-
-#ifdef PG_DEBUG_REFS
- uint64_t get_with_id();
- void put_with_id(uint64_t);
- void dump_live_ids();
-#endif
- void get(const char* tag);
- void put(const char* tag);
+protected:
bool dirty_info, dirty_big_info;
-public:
- bool is_ec_pg() const {
- return pool.info.ec_pool();
- }
+protected:
// pg state
pg_info_t info; ///< current pg info
pg_info_t last_written_info; ///< last written info
- __u8 info_struct_v;
- static const __u8 cur_struct_v = 10;
+ __u8 info_struct_v = 0;
+ static const __u8 latest_struct_v = 10;
// v10 is the new past_intervals encoding
// v9 was fastinfo_key addition
// v8 was the move to a per-pg pgmeta object
// v7 was SnapMapper addition in 86658392516d5175b2756659ef7ffaaf95b0f8ad
// (first appeared in cuttlefish).
- static const __u8 compat_struct_v = 7;
- bool must_upgrade() {
- return info_struct_v < cur_struct_v;
- }
- bool can_upgrade() {
- return info_struct_v >= compat_struct_v;
- }
+ static const __u8 compat_struct_v = 10;
void upgrade(ObjectStore *store);
- const coll_t coll;
- ObjectStore::CollectionHandle ch;
+protected:
PGLog pg_log;
- static string get_info_key(spg_t pgid) {
- return stringify(pgid) + "_info";
- }
- static string get_biginfo_key(spg_t pgid) {
- return stringify(pgid) + "_biginfo";
- }
- static string get_epoch_key(spg_t pgid) {
- return stringify(pgid) + "_epoch";
- }
ghobject_t pgmeta_oid;
// ------------------
(l.other < r.other)));
}
friend ostream& operator<<(ostream& out, const loc_count_t& l) {
- assert(l.up >= 0);
- assert(l.other >= 0);
+ ceph_assert(l.up >= 0);
+ ceph_assert(l.other >= 0);
return out << "(" << l.up << "+" << l.other << ")";
}
};
pgs_by_shard_id(s, pgsbs);
for (auto shard: pgsbs) {
auto p = missing_by_count[shard.first].find(_get_count(shard.second));
- assert(p != missing_by_count[shard.first].end());
+ ceph_assert(p != missing_by_count[shard.first].end());
if (--p->second == 0) {
missing_by_count[shard.first].erase(p);
}
is_readable.reset(_is_readable);
is_recoverable.reset(_is_recoverable);
}
- string gen_prefix() const { return pg->gen_prefix(); }
+ std::ostream& gen_prefix(std::ostream& out) const {
+ return pg->gen_prefix(out);
+ }
bool needs_recovery(
const hobject_t &hoid,
eversion_t *v = 0) const {
return i->second.is_delete();
}
bool is_unfound(const hobject_t &hoid) const {
- return needs_recovery(hoid) && !is_deleted(hoid) && (
- !missing_loc.count(hoid) ||
- !(*is_recoverable)(missing_loc.find(hoid)->second));
+ auto it = needs_recovery_map.find(hoid);
+ if (it == needs_recovery_map.end()) {
+ return false;
+ }
+ if (it->second.is_delete()) {
+ return false;
+ }
+ auto mit = missing_loc.find(hoid);
+ return mit == missing_loc.end() || !(*is_recoverable)(mit->second);
}
bool readable_with_acting(
const hobject_t &hoid,
needs_recovery_map.begin();
i != needs_recovery_map.end();
++i) {
- if (is_unfound(i->first))
+ if (i->second.is_delete())
+ continue;
+ auto mi = missing_loc.find(i->first);
+ if (mi == missing_loc.end() || !(*is_recoverable)(mi->second))
++ret;
}
return ret;
needs_recovery_map.begin();
i != needs_recovery_map.end();
++i) {
- if (is_unfound(i->first))
+ if (i->second.is_delete())
+ continue;
+ auto mi = missing_loc.find(i->first);
+ if (mi == missing_loc.end() || !(*is_recoverable)(mi->second))
return true;
}
return false;
if (p != missing_loc.end()) {
_dec_count(p->second);
p->second.erase(location);
- _inc_count(p->second);
+ if (p->second.empty()) {
+ missing_loc.erase(p);
+ } else {
+ _inc_count(p->second);
+ }
}
}
+
+ void clear_location(const hobject_t &hoid) {
+ auto p = missing_loc.find(hoid);
+ if (p != missing_loc.end()) {
+ _dec_count(p->second);
+ missing_loc.erase(p);
+ }
+ }
+
void add_active_missing(const pg_missing_t &missing) {
for (map<hobject_t, pg_missing_item>::const_iterator i =
missing.get_items().begin();
lgeneric_dout(pg->cct, 0) << this << " " << pg->info.pgid << " unexpected need for "
<< i->first << " have " << j->second
<< " tried to add " << i->second << dendl;
- assert(i->second.need == j->second.need);
+ ceph_assert(i->second.need == j->second.need);
}
}
}
- void add_missing(const hobject_t &hoid, eversion_t need, eversion_t have) {
- needs_recovery_map[hoid] = pg_missing_item(need, have);
+ void add_missing(const hobject_t &hoid, eversion_t need, eversion_t have, bool is_delete=false) {
+ needs_recovery_map[hoid] = pg_missing_item(need, have, is_delete);
}
void revise_need(const hobject_t &hoid, eversion_t need) {
- assert(needs_recovery(hoid));
- needs_recovery_map[hoid].need = need;
+ auto it = needs_recovery_map.find(hoid);
+ ceph_assert(it != needs_recovery_map.end());
+ it->second.need = need;
}
/// Adds info about a possible recovery source
if (i == self)
continue;
auto pmiter = pmissing.find(i);
- assert(pmiter != pmissing.end());
+ ceph_assert(pmiter != pmissing.end());
miter = pmiter->second.get_items().find(hoid);
if (miter != pmiter->second.get_items().end()) {
item = miter->second;
return;
auto mliter =
missing_loc.insert(make_pair(hoid, set<pg_shard_t>())).first;
- assert(info.last_backfill.is_max());
- assert(info.last_update >= item->need);
+ ceph_assert(info.last_backfill.is_max());
+ ceph_assert(info.last_update >= item->need);
if (!missing.is_missing(hoid))
mliter->second.insert(self);
for (auto &&i: pmissing) {
+ if (i.first == self)
+ continue;
auto pinfoiter = pinfo.find(i.first);
- assert(pinfoiter != pinfo.end());
+ ceph_assert(pinfoiter != pinfo.end());
if (item->need <= pinfoiter->second.last_update &&
hoid <= pinfoiter->second.last_backfill &&
!i.second.is_missing(hoid))
}
const set<pg_shard_t> &get_locations(const hobject_t &hoid) const {
- return missing_loc.count(hoid) ?
- missing_loc.find(hoid)->second : empty_set;
+ auto it = missing_loc.find(hoid);
+ return it == missing_loc.end() ? empty_set : it->second;
}
const map<hobject_t, set<pg_shard_t>> &get_missing_locs() const {
return missing_loc;
int recovery_ops_active;
set<pg_shard_t> waiting_on_backfill;
#ifdef DEBUG_RECOVERY_OIDS
- set<hobject_t> recovering_oids;
+ multiset<hobject_t> recovering_oids;
#endif
protected:
int role; // 0 = primary, 1 = replica, -1=none.
- unsigned state; // PG_STATE_*
+ uint64_t state; // PG_STATE_*
bool send_notify; ///< true if we are non-primary and should notify the primary
-public:
+protected:
eversion_t last_update_ondisk; // last_update that has committed; ONLY DEFINED WHEN is_active()
eversion_t last_complete_ondisk; // last_complete that has committed.
eversion_t last_update_applied;
-
- struct C_UpdateLastRollbackInfoTrimmedToApplied : Context {
- PGRef pg;
- epoch_t e;
- eversion_t v;
- C_UpdateLastRollbackInfoTrimmedToApplied(PG *pg, epoch_t e, eversion_t v)
- : pg(pg), e(e), v(v) {}
- void finish(int) override {
- pg->lock();
- if (!pg->pg_has_reset_since(e)) {
- pg->last_rollback_info_trimmed_to_applied = v;
- }
- pg->unlock();
- }
- };
- // entries <= last_rollback_info_trimmed_to_applied have been trimmed,
- // and the transaction has applied
+ // entries <= last_rollback_info_trimmed_to_applied have been trimmed
eversion_t last_rollback_info_trimmed_to_applied;
// primary state
- public:
+protected:
pg_shard_t primary;
pg_shard_t pg_whoami;
pg_shard_t up_primary;
vector<int> up, acting, want_acting;
- set<pg_shard_t> actingbackfill, actingset, upset;
+ // acting_recovery_backfill contains shards that are acting,
+ // async recovery targets, or backfill targets.
+ set<pg_shard_t> acting_recovery_backfill, actingset, upset;
map<pg_shard_t,eversion_t> peer_last_complete_ondisk;
eversion_t min_last_complete_ondisk; // up: min over last_complete_ondisk, peer_last_complete_ondisk
eversion_t pg_trim_to;
set<int> blocked_by; ///< osds we are blocked by (for pg stats)
+protected:
// [primary only] content recovery state
-
-public:
struct BufferedRecoveryMessages {
map<int, map<spg_t, pg_query_t> > query_map;
map<int, vector<pair<pg_notify_t, PastIntervals> > > info_map;
map<int, map<spg_t, pg_query_t> > *query_map;
map<int, vector<pair<pg_notify_t, PastIntervals> > > *info_map;
map<int, vector<pair<pg_notify_t, PastIntervals> > > *notify_list;
- set<PGRef> created_pgs;
- C_Contexts *on_applied;
- C_Contexts *on_safe;
ObjectStore::Transaction *transaction;
ThreadPool::TPHandle* handle;
RecoveryCtx(map<int, map<spg_t, pg_query_t> > *query_map,
vector<pair<pg_notify_t, PastIntervals> > > *info_map,
map<int,
vector<pair<pg_notify_t, PastIntervals> > > *notify_list,
- C_Contexts *on_applied,
- C_Contexts *on_safe,
ObjectStore::Transaction *transaction)
: query_map(query_map), info_map(info_map),
notify_list(notify_list),
- on_applied(on_applied),
- on_safe(on_safe),
transaction(transaction),
handle(NULL) {}
: query_map(&(buf.query_map)),
info_map(&(buf.info_map)),
notify_list(&(buf.notify_list)),
- on_applied(rctx.on_applied),
- on_safe(rctx.on_safe),
transaction(rctx.transaction),
handle(rctx.handle) {}
void accept_buffered_messages(BufferedRecoveryMessages &m) {
- assert(query_map);
- assert(info_map);
- assert(notify_list);
+ ceph_assert(query_map);
+ ceph_assert(info_map);
+ ceph_assert(notify_list);
for (map<int, map<spg_t, pg_query_t> >::iterator i = m.query_map.begin();
i != m.query_map.end();
++i) {
void send_notify(pg_shard_t to,
const pg_notify_t &info, const PastIntervals &pi) {
- assert(notify_list);
+ ceph_assert(notify_list);
(*notify_list)[to.osd].push_back(make_pair(info, pi));
}
};
-
+protected:
PGStateHistory pgstate_history;
bool need_up_thru;
set<pg_shard_t> stray_set; // non-acting osds that have PG data.
- eversion_t oldest_update; // acting: lowest (valid) last_update in active set
map<pg_shard_t, pg_info_t> peer_info; // info from peers (stray or prior)
+ map<pg_shard_t, int64_t> peer_bytes; // Peer's num_bytes from peer_info
set<pg_shard_t> peer_purged; // peers purged
map<pg_shard_t, pg_missing_t> peer_missing;
set<pg_shard_t> peer_log_requested; // logs i've requested (and start stamps)
// which are unfound on the primary
epoch_t last_peering_reset;
+ epoch_t get_last_peering_reset() const {
+ return last_peering_reset;
+ }
/* heartbeat peers */
void set_probe_targets(const set<pg_shard_t> &probe_set);
void clear_probe_targets();
-public:
+
Mutex heartbeat_peer_lock;
set<int> heartbeat_peers;
set<int> probe_targets;
+public:
/**
* BackfillInterval
*
/// drop first entry, and adjust @begin accordingly
void pop_front() {
- assert(!objects.empty());
+ ceph_assert(!objects.empty());
objects.erase(objects.begin());
trim();
}
bool backfill_reserved;
bool backfill_reserving;
- friend class OSD;
+ set<pg_shard_t> backfill_targets, async_recovery_targets;
-public:
- set<pg_shard_t> backfill_targets;
+ // The primary's num_bytes and local num_bytes for this pg, only valid
+ // during backfill for non-primary shards.
+ // Both of these are adjusted for EC to reflect the on-disk bytes
+ std::atomic<int64_t> primary_num_bytes = 0;
+ std::atomic<int64_t> local_num_bytes = 0;
+public:
bool is_backfill_targets(pg_shard_t osd) {
return backfill_targets.count(osd);
}
+ // Space reserved for backfill is primary_num_bytes - local_num_bytes
+ // Don't care that difference itself isn't atomic
+ uint64_t get_reserved_num_bytes() {
+ int64_t primary = primary_num_bytes.load();
+ int64_t local = local_num_bytes.load();
+ if (primary > local)
+ return primary - local;
+ else
+ return 0;
+ }
+
+ bool is_remote_backfilling() {
+ return primary_num_bytes.load() > 0;
+ }
+
+ void set_reserved_num_bytes(int64_t primary, int64_t local);
+ void clear_reserved_num_bytes();
+
+ // If num_bytes are inconsistent and local_num- goes negative
+ // it's ok, because it would then be ignored.
+
+ // The value of num_bytes could be negative,
+ // but we don't let local_num_bytes go negative.
+ void add_local_num_bytes(int64_t num_bytes) {
+ if (num_bytes) {
+ int64_t prev_bytes = local_num_bytes.load();
+ int64_t new_bytes;
+ do {
+ new_bytes = prev_bytes + num_bytes;
+ if (new_bytes < 0)
+ new_bytes = 0;
+ } while(!local_num_bytes.compare_exchange_weak(prev_bytes, new_bytes));
+ }
+ }
+ void sub_local_num_bytes(int64_t num_bytes) {
+ ceph_assert(num_bytes >= 0);
+ if (num_bytes) {
+ int64_t prev_bytes = local_num_bytes.load();
+ int64_t new_bytes;
+ do {
+ new_bytes = prev_bytes - num_bytes;
+ if (new_bytes < 0)
+ new_bytes = 0;
+ } while(!local_num_bytes.compare_exchange_weak(prev_bytes, new_bytes));
+ }
+ }
+ // The value of num_bytes could be negative,
+ // but we don't let info.stats.stats.sum.num_bytes go negative.
+ void add_num_bytes(int64_t num_bytes) {
+ ceph_assert(_lock.is_locked_by_me());
+ if (num_bytes) {
+ info.stats.stats.sum.num_bytes += num_bytes;
+ if (info.stats.stats.sum.num_bytes < 0) {
+ info.stats.stats.sum.num_bytes = 0;
+ }
+ }
+ }
+ void sub_num_bytes(int64_t num_bytes) {
+ ceph_assert(_lock.is_locked_by_me());
+ ceph_assert(num_bytes >= 0);
+ if (num_bytes) {
+ info.stats.stats.sum.num_bytes -= num_bytes;
+ if (info.stats.stats.sum.num_bytes < 0) {
+ info.stats.stats.sum.num_bytes = 0;
+ }
+ }
+ }
+
+ // Only used in testing so not worried about needing the PG lock here
+ int64_t get_stats_num_bytes() {
+ Mutex::Locker l(_lock);
+ int num_bytes = info.stats.stats.sum.num_bytes;
+ if (pool.info.is_erasure()) {
+ num_bytes /= (int)get_pgbackend()->get_ec_data_chunk_count();
+ // Round up each object by a stripe
+ num_bytes += get_pgbackend()->get_ec_stripe_chunk_size() * info.stats.stats.sum.num_objects;
+ }
+ int64_t lnb = local_num_bytes.load();
+ if (lnb && lnb != num_bytes) {
+ lgeneric_dout(cct, 0) << this << " " << info.pgid << " num_bytes mismatch "
+ << lnb << " vs stats "
+ << info.stats.stats.sum.num_bytes << " / chunk "
+ << get_pgbackend()->get_ec_data_chunk_count()
+ << dendl;
+ }
+ return num_bytes;
+ }
+
protected:
/*
map<hobject_t, list<Context*>> callbacks_for_degraded_object;
map<eversion_t,
- list<pair<OpRequestRef, version_t> > > waiting_for_ondisk;
+ list<tuple<OpRequestRef, version_t, int> > > waiting_for_ondisk;
void requeue_object_waiters(map<hobject_t, list<OpRequestRef>>& m);
void requeue_op(OpRequestRef op);
bool pg_stats_publish_valid;
pg_stat_t pg_stats_publish;
- // for ordering writes
- ceph::shared_ptr<ObjectStore::Sequencer> osr;
-
void _update_calc_stats();
void _update_blocked_by();
friend class TestOpsSocketHook;
void publish_stats_to_osd();
void clear_publish_stats();
-public:
void clear_primary_state();
- bool is_actingbackfill(pg_shard_t osd) const {
- return actingbackfill.count(osd);
+ bool is_acting_recovery_backfill(pg_shard_t osd) const {
+ return acting_recovery_backfill.count(osd);
}
bool is_acting(pg_shard_t osd) const {
- return has_shard(pool.info.ec_pool(), acting, osd);
+ return has_shard(pool.info.is_erasure(), acting, osd);
}
bool is_up(pg_shard_t osd) const {
- return has_shard(pool.info.ec_pool(), up, osd);
+ return has_shard(pool.info.is_erasure(), up, osd);
}
static bool has_shard(bool ec, const vector<int>& v, pg_shard_t osd) {
if (ec) {
unsigned get_recovery_priority();
/// get backfill reservation priority
unsigned get_backfill_priority();
+ /// get priority for pg deletion
+ unsigned get_delete_priority();
- void mark_clean(); ///< mark an active pg clean
+ void try_mark_clean(); ///< mark an active pg clean
/// return [start,end) bounds for required past_intervals
static pair<epoch_t, epoch_t> get_required_past_interval_bounds(
const pg_info_t &info,
epoch_t oldest_map) {
- epoch_t start = MAX(
+ epoch_t start = std::max(
info.history.last_epoch_clean ? info.history.last_epoch_clean :
info.history.epoch_pool_created,
oldest_map);
- epoch_t end = MAX(
+ epoch_t end = std::max(
info.history.same_interval_since,
info.history.epoch_pool_created);
return make_pair(start, end);
bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const;
virtual void dump_recovery_info(Formatter *f) const = 0;
- bool calc_min_last_complete_ondisk() {
+ void calc_min_last_complete_ondisk() {
eversion_t min = last_complete_ondisk;
- assert(!actingbackfill.empty());
- for (set<pg_shard_t>::iterator i = actingbackfill.begin();
- i != actingbackfill.end();
+ ceph_assert(!acting_recovery_backfill.empty());
+ for (set<pg_shard_t>::iterator i = acting_recovery_backfill.begin();
+ i != acting_recovery_backfill.end();
++i) {
if (*i == get_primary()) continue;
if (peer_last_complete_ondisk.count(*i) == 0)
- return false; // we don't have complete info
+ return; // we don't have complete info
eversion_t a = peer_last_complete_ondisk[*i];
if (a < min)
min = a;
}
if (min == min_last_complete_ondisk)
- return false;
+ return;
min_last_complete_ondisk = min;
- return true;
+ return;
}
virtual void calc_trim_to() = 0;
pg->get_pgbackend()->try_stash(hoid, v, t);
}
void rollback(const pg_log_entry_t &entry) override {
- assert(entry.can_rollback());
+ ceph_assert(entry.can_rollback());
pg->get_pgbackend()->rollback(entry, t);
}
void rollforward(const pg_log_entry_t &entry) override {
pg_shard_t fromosd,
RecoveryCtx*);
- void check_for_lost_objects();
- void forget_lost_objects();
-
void discover_all_missing(std::map<int, map<spg_t,pg_query_t> > &query_map);
- void trim_write_ahead();
-
map<pg_shard_t, pg_info_t>::const_iterator find_best_info(
const map<pg_shard_t, pg_info_t> &infos,
bool restrict_to_up_acting,
map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
unsigned size,
const vector<int> &acting,
- pg_shard_t acting_primary,
const vector<int> &up,
- pg_shard_t up_primary,
const map<pg_shard_t, pg_info_t> &all_info,
bool restrict_to_up_acting,
vector<int> *want,
set<pg_shard_t> *backfill,
set<pg_shard_t> *acting_backfill,
- pg_shard_t *want_primary,
ostream &ss);
static void calc_replicated_acting(
map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
+ uint64_t force_auth_primary_missing_objects,
unsigned size,
const vector<int> &acting,
- pg_shard_t acting_primary,
const vector<int> &up,
pg_shard_t up_primary,
const map<pg_shard_t, pg_info_t> &all_info,
vector<int> *want,
set<pg_shard_t> *backfill,
set<pg_shard_t> *acting_backfill,
- pg_shard_t *want_primary,
+ const OSDMapRef osdmap,
ostream &ss);
+ void choose_async_recovery_ec(const map<pg_shard_t, pg_info_t> &all_info,
+ const pg_info_t &auth_info,
+ vector<int> *want,
+ set<pg_shard_t> *async_recovery) const;
+ void choose_async_recovery_replicated(const map<pg_shard_t, pg_info_t> &all_info,
+ const pg_info_t &auth_info,
+ vector<int> *want,
+ set<pg_shard_t> *async_recovery) const;
+
+ bool recoverable_and_ge_min_size(const vector<int> &want) const;
bool choose_acting(pg_shard_t &auth_log_shard,
bool restrict_to_up_acting,
bool *history_les_bound);
void activate(
ObjectStore::Transaction& t,
epoch_t activation_epoch,
- list<Context*>& tfin,
map<int, map<spg_t,pg_query_t> >& query_map,
map<int,
vector<pair<pg_notify_t, PastIntervals> > > *activator_map,
RecoveryCtx *ctx);
+
+ struct C_PG_ActivateCommitted : public Context {
+ PGRef pg;
+ epoch_t epoch;
+ epoch_t activation_epoch;
+ C_PG_ActivateCommitted(PG *p, epoch_t e, epoch_t ae)
+ : pg(p), epoch(e), activation_epoch(ae) {}
+ void finish(int r) override {
+ pg->_activate_committed(epoch, activation_epoch);
+ }
+ };
void _activate_committed(epoch_t epoch, epoch_t activation_epoch);
void all_activated_and_committed();
virtual void check_local() = 0;
- /**
- * @param ops_begun returns how many recovery ops the function started
- * @returns true if any useful work was accomplished; false otherwise
- */
- virtual bool start_recovery_ops(
- uint64_t max,
- ThreadPool::TPHandle &handle,
- uint64_t *ops_begun) = 0;
-
void purge_strays();
void update_heartbeat_peers();
Context *finish_sync_event;
- void finish_recovery(list<Context*>& tfin);
+ Context *finish_recovery();
void _finish_recovery(Context *c);
+ struct C_PG_FinishRecovery : public Context {
+ PGRef pg;
+ explicit C_PG_FinishRecovery(PG *p) : pg(p) {}
+ void finish(int r) override {
+ pg->_finish_recovery(this);
+ }
+ };
void cancel_recovery();
void clear_recovery_state();
virtual void _clear_recovery_state() = 0;
void start_recovery_op(const hobject_t& soid);
void finish_recovery_op(const hobject_t& soid, bool dequeue=false);
- void split_into(pg_t child_pgid, PG *child, unsigned split_bits);
virtual void _split_into(pg_t child_pgid, PG *child, unsigned split_bits) = 0;
friend class C_OSD_RepModify_Commit;
+ friend class C_DeleteMore;
// -- backoff --
Mutex backoff_lock; // orders inside Backoff::lock
release_backoffs(begin, end);
}
- void rm_backoff(BackoffRef b);
-
// -- scrub --
+public:
struct Scrubber {
Scrubber();
~Scrubber();
set<pg_shard_t> waiting_on_whom;
int shallow_errors;
int deep_errors;
- int large_omap_objects = 0;
int fixed;
ScrubMap primary_scrubmap;
ScrubMapBuilder primary_scrubmap_pos;
OpRequestRef active_rep_scrub;
utime_t scrub_reg_stamp; // stamp we registered for
+ omap_stat_t omap_stats = (const struct omap_stat_t){ 0 };
+
// For async sleep
bool sleeping = false;
bool needs_sleep = true;
bool must_scrub, must_deep_scrub, must_repair;
// Priority to use for scrub scheduling
- unsigned priority;
+ unsigned priority = 0;
// this flag indicates whether we would like to do auto-repair of the PG or not
bool auto_repair;
+ // this flag indicates that we are scrubbing post repair to verify everything is fixed
+ bool check_repair;
+ // this flag indicates that if a regular scrub detects errors <= osd_scrub_auto_repair_num_errors,
+ // we should deep scrub in order to auto repair
+ bool deep_scrub_on_error;
// Maps from objects with errors to missing/inconsistent peers
map<hobject_t, set<pg_shard_t>> missing;
must_deep_scrub = false;
must_repair = false;
auto_repair = false;
+ check_repair = false;
+ deep_scrub_on_error = false;
state = PG::Scrubber::INACTIVE;
start = hobject_t();
subset_last_update = eversion_t();
shallow_errors = 0;
deep_errors = 0;
- large_omap_objects = 0;
fixed = 0;
+ omap_stats = (const struct omap_stat_t){ 0 };
deep = false;
run_callbacks();
inconsistent.clear();
void cleanup_store(ObjectStore::Transaction *t);
} scrubber;
+protected:
bool scrub_after_recovery;
int active_pushes;
const hobject_t& soid, list<pair<ScrubMap::object, pg_shard_t> > *ok_peers,
pg_shard_t bad_peer);
- void scrub(epoch_t queued, ThreadPool::TPHandle &handle);
void chunky_scrub(ThreadPool::TPHandle &handle);
void scrub_compare_maps();
/**
bool scrub_process_inconsistent();
bool ops_blocked_by_scrub() const;
void scrub_finish();
- void scrub_clear_state();
+ void scrub_clear_state(bool keep_repair = false);
void _scan_snaps(ScrubMap &map);
void _repair_oinfo_oid(ScrubMap &map);
- void _scan_rollback_obs(
- const vector<ghobject_t> &rollback_obs,
- ThreadPool::TPHandle &handle);
+ void _scan_rollback_obs(const vector<ghobject_t> &rollback_obs);
void _request_scrub_map(pg_shard_t replica, eversion_t version,
hobject_t start, hobject_t end, bool deep,
bool allow_preemption);
boost::optional<uint32_t>>> &missing_digest) { }
virtual void _scrub_clear_state() { }
virtual void _scrub_finish() { }
- virtual void split_colls(
- spg_t child,
- int split_bits,
- int seed,
- const pg_pool_t *pool,
- ObjectStore::Transaction *t) = 0;
void clear_scrub_reserved();
void scrub_reserve_replicas();
void scrub_unreserve_replicas();
bool scrub_all_replicas_reserved() const;
- bool sched_scrub();
- void reg_next_scrub();
- void unreg_next_scrub();
void replica_scrub(
OpRequestRef op,
ThreadPool::TPHandle &handle);
void do_replica_scrub_map(OpRequestRef op);
- void sub_op_scrub_map(OpRequestRef op);
void handle_scrub_reserve_request(OpRequestRef op);
void handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from);
pg(pg), epoch(epoch), evt(evt) {}
void finish(int r) override {
pg->lock();
- pg->queue_peering_event(PG::CephPeeringEvtRef(
- new PG::CephPeeringEvt(
+ pg->queue_peering_event(PGPeeringEventRef(
+ new PGPeeringEvent(
epoch,
epoch,
evt)));
}
};
- class CephPeeringEvt {
- epoch_t epoch_sent;
- epoch_t epoch_requested;
- boost::intrusive_ptr< const boost::statechart::event_base > evt;
- string desc;
- public:
- MEMPOOL_CLASS_HELPERS();
- template <class T>
- CephPeeringEvt(epoch_t epoch_sent,
- epoch_t epoch_requested,
- const T &evt_) :
- epoch_sent(epoch_sent), epoch_requested(epoch_requested),
- evt(evt_.intrusive_from_this()) {
- stringstream out;
- out << "epoch_sent: " << epoch_sent
- << " epoch_requested: " << epoch_requested << " ";
- evt_.print(&out);
- desc = out.str();
- }
- epoch_t get_epoch_sent() { return epoch_sent; }
- epoch_t get_epoch_requested() { return epoch_requested; }
- const boost::statechart::event_base &get_event() { return *evt; }
- string get_desc() { return desc; }
- };
- typedef ceph::shared_ptr<CephPeeringEvt> CephPeeringEvtRef;
- list<CephPeeringEvtRef> peering_queue; // op queue
- list<CephPeeringEvtRef> peering_waiters;
struct QueryState : boost::statechart::event< QueryState > {
Formatter *f;
}
};
- struct MInfoRec : boost::statechart::event< MInfoRec > {
- pg_shard_t from;
- pg_info_t info;
- epoch_t msg_epoch;
- MInfoRec(pg_shard_t from, const pg_info_t &info, epoch_t msg_epoch) :
- from(from), info(info), msg_epoch(msg_epoch) {}
- void print(std::ostream *out) const {
- *out << "MInfoRec from " << from << " info: " << info;
- }
- };
-
- struct MLogRec : boost::statechart::event< MLogRec > {
- pg_shard_t from;
- boost::intrusive_ptr<MOSDPGLog> msg;
- MLogRec(pg_shard_t from, MOSDPGLog *msg) :
- from(from), msg(msg) {}
- void print(std::ostream *out) const {
- *out << "MLogRec from " << from;
- }
- };
-
- struct MNotifyRec : boost::statechart::event< MNotifyRec > {
- pg_shard_t from;
- pg_notify_t notify;
- uint64_t features;
- MNotifyRec(pg_shard_t from, const pg_notify_t ¬ify, uint64_t f) :
- from(from), notify(notify), features(f) {}
- void print(std::ostream *out) const {
- *out << "MNotifyRec from " << from << " notify: " << notify
- << " features: 0x" << hex << features << dec;
- }
- };
-
- struct MQuery : boost::statechart::event< MQuery > {
- pg_shard_t from;
- pg_query_t query;
- epoch_t query_epoch;
- MQuery(pg_shard_t from, const pg_query_t &query, epoch_t query_epoch):
- from(from), query(query), query_epoch(query_epoch) {}
- void print(std::ostream *out) const {
- *out << "MQuery from " << from
- << " query_epoch " << query_epoch
- << " query: " << query;
- }
- };
+public:
+ int pg_stat_adjust(osd_stat_t *new_stat);
+protected:
struct AdvMap : boost::statechart::event< AdvMap > {
OSDMapRef osdmap;
*out << "Activate from " << activation_epoch;
}
};
- struct RequestBackfillPrio : boost::statechart::event< RequestBackfillPrio > {
- unsigned priority;
- explicit RequestBackfillPrio(unsigned prio) :
- boost::statechart::event< RequestBackfillPrio >(),
- priority(prio) {}
- void print(std::ostream *out) const {
- *out << "RequestBackfillPrio: priority " << priority;
- }
- };
-#define TrivialEvent(T) struct T : boost::statechart::event< T > { \
- T() : boost::statechart::event< T >() {} \
- void print(std::ostream *out) const { \
- *out << #T; \
- } \
- };
- struct DeferBackfill : boost::statechart::event<DeferBackfill> {
- float delay;
- explicit DeferBackfill(float delay) : delay(delay) {}
- void print(std::ostream *out) const {
- *out << "DeferBackfill: delay " << delay;
- }
- };
- struct DeferRecovery : boost::statechart::event<DeferRecovery> {
- float delay;
- explicit DeferRecovery(float delay) : delay(delay) {}
- void print(std::ostream *out) const {
- *out << "DeferRecovery: delay " << delay;
- }
- };
+public:
struct UnfoundBackfill : boost::statechart::event<UnfoundBackfill> {
explicit UnfoundBackfill() {}
void print(std::ostream *out) const {
*out << "UnfoundRecovery";
}
};
+
+ struct RequestScrub : boost::statechart::event<RequestScrub> {
+ bool deep;
+ bool repair;
+ explicit RequestScrub(bool d, bool r) : deep(d), repair(r) {}
+ void print(std::ostream *out) const {
+ *out << "RequestScrub(" << (deep ? "deep" : "shallow")
+ << (repair ? " repair" : "");
+ }
+ };
+
protected:
TrivialEvent(Initialize)
- TrivialEvent(Load)
TrivialEvent(GotInfo)
TrivialEvent(NeedUpThru)
- TrivialEvent(NullEvt)
- TrivialEvent(FlushedEvt)
TrivialEvent(Backfilled)
TrivialEvent(LocalBackfillReserved)
- TrivialEvent(RemoteBackfillReserved)
TrivialEvent(RejectRemoteReservation)
- TrivialEvent(RemoteReservationRejected)
- TrivialEvent(RemoteReservationCanceled)
+ public:
TrivialEvent(RequestBackfill)
- TrivialEvent(RequestRecovery)
- TrivialEvent(RecoveryDone)
+ protected:
+ TrivialEvent(RemoteRecoveryPreempted)
+ TrivialEvent(RemoteBackfillPreempted)
TrivialEvent(BackfillTooFull)
TrivialEvent(RecoveryTooFull)
TrivialEvent(AllReplicasRecovered)
TrivialEvent(DoRecovery)
TrivialEvent(LocalRecoveryReserved)
- TrivialEvent(RemoteRecoveryReserved)
+ public:
+ protected:
TrivialEvent(AllRemotesReserved)
TrivialEvent(AllBackfillsReserved)
TrivialEvent(GoClean)
TrivialEvent(IntervalFlush)
+ public:
+ TrivialEvent(DeleteStart)
+ TrivialEvent(DeleteSome)
+
+ TrivialEvent(SetForceRecovery)
+ TrivialEvent(UnsetForceRecovery)
+ TrivialEvent(SetForceBackfill)
+ TrivialEvent(UnsetForceBackfill)
+
+ protected:
+ TrivialEvent(DeleteReserved)
+ TrivialEvent(DeleteInterrupted)
+
/* Encapsulates PG recovery process */
class RecoveryState {
void start_handle(RecoveryCtx *new_ctx);
/* Accessor functions for state methods */
ObjectStore::Transaction* get_cur_transaction() {
- assert(state->rctx);
- assert(state->rctx->transaction);
+ ceph_assert(state->rctx);
+ ceph_assert(state->rctx->transaction);
return state->rctx->transaction;
}
void send_query(pg_shard_t to, const pg_query_t &query) {
- assert(state->rctx);
- assert(state->rctx->query_map);
+ ceph_assert(state->rctx);
+ ceph_assert(state->rctx->query_map);
(*state->rctx->query_map)[to.osd][spg_t(pg->info.pgid.pgid, to.shard)] =
query;
}
map<int, map<spg_t, pg_query_t> > *get_query_map() {
- assert(state->rctx);
- assert(state->rctx->query_map);
+ ceph_assert(state->rctx);
+ ceph_assert(state->rctx->query_map);
return state->rctx->query_map;
}
map<int, vector<pair<pg_notify_t, PastIntervals> > > *get_info_map() {
- assert(state->rctx);
- assert(state->rctx->info_map);
+ ceph_assert(state->rctx);
+ ceph_assert(state->rctx->info_map);
return state->rctx->info_map;
}
- list< Context* > *get_on_safe_context_list() {
- assert(state->rctx);
- assert(state->rctx->on_safe);
- return &(state->rctx->on_safe->contexts);
- }
-
- list< Context * > *get_on_applied_context_list() {
- assert(state->rctx);
- assert(state->rctx->on_applied);
- return &(state->rctx->on_applied->contexts);
- }
-
RecoveryCtx *get_recovery_ctx() { return &*(state->rctx); }
void send_notify(pg_shard_t to,
const pg_notify_t &info, const PastIntervals &pi) {
- assert(state->rctx);
+ ceph_assert(state->rctx);
state->rctx->send_notify(to, info, pi);
}
};
// RepWaitBackfillReserved
// RepWaitRecoveryReserved
// Stray
+ // ToDelete
+ // WaitDeleteReserved
+ // Deleting
+ // Crashed
struct Crashed : boost::statechart::state< Crashed, RecoveryMachine >, NamedState {
explicit Crashed(my_context ctx);
typedef boost::mpl::list <
boost::statechart::transition< Initialize, Reset >,
- boost::statechart::custom_reaction< Load >,
boost::statechart::custom_reaction< NullEvt >,
boost::statechart::transition< boost::statechart::event_base, Crashed >
> reactions;
- boost::statechart::result react(const Load&);
boost::statechart::result react(const MNotifyRec&);
boost::statechart::result react(const MInfoRec&);
boost::statechart::result react(const MLogRec&);
boost::statechart::custom_reaction< AdvMap >,
boost::statechart::custom_reaction< ActMap >,
boost::statechart::custom_reaction< NullEvt >,
- boost::statechart::custom_reaction< FlushedEvt >,
boost::statechart::custom_reaction< IntervalFlush >,
boost::statechart::transition< boost::statechart::event_base, Crashed >
> reactions;
boost::statechart::result react(const QueryState& q);
boost::statechart::result react(const AdvMap&);
boost::statechart::result react(const ActMap&);
- boost::statechart::result react(const FlushedEvt&);
boost::statechart::result react(const IntervalFlush&);
boost::statechart::result react(const boost::statechart::event_base&) {
return discard_event();
typedef boost::mpl::list <
boost::statechart::custom_reaction< QueryState >,
boost::statechart::custom_reaction< AdvMap >,
- boost::statechart::custom_reaction< NullEvt >,
- boost::statechart::custom_reaction< FlushedEvt >,
boost::statechart::custom_reaction< IntervalFlush >,
+ // ignored
+ boost::statechart::custom_reaction< NullEvt >,
+ boost::statechart::custom_reaction<SetForceRecovery>,
+ boost::statechart::custom_reaction<UnsetForceRecovery>,
+ boost::statechart::custom_reaction<SetForceBackfill>,
+ boost::statechart::custom_reaction<UnsetForceBackfill>,
+ boost::statechart::custom_reaction<RequestScrub>,
+ // crash
boost::statechart::transition< boost::statechart::event_base, Crashed >
> reactions;
boost::statechart::result react(const QueryState& q);
boost::statechart::result react(const AdvMap&);
- boost::statechart::result react(const FlushedEvt&);
boost::statechart::result react(const IntervalFlush&);
boost::statechart::result react(const boost::statechart::event_base&) {
return discard_event();
typedef boost::mpl::list <
boost::statechart::custom_reaction< ActMap >,
boost::statechart::custom_reaction< MNotifyRec >,
- boost::statechart::transition< NeedActingChange, WaitActingChange >
+ boost::statechart::transition< NeedActingChange, WaitActingChange >,
+ boost::statechart::custom_reaction<SetForceRecovery>,
+ boost::statechart::custom_reaction<UnsetForceRecovery>,
+ boost::statechart::custom_reaction<SetForceBackfill>,
+ boost::statechart::custom_reaction<UnsetForceBackfill>,
+ boost::statechart::custom_reaction<RequestScrub>
> reactions;
boost::statechart::result react(const ActMap&);
boost::statechart::result react(const MNotifyRec&);
+ boost::statechart::result react(const SetForceRecovery&);
+ boost::statechart::result react(const UnsetForceRecovery&);
+ boost::statechart::result react(const SetForceBackfill&);
+ boost::statechart::result react(const UnsetForceBackfill&);
+ boost::statechart::result react(const RequestScrub&);
};
struct WaitActingChange : boost::statechart::state< WaitActingChange, Primary>,
boost::statechart::custom_reaction< MInfoRec >,
boost::statechart::custom_reaction< MNotifyRec >,
boost::statechart::custom_reaction< MLogRec >,
+ boost::statechart::custom_reaction< MTrim >,
boost::statechart::custom_reaction< Backfilled >,
boost::statechart::custom_reaction< AllReplicasActivated >,
boost::statechart::custom_reaction< DeferRecovery >,
boost::statechart::custom_reaction< DeferBackfill >,
- boost::statechart::custom_reaction< UnfoundRecovery >,
+ boost::statechart::custom_reaction< UnfoundRecovery >,
boost::statechart::custom_reaction< UnfoundBackfill >,
+ boost::statechart::custom_reaction< RemoteReservationRevokedTooFull>,
+ boost::statechart::custom_reaction< RemoteReservationRevoked>,
boost::statechart::custom_reaction< DoRecovery>
> reactions;
boost::statechart::result react(const QueryState& q);
boost::statechart::result react(const MInfoRec& infoevt);
boost::statechart::result react(const MNotifyRec& notevt);
boost::statechart::result react(const MLogRec& logevt);
+ boost::statechart::result react(const MTrim& trimevt);
boost::statechart::result react(const Backfilled&) {
return discard_event();
}
return discard_event();
}
boost::statechart::result react(const UnfoundRecovery& evt) {
- return discard_event();
+ return discard_event();
}
boost::statechart::result react(const UnfoundBackfill& evt) {
- return discard_event();
+ return discard_event();
+ }
+ boost::statechart::result react(const RemoteReservationRevokedTooFull&) {
+ return discard_event();
+ }
+ boost::statechart::result react(const RemoteReservationRevoked&) {
+ return discard_event();
}
boost::statechart::result react(const DoRecovery&) {
return discard_event();
struct Clean : boost::statechart::state< Clean, Active >, NamedState {
typedef boost::mpl::list<
- boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >
+ boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
+ boost::statechart::custom_reaction<SetForceRecovery>,
+ boost::statechart::custom_reaction<SetForceBackfill>
> reactions;
explicit Clean(my_context ctx);
void exit();
+ boost::statechart::result react(const boost::statechart::event_base&) {
+ return discard_event();
+ }
};
struct Recovered : boost::statechart::state< Recovered, Active >, NamedState {
struct Backfilling : boost::statechart::state< Backfilling, Active >, NamedState {
typedef boost::mpl::list<
- boost::statechart::transition< Backfilled, Recovered >,
+ boost::statechart::custom_reaction< Backfilled >,
boost::statechart::custom_reaction< DeferBackfill >,
boost::statechart::custom_reaction< UnfoundBackfill >,
- boost::statechart::custom_reaction< RemoteReservationRejected >
+ boost::statechart::custom_reaction< RemoteReservationRejected >,
+ boost::statechart::custom_reaction< RemoteReservationRevokedTooFull>,
+ boost::statechart::custom_reaction< RemoteReservationRevoked>
> reactions;
explicit Backfilling(my_context ctx);
- boost::statechart::result react(const RemoteReservationRejected& evt);
+ boost::statechart::result react(const RemoteReservationRejected& evt) {
+ // for compat with old peers
+ post_event(RemoteReservationRevokedTooFull());
+ return discard_event();
+ }
+ void backfill_release_reservations();
+ boost::statechart::result react(const Backfilled& evt);
+ boost::statechart::result react(const RemoteReservationRevokedTooFull& evt);
+ boost::statechart::result react(const RemoteReservationRevoked& evt);
boost::statechart::result react(const DeferBackfill& evt);
boost::statechart::result react(const UnfoundBackfill& evt);
+ void cancel_backfill();
void exit();
};
typedef boost::mpl::list<
boost::statechart::custom_reaction< RemoteBackfillReserved >,
boost::statechart::custom_reaction< RemoteReservationRejected >,
+ boost::statechart::custom_reaction< RemoteReservationRevoked >,
boost::statechart::transition< AllBackfillsReserved, Backfilling >
> reactions;
set<pg_shard_t>::const_iterator backfill_osd_it;
explicit WaitRemoteBackfillReserved(my_context ctx);
+ void retry();
void exit();
boost::statechart::result react(const RemoteBackfillReserved& evt);
boost::statechart::result react(const RemoteReservationRejected& evt);
+ boost::statechart::result react(const RemoteReservationRevoked& evt);
};
struct WaitLocalBackfillReserved : boost::statechart::state< WaitLocalBackfillReserved, Active >, NamedState {
void exit();
};
+ struct ToDelete;
struct RepNotRecovering;
struct ReplicaActive : boost::statechart::state< ReplicaActive, Started, RepNotRecovering >, NamedState {
explicit ReplicaActive(my_context ctx);
boost::statechart::custom_reaction< MQuery >,
boost::statechart::custom_reaction< MInfoRec >,
boost::statechart::custom_reaction< MLogRec >,
+ boost::statechart::custom_reaction< MTrim >,
boost::statechart::custom_reaction< Activate >,
boost::statechart::custom_reaction< DeferRecovery >,
boost::statechart::custom_reaction< DeferBackfill >,
boost::statechart::custom_reaction< UnfoundRecovery >,
- boost::statechart::custom_reaction< UnfoundBackfill >
+ boost::statechart::custom_reaction< UnfoundBackfill >,
+ boost::statechart::custom_reaction< RemoteBackfillPreempted >,
+ boost::statechart::custom_reaction< RemoteRecoveryPreempted >,
+ boost::statechart::custom_reaction< RecoveryDone >,
+ boost::statechart::transition<DeleteStart, ToDelete>
> reactions;
boost::statechart::result react(const QueryState& q);
boost::statechart::result react(const MInfoRec& infoevt);
boost::statechart::result react(const MLogRec& logevt);
+ boost::statechart::result react(const MTrim& trimevt);
boost::statechart::result react(const ActMap&);
boost::statechart::result react(const MQuery&);
boost::statechart::result react(const Activate&);
+ boost::statechart::result react(const RecoveryDone&) {
+ return discard_event();
+ }
boost::statechart::result react(const DeferRecovery& evt) {
return discard_event();
}
boost::statechart::result react(const UnfoundBackfill& evt) {
return discard_event();
}
+ boost::statechart::result react(const RemoteBackfillPreempted& evt) {
+ return discard_event();
+ }
+ boost::statechart::result react(const RemoteRecoveryPreempted& evt) {
+ return discard_event();
+ }
};
struct RepRecovering : boost::statechart::state< RepRecovering, ReplicaActive >, NamedState {
// for compat with old peers
boost::statechart::transition< RemoteReservationRejected, RepNotRecovering >,
boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >,
- boost::statechart::custom_reaction< BackfillTooFull >
+ boost::statechart::custom_reaction< BackfillTooFull >,
+ boost::statechart::custom_reaction< RemoteRecoveryPreempted >,
+ boost::statechart::custom_reaction< RemoteBackfillPreempted >
> reactions;
explicit RepRecovering(my_context ctx);
+ boost::statechart::result react(const RemoteRecoveryPreempted &evt);
boost::statechart::result react(const BackfillTooFull &evt);
+ boost::statechart::result react(const RemoteBackfillPreempted &evt);
void exit();
};
struct RepNotRecovering : boost::statechart::state< RepNotRecovering, ReplicaActive>, NamedState {
typedef boost::mpl::list<
+ boost::statechart::custom_reaction< RequestRecoveryPrio >,
boost::statechart::custom_reaction< RequestBackfillPrio >,
- boost::statechart::transition< RequestRecovery, RepWaitRecoveryReserved >,
boost::statechart::custom_reaction< RejectRemoteReservation >,
boost::statechart::transition< RemoteReservationRejected, RepNotRecovering >,
boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >,
+ boost::statechart::custom_reaction< RemoteRecoveryReserved >,
+ boost::statechart::custom_reaction< RemoteBackfillReserved >,
boost::statechart::transition< RecoveryDone, RepNotRecovering > // for compat with pre-reservation peers
> reactions;
explicit RepNotRecovering(my_context ctx);
+ boost::statechart::result react(const RequestRecoveryPrio &evt);
boost::statechart::result react(const RequestBackfillPrio &evt);
+ boost::statechart::result react(const RemoteBackfillReserved &evt) {
+ // my reservation completion raced with a RELEASE from primary
+ return discard_event();
+ }
+ boost::statechart::result react(const RemoteRecoveryReserved &evt) {
+ // my reservation completion raced with a RELEASE from primary
+ return discard_event();
+ }
boost::statechart::result react(const RejectRemoteReservation &evt);
void exit();
};
void exit();
};
- struct Stray : boost::statechart::state< Stray, Started >, NamedState {
- map<int, pair<pg_query_t, epoch_t> > pending_queries;
-
+ struct Stray : boost::statechart::state< Stray, Started >,
+ NamedState {
explicit Stray(my_context ctx);
void exit();
boost::statechart::custom_reaction< MLogRec >,
boost::statechart::custom_reaction< MInfoRec >,
boost::statechart::custom_reaction< ActMap >,
- boost::statechart::custom_reaction< RecoveryDone >
+ boost::statechart::custom_reaction< RecoveryDone >,
+ boost::statechart::transition<DeleteStart, ToDelete>
> reactions;
boost::statechart::result react(const MQuery& query);
boost::statechart::result react(const MLogRec& logevt);
}
};
+ struct WaitDeleteReserved;
+ struct ToDelete : boost::statechart::state<ToDelete, Started, WaitDeleteReserved>, NamedState {
+ unsigned priority = 0;
+ typedef boost::mpl::list <
+ boost::statechart::custom_reaction< ActMap >,
+ boost::statechart::custom_reaction< DeleteSome >
+ > reactions;
+ explicit ToDelete(my_context ctx);
+ boost::statechart::result react(const ActMap &evt);
+ boost::statechart::result react(const DeleteSome &evt) {
+ // happens if we drop out of Deleting due to reprioritization etc.
+ return discard_event();
+ }
+ void exit();
+ };
+
+ struct Deleting;
+ struct WaitDeleteReserved : boost::statechart::state<WaitDeleteReserved,
+ ToDelete>, NamedState {
+ typedef boost::mpl::list <
+ boost::statechart::transition<DeleteReserved, Deleting>
+ > reactions;
+ explicit WaitDeleteReserved(my_context ctx);
+ void exit();
+ };
+
+ struct Deleting : boost::statechart::state<Deleting,
+ ToDelete>, NamedState {
+ typedef boost::mpl::list <
+ boost::statechart::custom_reaction< DeleteSome >,
+ boost::statechart::transition<DeleteInterrupted, WaitDeleteReserved>
+ > reactions;
+ explicit Deleting(my_context ctx);
+ boost::statechart::result react(const DeleteSome &evt);
+ void exit();
+ };
+
struct GetLog;
struct GetInfo : boost::statechart::state< GetInfo, Peering >, NamedState {
struct Down : boost::statechart::state< Down, Peering>, NamedState {
explicit Down(my_context ctx);
typedef boost::mpl::list <
- boost::statechart::custom_reaction< QueryState >
+ boost::statechart::custom_reaction< QueryState >,
+ boost::statechart::custom_reaction< MNotifyRec >
> reactions;
- boost::statechart::result react(const QueryState& infoevt);
+ boost::statechart::result react(const QueryState& q);
+ boost::statechart::result react(const MNotifyRec& infoevt);
void exit();
};
explicit Incomplete(my_context ctx);
boost::statechart::result react(const AdvMap &advmap);
boost::statechart::result react(const MNotifyRec& infoevt);
- boost::statechart::result react(const QueryState& infoevt);
+ boost::statechart::result react(const QueryState& q);
void exit();
};
-
RecoveryMachine machine;
PG *pg;
end_handle();
}
- void handle_event(CephPeeringEvtRef evt,
+ void handle_event(PGPeeringEventRef evt,
RecoveryCtx *rctx) {
start_handle(rctx);
machine.process_event(evt->get_event());
} recovery_state;
- public:
- PG(OSDService *o, OSDMapRef curmap,
- const PGPool &pool, spg_t p);
- ~PG() override;
- const spg_t pg_id;
- private:
- // Prevent copying
- explicit PG(const PG& rhs);
- PG& operator=(const PG& rhs);
uint64_t peer_features;
uint64_t acting_features;
uint64_t upacting_features;
epoch_t last_epoch;
- public:
- const spg_t& get_pgid() const { return pg_id; }
-
- void set_last_scrub_stamp(utime_t t) {
- info.stats.last_scrub_stamp = t;
- info.history.last_scrub_stamp = t;
- }
-
- void set_last_deep_scrub_stamp(utime_t t) {
- info.stats.last_deep_scrub_stamp = t;
- info.history.last_deep_scrub_stamp = t;
- }
+ /// most recently consumed osdmap's require_osd_version
+ unsigned last_require_osd_release = 0;
+ bool delete_needs_sleep = false;
+protected:
void reset_min_peer_features() {
peer_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
}
actingset.insert(
pg_shard_t(
acting[i],
- pool.info.ec_pool() ? shard_id_t(i) : shard_id_t::NO_SHARD));
+ pool.info.is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD));
}
upset.clear();
up = newup;
upset.insert(
pg_shard_t(
up[i],
- pool.info.ec_pool() ? shard_id_t(i) : shard_id_t::NO_SHARD));
+ pool.info.is_erasure() ? shard_id_t(i) : shard_id_t::NO_SHARD));
}
- if (!pool.info.ec_pool()) {
+ if (!pool.info.is_erasure()) {
up_primary = pg_shard_t(new_up_primary, shard_id_t::NO_SHARD);
primary = pg_shard_t(new_acting_primary, shard_id_t::NO_SHARD);
return;
break;
}
}
- assert(up_primary.osd == new_up_primary);
- assert(primary.osd == new_acting_primary);
+ ceph_assert(up_primary.osd == new_up_primary);
+ ceph_assert(primary.osd == new_acting_primary);
}
- pg_shard_t get_primary() const { return primary; }
-
- int get_role() const { return role; }
- void set_role(int r) { role = r; }
- bool is_primary() const { return pg_whoami == primary; }
- bool is_replica() const { return role > 0; }
+ void set_role(int r) {
+ role = r;
+ }
- epoch_t get_last_peering_reset() const { return last_peering_reset; }
-
- //int get_state() const { return state; }
- bool state_test(int m) const { return (state & m) != 0; }
- void state_set(int m) { state |= m; }
- void state_clear(int m) { state &= ~m; }
+ bool state_test(uint64_t m) const { return (state & m) != 0; }
+ void state_set(uint64_t m) { state |= m; }
+ void state_clear(uint64_t m) { state &= ~m; }
bool is_complete() const { return info.last_complete == info.last_update; }
bool should_send_notify() const { return send_notify; }
- int get_state() const { return state; }
- bool is_active() const { return state_test(PG_STATE_ACTIVE); }
- bool is_activating() const { return state_test(PG_STATE_ACTIVATING); }
- bool is_peering() const { return state_test(PG_STATE_PEERING); }
- bool is_down() const { return state_test(PG_STATE_DOWN); }
- bool is_recovery_unfound() const { return state_test(PG_STATE_RECOVERY_UNFOUND); }
- bool is_backfill_unfound() const { return state_test(PG_STATE_BACKFILL_UNFOUND); }
- bool is_incomplete() const { return state_test(PG_STATE_INCOMPLETE); }
- bool is_clean() const { return state_test(PG_STATE_CLEAN); }
- bool is_degraded() const { return state_test(PG_STATE_DEGRADED); }
- bool is_undersized() const { return state_test(PG_STATE_UNDERSIZED); }
-
- bool is_scrubbing() const { return state_test(PG_STATE_SCRUBBING); }
- bool is_remapped() const { return state_test(PG_STATE_REMAPPED); }
- bool is_peered() const {
+ uint64_t get_state() const { return state; }
+ bool is_active() const { return state_test(PG_STATE_ACTIVE); }
+ bool is_activating() const { return state_test(PG_STATE_ACTIVATING); }
+ bool is_peering() const { return state_test(PG_STATE_PEERING); }
+ bool is_down() const { return state_test(PG_STATE_DOWN); }
+ bool is_recovery_unfound() const { return state_test(PG_STATE_RECOVERY_UNFOUND); }
+ bool is_backfill_unfound() const { return state_test(PG_STATE_BACKFILL_UNFOUND); }
+ bool is_incomplete() const { return state_test(PG_STATE_INCOMPLETE); }
+ bool is_clean() const { return state_test(PG_STATE_CLEAN); }
+ bool is_degraded() const { return state_test(PG_STATE_DEGRADED); }
+ bool is_undersized() const { return state_test(PG_STATE_UNDERSIZED); }
+ bool is_scrubbing() const { return state_test(PG_STATE_SCRUBBING); }
+ bool is_remapped() const { return state_test(PG_STATE_REMAPPED); }
+ bool is_peered() const {
return state_test(PG_STATE_ACTIVE) || state_test(PG_STATE_PEERED);
}
bool is_recovering() const { return state_test(PG_STATE_RECOVERING); }
+ bool is_premerge() const { return state_test(PG_STATE_PREMERGE); }
+ bool is_repair() const { return state_test(PG_STATE_REPAIR); }
- bool is_empty() const { return info.last_update == eversion_t(0,0); }
-
- void init(
- int role,
- const vector<int>& up,
- int up_primary,
- const vector<int>& acting,
- int acting_primary,
- const pg_history_t& history,
- const PastIntervals& pim,
- bool backfill,
- ObjectStore::Transaction *t);
+ bool is_empty() const { return info.last_update == eversion_t(0,0); }
// pg on-disk state
void do_pending_flush();
+public:
static void _create(ObjectStore::Transaction& t, spg_t pgid, int bits);
static void _init(ObjectStore::Transaction& t,
spg_t pgid, const pg_pool_t *pool);
-private:
+protected:
void prepare_write_info(map<string,bufferlist> *km);
void update_store_with_options();
- void update_store_on_load();
public:
static int _prepare_write_info(
bool dirty_epoch,
bool try_fast_info,
PerfCounters *logger = nullptr);
+
+ void write_if_dirty(RecoveryCtx *rctx) {
+ write_if_dirty(*rctx->transaction);
+ }
+protected:
void write_if_dirty(ObjectStore::Transaction& t);
PGLog::IndexedLog projected_log;
eversion_t projected_last_update;
eversion_t get_next_version() const {
eversion_t at_version(
- get_osdmap()->get_epoch(),
+ get_osdmap_epoch(),
projected_last_update.version+1);
- assert(at_version > info.last_update);
- assert(at_version > pg_log.get_head());
- assert(at_version > projected_last_update);
+ ceph_assert(at_version > info.last_update);
+ ceph_assert(at_version > pg_log.get_head());
+ ceph_assert(at_version > projected_last_update);
return at_version;
}
eversion_t trim_to,
eversion_t roll_forward_to,
ObjectStore::Transaction &t,
- bool transaction_applied = true);
+ bool transaction_applied = true,
+ bool async = false);
bool check_log_for_corruption(ObjectStore *store);
std::string get_corrupt_pg_log_name() const;
- static int read_info(
- ObjectStore *store, spg_t pgid, const coll_t &coll,
- bufferlist &bl, pg_info_t &info, PastIntervals &past_intervals,
- __u8 &);
- void read_state(ObjectStore *store, bufferlist &bl);
- static bool _has_removal_flag(ObjectStore *store, spg_t pgid);
- static int peek_map_epoch(ObjectStore *store, spg_t pgid,
- epoch_t *pepoch, bufferlist *bl);
+
void update_snap_map(
const vector<pg_log_entry_t> &log_entries,
ObjectStore::Transaction& t);
/**
* Merge entries updating missing as necessary on all
- * actingbackfill logs and missings (also missing_loc)
+ * acting_recovery_backfill logs and missings (also missing_loc)
*/
void merge_new_log_entries(
const mempool::osd_pglog::list<pg_log_entry_t> &entries,
ObjectStore::Transaction *t);
void on_new_interval();
virtual void _on_new_interval() = 0;
- void start_flush(ObjectStore::Transaction *t,
- list<Context *> *on_applied,
- list<Context *> *on_safe);
+ void start_flush(ObjectStore::Transaction *t);
void set_last_peering_reset();
- bool pg_has_reset_since(epoch_t e) {
- assert(is_locked());
- return deleting || e < get_last_peering_reset();
- }
void update_history(const pg_history_t& history);
void fulfill_info(pg_shard_t from, const pg_query_t &query,
bool can_discard_replica_op(OpRequestRef& op);
bool old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch);
- bool old_peering_evt(CephPeeringEvtRef evt) {
+ bool old_peering_evt(PGPeeringEventRef evt) {
return old_peering_msg(evt->get_epoch_sent(), evt->get_epoch_requested());
}
static bool have_same_or_newer_map(epoch_t cur_epoch, epoch_t e) {
return e <= cur_epoch;
}
bool have_same_or_newer_map(epoch_t e) {
- return e <= get_osdmap()->get_epoch();
+ return e <= get_osdmap_epoch();
}
bool op_has_sufficient_caps(OpRequestRef& op);
// recovery bits
void take_waiters();
- void queue_peering_event(CephPeeringEvtRef evt);
- void handle_peering_event(CephPeeringEvtRef evt, RecoveryCtx *rctx);
- void queue_query(epoch_t msg_epoch, epoch_t query_epoch,
- pg_shard_t from, const pg_query_t& q);
- void queue_null(epoch_t msg_epoch, epoch_t query_epoch);
- void queue_flushed(epoch_t started_at);
- void handle_advance_map(
- OSDMapRef osdmap, OSDMapRef lastmap,
- vector<int>& newup, int up_primary,
- vector<int>& newacting, int acting_primary,
- RecoveryCtx *rctx);
- void handle_activate_map(RecoveryCtx *rctx);
- void handle_create(RecoveryCtx *rctx);
- void handle_loaded(RecoveryCtx *rctx);
- void handle_query_state(Formatter *f);
-
- virtual void on_removal(ObjectStore::Transaction *t) = 0;
// abstract bits
- virtual void do_request(
- OpRequestRef& op,
- ThreadPool::TPHandle &handle
- ) = 0;
-
- virtual void do_op(OpRequestRef& op) = 0;
- virtual void do_sub_op(OpRequestRef op) = 0;
- virtual void do_sub_op_reply(OpRequestRef op) = 0;
- virtual void do_scan(
- OpRequestRef op,
- ThreadPool::TPHandle &handle
- ) = 0;
- virtual void do_backfill(OpRequestRef op) = 0;
- virtual void snap_trimmer(epoch_t epoch_queued) = 0;
-
- virtual int do_command(
- cmdmap_t cmdmap,
- ostream& ss,
- bufferlist& idata,
- bufferlist& odata,
- ConnectionRef conn,
- ceph_tid_t tid) = 0;
+ friend class FlushState;
virtual void on_role_change() = 0;
virtual void on_pool_change() = 0;
virtual void on_change(ObjectStore::Transaction *t) = 0;
virtual void on_activate() = 0;
virtual void on_flushed() = 0;
- virtual void on_shutdown() = 0;
virtual void check_blacklisted_watchers() = 0;
- virtual void get_watchers(std::list<obj_watch_item_t>&) = 0;
- virtual bool agent_work(int max) = 0;
- virtual bool agent_work(int max, int agent_flush_quota) = 0;
- virtual void agent_stop() = 0;
- virtual void agent_delay() = 0;
- virtual void agent_clear() = 0;
- virtual void agent_choose_mode_restart() = 0;
+ friend ostream& operator<<(ostream& out, const PG& pg);
};
-ostream& operator<<(ostream& out, const PG& pg);
ostream& operator<<(ostream& out, const PG::BackfillInterval& bi);