#ifndef CEPH_CINODE_H
#define CEPH_CINODE_H
+#include <list>
+#include <map>
+#include <set>
+#include <boost/utility/string_view.hpp>
+
#include "common/config.h"
#include "include/counter.h"
#include "include/elist.h"
#include "SnapRealm.h"
#include "Mutation.h"
-#include <list>
-#include <set>
-#include <map>
-
#define dout_context g_ceph_context
class Context;
*/
class InodeStoreBase {
public:
- inode_t inode; // the inode itself
- std::string symlink; // symlink dest, if symlink
- std::map<std::string, bufferptr> xattrs;
+ typedef inode_t<mempool::mds_co::pool_allocator> mempool_inode;
+ typedef old_inode_t<mempool::mds_co::pool_allocator> mempool_old_inode;
+ typedef mempool::mds_co::compact_map<snapid_t, mempool_old_inode> mempool_old_inode_map;
+ typedef xattr_map<mempool::mds_co::pool_allocator> mempool_xattr_map; // FIXME bufferptr not in mempool
+
+ mempool_inode inode; // the inode itself
+ mempool::mds_co::string symlink; // symlink dest, if symlink
+ mempool_xattr_map xattrs;
fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map.
- compact_map<snapid_t, old_inode_t> old_inodes; // key = last, value.first = first
- snapid_t oldest_snap;
- damage_flags_t damage_flags;
+ mempool_old_inode_map old_inodes; // key = last, value.first = first
+ snapid_t oldest_snap = CEPH_NOSNAP;
+ damage_flags_t damage_flags = 0;
- InodeStoreBase() : oldest_snap(CEPH_NOSNAP), damage_flags(0) { }
+ InodeStoreBase() {}
/* Helpers */
bool is_file() const { return inode.is_file(); }
void dump(Formatter *f) const;
/* For use by offline tools */
- __u32 hash_dentry_name(const std::string &dn);
- frag_t pick_dirfrag(const std::string &dn);
+ __u32 hash_dentry_name(boost::string_view dn);
+ frag_t pick_dirfrag(boost::string_view dn);
};
class InodeStore : public InodeStoreBase {
public:
+ // FIXME bufferlist not part of mempool
bufferlist snap_blob; // Encoded copy of SnapRealm, because we can't
// rehydrate it without full MDCache
void encode(bufferlist &bl, uint64_t features) const {
// cached inode wrapper
class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CInode> {
public:
+ MEMPOOL_CLASS_HELPERS();
// -- pins --
static const int PIN_DIRFRAG = -1;
static const int PIN_CAPS = 2; // client caps
static const int MASK_STATE_EXPORTED =
(STATE_DIRTY|STATE_NEEDSRECOVER|STATE_DIRTYPARENT|STATE_DIRTYPOOL);
static const int MASK_STATE_EXPORT_KEPT =
- (STATE_FROZEN|STATE_AMBIGUOUSAUTH|STATE_EXPORTINGCAPS);
+ (STATE_FROZEN|STATE_AMBIGUOUSAUTH|STATE_EXPORTINGCAPS|STATE_QUEUEDEXPORTPIN);
// -- waiters --
static const uint64_t WAIT_DIR = (1<<0);
public:
MDCache *mdcache;
- SnapRealm *snaprealm;
- SnapRealm *containing_realm;
+ SnapRealm *snaprealm = nullptr;
+ SnapRealm *containing_realm = nullptr;
snapid_t first, last;
- compact_set<snapid_t> dirty_old_rstats;
+ mempool::mds_co::compact_set<snapid_t> dirty_old_rstats;
class scrub_stamp_info_t {
public:
/// version we started our latest scrub (whether in-progress or finished)
- version_t scrub_start_version;
+ version_t scrub_start_version = 0;
/// time we started our latest scrub (whether in-progress or finished)
utime_t scrub_start_stamp;
/// version we started our most recent finished scrub
- version_t last_scrub_version;
+ version_t last_scrub_version = 0;
/// time we started our most recent finished scrub
utime_t last_scrub_stamp;
- scrub_stamp_info_t() : scrub_start_version(0), last_scrub_version(0) {}
+ scrub_stamp_info_t() {}
void reset() {
- scrub_start_version = 0;
- scrub_start_stamp = utime_t();
+ scrub_start_version = last_scrub_version = 0;
+ scrub_start_stamp = last_scrub_stamp = utime_t();
}
};
class scrub_info_t : public scrub_stamp_info_t {
public:
- CDentry *scrub_parent;
- MDSInternalContextBase *on_finish;
+ CDentry *scrub_parent = nullptr;
+ MDSInternalContextBase *on_finish = nullptr;
- bool last_scrub_dirty; /// are our stamps dirty with respect to disk state?
- bool scrub_in_progress; /// are we currently scrubbing?
- bool children_scrubbed;
+ bool last_scrub_dirty = false; /// are our stamps dirty with respect to disk state?
+ bool scrub_in_progress = false; /// are we currently scrubbing?
+ bool children_scrubbed = false;
/// my own (temporary) stamps and versions for each dirfrag we have
- std::map<frag_t, scrub_stamp_info_t> dirfrag_stamps;
+ std::map<frag_t, scrub_stamp_info_t> dirfrag_stamps; // XXX not part of mempool
- ScrubHeaderRefConst header;
+ ScrubHeaderRef header;
- scrub_info_t() : scrub_stamp_info_t(),
- scrub_parent(NULL), on_finish(NULL),
- last_scrub_dirty(false), scrub_in_progress(false),
- children_scrubbed(false) {}
+ scrub_info_t() {}
};
const scrub_info_t *scrub_info() const{
return scrub_infop;
}
+ ScrubHeaderRef get_scrub_header() {
+ if (scrub_infop == nullptr) {
+ return nullptr;
+ } else {
+ return scrub_infop->header;
+ }
+ }
+
bool scrub_is_in_progress() const {
return (scrub_infop && scrub_infop->scrub_in_progress);
}
* directory's get_projected_version())
*/
void scrub_initialize(CDentry *scrub_parent,
- const ScrubHeaderRefConst& header,
+ ScrubHeaderRef& header,
MDSInternalContextBase *f);
/**
* Get the next dirfrag to scrub. Gives you a frag_t in output param which
}
snapid_t get_oldest_snap();
- uint64_t last_journaled; // log offset for the last time i was journaled
+ uint64_t last_journaled = 0; // log offset for the last time i was journaled
//loff_t last_open_journaled; // log offset for the last journaled EOpen
utime_t last_dirstat_prop;
void mark_dirty_rstat();
void clear_dirty_rstat();
- //bool hack_accessed;
+ //bool hack_accessed = false;
//utime_t hack_load_stamp;
/**
* Projection methods, used to store inode changes until they have been journaled,
* at which point they are popped.
* Usage:
- * project_inode as needed. If you're also projecting xattrs, pass
- * in an xattr map (by pointer), then edit the map.
- * If you're also projecting the snaprealm, call project_snaprealm after
- * calling project_inode, and modify the snaprealm as necessary.
+ * project_inode as needed. If you're changing xattrs or sr_t, then pass true
+ * as needed then change the xattrs/snapnode member as needed. (Dirty
+ * exception: project_past_snaprealm_parent allows you to project the
+ * snapnode after doing project_inode (i.e. you don't need to pass
+ * snap=true).
*
* Then, journal. Once journaling is done, pop_and_dirty_projected_inode.
* This function will take care of the inode itself, the xattrs, and the snaprealm.
*/
- struct projected_inode_t {
- inode_t *inode;
- std::map<std::string,bufferptr> *xattrs;
- sr_t *snapnode;
-
- projected_inode_t()
- : inode(NULL), xattrs(NULL), snapnode(NULL) {}
- projected_inode_t(inode_t *in, sr_t *sn)
- : inode(in), xattrs(NULL), snapnode(sn) {}
- projected_inode_t(inode_t *in, std::map<std::string, bufferptr> *xp = NULL, sr_t *sn = NULL)
- : inode(in), xattrs(xp), snapnode(sn) {}
+ class projected_inode {
+ public:
+ mempool_inode inode;
+ std::unique_ptr<mempool_xattr_map> xattrs;
+ std::unique_ptr<sr_t> snapnode;
+
+ projected_inode() = delete;
+ projected_inode(const mempool_inode &in) : inode(in) {}
};
- std::list<projected_inode_t*> projected_nodes; // projected values (only defined while dirty)
- int num_projected_xattrs;
- int num_projected_srnodes;
-
- inode_t *project_inode(std::map<std::string,bufferptr> *px=0);
+
+private:
+ mempool::mds_co::list<projected_inode> projected_nodes; // projected values (only defined while dirty)
+ size_t num_projected_xattrs = 0;
+ size_t num_projected_srnodes = 0;
+
+ sr_t &project_snaprealm(projected_inode &pi);
+public:
+ CInode::projected_inode &project_inode(bool xattr = false, bool snap = false);
void pop_and_dirty_projected_inode(LogSegment *ls);
- projected_inode_t *get_projected_node() {
+ projected_inode *get_projected_node() {
if (projected_nodes.empty())
return NULL;
else
- return projected_nodes.back();
+ return &projected_nodes.back();
}
version_t get_projected_version() const {
if (projected_nodes.empty())
return inode.version;
else
- return projected_nodes.back()->inode->version;
+ return projected_nodes.back().inode.version;
}
bool is_projected() const {
return !projected_nodes.empty();
}
- const inode_t *get_projected_inode() const {
+ const mempool_inode *get_projected_inode() const {
if (projected_nodes.empty())
return &inode;
else
- return projected_nodes.back()->inode;
+ return &projected_nodes.back().inode;
}
- inode_t *get_projected_inode() {
+ mempool_inode *get_projected_inode() {
if (projected_nodes.empty())
return &inode;
else
- return projected_nodes.back()->inode;
+ return &projected_nodes.back().inode;
}
- inode_t *get_previous_projected_inode() {
+ mempool_inode *get_previous_projected_inode() {
assert(!projected_nodes.empty());
- std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin();
- ++p;
- if (p != projected_nodes.rend())
- return (*p)->inode;
+ auto it = projected_nodes.rbegin();
+ ++it;
+ if (it != projected_nodes.rend())
+ return &it->inode;
else
return &inode;
}
- std::map<std::string,bufferptr> *get_projected_xattrs() {
+ mempool_xattr_map *get_projected_xattrs() {
if (num_projected_xattrs > 0) {
- for (std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin();
- p != projected_nodes.rend();
- ++p)
- if ((*p)->xattrs)
- return (*p)->xattrs;
+ for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it)
+ if (it->xattrs)
+ return it->xattrs.get();
}
return &xattrs;
}
- std::map<std::string,bufferptr> *get_previous_projected_xattrs() {
- std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin();
- for (++p; // skip the most recent projected value
- p != projected_nodes.rend();
- ++p)
- if ((*p)->xattrs)
- return (*p)->xattrs;
+ mempool_xattr_map *get_previous_projected_xattrs() {
+ if (num_projected_xattrs > 0) {
+ for (auto it = ++projected_nodes.rbegin(); it != projected_nodes.rend(); ++it)
+ if (it->xattrs)
+ return it->xattrs.get();
+ }
return &xattrs;
}
- sr_t *project_snaprealm(snapid_t snapid=0);
const sr_t *get_projected_srnode() const {
if (num_projected_srnodes > 0) {
- for (std::list<projected_inode_t*>::const_reverse_iterator p = projected_nodes.rbegin();
- p != projected_nodes.rend();
- ++p)
- if ((*p)->snapnode)
- return (*p)->snapnode;
+ for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it)
+ if (it->snapnode)
+ return it->snapnode.get();
}
if (snaprealm)
return &snaprealm->srnode;
}
sr_t *get_projected_srnode() {
if (num_projected_srnodes > 0) {
- for (std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin();
- p != projected_nodes.rend();
- ++p)
- if ((*p)->snapnode)
- return (*p)->snapnode;
+ for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it)
+ if (it->snapnode)
+ return it->snapnode.get();
}
if (snaprealm)
return &snaprealm->srnode;
void pop_projected_snaprealm(sr_t *next_snaprealm);
public:
- old_inode_t& cow_old_inode(snapid_t follows, bool cow_head);
+ mempool_old_inode& cow_old_inode(snapid_t follows, bool cow_head);
void split_old_inode(snapid_t snap);
- old_inode_t *pick_old_inode(snapid_t last);
+ mempool_old_inode *pick_old_inode(snapid_t last);
void pre_cow_old_inode();
void purge_stale_snap_data(const std::set<snapid_t>& snaps);
// -- cache infrastructure --
private:
- compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode
- int stickydir_ref;
- scrub_info_t *scrub_infop;
+ mempool::mds_co::compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode
+ int stickydir_ref = 0;
+ scrub_info_t *scrub_infop = nullptr;
public:
bool has_dirfrags() { return !dirfrags.empty(); }
protected:
// parent dentries in cache
- CDentry *parent; // primary link
- compact_set<CDentry*> remote_parents; // if hard linked
+ CDentry *parent = nullptr; // primary link
+ mempool::mds_co::compact_set<CDentry*> remote_parents; // if hard linked
- std::list<CDentry*> projected_parent; // for in-progress rename, (un)link, etc.
+ mempool::mds_co::list<CDentry*> projected_parent; // for in-progress rename, (un)link, etc.
- mds_authority_t inode_auth;
+ mds_authority_t inode_auth = CDIR_AUTH_DEFAULT;
// -- distributed state --
protected:
// file capabilities
- std::map<client_t, Capability*> client_caps; // client -> caps
- compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted
- int replica_caps_wanted; // [replica] what i've requested from auth
+ using cap_map = mempool::mds_co::map<client_t, Capability*>;
+ cap_map client_caps; // client -> caps
+ mempool::mds_co::compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted
+ int replica_caps_wanted = 0; // [replica] what i've requested from auth
public:
- compact_map<int, std::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head
- compact_map<snapid_t, std::set<client_t> > client_need_snapflush;
+ mempool::mds_co::compact_map<int, mempool::mds_co::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head
+ mempool::mds_co::compact_map<snapid_t, mempool::mds_co::set<client_t> > client_need_snapflush;
void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
void remove_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
protected:
- ceph_lock_state_t *fcntl_locks;
- ceph_lock_state_t *flock_locks;
+ ceph_lock_state_t *fcntl_locks = nullptr;
+ ceph_lock_state_t *flock_locks = nullptr;
ceph_lock_state_t *get_fcntl_lock_state() {
if (!fcntl_locks)
elist<CInode*>::item item_dirty_dirfrag_dirfragtree;
elist<CInode*>::item item_scrub;
+ // also update RecoveryQueue::RecoveryQueue() if you change this
+ elist<CInode*>::item& item_recover_queue = item_dirty_dirfrag_dir;
+ elist<CInode*>::item& item_recover_queue_front = item_dirty_dirfrag_nest;
+
public:
- int auth_pin_freeze_allowance;
+ int auth_pin_freeze_allowance = 0;
inode_load_vec_t pop;
friend class CInodeExport;
// ---------------------------
+ CInode() = delete;
CInode(MDCache *c, bool auth=true, snapid_t f=2, snapid_t l=CEPH_NOSNAP) :
mdcache(c),
- snaprealm(0), containing_realm(0),
first(f), last(l),
- last_journaled(0), //last_open_journaled(0),
- //hack_accessed(true),
- num_projected_xattrs(0),
- num_projected_srnodes(0),
- stickydir_ref(0),
- scrub_infop(NULL),
- parent(0),
- inode_auth(CDIR_AUTH_DEFAULT),
- replica_caps_wanted(0),
- fcntl_locks(0), flock_locks(0),
- item_dirty(this), item_caps(this), item_open_file(this), item_dirty_parent(this),
+ item_dirty(this),
+ item_caps(this),
+ item_open_file(this),
+ item_dirty_parent(this),
item_dirty_dirfrag_dir(this),
item_dirty_dirfrag_nest(this),
item_dirty_dirfrag_dirfragtree(this),
- auth_pin_freeze_allowance(0),
pop(ceph_clock_now()),
versionlock(this, &versionlock_type),
authlock(this, &authlock_type),
snaplock(this, &snaplock_type),
nestlock(this, &nestlock_type),
flocklock(this, &flocklock_type),
- policylock(this, &policylock_type),
- loner_cap(-1), want_loner_cap(-1)
+ policylock(this, &policylock_type)
{
- state = 0;
if (auth) state_set(STATE_AUTH);
}
~CInode() override {
vinodeno_t vino() const { return vinodeno_t(inode.ino, last); }
int d_type() const { return IFTODT(inode.mode); }
- inode_t& get_inode() { return inode; }
+ mempool_inode& get_inode() { return inode; }
CDentry* get_parent_dn() { return parent; }
const CDentry* get_parent_dn() const { return parent; }
const CDentry* get_projected_parent_dn() const { return !projected_parent.empty() ? projected_parent.back() : parent; }
void encode_store(bufferlist& bl, uint64_t features);
void decode_store(bufferlist::iterator& bl);
- void encode_replica(mds_rank_t rep, bufferlist& bl, uint64_t features) {
+ void encode_replica(mds_rank_t rep, bufferlist& bl, uint64_t features, bool need_recover) {
assert(is_auth());
// relax locks?
::encode(nonce, bl);
_encode_base(bl, features);
- _encode_locks_state_for_replica(bl);
+ _encode_locks_state_for_replica(bl, need_recover);
}
void decode_replica(bufferlist::iterator& p, bool is_new) {
__u32 nonce;
// -- waiting --
protected:
- compact_map<frag_t, std::list<MDSInternalContextBase*> > waiting_on_dir;
+ mempool::mds_co::compact_map<frag_t, std::list<MDSInternalContextBase*> > waiting_on_dir;
public:
void add_dir_waiter(frag_t fg, MDSInternalContextBase *c);
void take_dir_waiting(frag_t fg, std::list<MDSInternalContextBase*>& ls);
void _decode_base(bufferlist::iterator& p);
void _encode_locks_full(bufferlist& bl);
void _decode_locks_full(bufferlist::iterator& p);
- void _encode_locks_state_for_replica(bufferlist& bl);
+ void _encode_locks_state_for_replica(bufferlist& bl, bool need_recover);
void _encode_locks_state_for_rejoin(bufferlist& bl, int rep);
void _decode_locks_state(bufferlist::iterator& p, bool is_new);
void _decode_locks_rejoin(bufferlist::iterator& p, std::list<MDSInternalContextBase*>& waiters,
- std::list<SimpleLock*>& eval_locks);
+ std::list<SimpleLock*>& eval_locks, bool survivor);
// -- import/export --
void encode_export(bufferlist& bl);
static LockType flocklock_type;
static LockType policylock_type;
+ // FIXME not part of mempool
LocalLock versionlock;
SimpleLock authlock;
SimpleLock linklock;
// -- caps -- (new)
// client caps
- client_t loner_cap, want_loner_cap;
+ client_t loner_cap = -1, want_loner_cap = -1;
client_t get_loner() const { return loner_cap; }
client_t get_wanted_loner() const { return want_loner_cap; }
}
client_t calc_ideal_loner();
- client_t choose_ideal_loner();
- bool try_set_loner();
void set_loner_cap(client_t l);
+ bool choose_ideal_loner();
+ bool try_set_loner();
bool try_drop_loner();
// choose new lock state during recovery, based on issued caps
int count_nonstale_caps() {
int n = 0;
- for (std::map<client_t,Capability*>::iterator it = client_caps.begin();
- it != client_caps.end();
- ++it)
- if (!it->second->is_stale())
+ for (const auto &p : client_caps) {
+ if (!p.second->is_stale())
n++;
+ }
return n;
}
bool multiple_nonstale_caps() {
int n = 0;
- for (std::map<client_t,Capability*>::iterator it = client_caps.begin();
- it != client_caps.end();
- ++it)
- if (!it->second->is_stale()) {
+ for (const auto &p : client_caps) {
+ if (!p.second->is_stale()) {
if (n)
return true;
n++;
}
+ }
return false;
}
bool is_any_caps() { return !client_caps.empty(); }
bool is_any_nonstale_caps() { return count_nonstale_caps(); }
- const compact_map<int32_t,int32_t>& get_mds_caps_wanted() const { return mds_caps_wanted; }
- compact_map<int32_t,int32_t>& get_mds_caps_wanted() { return mds_caps_wanted; }
+ const mempool::mds_co::compact_map<int32_t,int32_t>& get_mds_caps_wanted() const { return mds_caps_wanted; }
+ mempool::mds_co::compact_map<int32_t,int32_t>& get_mds_caps_wanted() { return mds_caps_wanted; }
- const std::map<client_t,Capability*>& get_client_caps() const { return client_caps; }
+ const cap_map& get_client_caps() const { return client_caps; }
Capability *get_client_cap(client_t client) {
auto client_caps_entry = client_caps.find(client);
if (client_caps_entry != client_caps.end())
int get_caps_allowed_by_type(int type) const;
int get_caps_careful() const;
int get_xlocker_mask(client_t client) const;
- int get_caps_allowed_for_client(Session *s, inode_t *file_i) const;
+ int get_caps_allowed_for_client(Session *s, mempool_inode *file_i) const;
// caps issued, wanted
int get_caps_issued(int *ploner = 0, int *pother = 0, int *pxlocker = 0,
// -- hierarchy stuff --
public:
void set_primary_parent(CDentry *p) {
- assert(parent == 0);
+ assert(parent == 0 ||
+ g_conf->get_val<bool>("mds_hack_allow_loading_invalid_metadata"));
parent = p;
}
void remove_primary_parent(CDentry *dn) {
*/
struct validated_data {
template<typename T>struct member_status {
- bool checked;
- bool passed;
- int ondisk_read_retval;
+ bool checked = false;
+ bool passed = false;
+ bool repaired = false;
+ int ondisk_read_retval = 0;
T ondisk_value;
T memory_value;
std::stringstream error_str;
- member_status() : checked(false), passed(false),
- ondisk_read_retval(0) {}
};
- bool performed_validation;
- bool passed_validation;
+ bool performed_validation = false;
+ bool passed_validation = false;
struct raw_stats_t {
frag_info_t dirstat;
};
member_status<inode_backtrace_t> backtrace;
- member_status<inode_t> inode;
+ member_status<mempool_inode> inode; // XXX should not be in mempool; wait for pmr
member_status<raw_stats_t> raw_stats;
- validated_data() : performed_validation(false),
- passed_validation(false) {}
+ validated_data() {}
void dump(Formatter *f) const;
+
+ bool all_damage_repaired() const;
};
/**