};
struct OnodeSpace;
- struct OnodeCacheShard;
/// an in-memory object
struct Onode {
MEMPOOL_CLASS_HELPERS();
- // Not persisted and updated on cache insertion/removal
- OnodeCacheShard *s;
- bool pinned = false; // Only to be used by the onode cache shard
std::atomic_int nref; ///< reference count
Collection *c;
/// key under PREFIX_OBJ where we are stored
mempool::bluestore_cache_other::string key;
- boost::intrusive::list_member_hook<> lru_item, pin_item;
+ boost::intrusive::list_member_hook<> lru_item;
bluestore_onode_t onode; ///< metadata stored as value in kv store
bool exists; ///< true if object logically exists
-
+ bool cached; ///< Onode is logically in the cache
+ /// (it can be pinned and hence physically out
+ /// of it at the moment though)
+ bool pinned; ///< Onode is pinned
+ /// (or should be pinned when cached)
ExtentMap extent_map;
// track txc's that have not been committed to kv store (and whose
Onode(Collection *c, const ghobject_t& o,
const mempool::bluestore_cache_other::string& k)
- : s(nullptr),
- nref(0),
+ : nref(0),
c(c),
oid(o),
key(k),
exists(false),
+ cached(false),
+ pinned(false),
extent_map(this) {
}
Onode(Collection* c, const ghobject_t& o,
- const string& k)
- : s(nullptr),
- nref(0),
+ const std::string& k)
+ : nref(0),
c(c),
oid(o),
key(k),
exists(false),
+ cached(false),
+ pinned(false),
extent_map(this) {
}
Onode(Collection* c, const ghobject_t& o,
const char* k)
- : s(nullptr),
- nref(0),
+ : nref(0),
c(c),
oid(o),
key(k),
exists(false),
+ cached(false),
+ pinned(false),
extent_map(this) {
}
void dump(Formatter* f) const;
void flush();
- void get() {
- if (++nref == 2 && s != nullptr) {
- s->pin(*this);
- }
+ void get();
+ void put();
+
+ inline bool put_cache() {
+ ceph_assert(!cached);
+ cached = true;
+ return !pinned;
}
- void put() {
- int n = --nref;
- if (n == 1 && s != nullptr) {
- s->unpin(*this);
- }
- if (n == 0) {
- delete this;
- }
+ inline bool pop_cache() {
+ ceph_assert(cached);
+ cached = false;
+ return !pinned;
}
const string& get_omap_prefix();
std::atomic<uint64_t> num_pinned = {0};
std::array<std::pair<ghobject_t, mono_clock::time_point>, 64> dumped_onodes;
+
+ virtual void _pin(Onode* o) = 0;
+ virtual void _unpin(Onode* o) = 0;
+
public:
OnodeCacheShard(CephContext* cct) : CacheShard(cct) {}
static OnodeCacheShard *create(CephContext* cct, string type,
PerfCounters *logger);
- virtual void _add(OnodeRef& o, int level) = 0;
- virtual void _rm(OnodeRef& o) = 0;
- virtual void _touch(OnodeRef& o) = 0;
- virtual void _pin(Onode& o) = 0;
- virtual void _unpin(Onode& o) = 0;
+ virtual void _add(Onode* o, int level) = 0;
+ virtual void _rm(Onode* o) = 0;
- void pin(Onode& o) {
+ void pin(Onode* o, std::function<bool ()> validator) {
std::lock_guard l(lock);
- _pin(o);
+ if (validator()) {
+ _pin(o);
+ }
}
- void unpin(Onode& o) {
+ void unpin(Onode* o, std::function<bool()> validator) {
std::lock_guard l(lock);
- _unpin(o);
+ if (validator()) {
+ _unpin(o);
+ }
}
+ virtual void move_pinned(OnodeCacheShard *to, Onode *o) = 0;
virtual void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) = 0;
bool empty() {
return _get_num() == 0;
friend class Collection; // for split_cache()
+ friend struct LruOnodeCacheShard;
+ void _remove(const ghobject_t& oid);
public:
OnodeSpace(OnodeCacheShard *c) : cache(c) {}
~OnodeSpace() {
clear();
}
- OnodeRef add(const ghobject_t& oid, OnodeRef o);
+ OnodeRef add(const ghobject_t& oid, OnodeRef& o);
OnodeRef lookup(const ghobject_t& o);
- void remove(const ghobject_t& oid) {
- onode_map.erase(oid);
- }
void rename(OnodeRef& o, const ghobject_t& old_oid,
const ghobject_t& new_oid,
const mempool::bluestore_cache_other::string& new_okey);
pool_opts_t pool_opts;
ContextQueue *commit_queue;
+ OnodeCacheShard* get_onode_cache() const {
+ return onode_map.cache;
+ }
OnodeRef get_onode(const ghobject_t& oid, bool create, bool is_createop=false);
// the terminology is confusing here, sorry!
void get_db_statistics(Formatter *f) override;
void generate_db_histogram(Formatter *f) override;
- void _flush_cache();
+ void _shutdown_cache();
int flush_cache(ostream *os = NULL) override;
void dump_perf_counters(Formatter *f) override {
f->open_object_section("perf_counters");
enum {
// use 0/nullptr as unset indication
LEVEL_FIRST = 1,
- LEVEL_WAL = LEVEL_FIRST,
+ LEVEL_LOG = LEVEL_FIRST, // BlueFS log
+ LEVEL_WAL,
LEVEL_DB,
LEVEL_SLOW,
LEVEL_MAX
typedef matrix_2d<uint64_t, BlueFS::MAX_BDEV + 1, LEVEL_MAX - LEVEL_FIRST + 1> per_level_per_dev_usage_t;
per_level_per_dev_usage_t per_level_per_dev_usage;
+ // file count per level, add +1 to keep total file count
+ uint64_t per_level_files[LEVEL_MAX - LEVEL_FIRST + 1] = { 0 };
// Note: maximum per-device totals below might be smaller than corresponding
// perf counters by up to a single alloc unit (1M) due to superblock extent.
uint64_t reserved,
bool new_pol)
{
+ l_totals[LEVEL_LOG - LEVEL_FIRST] = 0; // not used at the moment
l_totals[LEVEL_WAL - LEVEL_FIRST] = _wal_total;
l_totals[LEVEL_DB - LEVEL_FIRST] = _db_total;
l_totals[LEVEL_SLOW - LEVEL_FIRST] = _slow_total;
}
}
- void* get_hint_by_device(uint8_t dev) const override {
- ceph_assert(dev == BlueFS::BDEV_WAL); // others aren't used atm
- return reinterpret_cast<void*>(LEVEL_WAL);
+ void* get_hint_for_log() const override {
+ return reinterpret_cast<void*>(LEVEL_LOG);
}
void* get_hint_by_dir(const string& dirname) const override;
max = cur;
}
}
+ ++per_level_files[pos];
+ ++per_level_files[LEVEL_MAX - LEVEL_FIRST];
}
void sub_usage(void* hint, const bluefs_fnode_t& fnode) override {
if (hint == nullptr)
auto& cur = per_level_per_dev_usage.at(BlueFS::MAX_BDEV, pos);
ceph_assert(cur >= fnode.size);
cur -= fnode.size;
+ ceph_assert(per_level_files[pos] > 0);
+ --per_level_files[pos];
+ ceph_assert(per_level_files[LEVEL_MAX - LEVEL_FIRST] > 0);
+ --per_level_files[LEVEL_MAX - LEVEL_FIRST];
}
void add_usage(void* hint, uint64_t fsize) override {
if (hint == nullptr)