[ceph.git] / ceph / src / rocksdb / cache / lru_cache.h

//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once

#include <string>

#include "cache/sharded_cache.h"

#include "port/port.h"
#include "util/autovector.h"

namespace rocksdb {

// LRU cache implementation

// An entry is a variable length heap-allocated structure.
// Entries are referenced by cache and/or by any external entity.
// The cache keeps all its entries in table. Some elements
// are also stored on LRU list.
//
// LRUHandle can be in these states:
// 1. Referenced externally AND in hash table.
//  In that case the entry is *not* in the LRU. (refs > 1 && in_cache == true)
// 2. Not referenced externally and in hash table. In that case the entry is
// in the LRU and can be freed. (refs == 1 && in_cache == true)
// 3. Referenced externally and not in hash table. In that case the entry is
// in not on LRU and not in table. (refs >= 1 && in_cache == false)
//
// All newly created LRUHandles are in state 1. If you call
// LRUCacheShard::Release
// on entry in state 1, it will go into state 2. To move from state 1 to
// state 3, either call LRUCacheShard::Erase or LRUCacheShard::Insert with the
// same key.
// To move from state 2 to state 1, use LRUCacheShard::Lookup.
// Before destruction, make sure that no handles are in state 1. This means
// that any successful LRUCacheShard::Lookup/LRUCacheShard::Insert have a
// matching
// RUCache::Release (to move into state 2) or LRUCacheShard::Erase (for state 3)

struct LRUHandle {
  void* value;
  void (*deleter)(const Slice&, void* value);
  LRUHandle* next_hash;
  LRUHandle* next;
  LRUHandle* prev;
  size_t charge;  // TODO(opt): Only allow uint32_t?
  size_t key_length;
  uint32_t refs;     // a number of refs to this entry
                     // cache itself is counted as 1

  // Include the following flags:
  //   in_cache:    whether this entry is referenced by the hash table.
  //   is_high_pri: whether this entry is high priority entry.
  //   in_high_pri_pool: whether this entry is in high-pri pool.
  char flags;

  uint32_t hash;     // Hash of key(); used for fast sharding and comparisons

  char key_data[1];  // Beginning of key

  Slice key() const {
    // For cheaper lookups, we allow a temporary Handle object
    // to store a pointer to a key in "value".
    if (next == this) {
      return *(reinterpret_cast<Slice*>(value));
    } else {
      return Slice(key_data, key_length);
    }
  }

  bool InCache() { return flags & 1; }
  bool IsHighPri() { return flags & 2; }
  bool InHighPriPool() { return flags & 4; }
  bool HasHit() { return flags & 8; }

  void SetInCache(bool in_cache) {
    if (in_cache) {
      flags |= 1;
    } else {
      flags &= ~1;
    }
  }

  void SetPriority(Cache::Priority priority) {
    if (priority == Cache::Priority::HIGH) {
      flags |= 2;
    } else {
      flags &= ~2;
    }
  }

  void SetInHighPriPool(bool in_high_pri_pool) {
    if (in_high_pri_pool) {
      flags |= 4;
    } else {
      flags &= ~4;
    }
  }

  void SetHit() { flags |= 8; }

  void Free() {
    assert((refs == 1 && InCache()) || (refs == 0 && !InCache()));
    if (deleter) {
      (*deleter)(key(), value);
    }
    delete[] reinterpret_cast<char*>(this);
  }
};

// We provide our own simple hash table since it removes a whole bunch
// of porting hacks and is also faster than some of the built-in hash
// table implementations in some of the compiler/runtime combinations
// we have tested.  E.g., readrandom speeds up by ~5% over the g++
// 4.4.3's builtin hashtable.
class LRUHandleTable {
 public:
  LRUHandleTable();
  ~LRUHandleTable();

  LRUHandle* Lookup(const Slice& key, uint32_t hash);
  LRUHandle* Insert(LRUHandle* h);
  LRUHandle* Remove(const Slice& key, uint32_t hash);

  template <typename T>
  void ApplyToAllCacheEntries(T func) {
    for (uint32_t i = 0; i < length_; i++) {
      LRUHandle* h = list_[i];
      while (h != nullptr) {
        auto n = h->next_hash;
        assert(h->InCache());
        func(h);
        h = n;
      }
    }
  }

 private:
  // Return a pointer to slot that points to a cache entry that
  // matches key/hash.  If there is no such cache entry, return a
  // pointer to the trailing slot in the corresponding linked list.
  LRUHandle** FindPointer(const Slice& key, uint32_t hash);

  void Resize();

  // The table consists of an array of buckets where each bucket is
  // a linked list of cache entries that hash into the bucket.
  LRUHandle** list_;
  uint32_t length_;
  uint32_t elems_;
};

// A single shard of sharded cache.
class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard {
 public:
  LRUCacheShard(size_t capacity, bool strict_capacity_limit,
                double high_pri_pool_ratio);
  virtual ~LRUCacheShard();

  // Separate from constructor so caller can easily make an array of LRUCache
  // if current usage is more than new capacity, the function will attempt to
  // free the needed space
  virtual void SetCapacity(size_t capacity) override;

  // Set the flag to reject insertion if cache if full.
  virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override;

  // Set percentage of capacity reserved for high-pri cache entries.
  void SetHighPriorityPoolRatio(double high_pri_pool_ratio);

  // Like Cache methods, but with an extra "hash" parameter.
  virtual Status Insert(const Slice& key, uint32_t hash, void* value,
                        size_t charge,
                        void (*deleter)(const Slice& key, void* value),
                        Cache::Handle** handle,
                        Cache::Priority priority) override;
  virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
  virtual bool Ref(Cache::Handle* handle) override;
  virtual bool Release(Cache::Handle* handle,
                       bool force_erase = false) override;
  virtual void Erase(const Slice& key, uint32_t hash) override;

  // Although in some platforms the update of size_t is atomic, to make sure
  // GetUsage() and GetPinnedUsage() work correctly under any platform, we'll
  // protect them with mutex_.

  virtual size_t GetUsage() const override;
  virtual size_t GetPinnedUsage() const override;

  virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
                                      bool thread_safe) override;

  virtual void EraseUnRefEntries() override;

  virtual std::string GetPrintableOptions() const override;

  void TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri);

  //  Retrieves number of elements in LRU, for unit test purpose only
  //  not threadsafe
  size_t TEST_GetLRUSize();

  //  Retrives high pri pool ratio
  double GetHighPriPoolRatio();

 private:
  void LRU_Remove(LRUHandle* e);
  void LRU_Insert(LRUHandle* e);

  // Overflow the last entry in high-pri pool to low-pri pool until size of
  // high-pri pool is no larger than the size specify by high_pri_pool_pct.
  void MaintainPoolSize();

  // Just reduce the reference count by 1.
  // Return true if last reference
  bool Unref(LRUHandle* e);

  // Free some space following strict LRU policy until enough space
  // to hold (usage_ + charge) is freed or the lru list is empty
  // This function is not thread safe - it needs to be executed while
  // holding the mutex_
  void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);

  // Initialized before use.
  size_t capacity_;

  // Memory size for entries in high-pri pool.
  size_t high_pri_pool_usage_;

  // Whether to reject insertion if cache reaches its full capacity.
  bool strict_capacity_limit_;

  // Ratio of capacity reserved for high priority cache entries.
  double high_pri_pool_ratio_;

  // High-pri pool size, equals to capacity * high_pri_pool_ratio.
  // Remember the value to avoid recomputing each time.
  double high_pri_pool_capacity_;

  // Dummy head of LRU list.
  // lru.prev is newest entry, lru.next is oldest entry.
  // LRU contains items which can be evicted, ie reference only by cache
  LRUHandle lru_;

  // Pointer to head of low-pri pool in LRU list.
  LRUHandle* lru_low_pri_;

  // ------------^^^^^^^^^^^^^-----------
  // Not frequently modified data members
  // ------------------------------------
  //
  // We separate data members that are updated frequently from the ones that
  // are not frequently updated so that they don't share the same cache line
  // which will lead into false cache sharing
  //
  // ------------------------------------
  // Frequently modified data members
  // ------------vvvvvvvvvvvvv-----------
  LRUHandleTable table_;

  // Memory size for entries residing in the cache
  size_t usage_;

  // Memory size for entries residing only in the LRU list
  size_t lru_usage_;

  // mutex_ protects the following state.
  // We don't count mutex_ as the cache's internal state so semantically we
  // don't mind mutex_ invoking the non-const actions.
  mutable port::Mutex mutex_;
};

class LRUCache : public ShardedCache {
 public:
  LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
           double high_pri_pool_ratio);
  virtual ~LRUCache();
  virtual const char* Name() const override { return "LRUCache"; }
  virtual CacheShard* GetShard(int shard) override;
  virtual const CacheShard* GetShard(int shard) const override;
  virtual void* Value(Handle* handle) override;
  virtual size_t GetCharge(Handle* handle) const override;
  virtual uint32_t GetHash(Handle* handle) const override;
  virtual void DisownData() override;

  //  Retrieves number of elements in LRU, for unit test purpose only
  size_t TEST_GetLRUSize();
  //  Retrives high pri pool ratio
  double GetHighPriPoolRatio();

 private:
  LRUCacheShard* shards_ = nullptr;
  int num_shards_ = 0;
};

}  // namespace rocksdb
Commit	Line	Data
7c673cae	1	// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2 TL	2	// This source code is licensed under both the GPLv2 (found in the
	3	// COPYING file in the root directory) and Apache 2.0 License
	4	// (found in the LICENSE.Apache file in the root directory).
7c673cae FG	5	//
	6	// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
	7	// Use of this source code is governed by a BSD-style license that can be
	8	// found in the LICENSE file. See the AUTHORS file for names of contributors.
	9	#pragma once
	10
	11	#include <string>
	12
	13	#include "cache/sharded_cache.h"
	14
	15	#include "port/port.h"
	16	#include "util/autovector.h"
	17
	18	namespace rocksdb {
	19
	20	// LRU cache implementation
	21
	22	// An entry is a variable length heap-allocated structure.
	23	// Entries are referenced by cache and/or by any external entity.
	24	// The cache keeps all its entries in table. Some elements
	25	// are also stored on LRU list.
	26	//
	27	// LRUHandle can be in these states:
	28	// 1. Referenced externally AND in hash table.
	29	// In that case the entry is not in the LRU. (refs > 1 && in_cache == true)
	30	// 2. Not referenced externally and in hash table. In that case the entry is
	31	// in the LRU and can be freed. (refs == 1 && in_cache == true)
	32	// 3. Referenced externally and not in hash table. In that case the entry is
	33	// in not on LRU and not in table. (refs >= 1 && in_cache == false)
	34	//
	35	// All newly created LRUHandles are in state 1. If you call
	36	// LRUCacheShard::Release
	37	// on entry in state 1, it will go into state 2. To move from state 1 to
	38	// state 3, either call LRUCacheShard::Erase or LRUCacheShard::Insert with the
	39	// same key.
	40	// To move from state 2 to state 1, use LRUCacheShard::Lookup.
	41	// Before destruction, make sure that no handles are in state 1. This means
	42	// that any successful LRUCacheShard::Lookup/LRUCacheShard::Insert have a
	43	// matching
	44	// RUCache::Release (to move into state 2) or LRUCacheShard::Erase (for state 3)
	45
	46	struct LRUHandle {
	47	void* value;
	48	void (deleter)(const Slice&, void value);
	49	LRUHandle* next_hash;
	50	LRUHandle* next;
	51	LRUHandle* prev;
	52	size_t charge; // TODO(opt): Only allow uint32_t?
	53	size_t key_length;
	54	uint32_t refs; // a number of refs to this entry
	55	// cache itself is counted as 1
	56
	57	// Include the following flags:
	58	// in_cache: whether this entry is referenced by the hash table.
	59	// is_high_pri: whether this entry is high priority entry.
11fdf7f2	60	// in_high_pri_pool: whether this entry is in high-pri pool.
7c673cae FG	61	char flags;
	62
	63	uint32_t hash; // Hash of key(); used for fast sharding and comparisons
	64
	65	char key_data[1]; // Beginning of key
	66
	67	Slice key() const {
	68	// For cheaper lookups, we allow a temporary Handle object
	69	// to store a pointer to a key in "value".
	70	if (next == this) {
	71	return (reinterpret_cast<Slice>(value));
	72	} else {
	73	return Slice(key_data, key_length);
	74	}
	75	}
	76
	77	bool InCache() { return flags & 1; }
	78	bool IsHighPri() { return flags & 2; }
	79	bool InHighPriPool() { return flags & 4; }
11fdf7f2	80	bool HasHit() { return flags & 8; }
7c673cae FG	81
	82	void SetInCache(bool in_cache) {
	83	if (in_cache) {
	84	flags \|= 1;
	85	} else {
	86	flags &= ~1;
	87	}
	88	}
	89
	90	void SetPriority(Cache::Priority priority) {
	91	if (priority == Cache::Priority::HIGH) {
	92	flags \|= 2;
	93	} else {
	94	flags &= ~2;
	95	}
	96	}
	97
	98	void SetInHighPriPool(bool in_high_pri_pool) {
	99	if (in_high_pri_pool) {
	100	flags \|= 4;
	101	} else {
	102	flags &= ~4;
	103	}
	104	}
	105
11fdf7f2 TL	106	void SetHit() { flags \|= 8; }
11fdf7f2 TL	107
7c673cae FG	108	void Free() {
	109	assert((refs == 1 && InCache()) \|\| (refs == 0 && !InCache()));
	110	if (deleter) {
	111	(*deleter)(key(), value);
	112	}
	113	delete[] reinterpret_cast<char*>(this);
	114	}
	115	};
	116
	117	// We provide our own simple hash table since it removes a whole bunch
	118	// of porting hacks and is also faster than some of the built-in hash
	119	// table implementations in some of the compiler/runtime combinations
	120	// we have tested. E.g., readrandom speeds up by ~5% over the g++
	121	// 4.4.3's builtin hashtable.
	122	class LRUHandleTable {
	123	public:
	124	LRUHandleTable();
	125	~LRUHandleTable();
	126
	127	LRUHandle* Lookup(const Slice& key, uint32_t hash);
	128	LRUHandle* Insert(LRUHandle* h);
	129	LRUHandle* Remove(const Slice& key, uint32_t hash);
	130
	131	template <typename T>
	132	void ApplyToAllCacheEntries(T func) {
	133	for (uint32_t i = 0; i < length_; i++) {
	134	LRUHandle* h = list_[i];
	135	while (h != nullptr) {
	136	auto n = h->next_hash;
	137	assert(h->InCache());
	138	func(h);
	139	h = n;
	140	}
	141	}
	142	}
	143
	144	private:
	145	// Return a pointer to slot that points to a cache entry that
	146	// matches key/hash. If there is no such cache entry, return a
	147	// pointer to the trailing slot in the corresponding linked list.
	148	LRUHandle** FindPointer(const Slice& key, uint32_t hash);
	149
	150	void Resize();
	151
	152	// The table consists of an array of buckets where each bucket is
	153	// a linked list of cache entries that hash into the bucket.
11fdf7f2	154	LRUHandle** list_;
7c673cae FG	155	uint32_t length_;
7c673cae FG	156	uint32_t elems_;
7c673cae FG	157	};
	158
	159	// A single shard of sharded cache.
11fdf7f2	160	class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard {
7c673cae	161	public:
11fdf7f2 TL	162	LRUCacheShard(size_t capacity, bool strict_capacity_limit,
11fdf7f2 TL	163	double high_pri_pool_ratio);
7c673cae FG	164	virtual ~LRUCacheShard();
	165
	166	// Separate from constructor so caller can easily make an array of LRUCache
	167	// if current usage is more than new capacity, the function will attempt to
	168	// free the needed space
	169	virtual void SetCapacity(size_t capacity) override;
	170
	171	// Set the flag to reject insertion if cache if full.
	172	virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override;
	173
	174	// Set percentage of capacity reserved for high-pri cache entries.
	175	void SetHighPriorityPoolRatio(double high_pri_pool_ratio);
	176
	177	// Like Cache methods, but with an extra "hash" parameter.
	178	virtual Status Insert(const Slice& key, uint32_t hash, void* value,
	179	size_t charge,
	180	void (deleter)(const Slice& key, void value),
	181	Cache::Handle** handle,
	182	Cache::Priority priority) override;
	183	virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
	184	virtual bool Ref(Cache::Handle* handle) override;
	185	virtual bool Release(Cache::Handle* handle,
	186	bool force_erase = false) override;
	187	virtual void Erase(const Slice& key, uint32_t hash) override;
	188
	189	// Although in some platforms the update of size_t is atomic, to make sure
	190	// GetUsage() and GetPinnedUsage() work correctly under any platform, we'll
	191	// protect them with mutex_.
	192
	193	virtual size_t GetUsage() const override;
	194	virtual size_t GetPinnedUsage() const override;
	195
	196	virtual void ApplyToAllCacheEntries(void (callback)(void, size_t),
	197	bool thread_safe) override;
	198
	199	virtual void EraseUnRefEntries() override;
	200
	201	virtual std::string GetPrintableOptions() const override;
	202
	203	void TEST_GetLRUList(LRUHandle lru, LRUHandle lru_low_pri);
	204
11fdf7f2 TL	205	// Retrieves number of elements in LRU, for unit test purpose only
	206	// not threadsafe
	207	size_t TEST_GetLRUSize();
	208
	209	// Retrives high pri pool ratio
	210	double GetHighPriPoolRatio();
	211
7c673cae FG	212	private:
	213	void LRU_Remove(LRUHandle* e);
	214	void LRU_Insert(LRUHandle* e);
	215
	216	// Overflow the last entry in high-pri pool to low-pri pool until size of
	217	// high-pri pool is no larger than the size specify by high_pri_pool_pct.
	218	void MaintainPoolSize();
	219
	220	// Just reduce the reference count by 1.
	221	// Return true if last reference
	222	bool Unref(LRUHandle* e);
	223
	224	// Free some space following strict LRU policy until enough space
	225	// to hold (usage_ + charge) is freed or the lru list is empty
	226	// This function is not thread safe - it needs to be executed while
	227	// holding the mutex_
	228	void EvictFromLRU(size_t charge, autovector<LRUHandle> deleted);
	229
	230	// Initialized before use.
	231	size_t capacity_;
	232
7c673cae FG	233	// Memory size for entries in high-pri pool.
	234	size_t high_pri_pool_usage_;
	235
	236	// Whether to reject insertion if cache reaches its full capacity.
	237	bool strict_capacity_limit_;
	238
	239	// Ratio of capacity reserved for high priority cache entries.
	240	double high_pri_pool_ratio_;
	241
	242	// High-pri pool size, equals to capacity * high_pri_pool_ratio.
	243	// Remember the value to avoid recomputing each time.
	244	double high_pri_pool_capacity_;
	245
7c673cae FG	246	// Dummy head of LRU list.
	247	// lru.prev is newest entry, lru.next is oldest entry.
	248	// LRU contains items which can be evicted, ie reference only by cache
	249	LRUHandle lru_;
	250
	251	// Pointer to head of low-pri pool in LRU list.
	252	LRUHandle* lru_low_pri_;
	253
11fdf7f2 TL	254	// ------------^^^^^^^^^^^^^-----------
	255	// Not frequently modified data members
	256	// ------------------------------------
	257	//
	258	// We separate data members that are updated frequently from the ones that
	259	// are not frequently updated so that they don't share the same cache line
	260	// which will lead into false cache sharing
	261	//
	262	// ------------------------------------
	263	// Frequently modified data members
	264	// ------------vvvvvvvvvvvvv-----------
7c673cae	265	LRUHandleTable table_;
11fdf7f2 TL	266
	267	// Memory size for entries residing in the cache
	268	size_t usage_;
	269
	270	// Memory size for entries residing only in the LRU list
	271	size_t lru_usage_;
	272
	273	// mutex_ protects the following state.
	274	// We don't count mutex_ as the cache's internal state so semantically we
	275	// don't mind mutex_ invoking the non-const actions.
	276	mutable port::Mutex mutex_;
7c673cae FG	277	};
	278
	279	class LRUCache : public ShardedCache {
	280	public:
	281	LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
	282	double high_pri_pool_ratio);
	283	virtual ~LRUCache();
	284	virtual const char* Name() const override { return "LRUCache"; }
	285	virtual CacheShard* GetShard(int shard) override;
	286	virtual const CacheShard* GetShard(int shard) const override;
	287	virtual void* Value(Handle* handle) override;
	288	virtual size_t GetCharge(Handle* handle) const override;
	289	virtual uint32_t GetHash(Handle* handle) const override;
	290	virtual void DisownData() override;
	291
11fdf7f2 TL	292	// Retrieves number of elements in LRU, for unit test purpose only
	293	size_t TEST_GetLRUSize();
	294	// Retrives high pri pool ratio
	295	double GetHighPriPoolRatio();
	296
7c673cae	297	private:
11fdf7f2 TL	298	LRUCacheShard* shards_ = nullptr;
11fdf7f2 TL	299	int num_shards_ = 0;
7c673cae FG	300	};
	301
	302	} // namespace rocksdb