]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/memtable/write_buffer_manager.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / rocksdb / memtable / write_buffer_manager.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include "rocksdb/write_buffer_manager.h"
11 #include <mutex>
12 #include "util/coding.h"
13
14 namespace ROCKSDB_NAMESPACE {
15 #ifndef ROCKSDB_LITE
16 namespace {
17 const size_t kSizeDummyEntry = 256 * 1024;
18 // The key will be longer than keys for blocks in SST files so they won't
19 // conflict.
20 const size_t kCacheKeyPrefix = kMaxVarint64Length * 4 + 1;
21 } // namespace
22
23 struct WriteBufferManager::CacheRep {
24 std::shared_ptr<Cache> cache_;
25 std::mutex cache_mutex_;
26 std::atomic<size_t> cache_allocated_size_;
27 // The non-prefix part will be updated according to the ID to use.
28 char cache_key_[kCacheKeyPrefix + kMaxVarint64Length];
29 uint64_t next_cache_key_id_ = 0;
30 std::vector<Cache::Handle*> dummy_handles_;
31
32 explicit CacheRep(std::shared_ptr<Cache> cache)
33 : cache_(cache), cache_allocated_size_(0) {
34 memset(cache_key_, 0, kCacheKeyPrefix);
35 size_t pointer_size = sizeof(const void*);
36 assert(pointer_size <= kCacheKeyPrefix);
37 memcpy(cache_key_, static_cast<const void*>(this), pointer_size);
38 }
39
40 Slice GetNextCacheKey() {
41 memset(cache_key_ + kCacheKeyPrefix, 0, kMaxVarint64Length);
42 char* end =
43 EncodeVarint64(cache_key_ + kCacheKeyPrefix, next_cache_key_id_++);
44 return Slice(cache_key_, static_cast<size_t>(end - cache_key_));
45 }
46 };
47 #else
48 struct WriteBufferManager::CacheRep {};
49 #endif // ROCKSDB_LITE
50
51 WriteBufferManager::WriteBufferManager(size_t _buffer_size,
52 std::shared_ptr<Cache> cache)
53 : buffer_size_(_buffer_size),
54 mutable_limit_(buffer_size_ * 7 / 8),
55 memory_used_(0),
56 memory_active_(0),
57 cache_rep_(nullptr) {
58 #ifndef ROCKSDB_LITE
59 if (cache) {
60 // Construct the cache key using the pointer to this.
61 cache_rep_.reset(new CacheRep(cache));
62 }
63 #else
64 (void)cache;
65 #endif // ROCKSDB_LITE
66 }
67
68 WriteBufferManager::~WriteBufferManager() {
69 #ifndef ROCKSDB_LITE
70 if (cache_rep_) {
71 for (auto* handle : cache_rep_->dummy_handles_) {
72 cache_rep_->cache_->Release(handle, true);
73 }
74 }
75 #endif // ROCKSDB_LITE
76 }
77
78 // Should only be called from write thread
79 void WriteBufferManager::ReserveMemWithCache(size_t mem) {
80 #ifndef ROCKSDB_LITE
81 assert(cache_rep_ != nullptr);
82 // Use a mutex to protect various data structures. Can be optimized to a
83 // lock-free solution if it ends up with a performance bottleneck.
84 std::lock_guard<std::mutex> lock(cache_rep_->cache_mutex_);
85
86 size_t new_mem_used = memory_used_.load(std::memory_order_relaxed) + mem;
87 memory_used_.store(new_mem_used, std::memory_order_relaxed);
88 while (new_mem_used > cache_rep_->cache_allocated_size_) {
89 // Expand size by at least 256KB.
90 // Add a dummy record to the cache
91 Cache::Handle* handle;
92 cache_rep_->cache_->Insert(cache_rep_->GetNextCacheKey(), nullptr,
93 kSizeDummyEntry, nullptr, &handle);
94 cache_rep_->dummy_handles_.push_back(handle);
95 cache_rep_->cache_allocated_size_ += kSizeDummyEntry;
96 }
97 #else
98 (void)mem;
99 #endif // ROCKSDB_LITE
100 }
101
102 void WriteBufferManager::FreeMemWithCache(size_t mem) {
103 #ifndef ROCKSDB_LITE
104 assert(cache_rep_ != nullptr);
105 // Use a mutex to protect various data structures. Can be optimized to a
106 // lock-free solution if it ends up with a performance bottleneck.
107 std::lock_guard<std::mutex> lock(cache_rep_->cache_mutex_);
108 size_t new_mem_used = memory_used_.load(std::memory_order_relaxed) - mem;
109 memory_used_.store(new_mem_used, std::memory_order_relaxed);
110 // Gradually shrink memory costed in the block cache if the actual
111 // usage is less than 3/4 of what we reserve from the block cache.
112 // We do this because:
113 // 1. we don't pay the cost of the block cache immediately a memtable is
114 // freed, as block cache insert is expensive;
115 // 2. eventually, if we walk away from a temporary memtable size increase,
116 // we make sure shrink the memory costed in block cache over time.
117 // In this way, we only shrink costed memory showly even there is enough
118 // margin.
119 if (new_mem_used < cache_rep_->cache_allocated_size_ / 4 * 3 &&
120 cache_rep_->cache_allocated_size_ - kSizeDummyEntry > new_mem_used) {
121 assert(!cache_rep_->dummy_handles_.empty());
122 cache_rep_->cache_->Release(cache_rep_->dummy_handles_.back(), true);
123 cache_rep_->dummy_handles_.pop_back();
124 cache_rep_->cache_allocated_size_ -= kSizeDummyEntry;
125 }
126 #else
127 (void)mem;
128 #endif // ROCKSDB_LITE
129 }
130 } // namespace ROCKSDB_NAMESPACE