]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/cache/ReplicatedWriteLog.h
bump version to 15.2.11-pve1
[ceph.git] / ceph / src / librbd / cache / ReplicatedWriteLog.h
CommitLineData
9f95a23c
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#ifndef CEPH_LIBRBD_CACHE_REPLICATED_WRITE_LOG
5#define CEPH_LIBRBD_CACHE_REPLICATED_WRITE_LOG
6
7#include "common/RWLock.h"
8#include "common/WorkQueue.h"
9#include "common/AsyncOpTracker.h"
10#include "librbd/cache/ImageCache.h"
11#include "librbd/cache/ImageWriteback.h"
12#include "librbd/Utils.h"
13#include "librbd/BlockGuard.h"
14#include "librbd/cache/Types.h"
15#include "librbd/cache/rwl/LogOperation.h"
16#include "librbd/cache/rwl/Request.h"
17#include <functional>
18#include <list>
19
20class Context;
21class SafeTimer;
22
23namespace librbd {
24
25struct ImageCtx;
26
27namespace cache {
28
29namespace rwl {
30
31class SyncPointLogEntry;
32class GenericWriteLogEntry;
33class WriteLogEntry;
34class GenericLogEntry;
35
36typedef std::list<std::shared_ptr<WriteLogEntry>> WriteLogEntries;
37typedef std::list<std::shared_ptr<GenericLogEntry>> GenericLogEntries;
38
39/**** Write log entries end ****/
40
41typedef librbd::BlockGuard<GuardedRequest> WriteLogGuard;
42
43class DeferredContexts;
44template <typename> class ImageCacheState;
45
46template <typename T>
47struct C_BlockIORequest;
48
49template <typename T>
50struct C_WriteRequest;
51
52using GenericLogOperations = std::list<GenericLogOperationSharedPtr>;
53
54} // namespace rwl
55
56
57template <typename ImageCtxT>
58class ReplicatedWriteLog : public ImageCache<ImageCtxT> {
59public:
60 using typename ImageCache<ImageCtxT>::Extent;
61 using typename ImageCache<ImageCtxT>::Extents;
62
63 ReplicatedWriteLog(ImageCtxT &image_ctx, librbd::cache::rwl::ImageCacheState<ImageCtxT>* cache_state);
64 ~ReplicatedWriteLog();
65 ReplicatedWriteLog(const ReplicatedWriteLog&) = delete;
66 ReplicatedWriteLog &operator=(const ReplicatedWriteLog&) = delete;
67
68 /// client AIO methods
69 void aio_read(Extents&& image_extents, ceph::bufferlist *bl,
70 int fadvise_flags, Context *on_finish) override;
71 void aio_write(Extents&& image_extents, ceph::bufferlist&& bl,
72 int fadvise_flags, Context *on_finish) override;
73 void aio_discard(uint64_t offset, uint64_t length,
74 uint32_t discard_granularity_bytes,
75 Context *on_finish) override;
76 void aio_flush(Context *on_finish) override;
77 void aio_writesame(uint64_t offset, uint64_t length,
78 ceph::bufferlist&& bl,
79 int fadvise_flags, Context *on_finish) override;
80 void aio_compare_and_write(Extents&& image_extents,
81 ceph::bufferlist&& cmp_bl, ceph::bufferlist&& bl,
82 uint64_t *mismatch_offset,int fadvise_flags,
83 Context *on_finish) override;
84
85 /// internal state methods
86 void init(Context *on_finish) override;
87 void shut_down(Context *on_finish) override;
88 void invalidate(Context *on_finish);
89 void flush(Context *on_finish) override;
90
91 using This = ReplicatedWriteLog<ImageCtxT>;
92 using C_WriteRequestT = rwl::C_WriteRequest<This>;
93 using C_BlockIORequestT = rwl::C_BlockIORequest<This>;
94 CephContext * get_context();
95 void release_guarded_request(BlockGuardCell *cell);
96 void release_write_lanes(C_BlockIORequestT *req);
97 bool alloc_resources(C_BlockIORequestT *req);
98 template <typename V>
99 void flush_pmem_buffer(V& ops);
100 void schedule_append(rwl::GenericLogOperationsVector &ops);
101 void schedule_flush_and_append(rwl::GenericLogOperationsVector &ops);
102 std::shared_ptr<rwl::SyncPoint> get_current_sync_point() {
103 return m_current_sync_point;
104 }
105 bool get_persist_on_flush() {
106 return m_persist_on_flush;
107 }
108 void inc_last_op_sequence_num() {
109 m_perfcounter->inc(l_librbd_rwl_log_ops, 1);
110 ++m_last_op_sequence_num;
111 }
112 uint64_t get_last_op_sequence_num() {
113 return m_last_op_sequence_num;
114 }
115 uint64_t get_current_sync_gen() {
116 return m_current_sync_gen;
117 }
118 unsigned int get_free_lanes() {
119 return m_free_lanes;
120 }
121 uint32_t get_free_log_entries() {
122 return m_free_log_entries;
123 }
124private:
125 typedef std::list<rwl::C_WriteRequest<This> *> C_WriteRequests;
126 typedef std::list<rwl::C_BlockIORequest<This> *> C_BlockIORequests;
127
128 BlockGuardCell* detain_guarded_request_helper(rwl::GuardedRequest &req);
129 BlockGuardCell* detain_guarded_request_barrier_helper(rwl::GuardedRequest &req);
130 void detain_guarded_request(C_BlockIORequestT *request, rwl::GuardedRequestFunctionContext *guarded_ctx);
131
132 librbd::cache::rwl::ImageCacheState<ImageCtxT>* m_cache_state = nullptr;
133
134 std::atomic<bool> m_initialized = {false};
135 PMEMobjpool *m_log_pool = nullptr;
136 const char* m_rwl_pool_layout_name;
137
138 ImageCtxT &m_image_ctx;
139
140 std::string m_log_pool_name;
141 bool m_log_is_poolset = false;
142 uint64_t m_log_pool_config_size; /* Configured size of RWL */
143 uint64_t m_log_pool_actual_size = 0; /* Actual size of RWL pool */
144
145 uint32_t m_total_log_entries = 0;
146 uint32_t m_free_log_entries = 0;
147
148 std::atomic<uint64_t> m_bytes_allocated = {0}; /* Total bytes allocated in write buffers */
149 uint64_t m_bytes_cached = 0; /* Total bytes used in write buffers */
150 uint64_t m_bytes_dirty = 0; /* Total bytes yet to flush to RBD */
151 uint64_t m_bytes_allocated_cap = 0;
152
153 utime_t m_last_alloc_fail; /* Entry or buffer allocation fail seen */
154 std::atomic<bool> m_alloc_failed_since_retire = {false};
155
156 ImageWriteback<ImageCtxT> m_image_writeback;
157 rwl::WriteLogGuard m_write_log_guard;
158 /*
159 * When m_first_free_entry == m_first_valid_entry, the log is
160 * empty. There is always at least one free entry, which can't be
161 * used.
162 */
163 uint64_t m_first_free_entry = 0; /* Entries from here to m_first_valid_entry-1 are free */
164 uint64_t m_first_valid_entry = 0; /* Entries from here to m_first_free_entry-1 are valid */
165
166 /* Starts at 0 for a new write log. Incremented on every flush. */
167 uint64_t m_current_sync_gen = 0;
168 /* Starts at 0 on each sync gen increase. Incremented before applied
169 to an operation */
170 uint64_t m_last_op_sequence_num = 0;
171 /* All writes bearing this and all prior sync gen numbers are flushed */
172 uint64_t m_flushed_sync_gen = 0;
173
174 bool m_persist_on_write_until_flush = true;
175 bool m_flush_seen = false;
176
177 AsyncOpTracker m_async_op_tracker;
178 /* Debug counters for the places m_async_op_tracker is used */
179 std::atomic<int> m_async_flush_ops = {0};
180 std::atomic<int> m_async_append_ops = {0};
181 std::atomic<int> m_async_complete_ops = {0};
182
183 /* Acquire locks in order declared here */
184
185 /* Hold m_deferred_dispatch_lock while consuming from m_deferred_ios. */
186 mutable ceph::mutex m_deferred_dispatch_lock;
187 /* Hold m_log_append_lock while appending or retiring log entries. */
188 mutable ceph::mutex m_log_append_lock;
189 /* Used for most synchronization */
190 mutable ceph::mutex m_lock;
191
192 /* Used in release/detain to make BlockGuard preserve submission order */
193 mutable ceph::mutex m_blockguard_lock;
194
195 /* Use m_blockguard_lock for the following 3 things */
196 rwl::WriteLogGuard::BlockOperations m_awaiting_barrier;
197 bool m_barrier_in_progress = false;
198 BlockGuardCell *m_barrier_cell = nullptr;
199
200 bool m_appending = false;
201 bool m_dispatching_deferred_ops = false;
202
203 rwl::GenericLogOperations m_ops_to_flush; /* Write ops needing flush in local log */
204 rwl::GenericLogOperations m_ops_to_append; /* Write ops needing event append in local log */
205
206 /* New entries are at the back. Oldest at the front */
207 rwl::GenericLogEntries m_log_entries;
208 rwl::GenericLogEntries m_dirty_log_entries;
209
210 PerfCounters *m_perfcounter = nullptr;
211
212 std::shared_ptr<rwl::SyncPoint> m_current_sync_point = nullptr;
213 bool m_persist_on_flush = false; /* If false, persist each write before completion */
214
215 /* Writes that have left the block guard, but are waiting for resources */
216 C_BlockIORequests m_deferred_ios;
217 /* Throttle writes concurrently allocating & replicating */
218 unsigned int m_free_lanes = rwl::MAX_CONCURRENT_WRITES;
219 unsigned int m_unpublished_reserves = 0;
220
221 /* Initialized from config, then set false during shutdown */
222 std::atomic<bool> m_periodic_stats_enabled = {false};
223 SafeTimer *m_timer = nullptr; /* Used with m_timer_lock */
224 mutable ceph::mutex *m_timer_lock = nullptr; /* Used with and by m_timer */
225 Context *m_timer_ctx = nullptr;
226
227 ThreadPool m_thread_pool;
228 ContextWQ m_work_queue;
229
230 void perf_start(const std::string name);
231 void perf_stop();
232 void log_perf();
233 void periodic_stats();
234 void arm_periodic_stats();
235
236 void rwl_init(Context *on_finish, rwl::DeferredContexts &later);
237 void update_image_cache_state(Context *on_finish);
238 void wake_up();
239
240 void dispatch_deferred_writes(void);
241 void alloc_and_dispatch_io_req(C_BlockIORequestT *write_req);
242 void append_scheduled_ops(void);
243 void enlist_op_appender();
244 void schedule_append(rwl::GenericLogOperations &ops);
245 void flush_then_append_scheduled_ops(void);
246 void enlist_op_flusher();
247 void alloc_op_log_entries(rwl::GenericLogOperations &ops);
248 void flush_op_log_entries(rwl::GenericLogOperationsVector &ops);
249 int append_op_log_entries(rwl::GenericLogOperations &ops);
250 void complete_op_log_entries(rwl::GenericLogOperations &&ops, const int r);
251 void schedule_complete_op_log_entries(rwl::GenericLogOperations &&ops, const int r);
252};
253
254} // namespace cache
255} // namespace librbd
256
257extern template class librbd::cache::ReplicatedWriteLog<librbd::ImageCtx>;
258
259#endif // CEPH_LIBRBD_CACHE_REPLICATED_WRITE_LOG