]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/cache/pwl/AbstractWriteLog.h
import ceph quincy 17.2.1
[ceph.git] / ceph / src / librbd / cache / pwl / AbstractWriteLog.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #ifndef CEPH_LIBRBD_CACHE_PARENT_WRITE_LOG
5 #define CEPH_LIBRBD_CACHE_PARENT_WRITE_LOG
6
7 #include "common/Timer.h"
8 #include "common/RWLock.h"
9 #include "common/WorkQueue.h"
10 #include "common/AsyncOpTracker.h"
11 #include "librbd/cache/ImageWriteback.h"
12 #include "librbd/Utils.h"
13 #include "librbd/BlockGuard.h"
14 #include "librbd/cache/Types.h"
15 #include "librbd/cache/pwl/LogOperation.h"
16 #include "librbd/cache/pwl/ReadRequest.h"
17 #include "librbd/cache/pwl/Request.h"
18 #include "librbd/cache/pwl/LogMap.h"
19 #include "librbd/cache/pwl/Builder.h"
20 #include <functional>
21 #include <list>
22
23 class Context;
24
25 namespace librbd {
26
27 struct ImageCtx;
28
29 namespace plugin { template <typename> struct Api; }
30
31 namespace cache {
32 namespace pwl {
33
34 class GenericLogEntry;
35 class GenericWriteLogEntry;
36 class SyncPointLogEntry;
37 class WriteLogEntry;
38 struct WriteLogCacheEntry;
39
40 typedef std::list<std::shared_ptr<WriteLogEntry>> WriteLogEntries;
41 typedef std::list<std::shared_ptr<GenericLogEntry>> GenericLogEntries;
42 typedef std::list<std::shared_ptr<GenericWriteLogEntry>> GenericWriteLogEntries;
43 typedef std::vector<std::shared_ptr<GenericLogEntry>> GenericLogEntriesVector;
44
45 typedef LogMapEntries<GenericWriteLogEntry> WriteLogMapEntries;
46 typedef LogMap<GenericWriteLogEntry> WriteLogMap;
47
48 /**** Write log entries end ****/
49
50 typedef librbd::BlockGuard<GuardedRequest> WriteLogGuard;
51
52 class DeferredContexts;
53 template <typename>
54 class ImageCacheState;
55
56 template<typename T>
57 class Builder;
58
59 template <typename T>
60 struct C_BlockIORequest;
61
62 template <typename T>
63 struct C_WriteRequest;
64
65 using GenericLogOperations = std::list<GenericLogOperationSharedPtr>;
66
67
68 template <typename ImageCtxT>
69 class AbstractWriteLog {
70 public:
71 typedef io::Extent Extent;
72 typedef io::Extents Extents;
73 using This = AbstractWriteLog<ImageCtxT>;
74 Builder<This> *m_builder;
75
76 AbstractWriteLog(ImageCtxT &image_ctx,
77 librbd::cache::pwl::ImageCacheState<ImageCtxT>* cache_state,
78 Builder<This> *builder,
79 cache::ImageWritebackInterface& image_writeback,
80 plugin::Api<ImageCtxT>& plugin_api);
81 virtual ~AbstractWriteLog();
82 AbstractWriteLog(const AbstractWriteLog&) = delete;
83 AbstractWriteLog &operator=(const AbstractWriteLog&) = delete;
84
85 /// IO methods
86 void read(
87 Extents&& image_extents, ceph::bufferlist *bl,
88 int fadvise_flags, Context *on_finish);
89 void write(
90 Extents&& image_extents, ceph::bufferlist&& bl,
91 int fadvise_flags,
92 Context *on_finish);
93 void discard(
94 uint64_t offset, uint64_t length,
95 uint32_t discard_granularity_bytes,
96 Context *on_finish);
97 void flush(
98 io::FlushSource flush_source, Context *on_finish);
99 void writesame(
100 uint64_t offset, uint64_t length,
101 ceph::bufferlist&& bl,
102 int fadvise_flags, Context *on_finish);
103 void compare_and_write(
104 Extents&& image_extents,
105 ceph::bufferlist&& cmp_bl, ceph::bufferlist&& bl,
106 uint64_t *mismatch_offset,int fadvise_flags,
107 Context *on_finish);
108
109 /// internal state methods
110 void init(Context *on_finish);
111 void shut_down(Context *on_finish);
112 void invalidate(Context *on_finish);
113 void flush(Context *on_finish);
114
115 using C_WriteRequestT = pwl::C_WriteRequest<This>;
116 using C_BlockIORequestT = pwl::C_BlockIORequest<This>;
117 using C_FlushRequestT = pwl::C_FlushRequest<This>;
118 using C_DiscardRequestT = pwl::C_DiscardRequest<This>;
119 using C_WriteSameRequestT = pwl::C_WriteSameRequest<This>;
120
121 CephContext * get_context();
122 void release_guarded_request(BlockGuardCell *cell);
123 void release_write_lanes(C_BlockIORequestT *req);
124 virtual bool alloc_resources(C_BlockIORequestT *req) = 0;
125 virtual void setup_schedule_append(
126 pwl::GenericLogOperationsVector &ops, bool do_early_flush,
127 C_BlockIORequestT *req) = 0;
128 void schedule_append(pwl::GenericLogOperationsVector &ops, C_BlockIORequestT *req = nullptr);
129 void schedule_append(pwl::GenericLogOperationSharedPtr op, C_BlockIORequestT *req = nullptr);
130 void flush_new_sync_point(C_FlushRequestT *flush_req,
131 pwl::DeferredContexts &later);
132
133 std::shared_ptr<pwl::SyncPoint> get_current_sync_point() {
134 return m_current_sync_point;
135 }
136 bool get_persist_on_flush() {
137 return m_persist_on_flush;
138 }
139 void inc_last_op_sequence_num() {
140 m_perfcounter->inc(l_librbd_pwl_log_ops, 1);
141 ++m_last_op_sequence_num;
142 }
143 uint64_t get_last_op_sequence_num() {
144 return m_last_op_sequence_num;
145 }
146 uint64_t get_current_sync_gen() {
147 return m_current_sync_gen;
148 }
149 unsigned int get_free_lanes() {
150 return m_free_lanes;
151 }
152 uint32_t get_free_log_entries() {
153 return m_free_log_entries;
154 }
155 void add_into_log_map(pwl::GenericWriteLogEntries &log_entries,
156 C_BlockIORequestT *req);
157 virtual void complete_user_request(Context *&user_req, int r) = 0;
158 virtual void copy_bl_to_buffer(
159 WriteRequestResources *resources,
160 std::unique_ptr<WriteLogOperationSet> &op_set) {}
161
162 private:
163 typedef std::list<pwl::C_WriteRequest<This> *> C_WriteRequests;
164 typedef std::list<pwl::C_BlockIORequest<This> *> C_BlockIORequests;
165
166 std::atomic<bool> m_initialized = {false};
167
168 uint64_t m_bytes_dirty = 0; /* Total bytes yet to flush to RBD */
169 utime_t m_last_alloc_fail; /* Entry or buffer allocation fail seen */
170
171 pwl::WriteLogGuard m_write_log_guard;
172
173 /* Starts at 0 for a new write log. Incremented on every flush. */
174 uint64_t m_current_sync_gen = 0;
175 /* Starts at 0 on each sync gen increase. Incremented before applied
176 to an operation */
177 uint64_t m_last_op_sequence_num = 0;
178
179 bool m_persist_on_write_until_flush = true;
180
181 pwl::WriteLogGuard m_flush_guard;
182 mutable ceph::mutex m_flush_guard_lock;
183
184 /* Debug counters for the places m_async_op_tracker is used */
185 std::atomic<int> m_async_complete_ops = {0};
186 std::atomic<int> m_async_null_flush_finish = {0};
187 std::atomic<int> m_async_process_work = {0};
188
189 /* Hold m_deferred_dispatch_lock while consuming from m_deferred_ios. */
190 mutable ceph::mutex m_deferred_dispatch_lock;
191
192 /* Used in release/detain to make BlockGuard preserve submission order */
193 mutable ceph::mutex m_blockguard_lock;
194
195 /* Use m_blockguard_lock for the following 3 things */
196 bool m_barrier_in_progress = false;
197 BlockGuardCell *m_barrier_cell = nullptr;
198
199 bool m_wake_up_enabled = true;
200
201 Contexts m_flush_complete_contexts;
202
203 std::shared_ptr<pwl::SyncPoint> m_current_sync_point = nullptr;
204 bool m_persist_on_flush = false; //If false, persist each write before completion
205
206 int m_flush_ops_in_flight = 0;
207 int m_flush_bytes_in_flight = 0;
208 uint64_t m_lowest_flushing_sync_gen = 0;
209
210 /* Writes that have left the block guard, but are waiting for resources */
211 C_BlockIORequests m_deferred_ios;
212 /* Throttle writes concurrently allocating & replicating */
213 unsigned int m_free_lanes = pwl::MAX_CONCURRENT_WRITES;
214
215 SafeTimer *m_timer = nullptr; /* Used with m_timer_lock */
216 mutable ceph::mutex *m_timer_lock = nullptr; /* Used with and by m_timer */
217 Context *m_timer_ctx = nullptr;
218
219 ThreadPool m_thread_pool;
220
221 uint32_t m_discard_granularity_bytes;
222
223 BlockGuardCell* detain_guarded_request_helper(pwl::GuardedRequest &req);
224 BlockGuardCell* detain_guarded_request_barrier_helper(
225 pwl::GuardedRequest &req);
226 void detain_guarded_request(C_BlockIORequestT *request,
227 pwl::GuardedRequestFunctionContext *guarded_ctx,
228 bool is_barrier);
229 void perf_start(const std::string name);
230 void perf_stop();
231 void log_perf();
232 void periodic_stats();
233 void arm_periodic_stats();
234
235 void pwl_init(Context *on_finish, pwl::DeferredContexts &later);
236 void update_image_cache_state(Context *on_finish);
237 void handle_update_image_cache_state(int r);
238 void check_image_cache_state_clean();
239
240 void flush_dirty_entries(Context *on_finish);
241 bool can_flush_entry(const std::shared_ptr<pwl::GenericLogEntry> log_entry);
242 bool handle_flushed_sync_point(
243 std::shared_ptr<pwl::SyncPointLogEntry> log_entry);
244 void sync_point_writer_flushed(
245 std::shared_ptr<pwl::SyncPointLogEntry> log_entry);
246
247 void init_flush_new_sync_point(pwl::DeferredContexts &later);
248 void new_sync_point(pwl::DeferredContexts &later);
249 pwl::C_FlushRequest<AbstractWriteLog<ImageCtxT>>* make_flush_req(
250 Context *on_finish);
251 void flush_new_sync_point_if_needed(C_FlushRequestT *flush_req,
252 pwl::DeferredContexts &later);
253
254 void alloc_and_dispatch_io_req(C_BlockIORequestT *write_req);
255 void schedule_complete_op_log_entries(pwl::GenericLogOperations &&ops,
256 const int r);
257 void internal_flush(bool invalidate, Context *on_finish);
258
259 protected:
260 librbd::cache::pwl::ImageCacheState<ImageCtxT>* m_cache_state = nullptr;
261
262 std::atomic<bool> m_shutting_down = {false};
263 std::atomic<bool> m_invalidating = {false};
264
265 ImageCtxT &m_image_ctx;
266
267 std::string m_log_pool_name;
268 uint64_t m_log_pool_size;
269
270 uint32_t m_total_log_entries = 0;
271 uint32_t m_free_log_entries = 0;
272
273 std::atomic<uint64_t> m_bytes_allocated = {0}; /* Total bytes allocated in write buffers */
274 uint64_t m_bytes_cached = 0; /* Total bytes used in write buffers */
275 uint64_t m_bytes_allocated_cap = 0;
276
277 std::atomic<bool> m_alloc_failed_since_retire = {false};
278
279 cache::ImageWritebackInterface& m_image_writeback;
280 plugin::Api<ImageCtxT>& m_plugin_api;
281
282 /*
283 * When m_first_free_entry == m_first_valid_entry, the log is
284 * empty. There is always at least one free entry, which can't be
285 * used.
286 */
287 uint64_t m_first_free_entry = 0; /* Entries from here to m_first_valid_entry-1 are free */
288 uint64_t m_first_valid_entry = 0; /* Entries from here to m_first_free_entry-1 are valid */
289
290 /* All writes bearing this and all prior sync gen numbers are flushed */
291 uint64_t m_flushed_sync_gen = 0;
292
293 AsyncOpTracker m_async_op_tracker;
294 /* Debug counters for the places m_async_op_tracker is used */
295 std::atomic<int> m_async_flush_ops = {0};
296 std::atomic<int> m_async_append_ops = {0};
297
298 /* Acquire locks in order declared here */
299
300 mutable ceph::mutex m_log_retire_lock;
301 /* Hold a read lock on m_entry_reader_lock to add readers to log entry
302 * bufs. Hold a write lock to prevent readers from being added (e.g. when
303 * removing log entrys from the map). No lock required to remove readers. */
304 mutable RWLock m_entry_reader_lock;
305 /* Hold m_log_append_lock while appending or retiring log entries. */
306 mutable ceph::mutex m_log_append_lock;
307 /* Used for most synchronization */
308 mutable ceph::mutex m_lock;
309
310 /* Use m_blockguard_lock for the following 3 things */
311 pwl::WriteLogGuard::BlockOperations m_awaiting_barrier;
312
313 bool m_wake_up_requested = false;
314 bool m_wake_up_scheduled = false;
315 bool m_appending = false;
316 bool m_dispatching_deferred_ops = false;
317
318 pwl::GenericLogOperations m_ops_to_flush; /* Write ops needing flush in local log */
319 pwl::GenericLogOperations m_ops_to_append; /* Write ops needing event append in local log */
320
321 pwl::WriteLogMap m_blocks_to_log_entries;
322
323 /* New entries are at the back. Oldest at the front */
324 pwl::GenericLogEntries m_log_entries;
325 pwl::GenericLogEntries m_dirty_log_entries;
326
327 PerfCounters *m_perfcounter = nullptr;
328
329 unsigned int m_unpublished_reserves = 0;
330
331 ContextWQ m_work_queue;
332
333 void wake_up();
334
335 void update_entries(
336 std::shared_ptr<pwl::GenericLogEntry> *log_entry,
337 pwl::WriteLogCacheEntry *cache_entry,
338 std::map<uint64_t, bool> &missing_sync_points,
339 std::map<uint64_t,
340 std::shared_ptr<pwl::SyncPointLogEntry>> &sync_point_entries,
341 uint64_t entry_index);
342 void update_sync_points(
343 std::map<uint64_t, bool> &missing_sync_points,
344 std::map<uint64_t,
345 std::shared_ptr<pwl::SyncPointLogEntry>> &sync_point_entries,
346 pwl::DeferredContexts &later);
347 virtual void inc_allocated_cached_bytes(
348 std::shared_ptr<pwl::GenericLogEntry> log_entry) = 0;
349 Context *construct_flush_entry(
350 const std::shared_ptr<pwl::GenericLogEntry> log_entry, bool invalidating);
351 void detain_flush_guard_request(std::shared_ptr<GenericLogEntry> log_entry,
352 GuardedRequestFunctionContext *guarded_ctx);
353 void process_writeback_dirty_entries();
354 bool can_retire_entry(const std::shared_ptr<pwl::GenericLogEntry> log_entry);
355
356 void dispatch_deferred_writes(void);
357 void complete_op_log_entries(pwl::GenericLogOperations &&ops, const int r);
358
359 bool check_allocation(
360 C_BlockIORequestT *req, uint64_t bytes_cached, uint64_t bytes_dirtied,
361 uint64_t bytes_allocated, uint32_t num_lanes, uint32_t num_log_entries,
362 uint32_t num_unpublished_reserves);
363 void append_scheduled(
364 pwl::GenericLogOperations &ops, bool &ops_remain, bool &appending,
365 bool isRWL=false);
366
367 virtual void process_work() = 0;
368 virtual void append_scheduled_ops(void) = 0;
369 virtual void schedule_append_ops(pwl::GenericLogOperations &ops, C_BlockIORequestT *req) = 0;
370 virtual void remove_pool_file() = 0;
371 virtual bool initialize_pool(Context *on_finish,
372 pwl::DeferredContexts &later) = 0;
373 virtual void collect_read_extents(
374 uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
375 std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
376 std::vector<bufferlist*> &bls_to_read, uint64_t entry_hit_length,
377 Extent hit_extent, pwl::C_ReadRequest *read_ctx) = 0;
378 virtual void complete_read(
379 std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
380 std::vector<bufferlist*> &bls_to_read, Context *ctx) = 0;
381 virtual void write_data_to_buffer(
382 std::shared_ptr<pwl::WriteLogEntry> ws_entry,
383 pwl::WriteLogCacheEntry *cache_entry) {}
384 virtual void release_ram(
385 const std::shared_ptr<pwl::GenericLogEntry> log_entry) {}
386 virtual void alloc_op_log_entries(pwl::GenericLogOperations &ops) {}
387 virtual bool retire_entries(const unsigned long int frees_per_tx) {
388 return false;
389 }
390 virtual void schedule_flush_and_append(
391 pwl::GenericLogOperationsVector &ops) {}
392 virtual void persist_last_flushed_sync_gen() {}
393 virtual void reserve_cache(C_BlockIORequestT *req, bool &alloc_succeeds,
394 bool &no_space) {}
395 virtual void construct_flush_entries(pwl::GenericLogEntries entries_to_flush,
396 DeferredContexts &post_unlock,
397 bool has_write_entry) = 0;
398 virtual uint64_t get_max_extent() {
399 return 0;
400 }
401 void update_image_cache_state(void);
402 };
403
404 } // namespace pwl
405 } // namespace cache
406 } // namespace librbd
407
408 extern template class librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx>;
409
410 #endif // CEPH_LIBRBD_CACHE_PARENT_WRITE_LOG