]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #ifndef CEPH_LIBRBD_CACHE_REPLICATED_WRITE_LOG | |
5 | #define CEPH_LIBRBD_CACHE_REPLICATED_WRITE_LOG | |
6 | ||
7 | #include "common/RWLock.h" | |
8 | #include "common/WorkQueue.h" | |
9 | #include "common/AsyncOpTracker.h" | |
10 | #include "librbd/cache/ImageCache.h" | |
11 | #include "librbd/cache/ImageWriteback.h" | |
12 | #include "librbd/Utils.h" | |
13 | #include "librbd/BlockGuard.h" | |
14 | #include "librbd/cache/Types.h" | |
15 | #include "librbd/cache/rwl/LogOperation.h" | |
16 | #include "librbd/cache/rwl/Request.h" | |
17 | #include <functional> | |
18 | #include <list> | |
19 | ||
20 | class Context; | |
21 | class SafeTimer; | |
22 | ||
23 | namespace librbd { | |
24 | ||
25 | struct ImageCtx; | |
26 | ||
27 | namespace cache { | |
28 | ||
29 | namespace rwl { | |
30 | ||
31 | class SyncPointLogEntry; | |
32 | class GenericWriteLogEntry; | |
33 | class WriteLogEntry; | |
34 | class GenericLogEntry; | |
35 | ||
36 | typedef std::list<std::shared_ptr<WriteLogEntry>> WriteLogEntries; | |
37 | typedef std::list<std::shared_ptr<GenericLogEntry>> GenericLogEntries; | |
38 | ||
39 | /**** Write log entries end ****/ | |
40 | ||
41 | typedef librbd::BlockGuard<GuardedRequest> WriteLogGuard; | |
42 | ||
43 | class DeferredContexts; | |
44 | template <typename> class ImageCacheState; | |
45 | ||
46 | template <typename T> | |
47 | struct C_BlockIORequest; | |
48 | ||
49 | template <typename T> | |
50 | struct C_WriteRequest; | |
51 | ||
52 | using GenericLogOperations = std::list<GenericLogOperationSharedPtr>; | |
53 | ||
54 | } // namespace rwl | |
55 | ||
56 | ||
57 | template <typename ImageCtxT> | |
58 | class ReplicatedWriteLog : public ImageCache<ImageCtxT> { | |
59 | public: | |
60 | using typename ImageCache<ImageCtxT>::Extent; | |
61 | using typename ImageCache<ImageCtxT>::Extents; | |
62 | ||
63 | ReplicatedWriteLog(ImageCtxT &image_ctx, librbd::cache::rwl::ImageCacheState<ImageCtxT>* cache_state); | |
64 | ~ReplicatedWriteLog(); | |
65 | ReplicatedWriteLog(const ReplicatedWriteLog&) = delete; | |
66 | ReplicatedWriteLog &operator=(const ReplicatedWriteLog&) = delete; | |
67 | ||
68 | /// client AIO methods | |
69 | void aio_read(Extents&& image_extents, ceph::bufferlist *bl, | |
70 | int fadvise_flags, Context *on_finish) override; | |
71 | void aio_write(Extents&& image_extents, ceph::bufferlist&& bl, | |
72 | int fadvise_flags, Context *on_finish) override; | |
73 | void aio_discard(uint64_t offset, uint64_t length, | |
74 | uint32_t discard_granularity_bytes, | |
75 | Context *on_finish) override; | |
76 | void aio_flush(Context *on_finish) override; | |
77 | void aio_writesame(uint64_t offset, uint64_t length, | |
78 | ceph::bufferlist&& bl, | |
79 | int fadvise_flags, Context *on_finish) override; | |
80 | void aio_compare_and_write(Extents&& image_extents, | |
81 | ceph::bufferlist&& cmp_bl, ceph::bufferlist&& bl, | |
82 | uint64_t *mismatch_offset,int fadvise_flags, | |
83 | Context *on_finish) override; | |
84 | ||
85 | /// internal state methods | |
86 | void init(Context *on_finish) override; | |
87 | void shut_down(Context *on_finish) override; | |
88 | void invalidate(Context *on_finish); | |
89 | void flush(Context *on_finish) override; | |
90 | ||
91 | using This = ReplicatedWriteLog<ImageCtxT>; | |
92 | using C_WriteRequestT = rwl::C_WriteRequest<This>; | |
93 | using C_BlockIORequestT = rwl::C_BlockIORequest<This>; | |
94 | CephContext * get_context(); | |
95 | void release_guarded_request(BlockGuardCell *cell); | |
96 | void release_write_lanes(C_BlockIORequestT *req); | |
97 | bool alloc_resources(C_BlockIORequestT *req); | |
98 | template <typename V> | |
99 | void flush_pmem_buffer(V& ops); | |
100 | void schedule_append(rwl::GenericLogOperationsVector &ops); | |
101 | void schedule_flush_and_append(rwl::GenericLogOperationsVector &ops); | |
102 | std::shared_ptr<rwl::SyncPoint> get_current_sync_point() { | |
103 | return m_current_sync_point; | |
104 | } | |
105 | bool get_persist_on_flush() { | |
106 | return m_persist_on_flush; | |
107 | } | |
108 | void inc_last_op_sequence_num() { | |
109 | m_perfcounter->inc(l_librbd_rwl_log_ops, 1); | |
110 | ++m_last_op_sequence_num; | |
111 | } | |
112 | uint64_t get_last_op_sequence_num() { | |
113 | return m_last_op_sequence_num; | |
114 | } | |
115 | uint64_t get_current_sync_gen() { | |
116 | return m_current_sync_gen; | |
117 | } | |
118 | unsigned int get_free_lanes() { | |
119 | return m_free_lanes; | |
120 | } | |
121 | uint32_t get_free_log_entries() { | |
122 | return m_free_log_entries; | |
123 | } | |
124 | private: | |
125 | typedef std::list<rwl::C_WriteRequest<This> *> C_WriteRequests; | |
126 | typedef std::list<rwl::C_BlockIORequest<This> *> C_BlockIORequests; | |
127 | ||
128 | BlockGuardCell* detain_guarded_request_helper(rwl::GuardedRequest &req); | |
129 | BlockGuardCell* detain_guarded_request_barrier_helper(rwl::GuardedRequest &req); | |
130 | void detain_guarded_request(C_BlockIORequestT *request, rwl::GuardedRequestFunctionContext *guarded_ctx); | |
131 | ||
132 | librbd::cache::rwl::ImageCacheState<ImageCtxT>* m_cache_state = nullptr; | |
133 | ||
134 | std::atomic<bool> m_initialized = {false}; | |
135 | PMEMobjpool *m_log_pool = nullptr; | |
136 | const char* m_rwl_pool_layout_name; | |
137 | ||
138 | ImageCtxT &m_image_ctx; | |
139 | ||
140 | std::string m_log_pool_name; | |
141 | bool m_log_is_poolset = false; | |
142 | uint64_t m_log_pool_config_size; /* Configured size of RWL */ | |
143 | uint64_t m_log_pool_actual_size = 0; /* Actual size of RWL pool */ | |
144 | ||
145 | uint32_t m_total_log_entries = 0; | |
146 | uint32_t m_free_log_entries = 0; | |
147 | ||
148 | std::atomic<uint64_t> m_bytes_allocated = {0}; /* Total bytes allocated in write buffers */ | |
149 | uint64_t m_bytes_cached = 0; /* Total bytes used in write buffers */ | |
150 | uint64_t m_bytes_dirty = 0; /* Total bytes yet to flush to RBD */ | |
151 | uint64_t m_bytes_allocated_cap = 0; | |
152 | ||
153 | utime_t m_last_alloc_fail; /* Entry or buffer allocation fail seen */ | |
154 | std::atomic<bool> m_alloc_failed_since_retire = {false}; | |
155 | ||
156 | ImageWriteback<ImageCtxT> m_image_writeback; | |
157 | rwl::WriteLogGuard m_write_log_guard; | |
158 | /* | |
159 | * When m_first_free_entry == m_first_valid_entry, the log is | |
160 | * empty. There is always at least one free entry, which can't be | |
161 | * used. | |
162 | */ | |
163 | uint64_t m_first_free_entry = 0; /* Entries from here to m_first_valid_entry-1 are free */ | |
164 | uint64_t m_first_valid_entry = 0; /* Entries from here to m_first_free_entry-1 are valid */ | |
165 | ||
166 | /* Starts at 0 for a new write log. Incremented on every flush. */ | |
167 | uint64_t m_current_sync_gen = 0; | |
168 | /* Starts at 0 on each sync gen increase. Incremented before applied | |
169 | to an operation */ | |
170 | uint64_t m_last_op_sequence_num = 0; | |
171 | /* All writes bearing this and all prior sync gen numbers are flushed */ | |
172 | uint64_t m_flushed_sync_gen = 0; | |
173 | ||
174 | bool m_persist_on_write_until_flush = true; | |
175 | bool m_flush_seen = false; | |
176 | ||
177 | AsyncOpTracker m_async_op_tracker; | |
178 | /* Debug counters for the places m_async_op_tracker is used */ | |
179 | std::atomic<int> m_async_flush_ops = {0}; | |
180 | std::atomic<int> m_async_append_ops = {0}; | |
181 | std::atomic<int> m_async_complete_ops = {0}; | |
182 | ||
183 | /* Acquire locks in order declared here */ | |
184 | ||
185 | /* Hold m_deferred_dispatch_lock while consuming from m_deferred_ios. */ | |
186 | mutable ceph::mutex m_deferred_dispatch_lock; | |
187 | /* Hold m_log_append_lock while appending or retiring log entries. */ | |
188 | mutable ceph::mutex m_log_append_lock; | |
189 | /* Used for most synchronization */ | |
190 | mutable ceph::mutex m_lock; | |
191 | ||
192 | /* Used in release/detain to make BlockGuard preserve submission order */ | |
193 | mutable ceph::mutex m_blockguard_lock; | |
194 | ||
195 | /* Use m_blockguard_lock for the following 3 things */ | |
196 | rwl::WriteLogGuard::BlockOperations m_awaiting_barrier; | |
197 | bool m_barrier_in_progress = false; | |
198 | BlockGuardCell *m_barrier_cell = nullptr; | |
199 | ||
200 | bool m_appending = false; | |
201 | bool m_dispatching_deferred_ops = false; | |
202 | ||
203 | rwl::GenericLogOperations m_ops_to_flush; /* Write ops needing flush in local log */ | |
204 | rwl::GenericLogOperations m_ops_to_append; /* Write ops needing event append in local log */ | |
205 | ||
206 | /* New entries are at the back. Oldest at the front */ | |
207 | rwl::GenericLogEntries m_log_entries; | |
208 | rwl::GenericLogEntries m_dirty_log_entries; | |
209 | ||
210 | PerfCounters *m_perfcounter = nullptr; | |
211 | ||
212 | std::shared_ptr<rwl::SyncPoint> m_current_sync_point = nullptr; | |
213 | bool m_persist_on_flush = false; /* If false, persist each write before completion */ | |
214 | ||
215 | /* Writes that have left the block guard, but are waiting for resources */ | |
216 | C_BlockIORequests m_deferred_ios; | |
217 | /* Throttle writes concurrently allocating & replicating */ | |
218 | unsigned int m_free_lanes = rwl::MAX_CONCURRENT_WRITES; | |
219 | unsigned int m_unpublished_reserves = 0; | |
220 | ||
221 | /* Initialized from config, then set false during shutdown */ | |
222 | std::atomic<bool> m_periodic_stats_enabled = {false}; | |
223 | SafeTimer *m_timer = nullptr; /* Used with m_timer_lock */ | |
224 | mutable ceph::mutex *m_timer_lock = nullptr; /* Used with and by m_timer */ | |
225 | Context *m_timer_ctx = nullptr; | |
226 | ||
227 | ThreadPool m_thread_pool; | |
228 | ContextWQ m_work_queue; | |
229 | ||
230 | void perf_start(const std::string name); | |
231 | void perf_stop(); | |
232 | void log_perf(); | |
233 | void periodic_stats(); | |
234 | void arm_periodic_stats(); | |
235 | ||
236 | void rwl_init(Context *on_finish, rwl::DeferredContexts &later); | |
237 | void update_image_cache_state(Context *on_finish); | |
238 | void wake_up(); | |
239 | ||
240 | void dispatch_deferred_writes(void); | |
241 | void alloc_and_dispatch_io_req(C_BlockIORequestT *write_req); | |
242 | void append_scheduled_ops(void); | |
243 | void enlist_op_appender(); | |
244 | void schedule_append(rwl::GenericLogOperations &ops); | |
245 | void flush_then_append_scheduled_ops(void); | |
246 | void enlist_op_flusher(); | |
247 | void alloc_op_log_entries(rwl::GenericLogOperations &ops); | |
248 | void flush_op_log_entries(rwl::GenericLogOperationsVector &ops); | |
249 | int append_op_log_entries(rwl::GenericLogOperations &ops); | |
250 | void complete_op_log_entries(rwl::GenericLogOperations &&ops, const int r); | |
251 | void schedule_complete_op_log_entries(rwl::GenericLogOperations &&ops, const int r); | |
252 | }; | |
253 | ||
254 | } // namespace cache | |
255 | } // namespace librbd | |
256 | ||
257 | extern template class librbd::cache::ReplicatedWriteLog<librbd::ImageCtx>; | |
258 | ||
259 | #endif // CEPH_LIBRBD_CACHE_REPLICATED_WRITE_LOG |