]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/cache/ObjectCacherObjectDispatch.h" | |
5 | #include "common/errno.h" | |
6 | #include "common/WorkQueue.h" | |
7 | #include "librbd/ImageCtx.h" | |
8 | #include "librbd/Journal.h" | |
9 | #include "librbd/Utils.h" | |
9f95a23c | 10 | #include "librbd/cache/ObjectCacherWriteback.h" |
11fdf7f2 TL |
11 | #include "librbd/io/ObjectDispatchSpec.h" |
12 | #include "librbd/io/ObjectDispatcher.h" | |
9f95a23c | 13 | #include "librbd/io/Types.h" |
11fdf7f2 TL |
14 | #include "librbd/io/Utils.h" |
15 | #include "osd/osd_types.h" | |
16 | #include "osdc/WritebackHandler.h" | |
17 | #include <vector> | |
18 | ||
19 | #define dout_subsys ceph_subsys_rbd | |
20 | #undef dout_prefix | |
21 | #define dout_prefix *_dout << "librbd::cache::ObjectCacherObjectDispatch: " \ | |
22 | << this << " " << __func__ << ": " | |
23 | ||
24 | namespace librbd { | |
25 | namespace cache { | |
26 | ||
9f95a23c TL |
27 | using librbd::util::data_object_name; |
28 | ||
11fdf7f2 TL |
29 | namespace { |
30 | ||
31 | typedef std::vector<ObjectExtent> ObjectExtents; | |
32 | ||
33 | } // anonymous namespace | |
34 | ||
35 | template <typename I> | |
36 | struct ObjectCacherObjectDispatch<I>::C_InvalidateCache : public Context { | |
37 | ObjectCacherObjectDispatch* dispatcher; | |
38 | bool purge_on_error; | |
39 | Context *on_finish; | |
40 | ||
41 | C_InvalidateCache(ObjectCacherObjectDispatch* dispatcher, | |
42 | bool purge_on_error, Context *on_finish) | |
43 | : dispatcher(dispatcher), purge_on_error(purge_on_error), | |
44 | on_finish(on_finish) { | |
45 | } | |
46 | ||
47 | void finish(int r) override { | |
9f95a23c | 48 | ceph_assert(ceph_mutex_is_locked(dispatcher->m_cache_lock)); |
11fdf7f2 TL |
49 | auto cct = dispatcher->m_image_ctx->cct; |
50 | ||
51 | if (r == -EBLACKLISTED) { | |
52 | lderr(cct) << "blacklisted during flush (purging)" << dendl; | |
53 | dispatcher->m_object_cacher->purge_set(dispatcher->m_object_set); | |
54 | } else if (r < 0 && purge_on_error) { | |
55 | lderr(cct) << "failed to invalidate cache (purging): " | |
56 | << cpp_strerror(r) << dendl; | |
57 | dispatcher->m_object_cacher->purge_set(dispatcher->m_object_set); | |
58 | } else if (r != 0) { | |
59 | lderr(cct) << "failed to invalidate cache: " << cpp_strerror(r) << dendl; | |
60 | } | |
61 | ||
62 | auto unclean = dispatcher->m_object_cacher->release_set( | |
63 | dispatcher->m_object_set); | |
64 | if (unclean == 0) { | |
65 | r = 0; | |
66 | } else { | |
67 | lderr(cct) << "could not release all objects from cache: " | |
68 | << unclean << " bytes remain" << dendl; | |
69 | if (r == 0) { | |
70 | r = -EBUSY; | |
71 | } | |
72 | } | |
73 | ||
74 | on_finish->complete(r); | |
75 | } | |
76 | }; | |
77 | ||
78 | template <typename I> | |
79 | ObjectCacherObjectDispatch<I>::ObjectCacherObjectDispatch( | |
9f95a23c TL |
80 | I* image_ctx, size_t max_dirty, bool writethrough_until_flush) |
81 | : m_image_ctx(image_ctx), m_max_dirty(max_dirty), | |
82 | m_writethrough_until_flush(writethrough_until_flush), | |
83 | m_cache_lock(ceph::make_mutex(util::unique_lock_name( | |
84 | "librbd::cache::ObjectCacherObjectDispatch::cache_lock", this))) { | |
eafe8130 | 85 | ceph_assert(m_image_ctx->data_ctx.is_valid()); |
11fdf7f2 TL |
86 | } |
87 | ||
88 | template <typename I> | |
89 | ObjectCacherObjectDispatch<I>::~ObjectCacherObjectDispatch() { | |
90 | delete m_object_cacher; | |
91 | delete m_object_set; | |
92 | ||
93 | delete m_writeback_handler; | |
94 | } | |
95 | ||
96 | template <typename I> | |
97 | void ObjectCacherObjectDispatch<I>::init() { | |
98 | auto cct = m_image_ctx->cct; | |
99 | ldout(cct, 5) << dendl; | |
100 | ||
9f95a23c | 101 | m_cache_lock.lock(); |
11fdf7f2 | 102 | ldout(cct, 5) << "enabling caching..." << dendl; |
9f95a23c | 103 | m_writeback_handler = new ObjectCacherWriteback(m_image_ctx, m_cache_lock); |
11fdf7f2 | 104 | |
9f95a23c TL |
105 | auto init_max_dirty = m_max_dirty; |
106 | if (m_writethrough_until_flush) { | |
11fdf7f2 TL |
107 | init_max_dirty = 0; |
108 | } | |
109 | ||
110 | auto cache_size = | |
111 | m_image_ctx->config.template get_val<Option::size_t>("rbd_cache_size"); | |
112 | auto target_dirty = | |
113 | m_image_ctx->config.template get_val<Option::size_t>("rbd_cache_target_dirty"); | |
114 | auto max_dirty_age = | |
115 | m_image_ctx->config.template get_val<double>("rbd_cache_max_dirty_age"); | |
116 | auto block_writes_upfront = | |
117 | m_image_ctx->config.template get_val<bool>("rbd_cache_block_writes_upfront"); | |
118 | auto max_dirty_object = | |
119 | m_image_ctx->config.template get_val<uint64_t>("rbd_cache_max_dirty_object"); | |
120 | ||
121 | ldout(cct, 5) << "Initial cache settings:" | |
122 | << " size=" << cache_size | |
123 | << " num_objects=" << 10 | |
124 | << " max_dirty=" << init_max_dirty | |
125 | << " target_dirty=" << target_dirty | |
126 | << " max_dirty_age=" << max_dirty_age << dendl; | |
127 | ||
128 | m_object_cacher = new ObjectCacher(cct, m_image_ctx->perfcounter->get_name(), | |
129 | *m_writeback_handler, m_cache_lock, | |
130 | nullptr, nullptr, cache_size, | |
131 | 10, /* reset this in init */ | |
132 | init_max_dirty, target_dirty, | |
133 | max_dirty_age, block_writes_upfront); | |
134 | ||
135 | // size object cache appropriately | |
136 | if (max_dirty_object == 0) { | |
137 | max_dirty_object = std::min<uint64_t>( | |
138 | 2000, std::max<uint64_t>(10, cache_size / 100 / | |
139 | sizeof(ObjectCacher::Object))); | |
140 | } | |
141 | ldout(cct, 5) << " cache bytes " << cache_size | |
142 | << " -> about " << max_dirty_object << " objects" << dendl; | |
143 | m_object_cacher->set_max_objects(max_dirty_object); | |
144 | ||
145 | m_object_set = new ObjectCacher::ObjectSet(nullptr, | |
146 | m_image_ctx->data_ctx.get_id(), 0); | |
147 | m_object_cacher->start(); | |
9f95a23c | 148 | m_cache_lock.unlock(); |
11fdf7f2 TL |
149 | |
150 | // add ourself to the IO object dispatcher chain | |
9f95a23c TL |
151 | if (m_max_dirty > 0) { |
152 | m_image_ctx->disable_zero_copy = true; | |
153 | } | |
11fdf7f2 TL |
154 | m_image_ctx->io_object_dispatcher->register_object_dispatch(this); |
155 | } | |
156 | ||
157 | template <typename I> | |
158 | void ObjectCacherObjectDispatch<I>::shut_down(Context* on_finish) { | |
159 | auto cct = m_image_ctx->cct; | |
160 | ldout(cct, 5) << dendl; | |
161 | ||
162 | // chain shut down in reverse order | |
163 | ||
164 | // shut down the cache | |
9f95a23c | 165 | on_finish = new LambdaContext([this, on_finish](int r) { |
11fdf7f2 TL |
166 | m_object_cacher->stop(); |
167 | on_finish->complete(r); | |
168 | }); | |
169 | ||
170 | // ensure we aren't holding the cache lock post-flush | |
171 | on_finish = util::create_async_context_callback(*m_image_ctx, on_finish); | |
172 | ||
173 | // invalidate any remaining cache entries | |
174 | on_finish = new C_InvalidateCache(this, true, on_finish); | |
175 | ||
176 | // flush all pending writeback state | |
9f95a23c | 177 | std::lock_guard locker{m_cache_lock}; |
11fdf7f2 TL |
178 | m_object_cacher->release_set(m_object_set); |
179 | m_object_cacher->flush_set(m_object_set, on_finish); | |
11fdf7f2 TL |
180 | } |
181 | ||
182 | template <typename I> | |
183 | bool ObjectCacherObjectDispatch<I>::read( | |
9f95a23c TL |
184 | uint64_t object_no, uint64_t object_off, uint64_t object_len, |
185 | librados::snap_t snap_id, int op_flags, const ZTracer::Trace &parent_trace, | |
186 | ceph::bufferlist* read_data, io::ExtentMap* extent_map, | |
187 | int* object_dispatch_flags, io::DispatchResult* dispatch_result, | |
188 | Context** on_finish, Context* on_dispatched) { | |
11fdf7f2 TL |
189 | // IO chained in reverse order |
190 | auto cct = m_image_ctx->cct; | |
191 | ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~" | |
192 | << object_len << dendl; | |
193 | ||
194 | // ensure we aren't holding the cache lock post-read | |
195 | on_dispatched = util::create_async_context_callback(*m_image_ctx, | |
196 | on_dispatched); | |
197 | ||
9f95a23c | 198 | m_image_ctx->image_lock.lock_shared(); |
11fdf7f2 | 199 | auto rd = m_object_cacher->prepare_read(snap_id, read_data, op_flags); |
9f95a23c | 200 | m_image_ctx->image_lock.unlock_shared(); |
11fdf7f2 | 201 | |
9f95a23c TL |
202 | ObjectExtent extent(data_object_name(m_image_ctx, object_no), object_no, |
203 | object_off, object_len, 0); | |
11fdf7f2 TL |
204 | extent.oloc.pool = m_image_ctx->data_ctx.get_id(); |
205 | extent.buffer_extents.push_back({0, object_len}); | |
206 | rd->extents.push_back(extent); | |
207 | ||
208 | ZTracer::Trace trace(parent_trace); | |
209 | *dispatch_result = io::DISPATCH_RESULT_COMPLETE; | |
210 | ||
9f95a23c | 211 | m_cache_lock.lock(); |
11fdf7f2 | 212 | int r = m_object_cacher->readx(rd, m_object_set, on_dispatched, &trace); |
9f95a23c | 213 | m_cache_lock.unlock(); |
11fdf7f2 TL |
214 | if (r != 0) { |
215 | on_dispatched->complete(r); | |
216 | } | |
217 | return true; | |
218 | } | |
219 | ||
220 | template <typename I> | |
221 | bool ObjectCacherObjectDispatch<I>::discard( | |
9f95a23c TL |
222 | uint64_t object_no, uint64_t object_off, uint64_t object_len, |
223 | const ::SnapContext &snapc, int discard_flags, | |
11fdf7f2 TL |
224 | const ZTracer::Trace &parent_trace, int* object_dispatch_flags, |
225 | uint64_t* journal_tid, io::DispatchResult* dispatch_result, | |
226 | Context** on_finish, Context* on_dispatched) { | |
227 | auto cct = m_image_ctx->cct; | |
228 | ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~" | |
229 | << object_len << dendl; | |
230 | ||
231 | ObjectExtents object_extents; | |
9f95a23c TL |
232 | object_extents.emplace_back(data_object_name(m_image_ctx, object_no), |
233 | object_no, object_off, object_len, 0); | |
11fdf7f2 TL |
234 | |
235 | // discard the cache state after changes are committed to disk (and to | |
236 | // prevent races w/ readahead) | |
237 | auto ctx = *on_finish; | |
9f95a23c | 238 | *on_finish = new LambdaContext( |
11fdf7f2 | 239 | [this, object_extents, ctx](int r) { |
9f95a23c | 240 | m_cache_lock.lock(); |
11fdf7f2 | 241 | m_object_cacher->discard_set(m_object_set, object_extents); |
9f95a23c | 242 | m_cache_lock.unlock(); |
11fdf7f2 TL |
243 | |
244 | ctx->complete(r); | |
245 | }); | |
246 | ||
247 | // ensure we aren't holding the cache lock post-write | |
248 | on_dispatched = util::create_async_context_callback(*m_image_ctx, | |
249 | on_dispatched); | |
250 | ||
251 | *dispatch_result = io::DISPATCH_RESULT_CONTINUE; | |
252 | ||
253 | // ensure any in-flight writeback is complete before advancing | |
254 | // the discard request | |
9f95a23c | 255 | std::lock_guard locker{m_cache_lock}; |
11fdf7f2 TL |
256 | m_object_cacher->discard_writeback(m_object_set, object_extents, |
257 | on_dispatched); | |
11fdf7f2 TL |
258 | return true; |
259 | } | |
260 | ||
261 | template <typename I> | |
262 | bool ObjectCacherObjectDispatch<I>::write( | |
9f95a23c TL |
263 | uint64_t object_no, uint64_t object_off, ceph::bufferlist&& data, |
264 | const ::SnapContext &snapc, int op_flags, | |
11fdf7f2 TL |
265 | const ZTracer::Trace &parent_trace, int* object_dispatch_flags, |
266 | uint64_t* journal_tid, io::DispatchResult* dispatch_result, | |
267 | Context** on_finish, Context* on_dispatched) { | |
268 | auto cct = m_image_ctx->cct; | |
269 | ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~" | |
270 | << data.length() << dendl; | |
271 | ||
272 | // ensure we aren't holding the cache lock post-write | |
273 | on_dispatched = util::create_async_context_callback(*m_image_ctx, | |
274 | on_dispatched); | |
275 | ||
9f95a23c | 276 | m_image_ctx->image_lock.lock_shared(); |
11fdf7f2 TL |
277 | ObjectCacher::OSDWrite *wr = m_object_cacher->prepare_write( |
278 | snapc, data, ceph::real_time::min(), op_flags, *journal_tid); | |
9f95a23c | 279 | m_image_ctx->image_lock.unlock_shared(); |
11fdf7f2 | 280 | |
9f95a23c TL |
281 | ObjectExtent extent(data_object_name(m_image_ctx, object_no), |
282 | object_no, object_off, data.length(), 0); | |
11fdf7f2 TL |
283 | extent.oloc.pool = m_image_ctx->data_ctx.get_id(); |
284 | extent.buffer_extents.push_back({0, data.length()}); | |
285 | wr->extents.push_back(extent); | |
286 | ||
287 | ZTracer::Trace trace(parent_trace); | |
288 | *dispatch_result = io::DISPATCH_RESULT_COMPLETE; | |
289 | ||
9f95a23c | 290 | std::lock_guard locker{m_cache_lock}; |
11fdf7f2 | 291 | m_object_cacher->writex(wr, m_object_set, on_dispatched, &trace); |
11fdf7f2 TL |
292 | return true; |
293 | } | |
294 | ||
295 | template <typename I> | |
296 | bool ObjectCacherObjectDispatch<I>::write_same( | |
9f95a23c TL |
297 | uint64_t object_no, uint64_t object_off, uint64_t object_len, |
298 | io::LightweightBufferExtents&& buffer_extents, ceph::bufferlist&& data, | |
11fdf7f2 TL |
299 | const ::SnapContext &snapc, int op_flags, |
300 | const ZTracer::Trace &parent_trace, int* object_dispatch_flags, | |
301 | uint64_t* journal_tid, io::DispatchResult* dispatch_result, | |
302 | Context** on_finish, Context* on_dispatched) { | |
303 | auto cct = m_image_ctx->cct; | |
304 | ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~" | |
305 | << object_len << dendl; | |
306 | ||
307 | // ObjectCacher doesn't support write-same so convert to regular write | |
9f95a23c | 308 | io::LightweightObjectExtent extent(object_no, object_off, object_len, 0); |
11fdf7f2 TL |
309 | extent.buffer_extents = std::move(buffer_extents); |
310 | ||
311 | bufferlist ws_data; | |
312 | io::util::assemble_write_same_extent(extent, data, &ws_data, true); | |
313 | ||
9f95a23c TL |
314 | return write(object_no, object_off, std::move(ws_data), snapc, op_flags, |
315 | parent_trace, object_dispatch_flags, journal_tid, | |
11fdf7f2 TL |
316 | dispatch_result, on_finish, on_dispatched); |
317 | } | |
318 | ||
319 | template <typename I> | |
320 | bool ObjectCacherObjectDispatch<I>::compare_and_write( | |
9f95a23c TL |
321 | uint64_t object_no, uint64_t object_off, ceph::bufferlist&& cmp_data, |
322 | ceph::bufferlist&& write_data, const ::SnapContext &snapc, int op_flags, | |
11fdf7f2 TL |
323 | const ZTracer::Trace &parent_trace, uint64_t* mismatch_offset, |
324 | int* object_dispatch_flags, uint64_t* journal_tid, | |
325 | io::DispatchResult* dispatch_result, Context** on_finish, | |
326 | Context* on_dispatched) { | |
327 | auto cct = m_image_ctx->cct; | |
328 | ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~" | |
329 | << cmp_data.length() << dendl; | |
330 | ||
331 | // pass-through the compare-and-write request since it's not a supported | |
332 | // operation of the ObjectCacher | |
333 | ||
334 | // ensure we aren't holding the cache lock post-flush | |
335 | on_dispatched = util::create_async_context_callback(*m_image_ctx, | |
336 | on_dispatched); | |
337 | ||
338 | // flush any pending writes from the cache | |
339 | ZTracer::Trace trace(parent_trace); | |
340 | *dispatch_result = io::DISPATCH_RESULT_CONTINUE; | |
341 | ||
342 | ObjectExtents object_extents; | |
9f95a23c TL |
343 | object_extents.emplace_back(data_object_name(m_image_ctx, object_no), |
344 | object_no, object_off, cmp_data.length(), 0); | |
11fdf7f2 | 345 | |
9f95a23c | 346 | std::lock_guard cache_locker{m_cache_lock}; |
11fdf7f2 TL |
347 | m_object_cacher->flush_set(m_object_set, object_extents, &trace, |
348 | on_dispatched); | |
349 | return true; | |
350 | } | |
351 | ||
352 | template <typename I> | |
353 | bool ObjectCacherObjectDispatch<I>::flush( | |
354 | io::FlushSource flush_source, const ZTracer::Trace &parent_trace, | |
9f95a23c TL |
355 | uint64_t* journal_tid, io::DispatchResult* dispatch_result, |
356 | Context** on_finish, Context* on_dispatched) { | |
11fdf7f2 TL |
357 | auto cct = m_image_ctx->cct; |
358 | ldout(cct, 20) << dendl; | |
359 | ||
360 | // ensure we aren't holding the cache lock post-flush | |
361 | on_dispatched = util::create_async_context_callback(*m_image_ctx, | |
362 | on_dispatched); | |
363 | ||
9f95a23c TL |
364 | std::lock_guard locker{m_cache_lock}; |
365 | if (flush_source == io::FLUSH_SOURCE_USER && !m_user_flushed) { | |
11fdf7f2 | 366 | m_user_flushed = true; |
9f95a23c TL |
367 | if (m_writethrough_until_flush && m_max_dirty > 0) { |
368 | m_object_cacher->set_max_dirty(m_max_dirty); | |
369 | ldout(cct, 5) << "saw first user flush, enabling writeback" << dendl; | |
370 | } | |
11fdf7f2 TL |
371 | } |
372 | ||
373 | *dispatch_result = io::DISPATCH_RESULT_CONTINUE; | |
374 | m_object_cacher->flush_set(m_object_set, on_dispatched); | |
11fdf7f2 TL |
375 | return true; |
376 | } | |
377 | ||
378 | template <typename I> | |
379 | bool ObjectCacherObjectDispatch<I>::invalidate_cache(Context* on_finish) { | |
380 | auto cct = m_image_ctx->cct; | |
381 | ldout(cct, 5) << dendl; | |
382 | ||
383 | // ensure we aren't holding the cache lock post-flush | |
384 | on_finish = util::create_async_context_callback(*m_image_ctx, on_finish); | |
385 | ||
386 | // invalidate any remaining cache entries | |
387 | on_finish = new C_InvalidateCache(this, false, on_finish); | |
388 | ||
9f95a23c | 389 | std::lock_guard locker{m_cache_lock}; |
11fdf7f2 TL |
390 | m_object_cacher->release_set(m_object_set); |
391 | m_object_cacher->flush_set(m_object_set, on_finish); | |
11fdf7f2 TL |
392 | return true; |
393 | } | |
394 | ||
395 | template <typename I> | |
396 | bool ObjectCacherObjectDispatch<I>::reset_existence_cache( | |
397 | Context* on_finish) { | |
398 | auto cct = m_image_ctx->cct; | |
399 | ldout(cct, 5) << dendl; | |
400 | ||
9f95a23c | 401 | std::lock_guard locker{m_cache_lock}; |
11fdf7f2 | 402 | m_object_cacher->clear_nonexistence(m_object_set); |
11fdf7f2 TL |
403 | return false; |
404 | } | |
405 | ||
406 | } // namespace cache | |
407 | } // namespace librbd | |
408 | ||
409 | template class librbd::cache::ObjectCacherObjectDispatch<librbd::ImageCtx>; |