]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/cache/ObjectCacherObjectDispatch.cc
81d5a71885b01d6cd100e93f212366ab56d76c6b
[ceph.git] / ceph / src / librbd / cache / ObjectCacherObjectDispatch.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/cache/ObjectCacherObjectDispatch.h"
5 #include "include/neorados/RADOS.hpp"
6 #include "common/errno.h"
7 #include "librbd/ImageCtx.h"
8 #include "librbd/Journal.h"
9 #include "librbd/Utils.h"
10 #include "librbd/asio/ContextWQ.h"
11 #include "librbd/cache/ObjectCacherWriteback.h"
12 #include "librbd/io/ObjectDispatchSpec.h"
13 #include "librbd/io/ObjectDispatcherInterface.h"
14 #include "librbd/io/ReadResult.h"
15 #include "librbd/io/Types.h"
16 #include "librbd/io/Utils.h"
17 #include "osd/osd_types.h"
18 #include "osdc/WritebackHandler.h"
19 #include <vector>
20
21 #define dout_subsys ceph_subsys_rbd
22 #undef dout_prefix
23 #define dout_prefix *_dout << "librbd::cache::ObjectCacherObjectDispatch: " \
24 << this << " " << __func__ << ": "
25
26 namespace librbd {
27 namespace cache {
28
29 using librbd::util::data_object_name;
30
31 namespace {
32
33 typedef std::vector<ObjectExtent> ObjectExtents;
34
35 } // anonymous namespace
36
37 template <typename I>
38 struct ObjectCacherObjectDispatch<I>::C_InvalidateCache : public Context {
39 ObjectCacherObjectDispatch* dispatcher;
40 bool purge_on_error;
41 Context *on_finish;
42
43 C_InvalidateCache(ObjectCacherObjectDispatch* dispatcher,
44 bool purge_on_error, Context *on_finish)
45 : dispatcher(dispatcher), purge_on_error(purge_on_error),
46 on_finish(on_finish) {
47 }
48
49 void finish(int r) override {
50 ceph_assert(ceph_mutex_is_locked(dispatcher->m_cache_lock));
51 auto cct = dispatcher->m_image_ctx->cct;
52
53 if (r == -EBLOCKLISTED) {
54 lderr(cct) << "blocklisted during flush (purging)" << dendl;
55 dispatcher->m_object_cacher->purge_set(dispatcher->m_object_set);
56 } else if (r < 0 && purge_on_error) {
57 lderr(cct) << "failed to invalidate cache (purging): "
58 << cpp_strerror(r) << dendl;
59 dispatcher->m_object_cacher->purge_set(dispatcher->m_object_set);
60 } else if (r != 0) {
61 lderr(cct) << "failed to invalidate cache: " << cpp_strerror(r) << dendl;
62 }
63
64 auto unclean = dispatcher->m_object_cacher->release_set(
65 dispatcher->m_object_set);
66 if (unclean == 0) {
67 r = 0;
68 } else {
69 lderr(cct) << "could not release all objects from cache: "
70 << unclean << " bytes remain" << dendl;
71 if (r == 0) {
72 r = -EBUSY;
73 }
74 }
75
76 on_finish->complete(r);
77 }
78 };
79
80 template <typename I>
81 ObjectCacherObjectDispatch<I>::ObjectCacherObjectDispatch(
82 I* image_ctx, size_t max_dirty, bool writethrough_until_flush)
83 : m_image_ctx(image_ctx), m_max_dirty(max_dirty),
84 m_writethrough_until_flush(writethrough_until_flush),
85 m_cache_lock(ceph::make_mutex(util::unique_lock_name(
86 "librbd::cache::ObjectCacherObjectDispatch::cache_lock", this))) {
87 ceph_assert(m_image_ctx->data_ctx.is_valid());
88 }
89
90 template <typename I>
91 ObjectCacherObjectDispatch<I>::~ObjectCacherObjectDispatch() {
92 delete m_object_cacher;
93 delete m_object_set;
94
95 delete m_writeback_handler;
96 }
97
98 template <typename I>
99 void ObjectCacherObjectDispatch<I>::init() {
100 auto cct = m_image_ctx->cct;
101 ldout(cct, 5) << dendl;
102
103 m_cache_lock.lock();
104 ldout(cct, 5) << "enabling caching..." << dendl;
105 m_writeback_handler = new ObjectCacherWriteback(m_image_ctx, m_cache_lock);
106
107 auto init_max_dirty = m_max_dirty;
108 if (m_writethrough_until_flush) {
109 init_max_dirty = 0;
110 }
111
112 auto cache_size =
113 m_image_ctx->config.template get_val<Option::size_t>("rbd_cache_size");
114 auto target_dirty =
115 m_image_ctx->config.template get_val<Option::size_t>("rbd_cache_target_dirty");
116 auto max_dirty_age =
117 m_image_ctx->config.template get_val<double>("rbd_cache_max_dirty_age");
118 auto block_writes_upfront =
119 m_image_ctx->config.template get_val<bool>("rbd_cache_block_writes_upfront");
120 auto max_dirty_object =
121 m_image_ctx->config.template get_val<uint64_t>("rbd_cache_max_dirty_object");
122
123 ldout(cct, 5) << "Initial cache settings:"
124 << " size=" << cache_size
125 << " num_objects=" << 10
126 << " max_dirty=" << init_max_dirty
127 << " target_dirty=" << target_dirty
128 << " max_dirty_age=" << max_dirty_age << dendl;
129
130 m_object_cacher = new ObjectCacher(cct, m_image_ctx->perfcounter->get_name(),
131 *m_writeback_handler, m_cache_lock,
132 nullptr, nullptr, cache_size,
133 10, /* reset this in init */
134 init_max_dirty, target_dirty,
135 max_dirty_age, block_writes_upfront);
136
137 // size object cache appropriately
138 if (max_dirty_object == 0) {
139 max_dirty_object = std::min<uint64_t>(
140 2000, std::max<uint64_t>(10, cache_size / 100 /
141 sizeof(ObjectCacher::Object)));
142 }
143 ldout(cct, 5) << " cache bytes " << cache_size
144 << " -> about " << max_dirty_object << " objects" << dendl;
145 m_object_cacher->set_max_objects(max_dirty_object);
146
147 m_object_set = new ObjectCacher::ObjectSet(nullptr,
148 m_image_ctx->data_ctx.get_id(), 0);
149 m_object_cacher->start();
150 m_cache_lock.unlock();
151
152 // add ourself to the IO object dispatcher chain
153 if (m_max_dirty > 0) {
154 m_image_ctx->disable_zero_copy = true;
155 }
156 m_image_ctx->io_object_dispatcher->register_dispatch(this);
157 }
158
159 template <typename I>
160 void ObjectCacherObjectDispatch<I>::shut_down(Context* on_finish) {
161 auto cct = m_image_ctx->cct;
162 ldout(cct, 5) << dendl;
163
164 // chain shut down in reverse order
165
166 // shut down the cache
167 on_finish = new LambdaContext([this, on_finish](int r) {
168 m_object_cacher->stop();
169 on_finish->complete(r);
170 });
171
172 // ensure we aren't holding the cache lock post-flush
173 on_finish = util::create_async_context_callback(*m_image_ctx, on_finish);
174
175 // invalidate any remaining cache entries
176 on_finish = new C_InvalidateCache(this, true, on_finish);
177
178 // flush all pending writeback state
179 std::lock_guard locker{m_cache_lock};
180 m_object_cacher->release_set(m_object_set);
181 m_object_cacher->flush_set(m_object_set, on_finish);
182 }
183
184 template <typename I>
185 bool ObjectCacherObjectDispatch<I>::read(
186 uint64_t object_no, io::ReadExtents* extents, IOContext io_context,
187 int op_flags, int read_flags, const ZTracer::Trace &parent_trace,
188 uint64_t* version, int* object_dispatch_flags,
189 io::DispatchResult* dispatch_result, Context** on_finish,
190 Context* on_dispatched) {
191 // IO chained in reverse order
192 auto cct = m_image_ctx->cct;
193 ldout(cct, 20) << "object_no=" << object_no << " " << *extents << dendl;
194
195 if (extents->size() == 0) {
196 ldout(cct, 20) << "no extents to read" << dendl;
197 return false;
198 }
199
200 if (version != nullptr) {
201 // we currently don't cache read versions
202 // and don't support reading more than one extent
203 return false;
204 }
205
206 // ensure we aren't holding the cache lock post-read
207 on_dispatched = util::create_async_context_callback(*m_image_ctx,
208 on_dispatched);
209
210 // embed the RBD-internal read flags in the genenric RADOS op_flags and
211 op_flags = ((op_flags & ~ObjectCacherWriteback::READ_FLAGS_MASK) |
212 ((read_flags << ObjectCacherWriteback::READ_FLAGS_SHIFT) &
213 ObjectCacherWriteback::READ_FLAGS_MASK));
214
215 ceph::bufferlist* bl;
216 if (extents->size() > 1) {
217 auto req = new io::ReadResult::C_ObjectReadMergedExtents(
218 cct, extents, on_dispatched);
219 on_dispatched = req;
220 bl = &req->bl;
221 } else {
222 bl = &extents->front().bl;
223 }
224
225 m_image_ctx->image_lock.lock_shared();
226 auto rd = m_object_cacher->prepare_read(
227 io_context->read_snap().value_or(CEPH_NOSNAP), bl, op_flags);
228 m_image_ctx->image_lock.unlock_shared();
229
230 uint64_t off = 0;
231 for (auto& read_extent: *extents) {
232 ObjectExtent extent(data_object_name(m_image_ctx, object_no), object_no,
233 read_extent.offset, read_extent.length, 0);
234 extent.oloc.pool = m_image_ctx->data_ctx.get_id();
235 extent.buffer_extents.push_back({off, read_extent.length});
236 rd->extents.push_back(extent);
237 off += read_extent.length;
238 }
239
240 ZTracer::Trace trace(parent_trace);
241 *dispatch_result = io::DISPATCH_RESULT_COMPLETE;
242
243 m_cache_lock.lock();
244 int r = m_object_cacher->readx(rd, m_object_set, on_dispatched, &trace);
245 m_cache_lock.unlock();
246 if (r != 0) {
247 on_dispatched->complete(r);
248 }
249 return true;
250 }
251
252 template <typename I>
253 bool ObjectCacherObjectDispatch<I>::discard(
254 uint64_t object_no, uint64_t object_off, uint64_t object_len,
255 IOContext io_context, int discard_flags,
256 const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
257 uint64_t* journal_tid, io::DispatchResult* dispatch_result,
258 Context** on_finish, Context* on_dispatched) {
259 auto cct = m_image_ctx->cct;
260 ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
261 << object_len << dendl;
262
263 ObjectExtents object_extents;
264 object_extents.emplace_back(data_object_name(m_image_ctx, object_no),
265 object_no, object_off, object_len, 0);
266
267 // discard the cache state after changes are committed to disk (and to
268 // prevent races w/ readahead)
269 auto ctx = *on_finish;
270 *on_finish = new LambdaContext(
271 [this, object_extents, ctx](int r) {
272 m_cache_lock.lock();
273 m_object_cacher->discard_set(m_object_set, object_extents);
274 m_cache_lock.unlock();
275
276 ctx->complete(r);
277 });
278
279 // ensure we aren't holding the cache lock post-write
280 on_dispatched = util::create_async_context_callback(*m_image_ctx,
281 on_dispatched);
282
283 *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
284
285 // ensure any in-flight writeback is complete before advancing
286 // the discard request
287 std::lock_guard locker{m_cache_lock};
288 m_object_cacher->discard_writeback(m_object_set, object_extents,
289 on_dispatched);
290 return true;
291 }
292
293 template <typename I>
294 bool ObjectCacherObjectDispatch<I>::write(
295 uint64_t object_no, uint64_t object_off, ceph::bufferlist&& data,
296 IOContext io_context, int op_flags, int write_flags,
297 std::optional<uint64_t> assert_version,
298 const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
299 uint64_t* journal_tid, io::DispatchResult* dispatch_result,
300 Context** on_finish, Context* on_dispatched) {
301 auto cct = m_image_ctx->cct;
302 ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
303 << data.length() << dendl;
304
305 // ensure we aren't holding the cache lock post-write
306 on_dispatched = util::create_async_context_callback(*m_image_ctx,
307 on_dispatched);
308
309 // cache layer does not handle version checking
310 if (assert_version.has_value() ||
311 (write_flags & io::OBJECT_WRITE_FLAG_CREATE_EXCLUSIVE) != 0) {
312 ObjectExtents object_extents;
313 object_extents.emplace_back(data_object_name(m_image_ctx, object_no),
314 object_no, object_off, data.length(), 0);
315
316 *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
317
318 // ensure any in-flight writeback is complete before advancing
319 // the write request
320 std::lock_guard locker{m_cache_lock};
321 m_object_cacher->discard_writeback(m_object_set, object_extents,
322 on_dispatched);
323 return true;
324 }
325
326 SnapContext snapc;
327 if (io_context->write_snap_context()) {
328 auto write_snap_context = *io_context->write_snap_context();
329 snapc = SnapContext(write_snap_context.first,
330 {write_snap_context.second.begin(),
331 write_snap_context.second.end()});
332 }
333
334 m_image_ctx->image_lock.lock_shared();
335 ObjectCacher::OSDWrite *wr = m_object_cacher->prepare_write(
336 snapc, data, ceph::real_time::min(), op_flags, *journal_tid);
337 m_image_ctx->image_lock.unlock_shared();
338
339 ObjectExtent extent(data_object_name(m_image_ctx, object_no),
340 object_no, object_off, data.length(), 0);
341 extent.oloc.pool = m_image_ctx->data_ctx.get_id();
342 extent.buffer_extents.push_back({0, data.length()});
343 wr->extents.push_back(extent);
344
345 ZTracer::Trace trace(parent_trace);
346 *dispatch_result = io::DISPATCH_RESULT_COMPLETE;
347
348 std::lock_guard locker{m_cache_lock};
349 m_object_cacher->writex(wr, m_object_set, on_dispatched, &trace);
350 return true;
351 }
352
353 template <typename I>
354 bool ObjectCacherObjectDispatch<I>::write_same(
355 uint64_t object_no, uint64_t object_off, uint64_t object_len,
356 io::LightweightBufferExtents&& buffer_extents, ceph::bufferlist&& data,
357 IOContext io_context, int op_flags,
358 const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
359 uint64_t* journal_tid, io::DispatchResult* dispatch_result,
360 Context** on_finish, Context* on_dispatched) {
361 auto cct = m_image_ctx->cct;
362 ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
363 << object_len << dendl;
364
365 // ObjectCacher doesn't support write-same so convert to regular write
366 io::LightweightObjectExtent extent(object_no, object_off, object_len, 0);
367 extent.buffer_extents = std::move(buffer_extents);
368
369 bufferlist ws_data;
370 io::util::assemble_write_same_extent(extent, data, &ws_data, true);
371
372 return write(object_no, object_off, std::move(ws_data), io_context, op_flags,
373 0, std::nullopt, parent_trace, object_dispatch_flags,
374 journal_tid, dispatch_result, on_finish, on_dispatched);
375 }
376
377 template <typename I>
378 bool ObjectCacherObjectDispatch<I>::compare_and_write(
379 uint64_t object_no, uint64_t object_off, ceph::bufferlist&& cmp_data,
380 ceph::bufferlist&& write_data, IOContext io_context, int op_flags,
381 const ZTracer::Trace &parent_trace, uint64_t* mismatch_offset,
382 int* object_dispatch_flags, uint64_t* journal_tid,
383 io::DispatchResult* dispatch_result, Context** on_finish,
384 Context* on_dispatched) {
385 auto cct = m_image_ctx->cct;
386 ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
387 << cmp_data.length() << dendl;
388
389 // pass-through the compare-and-write request since it's not a supported
390 // operation of the ObjectCacher
391
392 // ensure we aren't holding the cache lock post-flush
393 on_dispatched = util::create_async_context_callback(*m_image_ctx,
394 on_dispatched);
395
396 // flush any pending writes from the cache
397 ZTracer::Trace trace(parent_trace);
398 *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
399
400 ObjectExtents object_extents;
401 object_extents.emplace_back(data_object_name(m_image_ctx, object_no),
402 object_no, object_off, cmp_data.length(), 0);
403
404 std::lock_guard cache_locker{m_cache_lock};
405 m_object_cacher->flush_set(m_object_set, object_extents, &trace,
406 on_dispatched);
407 return true;
408 }
409
410 template <typename I>
411 bool ObjectCacherObjectDispatch<I>::flush(
412 io::FlushSource flush_source, const ZTracer::Trace &parent_trace,
413 uint64_t* journal_tid, io::DispatchResult* dispatch_result,
414 Context** on_finish, Context* on_dispatched) {
415 auto cct = m_image_ctx->cct;
416 ldout(cct, 20) << dendl;
417
418 // ensure we aren't holding the cache lock post-flush
419 on_dispatched = util::create_async_context_callback(*m_image_ctx,
420 on_dispatched);
421
422 std::lock_guard locker{m_cache_lock};
423 if (flush_source == io::FLUSH_SOURCE_USER && !m_user_flushed) {
424 m_user_flushed = true;
425 if (m_writethrough_until_flush && m_max_dirty > 0) {
426 m_object_cacher->set_max_dirty(m_max_dirty);
427 ldout(cct, 5) << "saw first user flush, enabling writeback" << dendl;
428 }
429 }
430
431 *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
432 m_object_cacher->flush_set(m_object_set, on_dispatched);
433 return true;
434 }
435
436 template <typename I>
437 bool ObjectCacherObjectDispatch<I>::invalidate_cache(Context* on_finish) {
438 auto cct = m_image_ctx->cct;
439 ldout(cct, 5) << dendl;
440
441 // ensure we aren't holding the cache lock post-flush
442 on_finish = util::create_async_context_callback(*m_image_ctx, on_finish);
443
444 // invalidate any remaining cache entries
445 on_finish = new C_InvalidateCache(this, false, on_finish);
446
447 std::lock_guard locker{m_cache_lock};
448 m_object_cacher->release_set(m_object_set);
449 m_object_cacher->flush_set(m_object_set, on_finish);
450 return true;
451 }
452
453 template <typename I>
454 bool ObjectCacherObjectDispatch<I>::reset_existence_cache(
455 Context* on_finish) {
456 auto cct = m_image_ctx->cct;
457 ldout(cct, 5) << dendl;
458
459 std::lock_guard locker{m_cache_lock};
460 m_object_cacher->clear_nonexistence(m_object_set);
461 return false;
462 }
463
464 } // namespace cache
465 } // namespace librbd
466
467 template class librbd::cache::ObjectCacherObjectDispatch<librbd::ImageCtx>;