]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/cache/ObjectCacherObjectDispatch.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / librbd / cache / ObjectCacherObjectDispatch.cc
CommitLineData
11fdf7f2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "librbd/cache/ObjectCacherObjectDispatch.h"
5#include "common/errno.h"
6#include "common/WorkQueue.h"
7#include "librbd/ImageCtx.h"
8#include "librbd/Journal.h"
9#include "librbd/Utils.h"
9f95a23c 10#include "librbd/cache/ObjectCacherWriteback.h"
11fdf7f2
TL
11#include "librbd/io/ObjectDispatchSpec.h"
12#include "librbd/io/ObjectDispatcher.h"
9f95a23c 13#include "librbd/io/Types.h"
11fdf7f2
TL
14#include "librbd/io/Utils.h"
15#include "osd/osd_types.h"
16#include "osdc/WritebackHandler.h"
17#include <vector>
18
19#define dout_subsys ceph_subsys_rbd
20#undef dout_prefix
21#define dout_prefix *_dout << "librbd::cache::ObjectCacherObjectDispatch: " \
22 << this << " " << __func__ << ": "
23
24namespace librbd {
25namespace cache {
26
9f95a23c
TL
27using librbd::util::data_object_name;
28
11fdf7f2
TL
29namespace {
30
31typedef std::vector<ObjectExtent> ObjectExtents;
32
33} // anonymous namespace
34
35template <typename I>
36struct ObjectCacherObjectDispatch<I>::C_InvalidateCache : public Context {
37 ObjectCacherObjectDispatch* dispatcher;
38 bool purge_on_error;
39 Context *on_finish;
40
41 C_InvalidateCache(ObjectCacherObjectDispatch* dispatcher,
42 bool purge_on_error, Context *on_finish)
43 : dispatcher(dispatcher), purge_on_error(purge_on_error),
44 on_finish(on_finish) {
45 }
46
47 void finish(int r) override {
9f95a23c 48 ceph_assert(ceph_mutex_is_locked(dispatcher->m_cache_lock));
11fdf7f2
TL
49 auto cct = dispatcher->m_image_ctx->cct;
50
51 if (r == -EBLACKLISTED) {
52 lderr(cct) << "blacklisted during flush (purging)" << dendl;
53 dispatcher->m_object_cacher->purge_set(dispatcher->m_object_set);
54 } else if (r < 0 && purge_on_error) {
55 lderr(cct) << "failed to invalidate cache (purging): "
56 << cpp_strerror(r) << dendl;
57 dispatcher->m_object_cacher->purge_set(dispatcher->m_object_set);
58 } else if (r != 0) {
59 lderr(cct) << "failed to invalidate cache: " << cpp_strerror(r) << dendl;
60 }
61
62 auto unclean = dispatcher->m_object_cacher->release_set(
63 dispatcher->m_object_set);
64 if (unclean == 0) {
65 r = 0;
66 } else {
67 lderr(cct) << "could not release all objects from cache: "
68 << unclean << " bytes remain" << dendl;
69 if (r == 0) {
70 r = -EBUSY;
71 }
72 }
73
74 on_finish->complete(r);
75 }
76};
77
78template <typename I>
79ObjectCacherObjectDispatch<I>::ObjectCacherObjectDispatch(
9f95a23c
TL
80 I* image_ctx, size_t max_dirty, bool writethrough_until_flush)
81 : m_image_ctx(image_ctx), m_max_dirty(max_dirty),
82 m_writethrough_until_flush(writethrough_until_flush),
83 m_cache_lock(ceph::make_mutex(util::unique_lock_name(
84 "librbd::cache::ObjectCacherObjectDispatch::cache_lock", this))) {
eafe8130 85 ceph_assert(m_image_ctx->data_ctx.is_valid());
11fdf7f2
TL
86}
87
88template <typename I>
89ObjectCacherObjectDispatch<I>::~ObjectCacherObjectDispatch() {
90 delete m_object_cacher;
91 delete m_object_set;
92
93 delete m_writeback_handler;
94}
95
96template <typename I>
97void ObjectCacherObjectDispatch<I>::init() {
98 auto cct = m_image_ctx->cct;
99 ldout(cct, 5) << dendl;
100
9f95a23c 101 m_cache_lock.lock();
11fdf7f2 102 ldout(cct, 5) << "enabling caching..." << dendl;
9f95a23c 103 m_writeback_handler = new ObjectCacherWriteback(m_image_ctx, m_cache_lock);
11fdf7f2 104
9f95a23c
TL
105 auto init_max_dirty = m_max_dirty;
106 if (m_writethrough_until_flush) {
11fdf7f2
TL
107 init_max_dirty = 0;
108 }
109
110 auto cache_size =
111 m_image_ctx->config.template get_val<Option::size_t>("rbd_cache_size");
112 auto target_dirty =
113 m_image_ctx->config.template get_val<Option::size_t>("rbd_cache_target_dirty");
114 auto max_dirty_age =
115 m_image_ctx->config.template get_val<double>("rbd_cache_max_dirty_age");
116 auto block_writes_upfront =
117 m_image_ctx->config.template get_val<bool>("rbd_cache_block_writes_upfront");
118 auto max_dirty_object =
119 m_image_ctx->config.template get_val<uint64_t>("rbd_cache_max_dirty_object");
120
121 ldout(cct, 5) << "Initial cache settings:"
122 << " size=" << cache_size
123 << " num_objects=" << 10
124 << " max_dirty=" << init_max_dirty
125 << " target_dirty=" << target_dirty
126 << " max_dirty_age=" << max_dirty_age << dendl;
127
128 m_object_cacher = new ObjectCacher(cct, m_image_ctx->perfcounter->get_name(),
129 *m_writeback_handler, m_cache_lock,
130 nullptr, nullptr, cache_size,
131 10, /* reset this in init */
132 init_max_dirty, target_dirty,
133 max_dirty_age, block_writes_upfront);
134
135 // size object cache appropriately
136 if (max_dirty_object == 0) {
137 max_dirty_object = std::min<uint64_t>(
138 2000, std::max<uint64_t>(10, cache_size / 100 /
139 sizeof(ObjectCacher::Object)));
140 }
141 ldout(cct, 5) << " cache bytes " << cache_size
142 << " -> about " << max_dirty_object << " objects" << dendl;
143 m_object_cacher->set_max_objects(max_dirty_object);
144
145 m_object_set = new ObjectCacher::ObjectSet(nullptr,
146 m_image_ctx->data_ctx.get_id(), 0);
147 m_object_cacher->start();
9f95a23c 148 m_cache_lock.unlock();
11fdf7f2
TL
149
150 // add ourself to the IO object dispatcher chain
9f95a23c
TL
151 if (m_max_dirty > 0) {
152 m_image_ctx->disable_zero_copy = true;
153 }
11fdf7f2
TL
154 m_image_ctx->io_object_dispatcher->register_object_dispatch(this);
155}
156
157template <typename I>
158void ObjectCacherObjectDispatch<I>::shut_down(Context* on_finish) {
159 auto cct = m_image_ctx->cct;
160 ldout(cct, 5) << dendl;
161
162 // chain shut down in reverse order
163
164 // shut down the cache
9f95a23c 165 on_finish = new LambdaContext([this, on_finish](int r) {
11fdf7f2
TL
166 m_object_cacher->stop();
167 on_finish->complete(r);
168 });
169
170 // ensure we aren't holding the cache lock post-flush
171 on_finish = util::create_async_context_callback(*m_image_ctx, on_finish);
172
173 // invalidate any remaining cache entries
174 on_finish = new C_InvalidateCache(this, true, on_finish);
175
176 // flush all pending writeback state
9f95a23c 177 std::lock_guard locker{m_cache_lock};
11fdf7f2
TL
178 m_object_cacher->release_set(m_object_set);
179 m_object_cacher->flush_set(m_object_set, on_finish);
11fdf7f2
TL
180}
181
182template <typename I>
183bool ObjectCacherObjectDispatch<I>::read(
9f95a23c
TL
184 uint64_t object_no, uint64_t object_off, uint64_t object_len,
185 librados::snap_t snap_id, int op_flags, const ZTracer::Trace &parent_trace,
186 ceph::bufferlist* read_data, io::ExtentMap* extent_map,
187 int* object_dispatch_flags, io::DispatchResult* dispatch_result,
188 Context** on_finish, Context* on_dispatched) {
11fdf7f2
TL
189 // IO chained in reverse order
190 auto cct = m_image_ctx->cct;
191 ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
192 << object_len << dendl;
193
194 // ensure we aren't holding the cache lock post-read
195 on_dispatched = util::create_async_context_callback(*m_image_ctx,
196 on_dispatched);
197
9f95a23c 198 m_image_ctx->image_lock.lock_shared();
11fdf7f2 199 auto rd = m_object_cacher->prepare_read(snap_id, read_data, op_flags);
9f95a23c 200 m_image_ctx->image_lock.unlock_shared();
11fdf7f2 201
9f95a23c
TL
202 ObjectExtent extent(data_object_name(m_image_ctx, object_no), object_no,
203 object_off, object_len, 0);
11fdf7f2
TL
204 extent.oloc.pool = m_image_ctx->data_ctx.get_id();
205 extent.buffer_extents.push_back({0, object_len});
206 rd->extents.push_back(extent);
207
208 ZTracer::Trace trace(parent_trace);
209 *dispatch_result = io::DISPATCH_RESULT_COMPLETE;
210
9f95a23c 211 m_cache_lock.lock();
11fdf7f2 212 int r = m_object_cacher->readx(rd, m_object_set, on_dispatched, &trace);
9f95a23c 213 m_cache_lock.unlock();
11fdf7f2
TL
214 if (r != 0) {
215 on_dispatched->complete(r);
216 }
217 return true;
218}
219
220template <typename I>
221bool ObjectCacherObjectDispatch<I>::discard(
9f95a23c
TL
222 uint64_t object_no, uint64_t object_off, uint64_t object_len,
223 const ::SnapContext &snapc, int discard_flags,
11fdf7f2
TL
224 const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
225 uint64_t* journal_tid, io::DispatchResult* dispatch_result,
226 Context** on_finish, Context* on_dispatched) {
227 auto cct = m_image_ctx->cct;
228 ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
229 << object_len << dendl;
230
231 ObjectExtents object_extents;
9f95a23c
TL
232 object_extents.emplace_back(data_object_name(m_image_ctx, object_no),
233 object_no, object_off, object_len, 0);
11fdf7f2
TL
234
235 // discard the cache state after changes are committed to disk (and to
236 // prevent races w/ readahead)
237 auto ctx = *on_finish;
9f95a23c 238 *on_finish = new LambdaContext(
11fdf7f2 239 [this, object_extents, ctx](int r) {
9f95a23c 240 m_cache_lock.lock();
11fdf7f2 241 m_object_cacher->discard_set(m_object_set, object_extents);
9f95a23c 242 m_cache_lock.unlock();
11fdf7f2
TL
243
244 ctx->complete(r);
245 });
246
247 // ensure we aren't holding the cache lock post-write
248 on_dispatched = util::create_async_context_callback(*m_image_ctx,
249 on_dispatched);
250
251 *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
252
253 // ensure any in-flight writeback is complete before advancing
254 // the discard request
9f95a23c 255 std::lock_guard locker{m_cache_lock};
11fdf7f2
TL
256 m_object_cacher->discard_writeback(m_object_set, object_extents,
257 on_dispatched);
11fdf7f2
TL
258 return true;
259}
260
261template <typename I>
262bool ObjectCacherObjectDispatch<I>::write(
9f95a23c
TL
263 uint64_t object_no, uint64_t object_off, ceph::bufferlist&& data,
264 const ::SnapContext &snapc, int op_flags,
11fdf7f2
TL
265 const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
266 uint64_t* journal_tid, io::DispatchResult* dispatch_result,
267 Context** on_finish, Context* on_dispatched) {
268 auto cct = m_image_ctx->cct;
269 ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
270 << data.length() << dendl;
271
272 // ensure we aren't holding the cache lock post-write
273 on_dispatched = util::create_async_context_callback(*m_image_ctx,
274 on_dispatched);
275
9f95a23c 276 m_image_ctx->image_lock.lock_shared();
11fdf7f2
TL
277 ObjectCacher::OSDWrite *wr = m_object_cacher->prepare_write(
278 snapc, data, ceph::real_time::min(), op_flags, *journal_tid);
9f95a23c 279 m_image_ctx->image_lock.unlock_shared();
11fdf7f2 280
9f95a23c
TL
281 ObjectExtent extent(data_object_name(m_image_ctx, object_no),
282 object_no, object_off, data.length(), 0);
11fdf7f2
TL
283 extent.oloc.pool = m_image_ctx->data_ctx.get_id();
284 extent.buffer_extents.push_back({0, data.length()});
285 wr->extents.push_back(extent);
286
287 ZTracer::Trace trace(parent_trace);
288 *dispatch_result = io::DISPATCH_RESULT_COMPLETE;
289
9f95a23c 290 std::lock_guard locker{m_cache_lock};
11fdf7f2 291 m_object_cacher->writex(wr, m_object_set, on_dispatched, &trace);
11fdf7f2
TL
292 return true;
293}
294
295template <typename I>
296bool ObjectCacherObjectDispatch<I>::write_same(
9f95a23c
TL
297 uint64_t object_no, uint64_t object_off, uint64_t object_len,
298 io::LightweightBufferExtents&& buffer_extents, ceph::bufferlist&& data,
11fdf7f2
TL
299 const ::SnapContext &snapc, int op_flags,
300 const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
301 uint64_t* journal_tid, io::DispatchResult* dispatch_result,
302 Context** on_finish, Context* on_dispatched) {
303 auto cct = m_image_ctx->cct;
304 ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
305 << object_len << dendl;
306
307 // ObjectCacher doesn't support write-same so convert to regular write
9f95a23c 308 io::LightweightObjectExtent extent(object_no, object_off, object_len, 0);
11fdf7f2
TL
309 extent.buffer_extents = std::move(buffer_extents);
310
311 bufferlist ws_data;
312 io::util::assemble_write_same_extent(extent, data, &ws_data, true);
313
9f95a23c
TL
314 return write(object_no, object_off, std::move(ws_data), snapc, op_flags,
315 parent_trace, object_dispatch_flags, journal_tid,
11fdf7f2
TL
316 dispatch_result, on_finish, on_dispatched);
317}
318
319template <typename I>
320bool ObjectCacherObjectDispatch<I>::compare_and_write(
9f95a23c
TL
321 uint64_t object_no, uint64_t object_off, ceph::bufferlist&& cmp_data,
322 ceph::bufferlist&& write_data, const ::SnapContext &snapc, int op_flags,
11fdf7f2
TL
323 const ZTracer::Trace &parent_trace, uint64_t* mismatch_offset,
324 int* object_dispatch_flags, uint64_t* journal_tid,
325 io::DispatchResult* dispatch_result, Context** on_finish,
326 Context* on_dispatched) {
327 auto cct = m_image_ctx->cct;
328 ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
329 << cmp_data.length() << dendl;
330
331 // pass-through the compare-and-write request since it's not a supported
332 // operation of the ObjectCacher
333
334 // ensure we aren't holding the cache lock post-flush
335 on_dispatched = util::create_async_context_callback(*m_image_ctx,
336 on_dispatched);
337
338 // flush any pending writes from the cache
339 ZTracer::Trace trace(parent_trace);
340 *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
341
342 ObjectExtents object_extents;
9f95a23c
TL
343 object_extents.emplace_back(data_object_name(m_image_ctx, object_no),
344 object_no, object_off, cmp_data.length(), 0);
11fdf7f2 345
9f95a23c 346 std::lock_guard cache_locker{m_cache_lock};
11fdf7f2
TL
347 m_object_cacher->flush_set(m_object_set, object_extents, &trace,
348 on_dispatched);
349 return true;
350}
351
352template <typename I>
353bool ObjectCacherObjectDispatch<I>::flush(
354 io::FlushSource flush_source, const ZTracer::Trace &parent_trace,
9f95a23c
TL
355 uint64_t* journal_tid, io::DispatchResult* dispatch_result,
356 Context** on_finish, Context* on_dispatched) {
11fdf7f2
TL
357 auto cct = m_image_ctx->cct;
358 ldout(cct, 20) << dendl;
359
360 // ensure we aren't holding the cache lock post-flush
361 on_dispatched = util::create_async_context_callback(*m_image_ctx,
362 on_dispatched);
363
9f95a23c
TL
364 std::lock_guard locker{m_cache_lock};
365 if (flush_source == io::FLUSH_SOURCE_USER && !m_user_flushed) {
11fdf7f2 366 m_user_flushed = true;
9f95a23c
TL
367 if (m_writethrough_until_flush && m_max_dirty > 0) {
368 m_object_cacher->set_max_dirty(m_max_dirty);
369 ldout(cct, 5) << "saw first user flush, enabling writeback" << dendl;
370 }
11fdf7f2
TL
371 }
372
373 *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
374 m_object_cacher->flush_set(m_object_set, on_dispatched);
11fdf7f2
TL
375 return true;
376}
377
378template <typename I>
379bool ObjectCacherObjectDispatch<I>::invalidate_cache(Context* on_finish) {
380 auto cct = m_image_ctx->cct;
381 ldout(cct, 5) << dendl;
382
383 // ensure we aren't holding the cache lock post-flush
384 on_finish = util::create_async_context_callback(*m_image_ctx, on_finish);
385
386 // invalidate any remaining cache entries
387 on_finish = new C_InvalidateCache(this, false, on_finish);
388
9f95a23c 389 std::lock_guard locker{m_cache_lock};
11fdf7f2
TL
390 m_object_cacher->release_set(m_object_set);
391 m_object_cacher->flush_set(m_object_set, on_finish);
11fdf7f2
TL
392 return true;
393}
394
395template <typename I>
396bool ObjectCacherObjectDispatch<I>::reset_existence_cache(
397 Context* on_finish) {
398 auto cct = m_image_ctx->cct;
399 ldout(cct, 5) << dendl;
400
9f95a23c 401 std::lock_guard locker{m_cache_lock};
11fdf7f2 402 m_object_cacher->clear_nonexistence(m_object_set);
11fdf7f2
TL
403 return false;
404}
405
406} // namespace cache
407} // namespace librbd
408
409template class librbd::cache::ObjectCacherObjectDispatch<librbd::ImageCtx>;