]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/io/ImageRequest.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / librbd / io / ImageRequest.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "librbd/io/ImageRequest.h"
5#include "librbd/ImageCtx.h"
6#include "librbd/internal.h"
7#include "librbd/Journal.h"
b32b8144 8#include "librbd/Types.h"
7c673cae
FG
9#include "librbd/Utils.h"
10#include "librbd/cache/ImageCache.h"
11#include "librbd/io/AioCompletion.h"
11fdf7f2
TL
12#include "librbd/io/AsyncOperation.h"
13#include "librbd/io/ObjectDispatchInterface.h"
14#include "librbd/io/ObjectDispatchSpec.h"
15#include "librbd/io/ObjectDispatcher.h"
16#include "librbd/io/Utils.h"
7c673cae
FG
17#include "librbd/journal/Types.h"
18#include "include/rados/librados.hpp"
11fdf7f2 19#include "common/perf_counters.h"
7c673cae
FG
20#include "common/WorkQueue.h"
21#include "osdc/Striper.h"
11fdf7f2
TL
22#include <algorithm>
23#include <functional>
7c673cae
FG
24
25#define dout_subsys ceph_subsys_rbd
26#undef dout_prefix
27#define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \
28 << " " << __func__ << ": "
29
30namespace librbd {
31namespace io {
32
11fdf7f2 33using librbd::util::get_image_ctx;
7c673cae
FG
34
35namespace {
36
11fdf7f2
TL
37template <typename I>
38struct C_UpdateTimestamp : public Context {
39public:
40 I& m_image_ctx;
41 bool m_modify; // if modify set to 'true', modify timestamp is updated,
42 // access timestamp otherwise
43 AsyncOperation m_async_op;
91327a77 44
11fdf7f2
TL
45 C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) {
46 m_async_op.start_op(*get_image_ctx(&m_image_ctx));
91327a77 47 }
11fdf7f2
TL
48 ~C_UpdateTimestamp() override {
49 m_async_op.finish_op();
91327a77
AA
50 }
51
11fdf7f2
TL
52 void send() {
53 librados::ObjectWriteOperation op;
54 if (m_modify) {
55 cls_client::set_modify_timestamp(&op);
56 } else {
57 cls_client::set_access_timestamp(&op);
91327a77 58 }
7c673cae 59
11fdf7f2
TL
60 auto comp = librbd::util::create_rados_callback(this);
61 int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
62 ceph_assert(r == 0);
63 comp->release();
7c673cae
FG
64 }
65
66 void finish(int r) override {
11fdf7f2 67 // ignore errors updating timestamp
7c673cae
FG
68 }
69};
70
11fdf7f2
TL
71bool should_update_timestamp(const utime_t& now, const utime_t& timestamp,
72 uint64_t interval) {
73 return (interval &&
74 (static_cast<uint64_t>(now.sec()) >= interval + timestamp));
224ce89b
WB
75}
76
11fdf7f2 77} // anonymous namespace
c07f9fc5 78
7c673cae
FG
79template <typename I>
80void ImageRequest<I>::aio_read(I *ictx, AioCompletion *c,
81 Extents &&image_extents,
31f18b77
FG
82 ReadResult &&read_result, int op_flags,
83 const ZTracer::Trace &parent_trace) {
7c673cae 84 ImageReadRequest<I> req(*ictx, c, std::move(image_extents),
31f18b77 85 std::move(read_result), op_flags, parent_trace);
7c673cae
FG
86 req.send();
87}
88
89template <typename I>
90void ImageRequest<I>::aio_write(I *ictx, AioCompletion *c,
91 Extents &&image_extents, bufferlist &&bl,
31f18b77
FG
92 int op_flags,
93 const ZTracer::Trace &parent_trace) {
7c673cae 94 ImageWriteRequest<I> req(*ictx, c, std::move(image_extents), std::move(bl),
31f18b77 95 op_flags, parent_trace);
7c673cae
FG
96 req.send();
97}
98
99template <typename I>
100void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c,
11fdf7f2
TL
101 Extents &&image_extents,
102 uint32_t discard_granularity_bytes,
31f18b77 103 const ZTracer::Trace &parent_trace) {
11fdf7f2
TL
104 ImageDiscardRequest<I> req(*ictx, c, std::move(image_extents),
105 discard_granularity_bytes, parent_trace);
7c673cae
FG
106 req.send();
107}
108
109template <typename I>
31f18b77 110void ImageRequest<I>::aio_flush(I *ictx, AioCompletion *c,
11fdf7f2
TL
111 FlushSource flush_source,
112 const ZTracer::Trace &parent_trace) {
113 ImageFlushRequest<I> req(*ictx, c, flush_source, parent_trace);
7c673cae
FG
114 req.send();
115}
116
117template <typename I>
118void ImageRequest<I>::aio_writesame(I *ictx, AioCompletion *c,
11fdf7f2 119 Extents &&image_extents,
31f18b77
FG
120 bufferlist &&bl, int op_flags,
121 const ZTracer::Trace &parent_trace) {
11fdf7f2
TL
122 ImageWriteSameRequest<I> req(*ictx, c, std::move(image_extents),
123 std::move(bl), op_flags, parent_trace);
7c673cae
FG
124 req.send();
125}
126
c07f9fc5
FG
127template <typename I>
128void ImageRequest<I>::aio_compare_and_write(I *ictx, AioCompletion *c,
129 Extents &&image_extents,
130 bufferlist &&cmp_bl,
131 bufferlist &&bl,
132 uint64_t *mismatch_offset,
133 int op_flags,
134 const ZTracer::Trace &parent_trace) {
135 ImageCompareAndWriteRequest<I> req(*ictx, c, std::move(image_extents),
136 std::move(cmp_bl), std::move(bl),
137 mismatch_offset, op_flags, parent_trace);
138 req.send();
139}
140
141
7c673cae
FG
142template <typename I>
143void ImageRequest<I>::send() {
144 I &image_ctx = this->m_image_ctx;
11fdf7f2
TL
145 ceph_assert(m_aio_comp->is_initialized(get_aio_type()));
146 ceph_assert(m_aio_comp->is_started() ^ (get_aio_type() == AIO_TYPE_FLUSH));
7c673cae
FG
147
148 CephContext *cct = image_ctx.cct;
149 AioCompletion *aio_comp = this->m_aio_comp;
150 ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", "
c07f9fc5 151 << "completion=" << aio_comp << dendl;
7c673cae
FG
152
153 aio_comp->get();
154 int r = clip_request();
155 if (r < 0) {
156 m_aio_comp->fail(r);
157 return;
158 }
159
160 if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) {
11fdf7f2 161 update_timestamp();
7c673cae
FG
162 send_request();
163 } else {
164 send_image_cache_request();
165 }
166}
167
168template <typename I>
169int ImageRequest<I>::clip_request() {
170 RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
171 for (auto &image_extent : m_image_extents) {
172 auto clip_len = image_extent.second;
173 int r = clip_io(get_image_ctx(&m_image_ctx), image_extent.first, &clip_len);
174 if (r < 0) {
175 return r;
176 }
177
178 image_extent.second = clip_len;
179 }
180 return 0;
181}
182
183template <typename I>
11fdf7f2
TL
184void ImageRequest<I>::update_timestamp() {
185 bool modify = (get_aio_type() != AIO_TYPE_READ);
186 uint64_t update_interval;
187 if (modify) {
188 update_interval = m_image_ctx.mtime_update_interval;
189 } else {
190 update_interval = m_image_ctx.atime_update_interval;
191 }
7c673cae 192
11fdf7f2
TL
193 if (update_interval == 0) {
194 return;
195 }
196
197 utime_t (I::*get_timestamp_fn)() const;
198 void (I::*set_timestamp_fn)(utime_t);
199 if (modify) {
200 get_timestamp_fn = &I::get_modify_timestamp;
201 set_timestamp_fn = &I::set_modify_timestamp;
202 } else {
203 get_timestamp_fn = &I::get_access_timestamp;
204 set_timestamp_fn = &I::set_access_timestamp;
205 }
206
207 utime_t ts = ceph_clock_now();
208 {
209 RWLock::RLocker timestamp_locker(m_image_ctx.timestamp_lock);
210 if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx),
211 update_interval)) {
212 return;
213 }
214 }
215
216 {
217 RWLock::WLocker timestamp_locker(m_image_ctx.timestamp_lock);
218 bool update = should_update_timestamp(
219 ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval);
220 if (!update) {
221 return;
222 }
223
224 std::invoke(set_timestamp_fn, m_image_ctx, ts);
225 }
226
227 // TODO we fire and forget this outside the IO path to prevent
228 // potential race conditions with librbd client IO callbacks
229 // between different threads (e.g. librados and object cacher)
230 ldout(m_image_ctx.cct, 10) << get_request_type() << dendl;
231 auto req = new C_UpdateTimestamp<I>(m_image_ctx, modify);
232 req->send();
7c673cae
FG
233}
234
235template <typename I>
236ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp,
237 Extents &&image_extents,
31f18b77
FG
238 ReadResult &&read_result, int op_flags,
239 const ZTracer::Trace &parent_trace)
240 : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents), "read",
241 parent_trace),
7c673cae
FG
242 m_op_flags(op_flags) {
243 aio_comp->read_result = std::move(read_result);
244}
245
246template <typename I>
247int ImageReadRequest<I>::clip_request() {
248 int r = ImageRequest<I>::clip_request();
249 if (r < 0) {
250 return r;
251 }
252
253 uint64_t buffer_length = 0;
254 auto &image_extents = this->m_image_extents;
255 for (auto &image_extent : image_extents) {
256 buffer_length += image_extent.second;
257 }
258 this->m_aio_comp->read_result.set_clip_length(buffer_length);
259 return 0;
260}
261
262template <typename I>
263void ImageReadRequest<I>::send_request() {
264 I &image_ctx = this->m_image_ctx;
265 CephContext *cct = image_ctx.cct;
266
267 auto &image_extents = this->m_image_extents;
11fdf7f2 268 if (image_ctx.cache && image_ctx.readahead_max_bytes > 0 &&
7c673cae
FG
269 !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) {
270 readahead(get_image_ctx(&image_ctx), image_extents);
271 }
272
273 AioCompletion *aio_comp = this->m_aio_comp;
274 librados::snap_t snap_id;
275 map<object_t,vector<ObjectExtent> > object_extents;
276 uint64_t buffer_ofs = 0;
277 {
278 // prevent image size from changing between computing clip and recording
279 // pending async operation
280 RWLock::RLocker snap_locker(image_ctx.snap_lock);
281 snap_id = image_ctx.snap_id;
282
283 // map image extents to object extents
284 for (auto &extent : image_extents) {
285 if (extent.second == 0) {
286 continue;
287 }
288
289 Striper::file_to_extents(cct, image_ctx.format_string, &image_ctx.layout,
290 extent.first, extent.second, 0, object_extents,
291 buffer_ofs);
292 buffer_ofs += extent.second;
293 }
294 }
295
296 // pre-calculate the expected number of read requests
297 uint32_t request_count = 0;
298 for (auto &object_extent : object_extents) {
299 request_count += object_extent.second.size();
300 }
301 aio_comp->set_request_count(request_count);
302
303 // issue the requests
304 for (auto &object_extent : object_extents) {
305 for (auto &extent : object_extent.second) {
306 ldout(cct, 20) << "oid " << extent.oid << " " << extent.offset << "~"
307 << extent.length << " from " << extent.buffer_extents
308 << dendl;
309
11fdf7f2
TL
310 auto req_comp = new io::ReadResult::C_ObjectReadRequest(
311 aio_comp, extent.offset, extent.length,
312 std::move(extent.buffer_extents));
313 auto req = ObjectDispatchSpec::create_read(
314 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, extent.oid.name,
315 extent.objectno, extent.offset, extent.length, snap_id, m_op_flags,
316 this->m_trace, &req_comp->bl, &req_comp->extent_map, req_comp);
b32b8144 317 req->send();
7c673cae
FG
318 }
319 }
320
321 aio_comp->put();
322
323 image_ctx.perfcounter->inc(l_librbd_rd);
324 image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs);
325}
326
327template <typename I>
328void ImageReadRequest<I>::send_image_cache_request() {
329 I &image_ctx = this->m_image_ctx;
11fdf7f2 330 ceph_assert(image_ctx.image_cache != nullptr);
7c673cae
FG
331
332 AioCompletion *aio_comp = this->m_aio_comp;
333 aio_comp->set_request_count(1);
334
335 auto *req_comp = new io::ReadResult::C_ImageReadRequest(
336 aio_comp, this->m_image_extents);
337 image_ctx.image_cache->aio_read(std::move(this->m_image_extents),
338 &req_comp->bl, m_op_flags,
339 req_comp);
340}
341
342template <typename I>
343void AbstractImageWriteRequest<I>::send_request() {
344 I &image_ctx = this->m_image_ctx;
345 CephContext *cct = image_ctx.cct;
346
347 RWLock::RLocker md_locker(image_ctx.md_lock);
348
349 bool journaling = false;
350
351 AioCompletion *aio_comp = this->m_aio_comp;
352 uint64_t clip_len = 0;
353 ObjectExtents object_extents;
354 ::SnapContext snapc;
355 {
356 // prevent image size from changing between computing clip and recording
357 // pending async operation
358 RWLock::RLocker snap_locker(image_ctx.snap_lock);
359 if (image_ctx.snap_id != CEPH_NOSNAP || image_ctx.read_only) {
360 aio_comp->fail(-EROFS);
361 return;
362 }
363
364 for (auto &extent : this->m_image_extents) {
365 if (extent.second == 0) {
366 continue;
367 }
368
369 // map to object extents
370 Striper::file_to_extents(cct, image_ctx.format_string, &image_ctx.layout,
371 extent.first, extent.second, 0, object_extents);
372 clip_len += extent.second;
373 }
374
375 snapc = image_ctx.snapc;
376 journaling = (image_ctx.journal != nullptr &&
377 image_ctx.journal->is_journal_appending());
378 }
379
11fdf7f2 380 int ret = prune_object_extents(&object_extents);
c07f9fc5
FG
381 if (ret < 0) {
382 aio_comp->fail(ret);
383 return;
384 }
7c673cae
FG
385
386 if (!object_extents.empty()) {
387 uint64_t journal_tid = 0;
7c673cae
FG
388 if (journaling) {
389 // in-flight ops are flushed prior to closing the journal
11fdf7f2
TL
390 ceph_assert(image_ctx.journal != NULL);
391 journal_tid = append_journal_event(m_synchronous);
7c673cae
FG
392 }
393
11fdf7f2
TL
394 aio_comp->set_request_count(object_extents.size());
395 send_object_requests(object_extents, snapc, journal_tid);
7c673cae
FG
396 } else {
397 // no IO to perform -- fire completion
398 aio_comp->unblock();
399 }
400
401 update_stats(clip_len);
402 aio_comp->put();
403}
404
405template <typename I>
406void AbstractImageWriteRequest<I>::send_object_requests(
407 const ObjectExtents &object_extents, const ::SnapContext &snapc,
11fdf7f2 408 uint64_t journal_tid) {
7c673cae
FG
409 I &image_ctx = this->m_image_ctx;
410 CephContext *cct = image_ctx.cct;
411
412 AioCompletion *aio_comp = this->m_aio_comp;
413 for (ObjectExtents::const_iterator p = object_extents.begin();
414 p != object_extents.end(); ++p) {
415 ldout(cct, 20) << "oid " << p->oid << " " << p->offset << "~" << p->length
416 << " from " << p->buffer_extents << dendl;
417 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
11fdf7f2 418 auto request = create_object_request(*p, snapc, journal_tid, req_comp);
7c673cae
FG
419
420 // if journaling, stash the request for later; otherwise send
421 if (request != NULL) {
11fdf7f2 422 request->send();
7c673cae
FG
423 }
424 }
425}
426
427template <typename I>
428void ImageWriteRequest<I>::assemble_extent(const ObjectExtent &object_extent,
429 bufferlist *bl) {
430 for (auto q = object_extent.buffer_extents.begin();
431 q != object_extent.buffer_extents.end(); ++q) {
432 bufferlist sub_bl;
433 sub_bl.substr_of(m_bl, q->first, q->second);
434 bl->claim_append(sub_bl);
435 }
436}
437
438template <typename I>
11fdf7f2 439uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) {
7c673cae
FG
440 I &image_ctx = this->m_image_ctx;
441
442 uint64_t tid = 0;
443 uint64_t buffer_offset = 0;
11fdf7f2 444 ceph_assert(!this->m_image_extents.empty());
7c673cae
FG
445 for (auto &extent : this->m_image_extents) {
446 bufferlist sub_bl;
447 sub_bl.substr_of(m_bl, buffer_offset, extent.second);
448 buffer_offset += extent.second;
449
450 tid = image_ctx.journal->append_write_event(extent.first, extent.second,
11fdf7f2 451 sub_bl, synchronous);
7c673cae
FG
452 }
453
7c673cae
FG
454 return tid;
455}
456
457template <typename I>
458void ImageWriteRequest<I>::send_image_cache_request() {
459 I &image_ctx = this->m_image_ctx;
11fdf7f2 460 ceph_assert(image_ctx.image_cache != nullptr);
7c673cae
FG
461
462 AioCompletion *aio_comp = this->m_aio_comp;
463 aio_comp->set_request_count(1);
464 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
465 image_ctx.image_cache->aio_write(std::move(this->m_image_extents),
466 std::move(m_bl), m_op_flags, req_comp);
467}
468
469template <typename I>
11fdf7f2 470ObjectDispatchSpec *ImageWriteRequest<I>::create_object_request(
7c673cae 471 const ObjectExtent &object_extent, const ::SnapContext &snapc,
11fdf7f2 472 uint64_t journal_tid, Context *on_finish) {
7c673cae 473 I &image_ctx = this->m_image_ctx;
7c673cae
FG
474
475 bufferlist bl;
476 assemble_extent(object_extent, &bl);
11fdf7f2
TL
477 auto req = ObjectDispatchSpec::create_write(
478 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.oid.name,
479 object_extent.objectno, object_extent.offset, std::move(bl), snapc,
480 m_op_flags, journal_tid, this->m_trace, on_finish);
7c673cae
FG
481 return req;
482}
483
484template <typename I>
485void ImageWriteRequest<I>::update_stats(size_t length) {
486 I &image_ctx = this->m_image_ctx;
487 image_ctx.perfcounter->inc(l_librbd_wr);
488 image_ctx.perfcounter->inc(l_librbd_wr_bytes, length);
489}
490
491template <typename I>
11fdf7f2 492uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) {
7c673cae
FG
493 I &image_ctx = this->m_image_ctx;
494
495 uint64_t tid = 0;
11fdf7f2 496 ceph_assert(!this->m_image_extents.empty());
7c673cae 497 for (auto &extent : this->m_image_extents) {
11fdf7f2
TL
498 journal::EventEntry event_entry(
499 journal::AioDiscardEvent(extent.first,
500 extent.second,
501 this->m_discard_granularity_bytes));
7c673cae 502 tid = image_ctx.journal->append_io_event(std::move(event_entry),
11fdf7f2
TL
503 extent.first, extent.second,
504 synchronous, 0);
7c673cae
FG
505 }
506
7c673cae
FG
507 return tid;
508}
509
7c673cae
FG
510template <typename I>
511void ImageDiscardRequest<I>::send_image_cache_request() {
512 I &image_ctx = this->m_image_ctx;
11fdf7f2 513 ceph_assert(image_ctx.image_cache != nullptr);
7c673cae
FG
514
515 AioCompletion *aio_comp = this->m_aio_comp;
516 aio_comp->set_request_count(this->m_image_extents.size());
517 for (auto &extent : this->m_image_extents) {
518 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
519 image_ctx.image_cache->aio_discard(extent.first, extent.second,
11fdf7f2
TL
520 this->m_discard_granularity_bytes,
521 req_comp);
7c673cae
FG
522 }
523}
524
525template <typename I>
11fdf7f2 526ObjectDispatchSpec *ImageDiscardRequest<I>::create_object_request(
7c673cae 527 const ObjectExtent &object_extent, const ::SnapContext &snapc,
11fdf7f2 528 uint64_t journal_tid, Context *on_finish) {
7c673cae 529 I &image_ctx = this->m_image_ctx;
11fdf7f2
TL
530 auto req = ObjectDispatchSpec::create_discard(
531 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.oid.name,
532 object_extent.objectno, object_extent.offset, object_extent.length, snapc,
533 OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE, journal_tid, this->m_trace,
534 on_finish);
7c673cae
FG
535 return req;
536}
537
538template <typename I>
539void ImageDiscardRequest<I>::update_stats(size_t length) {
540 I &image_ctx = this->m_image_ctx;
541 image_ctx.perfcounter->inc(l_librbd_discard);
542 image_ctx.perfcounter->inc(l_librbd_discard_bytes, length);
543}
544
11fdf7f2
TL
545template <typename I>
546int ImageDiscardRequest<I>::prune_object_extents(
547 ObjectExtents* object_extents) const {
548 if (m_discard_granularity_bytes == 0) {
549 return 0;
550 }
551
552 // Align the range to discard_granularity_bytes boundary and skip
553 // and discards that are too small to free up any space.
554 //
555 // discard_granularity_bytes >= object_size && tail truncation
556 // is a special case for filestore
557 bool prune_required = false;
558 auto object_size = this->m_image_ctx.layout.object_size;
559 auto discard_granularity_bytes = std::min(m_discard_granularity_bytes,
560 object_size);
561 auto xform_lambda =
562 [discard_granularity_bytes, object_size, &prune_required]
563 (ObjectExtent& object_extent) {
564 auto& offset = object_extent.offset;
565 auto& length = object_extent.length;
566 auto next_offset = offset + length;
567
568 if ((discard_granularity_bytes < object_size) ||
569 (next_offset < object_size)) {
570 offset = p2roundup<uint64_t>(offset, discard_granularity_bytes);
571 next_offset = p2align<uint64_t>(next_offset, discard_granularity_bytes);
572 if (offset >= next_offset) {
573 prune_required = true;
574 length = 0;
575 } else {
576 length = next_offset - offset;
577 }
578 }
579 };
580 std::for_each(object_extents->begin(), object_extents->end(),
581 xform_lambda);
582
583 if (prune_required) {
584 // one or more object extents were skipped
585 auto remove_lambda =
586 [](const ObjectExtent& object_extent) {
587 return (object_extent.length == 0);
588 };
589 object_extents->erase(
590 std::remove_if(object_extents->begin(), object_extents->end(),
591 remove_lambda),
592 object_extents->end());
593 }
594 return 0;
595}
596
7c673cae
FG
597template <typename I>
598void ImageFlushRequest<I>::send_request() {
599 I &image_ctx = this->m_image_ctx;
7c673cae
FG
600
601 bool journaling = false;
602 {
603 RWLock::RLocker snap_locker(image_ctx.snap_lock);
11fdf7f2
TL
604 journaling = (m_flush_source == FLUSH_SOURCE_USER &&
605 image_ctx.journal != nullptr &&
7c673cae
FG
606 image_ctx.journal->is_journal_appending());
607 }
608
609 AioCompletion *aio_comp = this->m_aio_comp;
11fdf7f2
TL
610 aio_comp->set_request_count(1);
611
612 Context *ctx = new C_AioRequest(aio_comp);
613
614 // ensure no locks are held when flush is complete
615 ctx = librbd::util::create_async_context_callback(image_ctx, ctx);
616
7c673cae
FG
617 if (journaling) {
618 // in-flight ops are flushed prior to closing the journal
619 uint64_t journal_tid = image_ctx.journal->append_io_event(
11fdf7f2
TL
620 journal::EventEntry(journal::AioFlushEvent()), 0, 0, false, 0);
621
622 ctx = new FunctionContext(
623 [&image_ctx, journal_tid, ctx](int r) {
624 image_ctx.journal->commit_io_event(journal_tid, r);
625 ctx->complete(r);
626 });
627 ctx = new FunctionContext(
628 [&image_ctx, journal_tid, ctx](int r) {
7c673cae 629 image_ctx.journal->flush_event(journal_tid, ctx);
11fdf7f2 630 });
7c673cae
FG
631 } else {
632 // flush rbd cache only when journaling is not enabled
11fdf7f2
TL
633 auto object_dispatch_spec = ObjectDispatchSpec::create_flush(
634 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, m_flush_source, this->m_trace,
635 ctx);
636 ctx = new FunctionContext([object_dispatch_spec](int r) {
637 object_dispatch_spec->send();
638 });
7c673cae
FG
639 }
640
11fdf7f2
TL
641 // ensure all in-flight IOs are settled if non-user flush request
642 image_ctx.flush_async_operations(ctx);
643 aio_comp->start_op(true);
644 aio_comp->put();
645
646 // might be flushing during image shutdown
647 if (image_ctx.perfcounter != nullptr) {
648 image_ctx.perfcounter->inc(l_librbd_flush);
649 }
7c673cae
FG
650}
651
652template <typename I>
653void ImageFlushRequest<I>::send_image_cache_request() {
654 I &image_ctx = this->m_image_ctx;
11fdf7f2 655 ceph_assert(image_ctx.image_cache != nullptr);
7c673cae
FG
656
657 AioCompletion *aio_comp = this->m_aio_comp;
658 aio_comp->set_request_count(1);
659 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
660 image_ctx.image_cache->aio_flush(req_comp);
661}
662
663template <typename I>
11fdf7f2 664uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) {
7c673cae
FG
665 I &image_ctx = this->m_image_ctx;
666
667 uint64_t tid = 0;
11fdf7f2 668 ceph_assert(!this->m_image_extents.empty());
7c673cae
FG
669 for (auto &extent : this->m_image_extents) {
670 journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first,
671 extent.second,
672 m_data_bl));
673 tid = image_ctx.journal->append_io_event(std::move(event_entry),
11fdf7f2
TL
674 extent.first, extent.second,
675 synchronous, 0);
7c673cae
FG
676 }
677
7c673cae
FG
678 return tid;
679}
680
681template <typename I>
682void ImageWriteSameRequest<I>::send_image_cache_request() {
683 I &image_ctx = this->m_image_ctx;
11fdf7f2 684 ceph_assert(image_ctx.image_cache != nullptr);
7c673cae
FG
685
686 AioCompletion *aio_comp = this->m_aio_comp;
687 aio_comp->set_request_count(this->m_image_extents.size());
688 for (auto &extent : this->m_image_extents) {
689 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
690 image_ctx.image_cache->aio_writesame(extent.first, extent.second,
691 std::move(m_data_bl), m_op_flags,
692 req_comp);
693 }
694}
695
696template <typename I>
11fdf7f2 697ObjectDispatchSpec *ImageWriteSameRequest<I>::create_object_request(
7c673cae 698 const ObjectExtent &object_extent, const ::SnapContext &snapc,
11fdf7f2 699 uint64_t journal_tid, Context *on_finish) {
7c673cae 700 I &image_ctx = this->m_image_ctx;
7c673cae
FG
701
702 bufferlist bl;
11fdf7f2
TL
703 ObjectDispatchSpec *req;
704
705 if (util::assemble_write_same_extent(object_extent, m_data_bl, &bl, false)) {
706 Extents buffer_extents{object_extent.buffer_extents};
7c673cae 707
11fdf7f2
TL
708 req = ObjectDispatchSpec::create_write_same(
709 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.oid.name,
710 object_extent.objectno, object_extent.offset, object_extent.length,
711 std::move(buffer_extents), std::move(bl), snapc, m_op_flags, journal_tid,
712 this->m_trace, on_finish);
7c673cae
FG
713 return req;
714 }
11fdf7f2
TL
715 req = ObjectDispatchSpec::create_write(
716 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.oid.name,
717 object_extent.objectno, object_extent.offset, std::move(bl), snapc,
718 m_op_flags, journal_tid, this->m_trace, on_finish);
7c673cae
FG
719 return req;
720}
721
722template <typename I>
723void ImageWriteSameRequest<I>::update_stats(size_t length) {
724 I &image_ctx = this->m_image_ctx;
725 image_ctx.perfcounter->inc(l_librbd_ws);
726 image_ctx.perfcounter->inc(l_librbd_ws_bytes, length);
727}
728
c07f9fc5
FG
729template <typename I>
730uint64_t ImageCompareAndWriteRequest<I>::append_journal_event(
11fdf7f2 731 bool synchronous) {
c07f9fc5
FG
732 I &image_ctx = this->m_image_ctx;
733
734 uint64_t tid = 0;
11fdf7f2 735 ceph_assert(this->m_image_extents.size() == 1);
c07f9fc5 736 auto &extent = this->m_image_extents.front();
11fdf7f2
TL
737 journal::EventEntry event_entry(
738 journal::AioCompareAndWriteEvent(extent.first, extent.second, m_cmp_bl,
739 m_bl));
c07f9fc5 740 tid = image_ctx.journal->append_io_event(std::move(event_entry),
11fdf7f2
TL
741 extent.first, extent.second,
742 synchronous, -EILSEQ);
c07f9fc5
FG
743
744 return tid;
745}
746
c07f9fc5
FG
747template <typename I>
748void ImageCompareAndWriteRequest<I>::assemble_extent(
749 const ObjectExtent &object_extent, bufferlist *bl) {
750 for (auto q = object_extent.buffer_extents.begin();
751 q != object_extent.buffer_extents.end(); ++q) {
752 bufferlist sub_bl;
753 sub_bl.substr_of(m_bl, q->first, q->second);
754 bl->claim_append(sub_bl);
755 }
756}
757
758template <typename I>
759void ImageCompareAndWriteRequest<I>::send_image_cache_request() {
760 I &image_ctx = this->m_image_ctx;
11fdf7f2 761 ceph_assert(image_ctx.image_cache != nullptr);
c07f9fc5
FG
762
763 AioCompletion *aio_comp = this->m_aio_comp;
764 aio_comp->set_request_count(1);
765 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
766 image_ctx.image_cache->aio_compare_and_write(
767 std::move(this->m_image_extents), std::move(m_cmp_bl), std::move(m_bl),
768 m_mismatch_offset, m_op_flags, req_comp);
769}
770
771template <typename I>
11fdf7f2 772ObjectDispatchSpec *ImageCompareAndWriteRequest<I>::create_object_request(
c07f9fc5
FG
773 const ObjectExtent &object_extent,
774 const ::SnapContext &snapc,
11fdf7f2 775 uint64_t journal_tid, Context *on_finish) {
c07f9fc5
FG
776 I &image_ctx = this->m_image_ctx;
777
11fdf7f2
TL
778 // NOTE: safe to move m_cmp_bl since we only support this op against
779 // a single object
c07f9fc5
FG
780 bufferlist bl;
781 assemble_extent(object_extent, &bl);
11fdf7f2
TL
782 auto req = ObjectDispatchSpec::create_compare_and_write(
783 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.oid.name,
784 object_extent.objectno, object_extent.offset, std::move(m_cmp_bl),
785 std::move(bl), snapc, m_mismatch_offset, m_op_flags, journal_tid,
786 this->m_trace, on_finish);
c07f9fc5
FG
787 return req;
788}
789
790template <typename I>
791void ImageCompareAndWriteRequest<I>::update_stats(size_t length) {
792 I &image_ctx = this->m_image_ctx;
793 image_ctx.perfcounter->inc(l_librbd_cmp);
794 image_ctx.perfcounter->inc(l_librbd_cmp_bytes, length);
795}
796
797template <typename I>
11fdf7f2
TL
798int ImageCompareAndWriteRequest<I>::prune_object_extents(
799 ObjectExtents* object_extents) const {
800 if (object_extents->size() > 1)
c07f9fc5
FG
801 return -EINVAL;
802
803 I &image_ctx = this->m_image_ctx;
804 uint64_t sector_size = 512ULL;
805 uint64_t su = image_ctx.layout.stripe_unit;
11fdf7f2 806 ObjectExtent object_extent = object_extents->front();
c07f9fc5
FG
807 if (object_extent.offset % sector_size + object_extent.length > sector_size ||
808 (su != 0 && (object_extent.offset % su + object_extent.length > su)))
809 return -EINVAL;
810
811 return 0;
812}
813
7c673cae
FG
814} // namespace io
815} // namespace librbd
816
817template class librbd::io::ImageRequest<librbd::ImageCtx>;
818template class librbd::io::ImageReadRequest<librbd::ImageCtx>;
819template class librbd::io::AbstractImageWriteRequest<librbd::ImageCtx>;
820template class librbd::io::ImageWriteRequest<librbd::ImageCtx>;
821template class librbd::io::ImageDiscardRequest<librbd::ImageCtx>;
822template class librbd::io::ImageFlushRequest<librbd::ImageCtx>;
823template class librbd::io::ImageWriteSameRequest<librbd::ImageCtx>;
c07f9fc5 824template class librbd::io::ImageCompareAndWriteRequest<librbd::ImageCtx>;