]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/io/ImageRequest.cc
5f8d2f0da39bf750c61febc46c56338762aa869a
[ceph.git] / ceph / src / librbd / io / ImageRequest.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/io/ImageRequest.h"
5 #include "librbd/ImageCtx.h"
6 #include "librbd/internal.h"
7 #include "librbd/Journal.h"
8 #include "librbd/Types.h"
9 #include "librbd/Utils.h"
10 #include "librbd/asio/ContextWQ.h"
11 #include "librbd/io/AioCompletion.h"
12 #include "librbd/io/AsyncOperation.h"
13 #include "librbd/io/ObjectDispatchInterface.h"
14 #include "librbd/io/ObjectDispatchSpec.h"
15 #include "librbd/io/ObjectDispatcherInterface.h"
16 #include "librbd/io/Utils.h"
17 #include "librbd/journal/Types.h"
18 #include "include/rados/librados.hpp"
19 #include "common/errno.h"
20 #include "common/perf_counters.h"
21 #include "osdc/Striper.h"
22 #include <algorithm>
23 #include <functional>
24 #include <map>
25
26 #define dout_subsys ceph_subsys_rbd
27 #undef dout_prefix
28 #define dout_prefix *_dout << "librbd::io::ImageRequest: " << __func__ << ": "
29
30 namespace librbd {
31 namespace io {
32
33 using librbd::util::data_object_name;
34 using librbd::util::get_image_ctx;
35
36 namespace {
37
38 template <typename I>
39 struct C_AssembleSnapshotDeltas : public C_AioRequest {
40 I* image_ctx;
41 SnapshotDelta* snapshot_delta;
42
43 ceph::mutex lock = ceph::make_mutex(
44 "librbd::io::C_AssembleSnapshotDeltas::lock", false);
45 std::map<uint64_t, SnapshotDelta> object_snapshot_delta;
46
47 C_AssembleSnapshotDeltas(I* image_ctx, AioCompletion* aio_comp,
48 SnapshotDelta* snapshot_delta)
49 : C_AioRequest(aio_comp),
50 image_ctx(image_ctx), snapshot_delta(snapshot_delta) {
51 }
52
53 SnapshotDelta* get_snapshot_delta(uint64_t object_no) {
54 std::unique_lock locker{lock};
55 return &object_snapshot_delta[object_no];
56 }
57
58 void finish(int r) override {
59 auto cct = image_ctx->cct;
60
61 if (r < 0) {
62 lderr(cct) << "C_AssembleSnapshotDeltas: list snaps failed: "
63 << cpp_strerror(r) << dendl;
64 C_AioRequest::finish(r);
65 return;
66 }
67
68 std::unique_lock locker{lock};
69 *snapshot_delta = {};
70 for (auto& [object_no, object_snapshot_delta] : object_snapshot_delta) {
71 SnapshotDelta image_snapshot_delta;
72 object_to_image_intervals(object_no, object_snapshot_delta,
73 &image_snapshot_delta, snapshot_delta);
74
75 ldout(cct, 20) << "object_no=" << object_no << ", "
76 << "object_snapshot_delta="
77 << object_snapshot_delta << ", "
78 << "image_snapshot_delta=" << image_snapshot_delta
79 << dendl;
80 }
81
82 ldout(cct, 20) << "snapshot_delta=" << *snapshot_delta << dendl;
83 C_AioRequest::finish(0);
84 }
85
86 void object_to_image_intervals(
87 uint64_t object_no, const SnapshotDelta& object_snapshot_delta,
88 SnapshotDelta* image_snapshot_delta,
89 SnapshotDelta* assembled_image_snapshot_delta) {
90 for (auto& [key, object_extents] : object_snapshot_delta) {
91 for (auto& object_extent : object_extents) {
92 Extents image_extents;
93 io::util::extent_to_file(image_ctx, object_no, object_extent.get_off(),
94 object_extent.get_len(), image_extents);
95
96 auto& intervals = (*image_snapshot_delta)[key];
97 auto& assembled_intervals = (*assembled_image_snapshot_delta)[key];
98 for (auto [image_offset, image_length] : image_extents) {
99 SparseExtent sparse_extent{object_extent.get_val().state,
100 image_length};
101 intervals.insert(image_offset, image_length, sparse_extent);
102 assembled_intervals.insert(image_offset, image_length,
103 sparse_extent);
104 }
105 }
106 }
107 }
108 };
109
110 template <typename I>
111 struct C_RBD_Readahead : public Context {
112 I *ictx;
113 uint64_t object_no;
114 io::ReadExtents extents;
115
116 C_RBD_Readahead(I *ictx, uint64_t object_no, uint64_t offset, uint64_t length)
117 : ictx(ictx), object_no(object_no), extents({{offset, length}}) {
118 ictx->readahead.inc_pending();
119 }
120
121 void finish(int r) override {
122 ceph_assert(extents.size() == 1);
123 auto& extent = extents.front();
124 ldout(ictx->cct, 20) << "C_RBD_Readahead on "
125 << data_object_name(ictx, object_no) << ": "
126 << extent.offset << "~" << extent.length << dendl;
127 ictx->readahead.dec_pending();
128 }
129 };
130
131 template <typename I>
132 void readahead(I *ictx, const Extents& image_extents, IOContext io_context) {
133 uint64_t total_bytes = 0;
134 for (auto& image_extent : image_extents) {
135 total_bytes += image_extent.second;
136 }
137
138 ictx->image_lock.lock_shared();
139 auto total_bytes_read = ictx->total_bytes_read.fetch_add(total_bytes);
140 bool abort = (
141 ictx->readahead_disable_after_bytes != 0 &&
142 total_bytes_read > ictx->readahead_disable_after_bytes);
143 if (abort) {
144 ictx->image_lock.unlock_shared();
145 return;
146 }
147
148 uint64_t image_size = ictx->get_effective_image_size(ictx->snap_id);
149 ictx->image_lock.unlock_shared();
150
151 auto readahead_extent = ictx->readahead.update(image_extents, image_size);
152 uint64_t readahead_offset = readahead_extent.first;
153 uint64_t readahead_length = readahead_extent.second;
154
155 if (readahead_length > 0) {
156 ldout(ictx->cct, 20) << "(readahead logical) " << readahead_offset << "~"
157 << readahead_length << dendl;
158 LightweightObjectExtents readahead_object_extents;
159 io::util::file_to_extents(ictx, readahead_offset, readahead_length, 0,
160 &readahead_object_extents);
161 for (auto& object_extent : readahead_object_extents) {
162 ldout(ictx->cct, 20) << "(readahead) "
163 << data_object_name(ictx,
164 object_extent.object_no) << " "
165 << object_extent.offset << "~"
166 << object_extent.length << dendl;
167
168 auto req_comp = new C_RBD_Readahead<I>(ictx, object_extent.object_no,
169 object_extent.offset,
170 object_extent.length);
171 auto req = io::ObjectDispatchSpec::create_read(
172 ictx, io::OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
173 &req_comp->extents, io_context, 0, 0, {}, nullptr, req_comp);
174 req->send();
175 }
176
177 ictx->perfcounter->inc(l_librbd_readahead);
178 ictx->perfcounter->inc(l_librbd_readahead_bytes, readahead_length);
179 }
180 }
181
182 template <typename I>
183 struct C_UpdateTimestamp : public Context {
184 public:
185 I& m_image_ctx;
186 bool m_modify; // if modify set to 'true', modify timestamp is updated,
187 // access timestamp otherwise
188 AsyncOperation m_async_op;
189
190 C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) {
191 m_async_op.start_op(*get_image_ctx(&m_image_ctx));
192 }
193 ~C_UpdateTimestamp() override {
194 m_async_op.finish_op();
195 }
196
197 void send() {
198 librados::ObjectWriteOperation op;
199 if (m_modify) {
200 cls_client::set_modify_timestamp(&op);
201 } else {
202 cls_client::set_access_timestamp(&op);
203 }
204
205 auto comp = librbd::util::create_rados_callback(this);
206 int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
207 ceph_assert(r == 0);
208 comp->release();
209 }
210
211 void finish(int r) override {
212 // ignore errors updating timestamp
213 }
214 };
215
216 bool should_update_timestamp(const utime_t& now, const utime_t& timestamp,
217 uint64_t interval) {
218 return (interval &&
219 (static_cast<uint64_t>(now.sec()) >= interval + timestamp));
220 }
221
222 } // anonymous namespace
223
224 #undef dout_prefix
225 #define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \
226 << " " << __func__ << ": "
227
228 template <typename I>
229 void ImageRequest<I>::aio_read(I *ictx, AioCompletion *c,
230 Extents &&image_extents,
231 ReadResult &&read_result, IOContext io_context,
232 int op_flags, int read_flags,
233 const ZTracer::Trace &parent_trace) {
234 ImageReadRequest<I> req(*ictx, c, std::move(image_extents),
235 std::move(read_result), io_context, op_flags,
236 read_flags, parent_trace);
237 req.send();
238 }
239
240 template <typename I>
241 void ImageRequest<I>::aio_write(I *ictx, AioCompletion *c,
242 Extents &&image_extents, bufferlist &&bl,
243 IOContext io_context, int op_flags,
244 const ZTracer::Trace &parent_trace) {
245 ImageWriteRequest<I> req(*ictx, c, std::move(image_extents), std::move(bl),
246 io_context, op_flags, parent_trace);
247 req.send();
248 }
249
250 template <typename I>
251 void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c,
252 Extents &&image_extents,
253 uint32_t discard_granularity_bytes,
254 IOContext io_context,
255 const ZTracer::Trace &parent_trace) {
256 ImageDiscardRequest<I> req(*ictx, c, std::move(image_extents),
257 discard_granularity_bytes, io_context,
258 parent_trace);
259 req.send();
260 }
261
262 template <typename I>
263 void ImageRequest<I>::aio_flush(I *ictx, AioCompletion *c,
264 FlushSource flush_source,
265 const ZTracer::Trace &parent_trace) {
266 ImageFlushRequest<I> req(*ictx, c, flush_source, parent_trace);
267 req.send();
268 }
269
270 template <typename I>
271 void ImageRequest<I>::aio_writesame(I *ictx, AioCompletion *c,
272 Extents &&image_extents,
273 bufferlist &&bl, IOContext io_context,
274 int op_flags,
275 const ZTracer::Trace &parent_trace) {
276 ImageWriteSameRequest<I> req(*ictx, c, std::move(image_extents),
277 std::move(bl), io_context, op_flags,
278 parent_trace);
279 req.send();
280 }
281
282 template <typename I>
283 void ImageRequest<I>::aio_compare_and_write(I *ictx, AioCompletion *c,
284 Extents &&image_extents,
285 bufferlist &&cmp_bl,
286 bufferlist &&bl,
287 uint64_t *mismatch_offset,
288 IOContext io_context, int op_flags,
289 const ZTracer::Trace &parent_trace) {
290 ImageCompareAndWriteRequest<I> req(*ictx, c, std::move(image_extents),
291 std::move(cmp_bl), std::move(bl),
292 mismatch_offset, io_context, op_flags,
293 parent_trace);
294 req.send();
295 }
296
297 template <typename I>
298 void ImageRequest<I>::send() {
299 I &image_ctx = this->m_image_ctx;
300 ceph_assert(m_aio_comp->is_initialized(get_aio_type()));
301 ceph_assert(m_aio_comp->is_started());
302
303 CephContext *cct = image_ctx.cct;
304 AioCompletion *aio_comp = this->m_aio_comp;
305 ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", "
306 << "completion=" << aio_comp << dendl;
307
308 update_timestamp();
309 send_request();
310 }
311
312 template <typename I>
313 void ImageRequest<I>::update_timestamp() {
314 bool modify = (get_aio_type() != AIO_TYPE_READ);
315 uint64_t update_interval;
316 if (modify) {
317 update_interval = m_image_ctx.mtime_update_interval;
318 } else {
319 update_interval = m_image_ctx.atime_update_interval;
320 }
321
322 if (update_interval == 0) {
323 return;
324 }
325
326 utime_t (I::*get_timestamp_fn)() const;
327 void (I::*set_timestamp_fn)(utime_t);
328 if (modify) {
329 get_timestamp_fn = &I::get_modify_timestamp;
330 set_timestamp_fn = &I::set_modify_timestamp;
331 } else {
332 get_timestamp_fn = &I::get_access_timestamp;
333 set_timestamp_fn = &I::set_access_timestamp;
334 }
335
336 utime_t ts = ceph_clock_now();
337 {
338 std::shared_lock timestamp_locker{m_image_ctx.timestamp_lock};
339 if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx),
340 update_interval)) {
341 return;
342 }
343 }
344
345 {
346 std::unique_lock timestamp_locker{m_image_ctx.timestamp_lock};
347 bool update = should_update_timestamp(
348 ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval);
349 if (!update) {
350 return;
351 }
352
353 std::invoke(set_timestamp_fn, m_image_ctx, ts);
354 }
355
356 // TODO we fire and forget this outside the IO path to prevent
357 // potential race conditions with librbd client IO callbacks
358 // between different threads (e.g. librados and object cacher)
359 ldout(m_image_ctx.cct, 10) << get_request_type() << dendl;
360 auto req = new C_UpdateTimestamp<I>(m_image_ctx, modify);
361 req->send();
362 }
363
364 template <typename I>
365 ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp,
366 Extents &&image_extents,
367 ReadResult &&read_result,
368 IOContext io_context, int op_flags,
369 int read_flags,
370 const ZTracer::Trace &parent_trace)
371 : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents),
372 io_context, "read", parent_trace),
373 m_op_flags(op_flags), m_read_flags(read_flags) {
374 aio_comp->read_result = std::move(read_result);
375 }
376
377 template <typename I>
378 void ImageReadRequest<I>::send_request() {
379 I &image_ctx = this->m_image_ctx;
380 CephContext *cct = image_ctx.cct;
381
382 auto &image_extents = this->m_image_extents;
383 if (image_ctx.cache && image_ctx.readahead_max_bytes > 0 &&
384 !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) {
385 readahead(get_image_ctx(&image_ctx), image_extents, this->m_io_context);
386 }
387
388 // map image extents to object extents
389 LightweightObjectExtents object_extents;
390 uint64_t buffer_ofs = 0;
391 for (auto &extent : image_extents) {
392 if (extent.second == 0) {
393 continue;
394 }
395
396 util::file_to_extents(&image_ctx, extent.first, extent.second, buffer_ofs,
397 &object_extents);
398 buffer_ofs += extent.second;
399 }
400
401 AioCompletion *aio_comp = this->m_aio_comp;
402 aio_comp->read_result.set_image_extents(image_extents);
403
404 // issue the requests
405 aio_comp->set_request_count(object_extents.size());
406 for (auto &oe : object_extents) {
407 ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " "
408 << oe.offset << "~" << oe.length << " from "
409 << oe.buffer_extents << dendl;
410
411 auto req_comp = new io::ReadResult::C_ObjectReadRequest(
412 aio_comp, {{oe.offset, oe.length, std::move(oe.buffer_extents)}});
413 auto req = ObjectDispatchSpec::create_read(
414 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, oe.object_no,
415 &req_comp->extents, this->m_io_context, m_op_flags, m_read_flags,
416 this->m_trace, nullptr, req_comp);
417 req->send();
418 }
419
420 image_ctx.perfcounter->inc(l_librbd_rd);
421 image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs);
422 }
423
424 template <typename I>
425 void AbstractImageWriteRequest<I>::send_request() {
426 I &image_ctx = this->m_image_ctx;
427
428 bool journaling = false;
429
430 AioCompletion *aio_comp = this->m_aio_comp;
431 {
432 // prevent image size from changing between computing clip and recording
433 // pending async operation
434 std::shared_lock image_locker{image_ctx.image_lock};
435 journaling = (image_ctx.journal != nullptr &&
436 image_ctx.journal->is_journal_appending());
437 }
438
439 uint64_t clip_len = 0;
440 LightweightObjectExtents object_extents;
441 for (auto &extent : this->m_image_extents) {
442 if (extent.second == 0) {
443 continue;
444 }
445
446 // map to object extents
447 io::util::file_to_extents(&image_ctx, extent.first, extent.second, clip_len,
448 &object_extents);
449 clip_len += extent.second;
450 }
451
452 int ret = prune_object_extents(&object_extents);
453 if (ret < 0) {
454 aio_comp->fail(ret);
455 return;
456 }
457
458 aio_comp->set_request_count(object_extents.size());
459 if (!object_extents.empty()) {
460 uint64_t journal_tid = 0;
461 if (journaling) {
462 // in-flight ops are flushed prior to closing the journal
463 ceph_assert(image_ctx.journal != NULL);
464 journal_tid = append_journal_event(m_synchronous);
465 }
466
467 send_object_requests(object_extents, this->m_io_context, journal_tid);
468 }
469
470 update_stats(clip_len);
471 }
472
473 template <typename I>
474 void AbstractImageWriteRequest<I>::send_object_requests(
475 const LightweightObjectExtents &object_extents, IOContext io_context,
476 uint64_t journal_tid) {
477 I &image_ctx = this->m_image_ctx;
478 CephContext *cct = image_ctx.cct;
479
480 AioCompletion *aio_comp = this->m_aio_comp;
481 bool single_extent = (object_extents.size() == 1);
482 for (auto& oe : object_extents) {
483 ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " "
484 << oe.offset << "~" << oe.length << " from "
485 << oe.buffer_extents << dendl;
486 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
487 auto request = create_object_request(oe, io_context, journal_tid,
488 single_extent, req_comp);
489 request->send();
490 }
491 }
492
493 template <typename I>
494 void ImageWriteRequest<I>::assemble_extent(
495 const LightweightObjectExtent &object_extent, bufferlist *bl) {
496 for (auto q = object_extent.buffer_extents.begin();
497 q != object_extent.buffer_extents.end(); ++q) {
498 bufferlist sub_bl;
499 sub_bl.substr_of(m_bl, q->first, q->second);
500 bl->claim_append(sub_bl);
501 }
502 }
503
504 template <typename I>
505 uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) {
506 I &image_ctx = this->m_image_ctx;
507
508 uint64_t tid = 0;
509 uint64_t buffer_offset = 0;
510 ceph_assert(!this->m_image_extents.empty());
511 for (auto &extent : this->m_image_extents) {
512 bufferlist sub_bl;
513 sub_bl.substr_of(m_bl, buffer_offset, extent.second);
514 buffer_offset += extent.second;
515
516 tid = image_ctx.journal->append_write_event(extent.first, extent.second,
517 sub_bl, synchronous);
518 }
519
520 return tid;
521 }
522
523 template <typename I>
524 ObjectDispatchSpec *ImageWriteRequest<I>::create_object_request(
525 const LightweightObjectExtent &object_extent, IOContext io_context,
526 uint64_t journal_tid, bool single_extent, Context *on_finish) {
527 I &image_ctx = this->m_image_ctx;
528
529 bufferlist bl;
530 if (single_extent && object_extent.buffer_extents.size() == 1 &&
531 m_bl.length() == object_extent.length) {
532 // optimization for single object/buffer extent writes
533 bl = std::move(m_bl);
534 } else {
535 assemble_extent(object_extent, &bl);
536 }
537
538 auto req = ObjectDispatchSpec::create_write(
539 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
540 object_extent.offset, std::move(bl), io_context, m_op_flags, 0,
541 std::nullopt, journal_tid, this->m_trace, on_finish);
542 return req;
543 }
544
545 template <typename I>
546 void ImageWriteRequest<I>::update_stats(size_t length) {
547 I &image_ctx = this->m_image_ctx;
548 image_ctx.perfcounter->inc(l_librbd_wr);
549 image_ctx.perfcounter->inc(l_librbd_wr_bytes, length);
550 }
551
552 template <typename I>
553 uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) {
554 I &image_ctx = this->m_image_ctx;
555
556 uint64_t tid = 0;
557 ceph_assert(!this->m_image_extents.empty());
558 for (auto &extent : this->m_image_extents) {
559 journal::EventEntry event_entry(
560 journal::AioDiscardEvent(extent.first,
561 extent.second,
562 this->m_discard_granularity_bytes));
563 tid = image_ctx.journal->append_io_event(std::move(event_entry),
564 extent.first, extent.second,
565 synchronous, 0);
566 }
567
568 return tid;
569 }
570
571 template <typename I>
572 ObjectDispatchSpec *ImageDiscardRequest<I>::create_object_request(
573 const LightweightObjectExtent &object_extent, IOContext io_context,
574 uint64_t journal_tid, bool single_extent, Context *on_finish) {
575 I &image_ctx = this->m_image_ctx;
576 auto req = ObjectDispatchSpec::create_discard(
577 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
578 object_extent.offset, object_extent.length, io_context,
579 OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE, journal_tid, this->m_trace,
580 on_finish);
581 return req;
582 }
583
584 template <typename I>
585 void ImageDiscardRequest<I>::update_stats(size_t length) {
586 I &image_ctx = this->m_image_ctx;
587 image_ctx.perfcounter->inc(l_librbd_discard);
588 image_ctx.perfcounter->inc(l_librbd_discard_bytes, length);
589 }
590
591 template <typename I>
592 int ImageDiscardRequest<I>::prune_object_extents(
593 LightweightObjectExtents* object_extents) const {
594 if (m_discard_granularity_bytes == 0) {
595 return 0;
596 }
597
598 // Align the range to discard_granularity_bytes boundary and skip
599 // and discards that are too small to free up any space.
600 //
601 // discard_granularity_bytes >= object_size && tail truncation
602 // is a special case for filestore
603 bool prune_required = false;
604 auto object_size = this->m_image_ctx.layout.object_size;
605 auto discard_granularity_bytes = std::min(m_discard_granularity_bytes,
606 object_size);
607 auto xform_lambda =
608 [discard_granularity_bytes, object_size, &prune_required]
609 (LightweightObjectExtent& object_extent) {
610 auto& offset = object_extent.offset;
611 auto& length = object_extent.length;
612 auto next_offset = offset + length;
613
614 if ((discard_granularity_bytes < object_size) ||
615 (next_offset < object_size)) {
616 offset = p2roundup<uint64_t>(offset, discard_granularity_bytes);
617 next_offset = p2align<uint64_t>(next_offset, discard_granularity_bytes);
618 if (offset >= next_offset) {
619 prune_required = true;
620 length = 0;
621 } else {
622 length = next_offset - offset;
623 }
624 }
625 };
626 std::for_each(object_extents->begin(), object_extents->end(),
627 xform_lambda);
628
629 if (prune_required) {
630 // one or more object extents were skipped
631 auto remove_lambda =
632 [](const LightweightObjectExtent& object_extent) {
633 return (object_extent.length == 0);
634 };
635 object_extents->erase(
636 std::remove_if(object_extents->begin(), object_extents->end(),
637 remove_lambda),
638 object_extents->end());
639 }
640 return 0;
641 }
642
643 template <typename I>
644 void ImageFlushRequest<I>::send_request() {
645 I &image_ctx = this->m_image_ctx;
646
647 bool journaling = false;
648 {
649 std::shared_lock image_locker{image_ctx.image_lock};
650 journaling = (m_flush_source == FLUSH_SOURCE_USER &&
651 image_ctx.journal != nullptr &&
652 image_ctx.journal->is_journal_appending());
653 }
654
655 AioCompletion *aio_comp = this->m_aio_comp;
656 aio_comp->set_request_count(1);
657
658 Context *ctx = new C_AioRequest(aio_comp);
659
660 // ensure no locks are held when flush is complete
661 ctx = librbd::util::create_async_context_callback(image_ctx, ctx);
662
663 uint64_t journal_tid = 0;
664 if (journaling) {
665 // in-flight ops are flushed prior to closing the journal
666 ceph_assert(image_ctx.journal != NULL);
667 journal_tid = image_ctx.journal->append_io_event(
668 journal::EventEntry(journal::AioFlushEvent()), 0, 0, false, 0);
669 image_ctx.journal->user_flushed();
670 }
671
672 auto object_dispatch_spec = ObjectDispatchSpec::create_flush(
673 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, m_flush_source, journal_tid,
674 this->m_trace, ctx);
675 ctx = new LambdaContext([object_dispatch_spec](int r) {
676 object_dispatch_spec->send();
677 });
678
679 // ensure all in-flight IOs are settled if non-user flush request
680 if (m_flush_source == FLUSH_SOURCE_WRITEBACK) {
681 ctx->complete(0);
682 } else {
683 aio_comp->async_op.flush(ctx);
684 }
685
686 // might be flushing during image shutdown
687 if (image_ctx.perfcounter != nullptr) {
688 image_ctx.perfcounter->inc(l_librbd_flush);
689 }
690 }
691
692 template <typename I>
693 uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) {
694 I &image_ctx = this->m_image_ctx;
695
696 uint64_t tid = 0;
697 ceph_assert(!this->m_image_extents.empty());
698 for (auto &extent : this->m_image_extents) {
699 journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first,
700 extent.second,
701 m_data_bl));
702 tid = image_ctx.journal->append_io_event(std::move(event_entry),
703 extent.first, extent.second,
704 synchronous, 0);
705 }
706
707 return tid;
708 }
709
710 template <typename I>
711 ObjectDispatchSpec *ImageWriteSameRequest<I>::create_object_request(
712 const LightweightObjectExtent &object_extent, IOContext io_context,
713 uint64_t journal_tid, bool single_extent, Context *on_finish) {
714 I &image_ctx = this->m_image_ctx;
715
716 bufferlist bl;
717 ObjectDispatchSpec *req;
718
719 if (util::assemble_write_same_extent(object_extent, m_data_bl, &bl, false)) {
720 auto buffer_extents{object_extent.buffer_extents};
721
722 req = ObjectDispatchSpec::create_write_same(
723 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
724 object_extent.offset, object_extent.length, std::move(buffer_extents),
725 std::move(bl), io_context, m_op_flags, journal_tid,
726 this->m_trace, on_finish);
727 return req;
728 }
729 req = ObjectDispatchSpec::create_write(
730 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
731 object_extent.offset, std::move(bl), io_context, m_op_flags, 0,
732 std::nullopt, journal_tid, this->m_trace, on_finish);
733 return req;
734 }
735
736 template <typename I>
737 void ImageWriteSameRequest<I>::update_stats(size_t length) {
738 I &image_ctx = this->m_image_ctx;
739 image_ctx.perfcounter->inc(l_librbd_ws);
740 image_ctx.perfcounter->inc(l_librbd_ws_bytes, length);
741 }
742
743 template <typename I>
744 uint64_t ImageCompareAndWriteRequest<I>::append_journal_event(
745 bool synchronous) {
746 I &image_ctx = this->m_image_ctx;
747
748 uint64_t tid = 0;
749 ceph_assert(this->m_image_extents.size() == 1);
750 auto &extent = this->m_image_extents.front();
751 journal::EventEntry event_entry(
752 journal::AioCompareAndWriteEvent(extent.first, extent.second, m_cmp_bl,
753 m_bl));
754 tid = image_ctx.journal->append_io_event(std::move(event_entry),
755 extent.first, extent.second,
756 synchronous, -EILSEQ);
757
758 return tid;
759 }
760
761 template <typename I>
762 void ImageCompareAndWriteRequest<I>::assemble_extent(
763 const LightweightObjectExtent &object_extent, bufferlist *bl) {
764 for (auto q = object_extent.buffer_extents.begin();
765 q != object_extent.buffer_extents.end(); ++q) {
766 bufferlist sub_bl;
767 sub_bl.substr_of(m_bl, q->first, q->second);
768 bl->claim_append(sub_bl);
769 }
770 }
771
772 template <typename I>
773 ObjectDispatchSpec *ImageCompareAndWriteRequest<I>::create_object_request(
774 const LightweightObjectExtent &object_extent, IOContext io_context,
775 uint64_t journal_tid, bool single_extent, Context *on_finish) {
776 I &image_ctx = this->m_image_ctx;
777
778 // NOTE: safe to move m_cmp_bl since we only support this op against
779 // a single object
780 bufferlist bl;
781 assemble_extent(object_extent, &bl);
782 auto req = ObjectDispatchSpec::create_compare_and_write(
783 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
784 object_extent.offset, std::move(m_cmp_bl), std::move(bl), io_context,
785 m_mismatch_offset, m_op_flags, journal_tid, this->m_trace, on_finish);
786 return req;
787 }
788
789 template <typename I>
790 void ImageCompareAndWriteRequest<I>::update_stats(size_t length) {
791 I &image_ctx = this->m_image_ctx;
792 image_ctx.perfcounter->inc(l_librbd_cmp);
793 image_ctx.perfcounter->inc(l_librbd_cmp_bytes, length);
794 }
795
796 template <typename I>
797 int ImageCompareAndWriteRequest<I>::prune_object_extents(
798 LightweightObjectExtents* object_extents) const {
799 if (object_extents->size() > 1)
800 return -EINVAL;
801
802 I &image_ctx = this->m_image_ctx;
803 uint64_t sector_size = 512ULL;
804 uint64_t su = image_ctx.layout.stripe_unit;
805 auto& object_extent = object_extents->front();
806 if (object_extent.offset % sector_size + object_extent.length > sector_size ||
807 (su != 0 && (object_extent.offset % su + object_extent.length > su)))
808 return -EINVAL;
809
810 return 0;
811 }
812
813 template <typename I>
814 ImageListSnapsRequest<I>::ImageListSnapsRequest(
815 I& image_ctx, AioCompletion* aio_comp, Extents&& image_extents,
816 SnapIds&& snap_ids, int list_snaps_flags, SnapshotDelta* snapshot_delta,
817 const ZTracer::Trace& parent_trace)
818 : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents),
819 image_ctx.get_data_io_context(), "list-snaps",
820 parent_trace),
821 m_snap_ids(std::move(snap_ids)), m_list_snaps_flags(list_snaps_flags),
822 m_snapshot_delta(snapshot_delta) {
823 }
824
825 template <typename I>
826 void ImageListSnapsRequest<I>::send_request() {
827 I &image_ctx = this->m_image_ctx;
828 CephContext *cct = image_ctx.cct;
829
830 // map image extents to object extents
831 auto &image_extents = this->m_image_extents;
832 std::map<uint64_t, Extents> object_number_extents;
833 for (auto& image_extent : image_extents) {
834 if (image_extent.second == 0) {
835 continue;
836 }
837
838 striper::LightweightObjectExtents object_extents;
839 io::util::file_to_extents(&image_ctx, image_extent.first,
840 image_extent.second, 0, &object_extents);
841 for (auto& object_extent : object_extents) {
842 object_number_extents[object_extent.object_no].emplace_back(
843 object_extent.offset, object_extent.length);
844 }
845 }
846
847 // reassemble the deltas back into image-extents when complete
848 auto aio_comp = this->m_aio_comp;
849 aio_comp->set_request_count(1);
850 auto assemble_ctx = new C_AssembleSnapshotDeltas<I>(
851 &image_ctx, aio_comp, m_snapshot_delta);
852 auto sub_aio_comp = AioCompletion::create_and_start<
853 Context, &Context::complete>(assemble_ctx, get_image_ctx(&image_ctx),
854 AIO_TYPE_GENERIC);
855
856 // issue the requests
857 sub_aio_comp->set_request_count(object_number_extents.size());
858 for (auto& oe : object_number_extents) {
859 ldout(cct, 20) << data_object_name(&image_ctx, oe.first) << " "
860 << oe.second << dendl;
861 auto ctx = new C_AioRequest(sub_aio_comp);
862 auto req = ObjectDispatchSpec::create_list_snaps(
863 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, oe.first, std::move(oe.second),
864 SnapIds{m_snap_ids}, m_list_snaps_flags, this->m_trace,
865 assemble_ctx->get_snapshot_delta(oe.first), ctx);
866 req->send();
867 }
868 }
869
870 } // namespace io
871 } // namespace librbd
872
873 template class librbd::io::ImageRequest<librbd::ImageCtx>;
874 template class librbd::io::ImageReadRequest<librbd::ImageCtx>;
875 template class librbd::io::AbstractImageWriteRequest<librbd::ImageCtx>;
876 template class librbd::io::ImageWriteRequest<librbd::ImageCtx>;
877 template class librbd::io::ImageDiscardRequest<librbd::ImageCtx>;
878 template class librbd::io::ImageFlushRequest<librbd::ImageCtx>;
879 template class librbd::io::ImageWriteSameRequest<librbd::ImageCtx>;
880 template class librbd::io::ImageCompareAndWriteRequest<librbd::ImageCtx>;
881 template class librbd::io::ImageListSnapsRequest<librbd::ImageCtx>;