1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "librbd/io/ImageRequest.h"
5 #include "librbd/ImageCtx.h"
6 #include "librbd/internal.h"
7 #include "librbd/Journal.h"
8 #include "librbd/Types.h"
9 #include "librbd/Utils.h"
10 #include "librbd/asio/ContextWQ.h"
11 #include "librbd/io/AioCompletion.h"
12 #include "librbd/io/AsyncOperation.h"
13 #include "librbd/io/ObjectDispatchInterface.h"
14 #include "librbd/io/ObjectDispatchSpec.h"
15 #include "librbd/io/ObjectDispatcherInterface.h"
16 #include "librbd/io/Utils.h"
17 #include "librbd/journal/Types.h"
18 #include "include/rados/librados.hpp"
19 #include "common/errno.h"
20 #include "common/perf_counters.h"
21 #include "osdc/Striper.h"
26 #define dout_subsys ceph_subsys_rbd
28 #define dout_prefix *_dout << "librbd::io::ImageRequest: " << __func__ << ": "
33 using librbd::util::data_object_name
;
34 using librbd::util::get_image_ctx
;
39 struct C_AssembleSnapshotDeltas
: public C_AioRequest
{
41 SnapshotDelta
* snapshot_delta
;
43 ceph::mutex lock
= ceph::make_mutex(
44 "librbd::io::C_AssembleSnapshotDeltas::lock", false);
45 std::map
<uint64_t, SnapshotDelta
> object_snapshot_delta
;
47 C_AssembleSnapshotDeltas(I
* image_ctx
, AioCompletion
* aio_comp
,
48 SnapshotDelta
* snapshot_delta
)
49 : C_AioRequest(aio_comp
),
50 image_ctx(image_ctx
), snapshot_delta(snapshot_delta
) {
53 SnapshotDelta
* get_snapshot_delta(uint64_t object_no
) {
54 std::unique_lock locker
{lock
};
55 return &object_snapshot_delta
[object_no
];
58 void finish(int r
) override
{
59 auto cct
= image_ctx
->cct
;
62 lderr(cct
) << "C_AssembleSnapshotDeltas: list snaps failed: "
63 << cpp_strerror(r
) << dendl
;
64 C_AioRequest::finish(r
);
68 std::unique_lock locker
{lock
};
70 for (auto& [object_no
, object_snapshot_delta
] : object_snapshot_delta
) {
71 SnapshotDelta image_snapshot_delta
;
72 object_to_image_intervals(object_no
, object_snapshot_delta
,
73 &image_snapshot_delta
, snapshot_delta
);
75 ldout(cct
, 20) << "object_no=" << object_no
<< ", "
76 << "object_snapshot_delta="
77 << object_snapshot_delta
<< ", "
78 << "image_snapshot_delta=" << image_snapshot_delta
82 ldout(cct
, 20) << "snapshot_delta=" << *snapshot_delta
<< dendl
;
83 C_AioRequest::finish(0);
86 void object_to_image_intervals(
87 uint64_t object_no
, const SnapshotDelta
& object_snapshot_delta
,
88 SnapshotDelta
* image_snapshot_delta
,
89 SnapshotDelta
* assembled_image_snapshot_delta
) {
90 for (auto& [key
, object_extents
] : object_snapshot_delta
) {
91 for (auto& object_extent
: object_extents
) {
92 Extents image_extents
;
93 io::util::extent_to_file(image_ctx
, object_no
, object_extent
.get_off(),
94 object_extent
.get_len(), image_extents
);
96 auto& intervals
= (*image_snapshot_delta
)[key
];
97 auto& assembled_intervals
= (*assembled_image_snapshot_delta
)[key
];
98 for (auto [image_offset
, image_length
] : image_extents
) {
99 SparseExtent sparse_extent
{object_extent
.get_val().state
,
101 intervals
.insert(image_offset
, image_length
, sparse_extent
);
102 assembled_intervals
.insert(image_offset
, image_length
,
110 template <typename I
>
111 struct C_RBD_Readahead
: public Context
{
114 io::ReadExtents extents
;
116 C_RBD_Readahead(I
*ictx
, uint64_t object_no
, uint64_t offset
, uint64_t length
)
117 : ictx(ictx
), object_no(object_no
), extents({{offset
, length
}}) {
118 ictx
->readahead
.inc_pending();
121 void finish(int r
) override
{
122 ceph_assert(extents
.size() == 1);
123 auto& extent
= extents
.front();
124 ldout(ictx
->cct
, 20) << "C_RBD_Readahead on "
125 << data_object_name(ictx
, object_no
) << ": "
126 << extent
.offset
<< "~" << extent
.length
<< dendl
;
127 ictx
->readahead
.dec_pending();
131 template <typename I
>
132 void readahead(I
*ictx
, const Extents
& image_extents
, IOContext io_context
) {
133 uint64_t total_bytes
= 0;
134 for (auto& image_extent
: image_extents
) {
135 total_bytes
+= image_extent
.second
;
138 ictx
->image_lock
.lock_shared();
139 auto total_bytes_read
= ictx
->total_bytes_read
.fetch_add(total_bytes
);
141 ictx
->readahead_disable_after_bytes
!= 0 &&
142 total_bytes_read
> ictx
->readahead_disable_after_bytes
);
144 ictx
->image_lock
.unlock_shared();
148 uint64_t image_size
= ictx
->get_effective_image_size(ictx
->snap_id
);
149 ictx
->image_lock
.unlock_shared();
151 auto readahead_extent
= ictx
->readahead
.update(image_extents
, image_size
);
152 uint64_t readahead_offset
= readahead_extent
.first
;
153 uint64_t readahead_length
= readahead_extent
.second
;
155 if (readahead_length
> 0) {
156 ldout(ictx
->cct
, 20) << "(readahead logical) " << readahead_offset
<< "~"
157 << readahead_length
<< dendl
;
158 LightweightObjectExtents readahead_object_extents
;
159 io::util::file_to_extents(ictx
, readahead_offset
, readahead_length
, 0,
160 &readahead_object_extents
);
161 for (auto& object_extent
: readahead_object_extents
) {
162 ldout(ictx
->cct
, 20) << "(readahead) "
163 << data_object_name(ictx
,
164 object_extent
.object_no
) << " "
165 << object_extent
.offset
<< "~"
166 << object_extent
.length
<< dendl
;
168 auto req_comp
= new C_RBD_Readahead
<I
>(ictx
, object_extent
.object_no
,
169 object_extent
.offset
,
170 object_extent
.length
);
171 auto req
= io::ObjectDispatchSpec::create_read(
172 ictx
, io::OBJECT_DISPATCH_LAYER_NONE
, object_extent
.object_no
,
173 &req_comp
->extents
, io_context
, 0, 0, {}, nullptr, req_comp
);
177 ictx
->perfcounter
->inc(l_librbd_readahead
);
178 ictx
->perfcounter
->inc(l_librbd_readahead_bytes
, readahead_length
);
182 template <typename I
>
183 struct C_UpdateTimestamp
: public Context
{
186 bool m_modify
; // if modify set to 'true', modify timestamp is updated,
187 // access timestamp otherwise
188 AsyncOperation m_async_op
;
190 C_UpdateTimestamp(I
& ictx
, bool m
) : m_image_ctx(ictx
), m_modify(m
) {
191 m_async_op
.start_op(*get_image_ctx(&m_image_ctx
));
193 ~C_UpdateTimestamp() override
{
194 m_async_op
.finish_op();
198 librados::ObjectWriteOperation op
;
200 cls_client::set_modify_timestamp(&op
);
202 cls_client::set_access_timestamp(&op
);
205 auto comp
= librbd::util::create_rados_callback(this);
206 int r
= m_image_ctx
.md_ctx
.aio_operate(m_image_ctx
.header_oid
, comp
, &op
);
211 void finish(int r
) override
{
212 // ignore errors updating timestamp
216 bool should_update_timestamp(const utime_t
& now
, const utime_t
& timestamp
,
219 (static_cast<uint64_t>(now
.sec()) >= interval
+ timestamp
));
222 } // anonymous namespace
225 #define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \
226 << " " << __func__ << ": "
228 template <typename I
>
229 void ImageRequest
<I
>::aio_read(I
*ictx
, AioCompletion
*c
,
230 Extents
&&image_extents
,
231 ReadResult
&&read_result
, IOContext io_context
,
232 int op_flags
, int read_flags
,
233 const ZTracer::Trace
&parent_trace
) {
234 ImageReadRequest
<I
> req(*ictx
, c
, std::move(image_extents
),
235 std::move(read_result
), io_context
, op_flags
,
236 read_flags
, parent_trace
);
240 template <typename I
>
241 void ImageRequest
<I
>::aio_write(I
*ictx
, AioCompletion
*c
,
242 Extents
&&image_extents
, bufferlist
&&bl
,
243 IOContext io_context
, int op_flags
,
244 const ZTracer::Trace
&parent_trace
) {
245 ImageWriteRequest
<I
> req(*ictx
, c
, std::move(image_extents
), std::move(bl
),
246 io_context
, op_flags
, parent_trace
);
250 template <typename I
>
251 void ImageRequest
<I
>::aio_discard(I
*ictx
, AioCompletion
*c
,
252 Extents
&&image_extents
,
253 uint32_t discard_granularity_bytes
,
254 IOContext io_context
,
255 const ZTracer::Trace
&parent_trace
) {
256 ImageDiscardRequest
<I
> req(*ictx
, c
, std::move(image_extents
),
257 discard_granularity_bytes
, io_context
,
262 template <typename I
>
263 void ImageRequest
<I
>::aio_flush(I
*ictx
, AioCompletion
*c
,
264 FlushSource flush_source
,
265 const ZTracer::Trace
&parent_trace
) {
266 ImageFlushRequest
<I
> req(*ictx
, c
, flush_source
, parent_trace
);
270 template <typename I
>
271 void ImageRequest
<I
>::aio_writesame(I
*ictx
, AioCompletion
*c
,
272 Extents
&&image_extents
,
273 bufferlist
&&bl
, IOContext io_context
,
275 const ZTracer::Trace
&parent_trace
) {
276 ImageWriteSameRequest
<I
> req(*ictx
, c
, std::move(image_extents
),
277 std::move(bl
), io_context
, op_flags
,
282 template <typename I
>
283 void ImageRequest
<I
>::aio_compare_and_write(I
*ictx
, AioCompletion
*c
,
284 Extents
&&image_extents
,
287 uint64_t *mismatch_offset
,
288 IOContext io_context
, int op_flags
,
289 const ZTracer::Trace
&parent_trace
) {
290 ImageCompareAndWriteRequest
<I
> req(*ictx
, c
, std::move(image_extents
),
291 std::move(cmp_bl
), std::move(bl
),
292 mismatch_offset
, io_context
, op_flags
,
297 template <typename I
>
298 void ImageRequest
<I
>::send() {
299 I
&image_ctx
= this->m_image_ctx
;
300 ceph_assert(m_aio_comp
->is_initialized(get_aio_type()));
301 ceph_assert(m_aio_comp
->is_started());
303 CephContext
*cct
= image_ctx
.cct
;
304 AioCompletion
*aio_comp
= this->m_aio_comp
;
305 ldout(cct
, 20) << get_request_type() << ": ictx=" << &image_ctx
<< ", "
306 << "completion=" << aio_comp
<< dendl
;
312 template <typename I
>
313 void ImageRequest
<I
>::update_timestamp() {
314 bool modify
= (get_aio_type() != AIO_TYPE_READ
);
315 uint64_t update_interval
;
317 update_interval
= m_image_ctx
.mtime_update_interval
;
319 update_interval
= m_image_ctx
.atime_update_interval
;
322 if (update_interval
== 0) {
326 utime_t (I::*get_timestamp_fn
)() const;
327 void (I::*set_timestamp_fn
)(utime_t
);
329 get_timestamp_fn
= &I::get_modify_timestamp
;
330 set_timestamp_fn
= &I::set_modify_timestamp
;
332 get_timestamp_fn
= &I::get_access_timestamp
;
333 set_timestamp_fn
= &I::set_access_timestamp
;
336 utime_t ts
= ceph_clock_now();
338 std::shared_lock timestamp_locker
{m_image_ctx
.timestamp_lock
};
339 if(!should_update_timestamp(ts
, std::invoke(get_timestamp_fn
, m_image_ctx
),
346 std::unique_lock timestamp_locker
{m_image_ctx
.timestamp_lock
};
347 bool update
= should_update_timestamp(
348 ts
, std::invoke(get_timestamp_fn
, m_image_ctx
), update_interval
);
353 std::invoke(set_timestamp_fn
, m_image_ctx
, ts
);
356 // TODO we fire and forget this outside the IO path to prevent
357 // potential race conditions with librbd client IO callbacks
358 // between different threads (e.g. librados and object cacher)
359 ldout(m_image_ctx
.cct
, 10) << get_request_type() << dendl
;
360 auto req
= new C_UpdateTimestamp
<I
>(m_image_ctx
, modify
);
364 template <typename I
>
365 ImageReadRequest
<I
>::ImageReadRequest(I
&image_ctx
, AioCompletion
*aio_comp
,
366 Extents
&&image_extents
,
367 ReadResult
&&read_result
,
368 IOContext io_context
, int op_flags
,
370 const ZTracer::Trace
&parent_trace
)
371 : ImageRequest
<I
>(image_ctx
, aio_comp
, std::move(image_extents
),
372 io_context
, "read", parent_trace
),
373 m_op_flags(op_flags
), m_read_flags(read_flags
) {
374 aio_comp
->read_result
= std::move(read_result
);
377 template <typename I
>
378 void ImageReadRequest
<I
>::send_request() {
379 I
&image_ctx
= this->m_image_ctx
;
380 CephContext
*cct
= image_ctx
.cct
;
382 auto &image_extents
= this->m_image_extents
;
383 if (image_ctx
.cache
&& image_ctx
.readahead_max_bytes
> 0 &&
384 !(m_op_flags
& LIBRADOS_OP_FLAG_FADVISE_RANDOM
)) {
385 readahead(get_image_ctx(&image_ctx
), image_extents
, this->m_io_context
);
388 // map image extents to object extents
389 LightweightObjectExtents object_extents
;
390 uint64_t buffer_ofs
= 0;
391 for (auto &extent
: image_extents
) {
392 if (extent
.second
== 0) {
396 util::file_to_extents(&image_ctx
, extent
.first
, extent
.second
, buffer_ofs
,
398 buffer_ofs
+= extent
.second
;
401 AioCompletion
*aio_comp
= this->m_aio_comp
;
402 aio_comp
->read_result
.set_image_extents(image_extents
);
404 // issue the requests
405 aio_comp
->set_request_count(object_extents
.size());
406 for (auto &oe
: object_extents
) {
407 ldout(cct
, 20) << data_object_name(&image_ctx
, oe
.object_no
) << " "
408 << oe
.offset
<< "~" << oe
.length
<< " from "
409 << oe
.buffer_extents
<< dendl
;
411 auto req_comp
= new io::ReadResult::C_ObjectReadRequest(
412 aio_comp
, {{oe
.offset
, oe
.length
, std::move(oe
.buffer_extents
)}});
413 auto req
= ObjectDispatchSpec::create_read(
414 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, oe
.object_no
,
415 &req_comp
->extents
, this->m_io_context
, m_op_flags
, m_read_flags
,
416 this->m_trace
, nullptr, req_comp
);
420 image_ctx
.perfcounter
->inc(l_librbd_rd
);
421 image_ctx
.perfcounter
->inc(l_librbd_rd_bytes
, buffer_ofs
);
424 template <typename I
>
425 void AbstractImageWriteRequest
<I
>::send_request() {
426 I
&image_ctx
= this->m_image_ctx
;
428 bool journaling
= false;
430 AioCompletion
*aio_comp
= this->m_aio_comp
;
432 // prevent image size from changing between computing clip and recording
433 // pending async operation
434 std::shared_lock image_locker
{image_ctx
.image_lock
};
435 journaling
= (image_ctx
.journal
!= nullptr &&
436 image_ctx
.journal
->is_journal_appending());
439 uint64_t clip_len
= 0;
440 LightweightObjectExtents object_extents
;
441 for (auto &extent
: this->m_image_extents
) {
442 if (extent
.second
== 0) {
446 // map to object extents
447 io::util::file_to_extents(&image_ctx
, extent
.first
, extent
.second
, clip_len
,
449 clip_len
+= extent
.second
;
452 int ret
= prune_object_extents(&object_extents
);
458 aio_comp
->set_request_count(object_extents
.size());
459 if (!object_extents
.empty()) {
460 uint64_t journal_tid
= 0;
462 // in-flight ops are flushed prior to closing the journal
463 ceph_assert(image_ctx
.journal
!= NULL
);
464 journal_tid
= append_journal_event(m_synchronous
);
467 send_object_requests(object_extents
, this->m_io_context
, journal_tid
);
470 update_stats(clip_len
);
473 template <typename I
>
474 void AbstractImageWriteRequest
<I
>::send_object_requests(
475 const LightweightObjectExtents
&object_extents
, IOContext io_context
,
476 uint64_t journal_tid
) {
477 I
&image_ctx
= this->m_image_ctx
;
478 CephContext
*cct
= image_ctx
.cct
;
480 AioCompletion
*aio_comp
= this->m_aio_comp
;
481 bool single_extent
= (object_extents
.size() == 1);
482 for (auto& oe
: object_extents
) {
483 ldout(cct
, 20) << data_object_name(&image_ctx
, oe
.object_no
) << " "
484 << oe
.offset
<< "~" << oe
.length
<< " from "
485 << oe
.buffer_extents
<< dendl
;
486 C_AioRequest
*req_comp
= new C_AioRequest(aio_comp
);
487 auto request
= create_object_request(oe
, io_context
, journal_tid
,
488 single_extent
, req_comp
);
493 template <typename I
>
494 void ImageWriteRequest
<I
>::assemble_extent(
495 const LightweightObjectExtent
&object_extent
, bufferlist
*bl
) {
496 for (auto q
= object_extent
.buffer_extents
.begin();
497 q
!= object_extent
.buffer_extents
.end(); ++q
) {
499 sub_bl
.substr_of(m_bl
, q
->first
, q
->second
);
500 bl
->claim_append(sub_bl
);
504 template <typename I
>
505 uint64_t ImageWriteRequest
<I
>::append_journal_event(bool synchronous
) {
506 I
&image_ctx
= this->m_image_ctx
;
509 uint64_t buffer_offset
= 0;
510 ceph_assert(!this->m_image_extents
.empty());
511 for (auto &extent
: this->m_image_extents
) {
513 sub_bl
.substr_of(m_bl
, buffer_offset
, extent
.second
);
514 buffer_offset
+= extent
.second
;
516 tid
= image_ctx
.journal
->append_write_event(extent
.first
, extent
.second
,
517 sub_bl
, synchronous
);
523 template <typename I
>
524 ObjectDispatchSpec
*ImageWriteRequest
<I
>::create_object_request(
525 const LightweightObjectExtent
&object_extent
, IOContext io_context
,
526 uint64_t journal_tid
, bool single_extent
, Context
*on_finish
) {
527 I
&image_ctx
= this->m_image_ctx
;
530 if (single_extent
&& object_extent
.buffer_extents
.size() == 1 &&
531 m_bl
.length() == object_extent
.length
) {
532 // optimization for single object/buffer extent writes
533 bl
= std::move(m_bl
);
535 assemble_extent(object_extent
, &bl
);
538 auto req
= ObjectDispatchSpec::create_write(
539 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, object_extent
.object_no
,
540 object_extent
.offset
, std::move(bl
), io_context
, m_op_flags
, 0,
541 std::nullopt
, journal_tid
, this->m_trace
, on_finish
);
545 template <typename I
>
546 void ImageWriteRequest
<I
>::update_stats(size_t length
) {
547 I
&image_ctx
= this->m_image_ctx
;
548 image_ctx
.perfcounter
->inc(l_librbd_wr
);
549 image_ctx
.perfcounter
->inc(l_librbd_wr_bytes
, length
);
552 template <typename I
>
553 uint64_t ImageDiscardRequest
<I
>::append_journal_event(bool synchronous
) {
554 I
&image_ctx
= this->m_image_ctx
;
557 ceph_assert(!this->m_image_extents
.empty());
558 for (auto &extent
: this->m_image_extents
) {
559 journal::EventEntry
event_entry(
560 journal::AioDiscardEvent(extent
.first
,
562 this->m_discard_granularity_bytes
));
563 tid
= image_ctx
.journal
->append_io_event(std::move(event_entry
),
564 extent
.first
, extent
.second
,
571 template <typename I
>
572 ObjectDispatchSpec
*ImageDiscardRequest
<I
>::create_object_request(
573 const LightweightObjectExtent
&object_extent
, IOContext io_context
,
574 uint64_t journal_tid
, bool single_extent
, Context
*on_finish
) {
575 I
&image_ctx
= this->m_image_ctx
;
576 auto req
= ObjectDispatchSpec::create_discard(
577 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, object_extent
.object_no
,
578 object_extent
.offset
, object_extent
.length
, io_context
,
579 OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE
, journal_tid
, this->m_trace
,
584 template <typename I
>
585 void ImageDiscardRequest
<I
>::update_stats(size_t length
) {
586 I
&image_ctx
= this->m_image_ctx
;
587 image_ctx
.perfcounter
->inc(l_librbd_discard
);
588 image_ctx
.perfcounter
->inc(l_librbd_discard_bytes
, length
);
591 template <typename I
>
592 int ImageDiscardRequest
<I
>::prune_object_extents(
593 LightweightObjectExtents
* object_extents
) const {
594 if (m_discard_granularity_bytes
== 0) {
598 // Align the range to discard_granularity_bytes boundary and skip
599 // and discards that are too small to free up any space.
601 // discard_granularity_bytes >= object_size && tail truncation
602 // is a special case for filestore
603 bool prune_required
= false;
604 auto object_size
= this->m_image_ctx
.layout
.object_size
;
605 auto discard_granularity_bytes
= std::min(m_discard_granularity_bytes
,
608 [discard_granularity_bytes
, object_size
, &prune_required
]
609 (LightweightObjectExtent
& object_extent
) {
610 auto& offset
= object_extent
.offset
;
611 auto& length
= object_extent
.length
;
612 auto next_offset
= offset
+ length
;
614 if ((discard_granularity_bytes
< object_size
) ||
615 (next_offset
< object_size
)) {
616 offset
= p2roundup
<uint64_t>(offset
, discard_granularity_bytes
);
617 next_offset
= p2align
<uint64_t>(next_offset
, discard_granularity_bytes
);
618 if (offset
>= next_offset
) {
619 prune_required
= true;
622 length
= next_offset
- offset
;
626 std::for_each(object_extents
->begin(), object_extents
->end(),
629 if (prune_required
) {
630 // one or more object extents were skipped
632 [](const LightweightObjectExtent
& object_extent
) {
633 return (object_extent
.length
== 0);
635 object_extents
->erase(
636 std::remove_if(object_extents
->begin(), object_extents
->end(),
638 object_extents
->end());
643 template <typename I
>
644 void ImageFlushRequest
<I
>::send_request() {
645 I
&image_ctx
= this->m_image_ctx
;
647 bool journaling
= false;
649 std::shared_lock image_locker
{image_ctx
.image_lock
};
650 journaling
= (m_flush_source
== FLUSH_SOURCE_USER
&&
651 image_ctx
.journal
!= nullptr &&
652 image_ctx
.journal
->is_journal_appending());
655 AioCompletion
*aio_comp
= this->m_aio_comp
;
656 aio_comp
->set_request_count(1);
658 Context
*ctx
= new C_AioRequest(aio_comp
);
660 // ensure no locks are held when flush is complete
661 ctx
= librbd::util::create_async_context_callback(image_ctx
, ctx
);
663 uint64_t journal_tid
= 0;
665 // in-flight ops are flushed prior to closing the journal
666 ceph_assert(image_ctx
.journal
!= NULL
);
667 journal_tid
= image_ctx
.journal
->append_io_event(
668 journal::EventEntry(journal::AioFlushEvent()), 0, 0, false, 0);
669 image_ctx
.journal
->user_flushed();
672 auto object_dispatch_spec
= ObjectDispatchSpec::create_flush(
673 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, m_flush_source
, journal_tid
,
675 ctx
= new LambdaContext([object_dispatch_spec
](int r
) {
676 object_dispatch_spec
->send();
679 // ensure all in-flight IOs are settled if non-user flush request
680 if (m_flush_source
== FLUSH_SOURCE_WRITEBACK
) {
683 aio_comp
->async_op
.flush(ctx
);
686 // might be flushing during image shutdown
687 if (image_ctx
.perfcounter
!= nullptr) {
688 image_ctx
.perfcounter
->inc(l_librbd_flush
);
692 template <typename I
>
693 uint64_t ImageWriteSameRequest
<I
>::append_journal_event(bool synchronous
) {
694 I
&image_ctx
= this->m_image_ctx
;
697 ceph_assert(!this->m_image_extents
.empty());
698 for (auto &extent
: this->m_image_extents
) {
699 journal::EventEntry
event_entry(journal::AioWriteSameEvent(extent
.first
,
702 tid
= image_ctx
.journal
->append_io_event(std::move(event_entry
),
703 extent
.first
, extent
.second
,
710 template <typename I
>
711 ObjectDispatchSpec
*ImageWriteSameRequest
<I
>::create_object_request(
712 const LightweightObjectExtent
&object_extent
, IOContext io_context
,
713 uint64_t journal_tid
, bool single_extent
, Context
*on_finish
) {
714 I
&image_ctx
= this->m_image_ctx
;
717 ObjectDispatchSpec
*req
;
719 if (util::assemble_write_same_extent(object_extent
, m_data_bl
, &bl
, false)) {
720 auto buffer_extents
{object_extent
.buffer_extents
};
722 req
= ObjectDispatchSpec::create_write_same(
723 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, object_extent
.object_no
,
724 object_extent
.offset
, object_extent
.length
, std::move(buffer_extents
),
725 std::move(bl
), io_context
, m_op_flags
, journal_tid
,
726 this->m_trace
, on_finish
);
729 req
= ObjectDispatchSpec::create_write(
730 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, object_extent
.object_no
,
731 object_extent
.offset
, std::move(bl
), io_context
, m_op_flags
, 0,
732 std::nullopt
, journal_tid
, this->m_trace
, on_finish
);
736 template <typename I
>
737 void ImageWriteSameRequest
<I
>::update_stats(size_t length
) {
738 I
&image_ctx
= this->m_image_ctx
;
739 image_ctx
.perfcounter
->inc(l_librbd_ws
);
740 image_ctx
.perfcounter
->inc(l_librbd_ws_bytes
, length
);
743 template <typename I
>
744 uint64_t ImageCompareAndWriteRequest
<I
>::append_journal_event(
746 I
&image_ctx
= this->m_image_ctx
;
749 ceph_assert(this->m_image_extents
.size() == 1);
750 auto &extent
= this->m_image_extents
.front();
751 journal::EventEntry
event_entry(
752 journal::AioCompareAndWriteEvent(extent
.first
, extent
.second
, m_cmp_bl
,
754 tid
= image_ctx
.journal
->append_io_event(std::move(event_entry
),
755 extent
.first
, extent
.second
,
756 synchronous
, -EILSEQ
);
761 template <typename I
>
762 void ImageCompareAndWriteRequest
<I
>::assemble_extent(
763 const LightweightObjectExtent
&object_extent
, bufferlist
*bl
) {
764 for (auto q
= object_extent
.buffer_extents
.begin();
765 q
!= object_extent
.buffer_extents
.end(); ++q
) {
767 sub_bl
.substr_of(m_bl
, q
->first
, q
->second
);
768 bl
->claim_append(sub_bl
);
772 template <typename I
>
773 ObjectDispatchSpec
*ImageCompareAndWriteRequest
<I
>::create_object_request(
774 const LightweightObjectExtent
&object_extent
, IOContext io_context
,
775 uint64_t journal_tid
, bool single_extent
, Context
*on_finish
) {
776 I
&image_ctx
= this->m_image_ctx
;
778 // NOTE: safe to move m_cmp_bl since we only support this op against
781 assemble_extent(object_extent
, &bl
);
782 auto req
= ObjectDispatchSpec::create_compare_and_write(
783 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, object_extent
.object_no
,
784 object_extent
.offset
, std::move(m_cmp_bl
), std::move(bl
), io_context
,
785 m_mismatch_offset
, m_op_flags
, journal_tid
, this->m_trace
, on_finish
);
789 template <typename I
>
790 void ImageCompareAndWriteRequest
<I
>::update_stats(size_t length
) {
791 I
&image_ctx
= this->m_image_ctx
;
792 image_ctx
.perfcounter
->inc(l_librbd_cmp
);
793 image_ctx
.perfcounter
->inc(l_librbd_cmp_bytes
, length
);
796 template <typename I
>
797 int ImageCompareAndWriteRequest
<I
>::prune_object_extents(
798 LightweightObjectExtents
* object_extents
) const {
799 if (object_extents
->size() > 1)
802 I
&image_ctx
= this->m_image_ctx
;
803 uint64_t sector_size
= 512ULL;
804 uint64_t su
= image_ctx
.layout
.stripe_unit
;
805 auto& object_extent
= object_extents
->front();
806 if (object_extent
.offset
% sector_size
+ object_extent
.length
> sector_size
||
807 (su
!= 0 && (object_extent
.offset
% su
+ object_extent
.length
> su
)))
813 template <typename I
>
814 ImageListSnapsRequest
<I
>::ImageListSnapsRequest(
815 I
& image_ctx
, AioCompletion
* aio_comp
, Extents
&& image_extents
,
816 SnapIds
&& snap_ids
, int list_snaps_flags
, SnapshotDelta
* snapshot_delta
,
817 const ZTracer::Trace
& parent_trace
)
818 : ImageRequest
<I
>(image_ctx
, aio_comp
, std::move(image_extents
),
819 image_ctx
.get_data_io_context(), "list-snaps",
821 m_snap_ids(std::move(snap_ids
)), m_list_snaps_flags(list_snaps_flags
),
822 m_snapshot_delta(snapshot_delta
) {
825 template <typename I
>
826 void ImageListSnapsRequest
<I
>::send_request() {
827 I
&image_ctx
= this->m_image_ctx
;
828 CephContext
*cct
= image_ctx
.cct
;
830 // map image extents to object extents
831 auto &image_extents
= this->m_image_extents
;
832 std::map
<uint64_t, Extents
> object_number_extents
;
833 for (auto& image_extent
: image_extents
) {
834 if (image_extent
.second
== 0) {
838 striper::LightweightObjectExtents object_extents
;
839 io::util::file_to_extents(&image_ctx
, image_extent
.first
,
840 image_extent
.second
, 0, &object_extents
);
841 for (auto& object_extent
: object_extents
) {
842 object_number_extents
[object_extent
.object_no
].emplace_back(
843 object_extent
.offset
, object_extent
.length
);
847 // reassemble the deltas back into image-extents when complete
848 auto aio_comp
= this->m_aio_comp
;
849 aio_comp
->set_request_count(1);
850 auto assemble_ctx
= new C_AssembleSnapshotDeltas
<I
>(
851 &image_ctx
, aio_comp
, m_snapshot_delta
);
852 auto sub_aio_comp
= AioCompletion::create_and_start
<
853 Context
, &Context::complete
>(assemble_ctx
, get_image_ctx(&image_ctx
),
856 // issue the requests
857 sub_aio_comp
->set_request_count(object_number_extents
.size());
858 for (auto& oe
: object_number_extents
) {
859 ldout(cct
, 20) << data_object_name(&image_ctx
, oe
.first
) << " "
860 << oe
.second
<< dendl
;
861 auto ctx
= new C_AioRequest(sub_aio_comp
);
862 auto req
= ObjectDispatchSpec::create_list_snaps(
863 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, oe
.first
, std::move(oe
.second
),
864 SnapIds
{m_snap_ids
}, m_list_snaps_flags
, this->m_trace
,
865 assemble_ctx
->get_snapshot_delta(oe
.first
), ctx
);
871 } // namespace librbd
873 template class librbd::io::ImageRequest
<librbd::ImageCtx
>;
874 template class librbd::io::ImageReadRequest
<librbd::ImageCtx
>;
875 template class librbd::io::AbstractImageWriteRequest
<librbd::ImageCtx
>;
876 template class librbd::io::ImageWriteRequest
<librbd::ImageCtx
>;
877 template class librbd::io::ImageDiscardRequest
<librbd::ImageCtx
>;
878 template class librbd::io::ImageFlushRequest
<librbd::ImageCtx
>;
879 template class librbd::io::ImageWriteSameRequest
<librbd::ImageCtx
>;
880 template class librbd::io::ImageCompareAndWriteRequest
<librbd::ImageCtx
>;
881 template class librbd::io::ImageListSnapsRequest
<librbd::ImageCtx
>;