1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "librbd/io/ImageRequest.h"
5 #include "librbd/ImageCtx.h"
6 #include "librbd/internal.h"
7 #include "librbd/Journal.h"
8 #include "librbd/Types.h"
9 #include "librbd/Utils.h"
10 #include "librbd/asio/ContextWQ.h"
11 #include "librbd/io/AioCompletion.h"
12 #include "librbd/io/AsyncOperation.h"
13 #include "librbd/io/ObjectDispatchInterface.h"
14 #include "librbd/io/ObjectDispatchSpec.h"
15 #include "librbd/io/ObjectDispatcherInterface.h"
16 #include "librbd/io/Utils.h"
17 #include "librbd/journal/Types.h"
18 #include "include/rados/librados.hpp"
19 #include "common/errno.h"
20 #include "common/perf_counters.h"
21 #include "osdc/Striper.h"
26 #define dout_subsys ceph_subsys_rbd
28 #define dout_prefix *_dout << "librbd::io::ImageRequest: " << __func__ << ": "
33 using librbd::util::data_object_name
;
34 using librbd::util::get_image_ctx
;
39 struct C_AssembleSnapshotDeltas
: public C_AioRequest
{
41 SnapshotDelta
* snapshot_delta
;
43 ceph::mutex lock
= ceph::make_mutex(
44 "librbd::io::C_AssembleSnapshotDeltas::lock", false);
45 std::map
<uint64_t, SnapshotDelta
> object_snapshot_delta
;
47 C_AssembleSnapshotDeltas(I
* image_ctx
, AioCompletion
* aio_comp
,
48 SnapshotDelta
* snapshot_delta
)
49 : C_AioRequest(aio_comp
),
50 image_ctx(image_ctx
), snapshot_delta(snapshot_delta
) {
53 SnapshotDelta
* get_snapshot_delta(uint64_t object_no
) {
54 std::unique_lock locker
{lock
};
55 return &object_snapshot_delta
[object_no
];
58 void finish(int r
) override
{
59 auto cct
= image_ctx
->cct
;
62 lderr(cct
) << "C_AssembleSnapshotDeltas: list snaps failed: "
63 << cpp_strerror(r
) << dendl
;
64 C_AioRequest::finish(r
);
68 std::unique_lock locker
{lock
};
70 for (auto& [object_no
, object_snapshot_delta
] : object_snapshot_delta
) {
71 SnapshotDelta image_snapshot_delta
;
72 object_to_image_intervals(object_no
, object_snapshot_delta
,
73 &image_snapshot_delta
, snapshot_delta
);
75 ldout(cct
, 20) << "object_no=" << object_no
<< ", "
76 << "object_snapshot_delta="
77 << object_snapshot_delta
<< ", "
78 << "image_snapshot_delta=" << image_snapshot_delta
82 ldout(cct
, 20) << "snapshot_delta=" << *snapshot_delta
<< dendl
;
83 C_AioRequest::finish(0);
86 void object_to_image_intervals(
87 uint64_t object_no
, const SnapshotDelta
& object_snapshot_delta
,
88 SnapshotDelta
* image_snapshot_delta
,
89 SnapshotDelta
* assembled_image_snapshot_delta
) {
90 for (auto& [key
, object_extents
] : object_snapshot_delta
) {
91 for (auto& object_extent
: object_extents
) {
92 auto [image_extents
, _
] = io::util::object_to_area_extents(
94 {{object_extent
.get_off(), object_extent
.get_len()}});
96 auto& intervals
= (*image_snapshot_delta
)[key
];
97 auto& assembled_intervals
= (*assembled_image_snapshot_delta
)[key
];
98 for (auto [image_offset
, image_length
] : image_extents
) {
99 SparseExtent sparse_extent
{object_extent
.get_val().state
,
101 intervals
.insert(image_offset
, image_length
, sparse_extent
);
102 assembled_intervals
.insert(image_offset
, image_length
,
110 template <typename I
>
111 struct C_RBD_Readahead
: public Context
{
114 io::ReadExtents extents
;
116 C_RBD_Readahead(I
*ictx
, uint64_t object_no
, uint64_t offset
, uint64_t length
)
117 : ictx(ictx
), object_no(object_no
), extents({{offset
, length
}}) {
118 ictx
->readahead
.inc_pending();
121 void finish(int r
) override
{
122 ceph_assert(extents
.size() == 1);
123 auto& extent
= extents
.front();
124 ldout(ictx
->cct
, 20) << "C_RBD_Readahead on "
125 << data_object_name(ictx
, object_no
) << ": "
126 << extent
.offset
<< "~" << extent
.length
<< dendl
;
127 ictx
->readahead
.dec_pending();
131 template <typename I
>
132 void readahead(I
*ictx
, const Extents
& image_extents
, IOContext io_context
) {
133 uint64_t total_bytes
= 0;
134 for (auto& image_extent
: image_extents
) {
135 total_bytes
+= image_extent
.second
;
138 ictx
->image_lock
.lock_shared();
139 auto total_bytes_read
= ictx
->total_bytes_read
.fetch_add(total_bytes
);
141 ictx
->readahead_disable_after_bytes
!= 0 &&
142 total_bytes_read
> ictx
->readahead_disable_after_bytes
);
144 ictx
->image_lock
.unlock_shared();
148 uint64_t data_size
= ictx
->get_area_size(ImageArea::DATA
);
149 ictx
->image_lock
.unlock_shared();
151 auto readahead_extent
= ictx
->readahead
.update(image_extents
, data_size
);
152 uint64_t readahead_offset
= readahead_extent
.first
;
153 uint64_t readahead_length
= readahead_extent
.second
;
155 if (readahead_length
> 0) {
156 ldout(ictx
->cct
, 20) << "(readahead logical) " << readahead_offset
<< "~"
157 << readahead_length
<< dendl
;
158 LightweightObjectExtents readahead_object_extents
;
159 io::util::area_to_object_extents(ictx
, readahead_offset
, readahead_length
,
161 &readahead_object_extents
);
162 for (auto& object_extent
: readahead_object_extents
) {
163 ldout(ictx
->cct
, 20) << "(readahead) "
164 << data_object_name(ictx
,
165 object_extent
.object_no
) << " "
166 << object_extent
.offset
<< "~"
167 << object_extent
.length
<< dendl
;
169 auto req_comp
= new C_RBD_Readahead
<I
>(ictx
, object_extent
.object_no
,
170 object_extent
.offset
,
171 object_extent
.length
);
172 auto req
= io::ObjectDispatchSpec::create_read(
173 ictx
, io::OBJECT_DISPATCH_LAYER_NONE
, object_extent
.object_no
,
174 &req_comp
->extents
, io_context
, 0, 0, {}, nullptr, req_comp
);
178 ictx
->perfcounter
->inc(l_librbd_readahead
);
179 ictx
->perfcounter
->inc(l_librbd_readahead_bytes
, readahead_length
);
183 template <typename I
>
184 struct C_UpdateTimestamp
: public Context
{
187 bool m_modify
; // if modify set to 'true', modify timestamp is updated,
188 // access timestamp otherwise
189 AsyncOperation m_async_op
;
191 C_UpdateTimestamp(I
& ictx
, bool m
) : m_image_ctx(ictx
), m_modify(m
) {
192 m_async_op
.start_op(*get_image_ctx(&m_image_ctx
));
194 ~C_UpdateTimestamp() override
{
195 m_async_op
.finish_op();
199 librados::ObjectWriteOperation op
;
201 cls_client::set_modify_timestamp(&op
);
203 cls_client::set_access_timestamp(&op
);
206 auto comp
= librbd::util::create_rados_callback(this);
207 int r
= m_image_ctx
.md_ctx
.aio_operate(m_image_ctx
.header_oid
, comp
, &op
);
212 void finish(int r
) override
{
213 // ignore errors updating timestamp
217 bool should_update_timestamp(const utime_t
& now
, const utime_t
& timestamp
,
220 (static_cast<uint64_t>(now
.sec()) >= interval
+ timestamp
));
223 } // anonymous namespace
226 #define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \
227 << " " << __func__ << ": "
229 template <typename I
>
230 void ImageRequest
<I
>::aio_read(I
*ictx
, AioCompletion
*c
,
231 Extents
&&image_extents
, ImageArea area
,
232 ReadResult
&&read_result
, IOContext io_context
,
233 int op_flags
, int read_flags
,
234 const ZTracer::Trace
&parent_trace
) {
235 ImageReadRequest
<I
> req(*ictx
, c
, std::move(image_extents
), area
,
236 std::move(read_result
), io_context
, op_flags
,
237 read_flags
, parent_trace
);
241 template <typename I
>
242 void ImageRequest
<I
>::aio_write(I
*ictx
, AioCompletion
*c
,
243 Extents
&&image_extents
, ImageArea area
,
244 bufferlist
&&bl
, int op_flags
,
245 const ZTracer::Trace
&parent_trace
) {
246 ImageWriteRequest
<I
> req(*ictx
, c
, std::move(image_extents
), area
,
247 std::move(bl
), op_flags
, parent_trace
);
251 template <typename I
>
252 void ImageRequest
<I
>::aio_discard(I
*ictx
, AioCompletion
*c
,
253 Extents
&&image_extents
, ImageArea area
,
254 uint32_t discard_granularity_bytes
,
255 const ZTracer::Trace
&parent_trace
) {
256 ImageDiscardRequest
<I
> req(*ictx
, c
, std::move(image_extents
), area
,
257 discard_granularity_bytes
, parent_trace
);
261 template <typename I
>
262 void ImageRequest
<I
>::aio_flush(I
*ictx
, AioCompletion
*c
,
263 FlushSource flush_source
,
264 const ZTracer::Trace
&parent_trace
) {
265 ImageFlushRequest
<I
> req(*ictx
, c
, flush_source
, parent_trace
);
269 template <typename I
>
270 void ImageRequest
<I
>::aio_writesame(I
*ictx
, AioCompletion
*c
,
271 Extents
&&image_extents
, ImageArea area
,
272 bufferlist
&&bl
, int op_flags
,
273 const ZTracer::Trace
&parent_trace
) {
274 ImageWriteSameRequest
<I
> req(*ictx
, c
, std::move(image_extents
), area
,
275 std::move(bl
), op_flags
, parent_trace
);
279 template <typename I
>
280 void ImageRequest
<I
>::aio_compare_and_write(I
*ictx
, AioCompletion
*c
,
281 Extents
&&image_extents
,
285 uint64_t *mismatch_offset
,
287 const ZTracer::Trace
&parent_trace
) {
288 ImageCompareAndWriteRequest
<I
> req(*ictx
, c
, std::move(image_extents
), area
,
289 std::move(cmp_bl
), std::move(bl
),
290 mismatch_offset
, op_flags
, parent_trace
);
294 template <typename I
>
295 void ImageRequest
<I
>::send() {
296 I
&image_ctx
= this->m_image_ctx
;
297 ceph_assert(m_aio_comp
->is_initialized(get_aio_type()));
298 ceph_assert(m_aio_comp
->is_started());
300 CephContext
*cct
= image_ctx
.cct
;
301 AioCompletion
*aio_comp
= this->m_aio_comp
;
302 ldout(cct
, 20) << get_request_type() << ": ictx=" << &image_ctx
<< ", "
303 << "completion=" << aio_comp
<< dendl
;
309 template <typename I
>
310 void ImageRequest
<I
>::update_timestamp() {
311 bool modify
= (get_aio_type() != AIO_TYPE_READ
);
312 uint64_t update_interval
;
314 update_interval
= m_image_ctx
.mtime_update_interval
;
316 update_interval
= m_image_ctx
.atime_update_interval
;
319 if (update_interval
== 0) {
323 utime_t (I::*get_timestamp_fn
)() const;
324 void (I::*set_timestamp_fn
)(utime_t
);
326 get_timestamp_fn
= &I::get_modify_timestamp
;
327 set_timestamp_fn
= &I::set_modify_timestamp
;
329 get_timestamp_fn
= &I::get_access_timestamp
;
330 set_timestamp_fn
= &I::set_access_timestamp
;
333 utime_t ts
= ceph_clock_now();
335 std::shared_lock timestamp_locker
{m_image_ctx
.timestamp_lock
};
336 if(!should_update_timestamp(ts
, std::invoke(get_timestamp_fn
, m_image_ctx
),
343 std::unique_lock timestamp_locker
{m_image_ctx
.timestamp_lock
};
344 bool update
= should_update_timestamp(
345 ts
, std::invoke(get_timestamp_fn
, m_image_ctx
), update_interval
);
350 std::invoke(set_timestamp_fn
, m_image_ctx
, ts
);
353 // TODO we fire and forget this outside the IO path to prevent
354 // potential race conditions with librbd client IO callbacks
355 // between different threads (e.g. librados and object cacher)
356 ldout(m_image_ctx
.cct
, 10) << get_request_type() << dendl
;
357 auto req
= new C_UpdateTimestamp
<I
>(m_image_ctx
, modify
);
361 template <typename I
>
362 ImageReadRequest
<I
>::ImageReadRequest(I
&image_ctx
, AioCompletion
*aio_comp
,
363 Extents
&&image_extents
, ImageArea area
,
364 ReadResult
&&read_result
,
365 IOContext io_context
, int op_flags
,
367 const ZTracer::Trace
&parent_trace
)
368 : ImageRequest
<I
>(image_ctx
, aio_comp
, std::move(image_extents
), area
,
369 "read", parent_trace
),
370 m_io_context(io_context
), m_op_flags(op_flags
), m_read_flags(read_flags
) {
371 aio_comp
->read_result
= std::move(read_result
);
374 template <typename I
>
375 void ImageReadRequest
<I
>::send_request() {
376 I
&image_ctx
= this->m_image_ctx
;
377 CephContext
*cct
= image_ctx
.cct
;
379 auto &image_extents
= this->m_image_extents
;
380 if (this->m_image_area
== ImageArea::DATA
&&
381 image_ctx
.cache
&& image_ctx
.readahead_max_bytes
> 0 &&
382 !(m_op_flags
& LIBRADOS_OP_FLAG_FADVISE_RANDOM
)) {
383 readahead(get_image_ctx(&image_ctx
), image_extents
, m_io_context
);
386 // map image extents to object extents
387 LightweightObjectExtents object_extents
;
388 uint64_t buffer_ofs
= 0;
389 for (auto &extent
: image_extents
) {
390 if (extent
.second
== 0) {
394 util::area_to_object_extents(&image_ctx
, extent
.first
, extent
.second
,
395 this->m_image_area
, buffer_ofs
,
397 buffer_ofs
+= extent
.second
;
400 AioCompletion
*aio_comp
= this->m_aio_comp
;
401 aio_comp
->read_result
.set_image_extents(image_extents
);
403 // issue the requests
404 aio_comp
->set_request_count(object_extents
.size());
405 for (auto &oe
: object_extents
) {
406 ldout(cct
, 20) << data_object_name(&image_ctx
, oe
.object_no
) << " "
407 << oe
.offset
<< "~" << oe
.length
<< " from "
408 << oe
.buffer_extents
<< dendl
;
410 auto req_comp
= new io::ReadResult::C_ObjectReadRequest(
411 aio_comp
, {{oe
.offset
, oe
.length
, std::move(oe
.buffer_extents
)}});
412 auto req
= ObjectDispatchSpec::create_read(
413 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, oe
.object_no
,
414 &req_comp
->extents
, m_io_context
, m_op_flags
, m_read_flags
,
415 this->m_trace
, nullptr, req_comp
);
419 image_ctx
.perfcounter
->inc(l_librbd_rd
);
420 image_ctx
.perfcounter
->inc(l_librbd_rd_bytes
, buffer_ofs
);
423 template <typename I
>
424 void AbstractImageWriteRequest
<I
>::send_request() {
425 I
&image_ctx
= this->m_image_ctx
;
427 bool journaling
= false;
429 AioCompletion
*aio_comp
= this->m_aio_comp
;
431 // prevent image size from changing between computing clip and recording
432 // pending async operation
433 std::shared_lock image_locker
{image_ctx
.image_lock
};
434 journaling
= (image_ctx
.journal
!= nullptr &&
435 image_ctx
.journal
->is_journal_appending());
438 uint64_t clip_len
= 0;
439 LightweightObjectExtents object_extents
;
440 for (auto &extent
: this->m_image_extents
) {
441 if (extent
.second
== 0) {
445 // map to object extents
446 io::util::area_to_object_extents(&image_ctx
, extent
.first
, extent
.second
,
447 this->m_image_area
, clip_len
,
449 clip_len
+= extent
.second
;
452 int ret
= prune_object_extents(&object_extents
);
458 // reflect changes in object_extents back to m_image_extents
460 this->m_image_extents
.clear();
461 for (auto& object_extent
: object_extents
) {
462 auto [image_extents
, _
] = io::util::object_to_area_extents(
463 &image_ctx
, object_extent
.object_no
,
464 {{object_extent
.offset
, object_extent
.length
}});
465 this->m_image_extents
.insert(this->m_image_extents
.end(),
466 image_extents
.begin(), image_extents
.end());
470 aio_comp
->set_request_count(object_extents
.size());
471 if (!object_extents
.empty()) {
472 uint64_t journal_tid
= 0;
474 // in-flight ops are flushed prior to closing the journal
475 ceph_assert(image_ctx
.journal
!= NULL
);
476 journal_tid
= append_journal_event(m_synchronous
);
479 // it's very important that IOContext is captured here instead of
480 // e.g. at the API layer so that an up-to-date snap context is used
481 // when owning the exclusive lock
482 send_object_requests(object_extents
, image_ctx
.get_data_io_context(),
486 update_stats(clip_len
);
489 template <typename I
>
490 void AbstractImageWriteRequest
<I
>::send_object_requests(
491 const LightweightObjectExtents
&object_extents
, IOContext io_context
,
492 uint64_t journal_tid
) {
493 I
&image_ctx
= this->m_image_ctx
;
494 CephContext
*cct
= image_ctx
.cct
;
496 AioCompletion
*aio_comp
= this->m_aio_comp
;
497 bool single_extent
= (object_extents
.size() == 1);
498 for (auto& oe
: object_extents
) {
499 ldout(cct
, 20) << data_object_name(&image_ctx
, oe
.object_no
) << " "
500 << oe
.offset
<< "~" << oe
.length
<< " from "
501 << oe
.buffer_extents
<< dendl
;
502 C_AioRequest
*req_comp
= new C_AioRequest(aio_comp
);
503 auto request
= create_object_request(oe
, io_context
, journal_tid
,
504 single_extent
, req_comp
);
509 template <typename I
>
510 void ImageWriteRequest
<I
>::assemble_extent(
511 const LightweightObjectExtent
&object_extent
, bufferlist
*bl
) {
512 for (auto q
= object_extent
.buffer_extents
.begin();
513 q
!= object_extent
.buffer_extents
.end(); ++q
) {
515 sub_bl
.substr_of(m_bl
, q
->first
, q
->second
);
516 bl
->claim_append(sub_bl
);
520 template <typename I
>
521 uint64_t ImageWriteRequest
<I
>::append_journal_event(bool synchronous
) {
522 I
&image_ctx
= this->m_image_ctx
;
525 uint64_t buffer_offset
= 0;
526 ceph_assert(!this->m_image_extents
.empty());
527 for (auto &extent
: this->m_image_extents
) {
529 sub_bl
.substr_of(m_bl
, buffer_offset
, extent
.second
);
530 buffer_offset
+= extent
.second
;
532 tid
= image_ctx
.journal
->append_write_event(extent
.first
, extent
.second
,
533 sub_bl
, synchronous
);
539 template <typename I
>
540 ObjectDispatchSpec
*ImageWriteRequest
<I
>::create_object_request(
541 const LightweightObjectExtent
&object_extent
, IOContext io_context
,
542 uint64_t journal_tid
, bool single_extent
, Context
*on_finish
) {
543 I
&image_ctx
= this->m_image_ctx
;
546 if (single_extent
&& object_extent
.buffer_extents
.size() == 1 &&
547 m_bl
.length() == object_extent
.length
) {
548 // optimization for single object/buffer extent writes
549 bl
= std::move(m_bl
);
551 assemble_extent(object_extent
, &bl
);
554 auto req
= ObjectDispatchSpec::create_write(
555 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, object_extent
.object_no
,
556 object_extent
.offset
, std::move(bl
), io_context
, m_op_flags
, 0,
557 std::nullopt
, journal_tid
, this->m_trace
, on_finish
);
561 template <typename I
>
562 void ImageWriteRequest
<I
>::update_stats(size_t length
) {
563 I
&image_ctx
= this->m_image_ctx
;
564 image_ctx
.perfcounter
->inc(l_librbd_wr
);
565 image_ctx
.perfcounter
->inc(l_librbd_wr_bytes
, length
);
568 template <typename I
>
569 uint64_t ImageDiscardRequest
<I
>::append_journal_event(bool synchronous
) {
570 I
&image_ctx
= this->m_image_ctx
;
573 ceph_assert(!this->m_image_extents
.empty());
574 for (auto &extent
: this->m_image_extents
) {
575 journal::EventEntry
event_entry(
576 journal::AioDiscardEvent(extent
.first
,
578 this->m_discard_granularity_bytes
));
579 tid
= image_ctx
.journal
->append_io_event(std::move(event_entry
),
580 extent
.first
, extent
.second
,
587 template <typename I
>
588 ObjectDispatchSpec
*ImageDiscardRequest
<I
>::create_object_request(
589 const LightweightObjectExtent
&object_extent
, IOContext io_context
,
590 uint64_t journal_tid
, bool single_extent
, Context
*on_finish
) {
591 I
&image_ctx
= this->m_image_ctx
;
592 auto req
= ObjectDispatchSpec::create_discard(
593 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, object_extent
.object_no
,
594 object_extent
.offset
, object_extent
.length
, io_context
,
595 OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE
, journal_tid
, this->m_trace
,
600 template <typename I
>
601 void ImageDiscardRequest
<I
>::update_stats(size_t length
) {
602 I
&image_ctx
= this->m_image_ctx
;
603 image_ctx
.perfcounter
->inc(l_librbd_discard
);
604 image_ctx
.perfcounter
->inc(l_librbd_discard_bytes
, length
);
607 template <typename I
>
608 int ImageDiscardRequest
<I
>::prune_object_extents(
609 LightweightObjectExtents
* object_extents
) const {
610 if (m_discard_granularity_bytes
== 0) {
614 // Align the range to discard_granularity_bytes boundary and skip
615 // and discards that are too small to free up any space.
617 // discard_granularity_bytes >= object_size && tail truncation
618 // is a special case for filestore
619 bool prune_required
= false;
620 bool length_modified
= false;
621 auto object_size
= this->m_image_ctx
.layout
.object_size
;
622 auto discard_granularity_bytes
= std::min(m_discard_granularity_bytes
,
625 [discard_granularity_bytes
, object_size
, &prune_required
, &length_modified
]
626 (LightweightObjectExtent
& object_extent
) {
627 auto& offset
= object_extent
.offset
;
628 auto& length
= object_extent
.length
;
629 auto next_offset
= offset
+ length
;
631 if ((discard_granularity_bytes
< object_size
) ||
632 (next_offset
< object_size
)) {
633 offset
= p2roundup
<uint64_t>(offset
, discard_granularity_bytes
);
634 next_offset
= p2align
<uint64_t>(next_offset
, discard_granularity_bytes
);
635 if (offset
>= next_offset
) {
636 prune_required
= true;
639 auto new_length
= next_offset
- offset
;
640 if (length
!= new_length
) {
641 length_modified
= true;
647 std::for_each(object_extents
->begin(), object_extents
->end(),
650 if (prune_required
) {
651 // one or more object extents were skipped
653 [](const LightweightObjectExtent
& object_extent
) {
654 return (object_extent
.length
== 0);
656 object_extents
->erase(
657 std::remove_if(object_extents
->begin(), object_extents
->end(),
659 object_extents
->end());
662 // object extents were modified, image extents needs updating
663 if (length_modified
|| prune_required
) {
670 template <typename I
>
671 void ImageFlushRequest
<I
>::send_request() {
672 I
&image_ctx
= this->m_image_ctx
;
674 bool journaling
= false;
676 std::shared_lock image_locker
{image_ctx
.image_lock
};
677 journaling
= (m_flush_source
== FLUSH_SOURCE_USER
&&
678 image_ctx
.journal
!= nullptr &&
679 image_ctx
.journal
->is_journal_appending());
682 AioCompletion
*aio_comp
= this->m_aio_comp
;
683 aio_comp
->set_request_count(1);
685 Context
*ctx
= new C_AioRequest(aio_comp
);
687 // ensure no locks are held when flush is complete
688 ctx
= librbd::util::create_async_context_callback(image_ctx
, ctx
);
690 uint64_t journal_tid
= 0;
692 // in-flight ops are flushed prior to closing the journal
693 ceph_assert(image_ctx
.journal
!= NULL
);
694 journal_tid
= image_ctx
.journal
->append_io_event(
695 journal::EventEntry(journal::AioFlushEvent()), 0, 0, false, 0);
696 image_ctx
.journal
->user_flushed();
699 auto object_dispatch_spec
= ObjectDispatchSpec::create_flush(
700 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, m_flush_source
, journal_tid
,
702 ctx
= new LambdaContext([object_dispatch_spec
](int r
) {
703 object_dispatch_spec
->send();
706 // ensure all in-flight IOs are settled if non-user flush request
707 if (m_flush_source
== FLUSH_SOURCE_WRITEBACK
) {
710 aio_comp
->async_op
.flush(ctx
);
713 // might be flushing during image shutdown
714 if (image_ctx
.perfcounter
!= nullptr) {
715 image_ctx
.perfcounter
->inc(l_librbd_flush
);
719 template <typename I
>
720 uint64_t ImageWriteSameRequest
<I
>::append_journal_event(bool synchronous
) {
721 I
&image_ctx
= this->m_image_ctx
;
724 ceph_assert(!this->m_image_extents
.empty());
725 for (auto &extent
: this->m_image_extents
) {
726 journal::EventEntry
event_entry(journal::AioWriteSameEvent(extent
.first
,
729 tid
= image_ctx
.journal
->append_io_event(std::move(event_entry
),
730 extent
.first
, extent
.second
,
737 template <typename I
>
738 ObjectDispatchSpec
*ImageWriteSameRequest
<I
>::create_object_request(
739 const LightweightObjectExtent
&object_extent
, IOContext io_context
,
740 uint64_t journal_tid
, bool single_extent
, Context
*on_finish
) {
741 I
&image_ctx
= this->m_image_ctx
;
744 ObjectDispatchSpec
*req
;
746 if (util::assemble_write_same_extent(object_extent
, m_data_bl
, &bl
, false)) {
747 auto buffer_extents
{object_extent
.buffer_extents
};
749 req
= ObjectDispatchSpec::create_write_same(
750 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, object_extent
.object_no
,
751 object_extent
.offset
, object_extent
.length
, std::move(buffer_extents
),
752 std::move(bl
), io_context
, m_op_flags
, journal_tid
,
753 this->m_trace
, on_finish
);
756 req
= ObjectDispatchSpec::create_write(
757 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, object_extent
.object_no
,
758 object_extent
.offset
, std::move(bl
), io_context
, m_op_flags
, 0,
759 std::nullopt
, journal_tid
, this->m_trace
, on_finish
);
763 template <typename I
>
764 void ImageWriteSameRequest
<I
>::update_stats(size_t length
) {
765 I
&image_ctx
= this->m_image_ctx
;
766 image_ctx
.perfcounter
->inc(l_librbd_ws
);
767 image_ctx
.perfcounter
->inc(l_librbd_ws_bytes
, length
);
770 template <typename I
>
771 uint64_t ImageCompareAndWriteRequest
<I
>::append_journal_event(
773 I
&image_ctx
= this->m_image_ctx
;
776 ceph_assert(this->m_image_extents
.size() == 1);
777 auto &extent
= this->m_image_extents
.front();
778 tid
= image_ctx
.journal
->append_compare_and_write_event(extent
.first
,
787 template <typename I
>
788 void ImageCompareAndWriteRequest
<I
>::assemble_extent(
789 const LightweightObjectExtent
&object_extent
, bufferlist
*bl
,
790 bufferlist
*cmp_bl
) {
791 for (auto q
= object_extent
.buffer_extents
.begin();
792 q
!= object_extent
.buffer_extents
.end(); ++q
) {
794 sub_bl
.substr_of(m_bl
, q
->first
, q
->second
);
795 bl
->claim_append(sub_bl
);
797 bufferlist sub_cmp_bl
;
798 sub_cmp_bl
.substr_of(m_cmp_bl
, q
->first
, q
->second
);
799 cmp_bl
->claim_append(sub_cmp_bl
);
803 template <typename I
>
804 ObjectDispatchSpec
*ImageCompareAndWriteRequest
<I
>::create_object_request(
805 const LightweightObjectExtent
&object_extent
, IOContext io_context
,
806 uint64_t journal_tid
, bool single_extent
, Context
*on_finish
) {
807 I
&image_ctx
= this->m_image_ctx
;
811 assemble_extent(object_extent
, &bl
, &cmp_bl
);
812 auto req
= ObjectDispatchSpec::create_compare_and_write(
813 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, object_extent
.object_no
,
814 object_extent
.offset
, std::move(cmp_bl
), std::move(bl
), io_context
,
815 m_mismatch_offset
, m_op_flags
, journal_tid
, this->m_trace
, on_finish
);
819 template <typename I
>
820 void ImageCompareAndWriteRequest
<I
>::update_stats(size_t length
) {
821 I
&image_ctx
= this->m_image_ctx
;
822 image_ctx
.perfcounter
->inc(l_librbd_cmp
);
823 image_ctx
.perfcounter
->inc(l_librbd_cmp_bytes
, length
);
826 template <typename I
>
827 int ImageCompareAndWriteRequest
<I
>::prune_object_extents(
828 LightweightObjectExtents
* object_extents
) const {
829 if (object_extents
->size() > 1)
832 I
&image_ctx
= this->m_image_ctx
;
833 uint64_t su
= image_ctx
.layout
.stripe_unit
;
834 auto& object_extent
= object_extents
->front();
835 if (su
== 0 || (object_extent
.offset
% su
+ object_extent
.length
> su
))
841 template <typename I
>
842 ImageListSnapsRequest
<I
>::ImageListSnapsRequest(
843 I
& image_ctx
, AioCompletion
* aio_comp
, Extents
&& image_extents
,
844 ImageArea area
, SnapIds
&& snap_ids
, int list_snaps_flags
,
845 SnapshotDelta
* snapshot_delta
, const ZTracer::Trace
& parent_trace
)
846 : ImageRequest
<I
>(image_ctx
, aio_comp
, std::move(image_extents
), area
,
847 "list-snaps", parent_trace
),
848 m_snap_ids(std::move(snap_ids
)), m_list_snaps_flags(list_snaps_flags
),
849 m_snapshot_delta(snapshot_delta
) {
852 template <typename I
>
853 void ImageListSnapsRequest
<I
>::send_request() {
854 I
&image_ctx
= this->m_image_ctx
;
855 CephContext
*cct
= image_ctx
.cct
;
857 // map image extents to object extents
858 auto &image_extents
= this->m_image_extents
;
859 std::map
<uint64_t, Extents
> object_number_extents
;
860 for (auto& image_extent
: image_extents
) {
861 if (image_extent
.second
== 0) {
865 striper::LightweightObjectExtents object_extents
;
866 io::util::area_to_object_extents(&image_ctx
, image_extent
.first
,
867 image_extent
.second
, this->m_image_area
, 0,
869 for (auto& object_extent
: object_extents
) {
870 object_number_extents
[object_extent
.object_no
].emplace_back(
871 object_extent
.offset
, object_extent
.length
);
875 // reassemble the deltas back into image-extents when complete
876 auto aio_comp
= this->m_aio_comp
;
877 aio_comp
->set_request_count(1);
878 auto assemble_ctx
= new C_AssembleSnapshotDeltas
<I
>(
879 &image_ctx
, aio_comp
, m_snapshot_delta
);
880 auto sub_aio_comp
= AioCompletion::create_and_start
<
881 Context
, &Context::complete
>(assemble_ctx
, get_image_ctx(&image_ctx
),
884 // issue the requests
885 sub_aio_comp
->set_request_count(object_number_extents
.size());
886 for (auto& oe
: object_number_extents
) {
887 ldout(cct
, 20) << data_object_name(&image_ctx
, oe
.first
) << " "
888 << oe
.second
<< dendl
;
889 auto ctx
= new C_AioRequest(sub_aio_comp
);
890 auto req
= ObjectDispatchSpec::create_list_snaps(
891 &image_ctx
, OBJECT_DISPATCH_LAYER_NONE
, oe
.first
, std::move(oe
.second
),
892 SnapIds
{m_snap_ids
}, m_list_snaps_flags
, this->m_trace
,
893 assemble_ctx
->get_snapshot_delta(oe
.first
), ctx
);
899 } // namespace librbd
901 template class librbd::io::ImageRequest
<librbd::ImageCtx
>;
902 template class librbd::io::ImageReadRequest
<librbd::ImageCtx
>;
903 template class librbd::io::AbstractImageWriteRequest
<librbd::ImageCtx
>;
904 template class librbd::io::ImageWriteRequest
<librbd::ImageCtx
>;
905 template class librbd::io::ImageDiscardRequest
<librbd::ImageCtx
>;
906 template class librbd::io::ImageFlushRequest
<librbd::ImageCtx
>;
907 template class librbd::io::ImageWriteSameRequest
<librbd::ImageCtx
>;
908 template class librbd::io::ImageCompareAndWriteRequest
<librbd::ImageCtx
>;
909 template class librbd::io::ImageListSnapsRequest
<librbd::ImageCtx
>;