1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "ObjectCopyRequest.h"
5 #include "common/errno.h"
6 #include "librados/snap_set_diff.h"
7 #include "librbd/ExclusiveLock.h"
8 #include "librbd/ObjectMap.h"
9 #include "librbd/Utils.h"
10 #include "librbd/deep_copy/Handler.h"
11 #include "librbd/io/AioCompletion.h"
12 #include "librbd/io/AsyncOperation.h"
13 #include "librbd/io/ImageRequest.h"
14 #include "librbd/io/ReadResult.h"
15 #include "osdc/Striper.h"
17 #define dout_subsys ceph_subsys_rbd
19 #define dout_prefix *_dout << "librbd::deep_copy::ObjectCopyRequest: " \
20 << this << " " << __func__ << ": "
24 inline bool operator==(const clone_info_t
& rhs
, const clone_info_t
& lhs
) {
25 return (rhs
.cloneid
== lhs
.cloneid
&&
26 rhs
.snaps
== lhs
.snaps
&&
27 rhs
.overlap
== lhs
.overlap
&&
28 rhs
.size
== lhs
.size
);
31 inline bool operator==(const snap_set_t
& rhs
, const snap_set_t
& lhs
) {
32 return (rhs
.clones
== lhs
.clones
&&
36 } // namespace librados
41 using librbd::util::create_context_callback
;
42 using librbd::util::create_rados_callback
;
45 ObjectCopyRequest
<I
>::ObjectCopyRequest(I
*src_image_ctx
,
47 librados::snap_t src_snap_id_start
,
48 librados::snap_t dst_snap_id_start
,
49 const SnapMap
&snap_map
,
50 uint64_t dst_object_number
,
51 bool flatten
, Handler
* handler
,
53 : m_src_image_ctx(src_image_ctx
),
54 m_dst_image_ctx(dst_image_ctx
), m_cct(dst_image_ctx
->cct
),
55 m_src_snap_id_start(src_snap_id_start
),
56 m_dst_snap_id_start(dst_snap_id_start
), m_snap_map(snap_map
),
57 m_dst_object_number(dst_object_number
), m_flatten(flatten
),
58 m_handler(handler
), m_on_finish(on_finish
) {
59 ceph_assert(src_image_ctx
->data_ctx
.is_valid());
60 ceph_assert(dst_image_ctx
->data_ctx
.is_valid());
61 ceph_assert(!m_snap_map
.empty());
63 m_src_async_op
= new io::AsyncOperation();
64 m_src_async_op
->start_op(*util::get_image_ctx(m_src_image_ctx
));
66 m_src_io_ctx
.dup(m_src_image_ctx
->data_ctx
);
67 m_dst_io_ctx
.dup(m_dst_image_ctx
->data_ctx
);
69 m_dst_oid
= m_dst_image_ctx
->get_object_name(dst_object_number
);
71 ldout(m_cct
, 20) << "dst_oid=" << m_dst_oid
<< dendl
;
73 compute_src_object_extents();
77 void ObjectCopyRequest
<I
>::send() {
82 void ObjectCopyRequest
<I
>::send_list_snaps() {
83 ceph_assert(!m_src_objects
.empty());
84 m_src_ono
= *m_src_objects
.begin();
85 m_src_oid
= m_src_image_ctx
->get_object_name(m_src_ono
);
87 ldout(m_cct
, 20) << "src_oid=" << m_src_oid
<< dendl
;
89 librados::AioCompletion
*rados_completion
= create_rados_callback
<
90 ObjectCopyRequest
<I
>, &ObjectCopyRequest
<I
>::handle_list_snaps
>(this);
92 librados::ObjectReadOperation op
;
95 op
.list_snaps(&m_snap_set
, &m_snap_ret
);
97 m_src_io_ctx
.snap_set_read(CEPH_SNAPDIR
);
98 int r
= m_src_io_ctx
.aio_operate(m_src_oid
, rados_completion
, &op
,
101 rados_completion
->release();
104 template <typename I
>
105 void ObjectCopyRequest
<I
>::handle_list_snaps(int r
) {
106 if (r
== 0 && m_snap_ret
< 0) {
110 ldout(m_cct
, 20) << "r=" << r
<< dendl
;
112 if (r
< 0 && r
!= -ENOENT
) {
113 lderr(m_cct
) << "failed to list snaps: " << cpp_strerror(r
) << dendl
;
118 if (m_retry_missing_read
) {
119 if (m_snap_set
== m_retry_snap_set
) {
120 lderr(m_cct
) << "read encountered missing object using up-to-date snap set"
126 ldout(m_cct
, 20) << "retrying using updated snap set" << dendl
;
127 m_retry_missing_read
= false;
128 m_retry_snap_set
= {};
132 for (auto &it
: m_src_object_extents
) {
134 if (e
.object_no
== m_src_ono
) {
140 m_zero_interval
= {};
147 template <typename I
>
148 void ObjectCopyRequest
<I
>::send_read_object() {
150 if (m_read_snaps
.empty()) {
151 // all snapshots have been read
154 ceph_assert(!m_src_objects
.empty());
155 m_src_objects
.erase(m_src_objects
.begin());
157 if (!m_src_objects
.empty()) {
162 // all objects have been read
163 send_read_from_parent();
167 auto index
= *m_read_snaps
.begin();
168 auto src_snap_seq
= index
.second
;
170 bool read_required
= false;
171 librados::ObjectReadOperation op
;
173 for (auto ©_op
: m_read_ops
[index
]) {
174 if (!read_required
) {
175 // map the copy op start snap id back to the necessary read snap id
176 m_src_io_ctx
.snap_set_read(src_snap_seq
);
178 ldout(m_cct
, 20) << "src_snap_seq=" << src_snap_seq
<< dendl
;
179 read_required
= true;
181 ldout(m_cct
, 20) << "read op: " << copy_op
.src_offset
<< "~"
182 << copy_op
.length
<< dendl
;
183 op
.sparse_read(copy_op
.src_offset
, copy_op
.length
, ©_op
.src_extent_map
,
184 ©_op
.out_bl
, nullptr);
185 op
.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
|
186 LIBRADOS_OP_FLAG_FADVISE_NOCACHE
);
189 if (!read_required
) {
190 // nothing written to this object for this snapshot (must be trunc/remove)
191 handle_read_object(0);
195 auto ctx
= create_context_callback
<
196 ObjectCopyRequest
<I
>, &ObjectCopyRequest
<I
>::handle_read_object
>(this);
197 auto comp
= create_rados_callback(ctx
);
199 ldout(m_cct
, 20) << "read " << m_src_oid
<< dendl
;
201 int r
= m_src_io_ctx
.aio_operate(m_src_oid
, comp
, &op
, nullptr);
206 template <typename I
>
207 void ObjectCopyRequest
<I
>::handle_read_object(int r
) {
208 ldout(m_cct
, 20) << "r=" << r
<< dendl
;
211 m_retry_snap_set
= m_snap_set
;
212 m_retry_missing_read
= true;
214 ldout(m_cct
, 5) << "object missing potentially due to removed snapshot"
221 lderr(m_cct
) << "failed to read from source object: " << cpp_strerror(r
)
227 if (m_handler
!= nullptr) {
228 uint64_t bytes_read
= 0;
230 auto index
= *m_read_snaps
.begin();
231 for (auto ©_op
: m_read_ops
[index
]) {
232 bytes_read
+= copy_op
.out_bl
.length();
234 m_handler
->handle_read(bytes_read
);
237 ceph_assert(!m_read_snaps
.empty());
238 m_read_snaps
.erase(m_read_snaps
.begin());
243 template <typename I
>
244 void ObjectCopyRequest
<I
>::send_read_from_parent() {
245 m_src_image_ctx
->image_lock
.lock_shared();
246 io::Extents image_extents
;
247 compute_read_from_parent_ops(&image_extents
);
248 m_src_image_ctx
->image_lock
.unlock_shared();
250 if (image_extents
.empty()) {
251 handle_read_from_parent(0);
255 ldout(m_cct
, 20) << dendl
;
257 ceph_assert(m_src_image_ctx
->parent
!= nullptr);
259 auto ctx
= create_context_callback
<
260 ObjectCopyRequest
<I
>, &ObjectCopyRequest
<I
>::handle_read_from_parent
>(this);
261 auto comp
= io::AioCompletion::create_and_start(
262 ctx
, util::get_image_ctx(m_src_image_ctx
->parent
), io::AIO_TYPE_READ
);
263 ldout(m_cct
, 20) << "completion " << comp
<< ", extents " << image_extents
266 auto src_image_ctx
= m_src_image_ctx
;
267 io::ImageRequest
<I
>::aio_read(src_image_ctx
->parent
, comp
,
268 std::move(image_extents
),
269 io::ReadResult
{&m_read_from_parent_data
}, 0,
273 template <typename I
>
274 void ObjectCopyRequest
<I
>::handle_read_from_parent(int r
) {
275 ldout(m_cct
, 20) << "r=" << r
<< dendl
;
278 lderr(m_cct
) << "failed to read from parent: " << cpp_strerror(r
) << dendl
;
283 if (!m_read_ops
.empty()) {
284 ceph_assert(m_read_ops
.size() == 1);
285 auto src_snap_seq
= m_read_ops
.begin()->first
.first
;
286 auto ©_ops
= m_read_ops
.begin()->second
;
288 for (auto it
= copy_ops
.begin(); it
!= copy_ops
.end(); ) {
289 it
->out_bl
.substr_of(m_read_from_parent_data
, offset
, it
->length
);
290 offset
+= it
->length
;
291 if (it
->out_bl
.is_zero()) {
292 m_zero_interval
[src_snap_seq
].insert(it
->dst_offset
, it
->length
);
293 it
= copy_ops
.erase(it
);
301 compute_dst_object_may_exist();
304 if (m_write_ops
.empty()) {
314 template <typename I
>
315 void ObjectCopyRequest
<I
>::send_write_object() {
316 ceph_assert(!m_write_ops
.empty());
317 auto& copy_ops
= m_write_ops
.begin()->second
;
319 // retrieve the destination snap context for the op
320 SnapIds dst_snap_ids
;
321 librados::snap_t dst_snap_seq
= 0;
322 librados::snap_t src_snap_seq
= m_write_ops
.begin()->first
;
323 if (src_snap_seq
!= 0) {
324 auto snap_map_it
= m_snap_map
.find(src_snap_seq
);
325 ceph_assert(snap_map_it
!= m_snap_map
.end());
327 auto dst_snap_id
= snap_map_it
->second
.front();
328 auto dst_may_exist_it
= m_dst_object_may_exist
.find(dst_snap_id
);
329 ceph_assert(dst_may_exist_it
!= m_dst_object_may_exist
.end());
330 if (!dst_may_exist_it
->second
&& !copy_ops
.empty()) {
331 // if the object cannot exist, the only valid op is to remove it
332 ceph_assert(copy_ops
.size() == 1U);
333 ceph_assert(copy_ops
.begin()->type
== COPY_OP_TYPE_REMOVE
);
336 // write snapshot context should be before actual snapshot
337 ceph_assert(!snap_map_it
->second
.empty());
338 auto dst_snap_ids_it
= snap_map_it
->second
.begin();
341 dst_snap_ids
= SnapIds
{dst_snap_ids_it
, snap_map_it
->second
.end()};
342 if (!dst_snap_ids
.empty()) {
343 dst_snap_seq
= dst_snap_ids
.front();
345 ceph_assert(dst_snap_seq
!= CEPH_NOSNAP
);
348 ldout(m_cct
, 20) << "dst_snap_seq=" << dst_snap_seq
<< ", "
349 << "dst_snaps=" << dst_snap_ids
<< dendl
;
351 librados::ObjectWriteOperation op
;
352 uint64_t buffer_offset
;
354 if (!m_dst_image_ctx
->migration_info
.empty()) {
355 cls_client::assert_snapc_seq(&op
, dst_snap_seq
,
356 cls::rbd::ASSERT_SNAPC_SEQ_GT_SNAPSET_SEQ
);
359 for (auto ©_op
: copy_ops
) {
360 switch (copy_op
.type
) {
361 case COPY_OP_TYPE_WRITE
:
363 for (auto &e
: copy_op
.dst_extent_map
) {
364 ldout(m_cct
, 20) << "write op: " << e
.first
<< "~" << e
.second
367 tmpbl
.substr_of(copy_op
.out_bl
, buffer_offset
, e
.second
);
368 op
.write(e
.first
, tmpbl
);
369 op
.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
|
370 LIBRADOS_OP_FLAG_FADVISE_NOCACHE
);
371 buffer_offset
+= e
.second
;
374 case COPY_OP_TYPE_ZERO
:
375 ldout(m_cct
, 20) << "zero op: " << copy_op
.dst_offset
<< "~"
376 << copy_op
.length
<< dendl
;
377 op
.zero(copy_op
.dst_offset
, copy_op
.length
);
379 case COPY_OP_TYPE_REMOVE_TRUNC
:
380 ldout(m_cct
, 20) << "create op" << dendl
;
383 case COPY_OP_TYPE_TRUNC
:
384 ldout(m_cct
, 20) << "trunc op: " << copy_op
.dst_offset
<< dendl
;
385 op
.truncate(copy_op
.dst_offset
);
387 case COPY_OP_TYPE_REMOVE
:
388 ldout(m_cct
, 20) << "remove op" << dendl
;
396 if (op
.size() == (m_dst_image_ctx
->migration_info
.empty() ? 0 : 1)) {
397 handle_write_object(0);
402 Context
*finish_op_ctx
;
404 std::shared_lock owner_locker
{m_dst_image_ctx
->owner_lock
};
405 finish_op_ctx
= start_lock_op(m_dst_image_ctx
->owner_lock
, &r
);
407 if (finish_op_ctx
== nullptr) {
408 lderr(m_cct
) << "lost exclusive lock" << dendl
;
413 auto ctx
= new LambdaContext([this, finish_op_ctx
](int r
) {
414 handle_write_object(r
);
415 finish_op_ctx
->complete(0);
417 librados::AioCompletion
*comp
= create_rados_callback(ctx
);
418 r
= m_dst_io_ctx
.aio_operate(m_dst_oid
, comp
, &op
, dst_snap_seq
, dst_snap_ids
,
424 template <typename I
>
425 void ObjectCopyRequest
<I
>::handle_write_object(int r
) {
426 ldout(m_cct
, 20) << "r=" << r
<< dendl
;
430 } else if (r
== -ERANGE
) {
431 ldout(m_cct
, 10) << "concurrent deep copy" << dendl
;
435 lderr(m_cct
) << "failed to write to destination object: " << cpp_strerror(r
)
441 m_write_ops
.erase(m_write_ops
.begin());
442 if (!m_write_ops
.empty()) {
447 send_update_object_map();
450 template <typename I
>
451 void ObjectCopyRequest
<I
>::send_update_object_map() {
452 if (!m_dst_image_ctx
->test_features(RBD_FEATURE_OBJECT_MAP
) ||
453 m_dst_object_state
.empty()) {
458 m_dst_image_ctx
->owner_lock
.lock_shared();
459 m_dst_image_ctx
->image_lock
.lock_shared();
460 if (m_dst_image_ctx
->object_map
== nullptr) {
461 // possible that exclusive lock was lost in background
462 lderr(m_cct
) << "object map is not initialized" << dendl
;
464 m_dst_image_ctx
->image_lock
.unlock_shared();
465 m_dst_image_ctx
->owner_lock
.unlock_shared();
470 auto &dst_object_state
= *m_dst_object_state
.begin();
471 auto it
= m_snap_map
.find(dst_object_state
.first
);
472 ceph_assert(it
!= m_snap_map
.end());
473 auto dst_snap_id
= it
->second
.front();
474 auto object_state
= dst_object_state
.second
;
475 m_dst_object_state
.erase(m_dst_object_state
.begin());
477 ldout(m_cct
, 20) << "dst_snap_id=" << dst_snap_id
<< ", object_state="
478 << static_cast<uint32_t>(object_state
) << dendl
;
481 auto finish_op_ctx
= start_lock_op(m_dst_image_ctx
->owner_lock
, &r
);
482 if (finish_op_ctx
== nullptr) {
483 lderr(m_cct
) << "lost exclusive lock" << dendl
;
484 m_dst_image_ctx
->image_lock
.unlock_shared();
485 m_dst_image_ctx
->owner_lock
.unlock_shared();
490 auto ctx
= new LambdaContext([this, finish_op_ctx
](int r
) {
491 handle_update_object_map(r
);
492 finish_op_ctx
->complete(0);
495 auto dst_image_ctx
= m_dst_image_ctx
;
496 bool sent
= dst_image_ctx
->object_map
->template aio_update
<
497 Context
, &Context::complete
>(dst_snap_id
, m_dst_object_number
, object_state
,
500 // NOTE: state machine might complete before we reach here
501 dst_image_ctx
->image_lock
.unlock_shared();
502 dst_image_ctx
->owner_lock
.unlock_shared();
504 ceph_assert(dst_snap_id
== CEPH_NOSNAP
);
509 template <typename I
>
510 void ObjectCopyRequest
<I
>::handle_update_object_map(int r
) {
511 ldout(m_cct
, 20) << "r=" << r
<< dendl
;
514 lderr(m_cct
) << "failed to update object map: " << cpp_strerror(r
) << dendl
;
519 if (!m_dst_object_state
.empty()) {
520 send_update_object_map();
526 template <typename I
>
527 Context
*ObjectCopyRequest
<I
>::start_lock_op(ceph::shared_mutex
&owner_lock
,
529 ceph_assert(ceph_mutex_is_locked(m_dst_image_ctx
->owner_lock
));
530 if (m_dst_image_ctx
->exclusive_lock
== nullptr) {
531 return new LambdaContext([](int r
) {});
533 return m_dst_image_ctx
->exclusive_lock
->start_op(r
);
536 template <typename I
>
537 uint64_t ObjectCopyRequest
<I
>::src_to_dst_object_offset(uint64_t objectno
,
539 std::vector
<std::pair
<uint64_t, uint64_t>> image_extents
;
540 Striper::extent_to_file(m_cct
, &m_src_image_ctx
->layout
, objectno
, offset
, 1,
542 ceph_assert(image_extents
.size() == 1);
543 auto dst_object_offset
= image_extents
.begin()->first
;
545 std::map
<object_t
, std::vector
<ObjectExtent
>> dst_object_extents
;
546 Striper::file_to_extents(m_cct
, m_dst_image_ctx
->format_string
,
547 &m_dst_image_ctx
->layout
, dst_object_offset
, 1, 0,
549 ceph_assert(dst_object_extents
.size() == 1);
550 ceph_assert(dst_object_extents
.begin()->second
.size() == 1);
551 auto &e
= *dst_object_extents
.begin()->second
.begin();
552 ceph_assert(e
.objectno
== m_dst_object_number
);
557 template <typename I
>
558 void ObjectCopyRequest
<I
>::compute_src_object_extents() {
559 std::vector
<std::pair
<uint64_t, uint64_t>> image_extents
;
560 Striper::extent_to_file(m_cct
, &m_dst_image_ctx
->layout
, m_dst_object_number
,
561 0, m_dst_image_ctx
->layout
.object_size
, image_extents
);
564 for (auto &e
: image_extents
) {
565 std::map
<object_t
, std::vector
<ObjectExtent
>> src_object_extents
;
566 Striper::file_to_extents(m_cct
, m_src_image_ctx
->format_string
,
567 &m_src_image_ctx
->layout
, e
.first
, e
.second
, 0,
569 auto stripe_unit
= std::min(m_src_image_ctx
->layout
.stripe_unit
,
570 m_dst_image_ctx
->layout
.stripe_unit
);
571 for (auto &p
: src_object_extents
) {
572 for (auto &s
: p
.second
) {
573 m_src_objects
.insert(s
.objectno
);
575 while (s
.length
> 0) {
576 ceph_assert(s
.length
>= stripe_unit
);
577 auto dst_object_offset
= src_to_dst_object_offset(s
.objectno
, s
.offset
);
578 m_src_object_extents
[dst_object_offset
] = {s
.objectno
, s
.offset
,
580 s
.offset
+= stripe_unit
;
581 s
.length
-= stripe_unit
;
587 ceph_assert(total
== m_dst_image_ctx
->layout
.object_size
);
589 ldout(m_cct
, 20) << m_src_object_extents
.size() << " src extents" << dendl
;
592 template <typename I
>
593 void ObjectCopyRequest
<I
>::compute_read_ops() {
596 m_zero_interval
= {};
598 m_src_image_ctx
->image_lock
.lock_shared();
599 bool hide_parent
= (m_src_image_ctx
->parent
!= nullptr);
600 m_src_image_ctx
->image_lock
.unlock_shared();
602 librados::snap_t src_copy_point_snap_id
= m_snap_map
.rbegin()->first
;
603 bool prev_exists
= (hide_parent
|| m_src_snap_id_start
> 0);
604 uint64_t prev_end_size
= prev_exists
?
605 m_src_image_ctx
->layout
.object_size
: 0;
606 librados::snap_t start_src_snap_id
= m_src_snap_id_start
;
608 for (auto &pair
: m_snap_map
) {
609 ceph_assert(!pair
.second
.empty());
610 librados::snap_t end_src_snap_id
= pair
.first
;
611 librados::snap_t end_dst_snap_id
= pair
.second
.front();
613 interval_set
<uint64_t> diff
;
616 librados::snap_t clone_end_snap_id
;
617 calc_snap_set_diff(m_cct
, m_snap_set
, start_src_snap_id
,
618 end_src_snap_id
, &diff
, &end_size
, &exists
,
619 &clone_end_snap_id
, &m_read_whole_object
);
621 if (m_read_whole_object
) {
622 ldout(m_cct
, 1) << "need to read full object" << dendl
;
623 diff
.insert(0, m_src_image_ctx
->layout
.object_size
);
625 end_size
= m_src_image_ctx
->layout
.object_size
;
626 clone_end_snap_id
= end_src_snap_id
;
627 } else if (!exists
) {
629 if (hide_parent
&& end_src_snap_id
== m_snap_map
.begin()->first
&&
630 m_snap_set
.clones
.empty()) {
631 ldout(m_cct
, 20) << "no clones for existing object" << dendl
;
633 diff
.insert(0, m_src_image_ctx
->layout
.object_size
);
634 clone_end_snap_id
= end_src_snap_id
;
638 ldout(m_cct
, 20) << "start_src_snap_id=" << start_src_snap_id
<< ", "
639 << "end_src_snap_id=" << end_src_snap_id
<< ", "
640 << "clone_end_snap_id=" << clone_end_snap_id
<< ", "
641 << "end_dst_snap_id=" << end_dst_snap_id
<< ", "
642 << "diff=" << diff
<< ", "
643 << "end_size=" << end_size
<< ", "
644 << "exists=" << exists
<< dendl
;
646 m_zero_interval
[end_src_snap_id
] = {};
648 if (exists
|| prev_exists
) {
649 // clip diff to size of object (in case it was truncated)
650 if (end_size
< prev_end_size
) {
651 interval_set
<uint64_t> trunc
;
652 trunc
.insert(end_size
, prev_end_size
- end_size
);
653 trunc
.intersection_of(diff
);
654 diff
.subtract(trunc
);
655 ldout(m_cct
, 20) << "clearing truncate diff: " << trunc
<< dendl
;
659 // reads should be issued against the newest (existing) snapshot within
660 // the associated snapshot object clone. writes should be issued
661 // against the oldest snapshot in the snap_map.
662 ceph_assert(clone_end_snap_id
>= end_src_snap_id
);
663 if (clone_end_snap_id
> src_copy_point_snap_id
) {
664 // do not read past the copy point snapshot
665 clone_end_snap_id
= src_copy_point_snap_id
;
669 for (auto &it
: m_src_object_extents
) {
670 auto dst_object_offset
= it
.first
;
673 if (e
.object_no
!= m_src_ono
) {
677 interval_set
<uint64_t> read_interval
;
678 read_interval
.insert(e
.offset
, e
.length
);
680 if (end_size
< prev_end_size
) {
681 interval_set
<uint64_t> zero_interval
;
682 zero_interval
.insert(end_size
, prev_end_size
- end_size
);
683 zero_interval
.intersection_of(read_interval
);
684 if (!zero_interval
.empty()) {
685 auto it
= zero_interval
.begin();
686 auto offset
= it
.get_start() - e
.offset
;
687 m_zero_interval
[end_src_snap_id
].insert(dst_object_offset
+ offset
,
689 ldout(m_cct
, 20) << "extent " << e
.offset
<< "~" << e
.length
690 << " intersects truncation " << end_size
<< "~"
691 << prev_end_size
- end_size
<< ", inserting zero "
692 << dst_object_offset
+ offset
<< "~"
693 << it
.get_len() << dendl
;
697 // limit read interval to diff
698 read_interval
.intersection_of(diff
);
700 ldout(m_cct
, 20) << "src_object_extent: " << e
.offset
<< "~" << e
.length
701 << ", dst_object_offset=" << dst_object_offset
702 << ", read: " << read_interval
<< dendl
;
704 ceph_assert(exists
|| read_interval
.empty());
706 for (auto it
= read_interval
.begin(); it
!= read_interval
.end();
708 ceph_assert(it
.get_start() >= e
.offset
);
709 auto offset
= it
.get_start() - e
.offset
;
710 ldout(m_cct
, 20) << "read/write op: " << it
.get_start() << "~"
711 << it
.get_len() << " dst: "
712 << dst_object_offset
+ offset
<< dendl
;
713 m_read_ops
[{end_src_snap_id
, clone_end_snap_id
}]
714 .emplace_back(COPY_OP_TYPE_WRITE
, it
.get_start(),
715 dst_object_offset
+ offset
, it
.get_len());
720 prev_end_size
= end_size
;
721 prev_exists
= exists
;
722 if (hide_parent
&& prev_exists
&& prev_end_size
== 0) {
724 prev_end_size
= m_src_image_ctx
->layout
.object_size
;
726 start_src_snap_id
= end_src_snap_id
;
729 for (auto &it
: m_read_ops
) {
730 m_read_snaps
.push_back(it
.first
);
734 template <typename I
>
735 void ObjectCopyRequest
<I
>::compute_read_from_parent_ops(
736 io::Extents
*parent_image_extents
) {
737 assert(ceph_mutex_is_locked(m_src_image_ctx
->image_lock
));
740 m_zero_interval
= {};
741 parent_image_extents
->clear();
743 if (m_src_image_ctx
->parent
== nullptr) {
744 ldout(m_cct
, 20) << "no parent" << dendl
;
748 size_t noent_count
= 0;
749 for (auto &it
: m_src_object_extents
) {
750 if (it
.second
.noent
) {
755 if (noent_count
== 0) {
756 ldout(m_cct
, 20) << "no extents need read from parent" << dendl
;
760 if (noent_count
== m_src_object_extents
.size() && !m_flatten
) {
761 ldout(m_cct
, 20) << "reading all extents skipped when no flatten"
766 ldout(m_cct
, 20) << dendl
;
768 auto src_snap_seq
= m_snap_map
.begin()->first
;
770 uint64_t parent_overlap
;
771 int r
= m_src_image_ctx
->get_parent_overlap(src_snap_seq
, &parent_overlap
);
773 ldout(m_cct
, 5) << "failed getting parent overlap for snap_id: "
774 << src_snap_seq
<< ": " << cpp_strerror(r
) << dendl
;
777 if (parent_overlap
== 0) {
778 ldout(m_cct
, 20) << "no parent overlap" << dendl
;
782 for (auto &it
: m_src_object_extents
) {
783 auto dst_object_offset
= it
.first
;
790 std::vector
<std::pair
<uint64_t, uint64_t>> image_extents
;
791 Striper::extent_to_file(m_cct
, &m_src_image_ctx
->layout
, e
.object_no
,
792 e
.offset
, e
.length
, image_extents
);
794 uint64_t overlap
= m_src_image_ctx
->prune_parent_extents(image_extents
,
797 ldout(m_cct
, 20) << "no parent overlap for object_no " << e
.object_no
798 << " extent " << e
.offset
<< "~" << e
.length
<< dendl
;
802 ldout(m_cct
, 20) << "object_no " << e
.object_no
<< " extent " << e
.offset
803 << "~" << e
.length
<< " overlap " << parent_overlap
804 << " parent extents " << image_extents
<< dendl
;
806 ceph_assert(image_extents
.size() == 1);
808 auto src_image_offset
= image_extents
.begin()->first
;
809 auto length
= image_extents
.begin()->second
;
810 m_read_ops
[{src_snap_seq
, 0}].emplace_back(COPY_OP_TYPE_WRITE
, e
.offset
,
811 dst_object_offset
, length
);
812 m_read_ops
[{src_snap_seq
, 0}].rbegin()->src_extent_map
[e
.offset
] = length
;
813 parent_image_extents
->emplace_back(src_image_offset
, length
);
816 if (!parent_image_extents
->empty()) {
817 m_dst_object_state
[src_snap_seq
] = OBJECT_EXISTS
;
821 template <typename I
>
822 void ObjectCopyRequest
<I
>::merge_write_ops() {
823 ldout(m_cct
, 20) << dendl
;
825 for (auto &it
: m_zero_interval
) {
826 m_dst_zero_interval
[it
.first
].insert(it
.second
);
829 for (auto &it
: m_read_ops
) {
830 auto src_snap_seq
= it
.first
.first
;
831 auto ©_ops
= it
.second
;
832 for (auto ©_op
: copy_ops
) {
833 uint64_t src_offset
= copy_op
.src_offset
;
834 uint64_t dst_offset
= copy_op
.dst_offset
;
835 for (auto &e
: copy_op
.src_extent_map
) {
836 uint64_t zero_len
= e
.first
- src_offset
;
838 ldout(m_cct
, 20) << "src_snap_seq=" << src_snap_seq
839 << ", inserting zero " << dst_offset
<< "~"
840 << zero_len
<< dendl
;
841 m_dst_zero_interval
[src_snap_seq
].insert(dst_offset
, zero_len
);
842 src_offset
+= zero_len
;
843 dst_offset
+= zero_len
;
845 copy_op
.dst_extent_map
[dst_offset
] = e
.second
;
846 src_offset
+= e
.second
;
847 dst_offset
+= e
.second
;
849 if (dst_offset
< copy_op
.dst_offset
+ copy_op
.length
) {
850 uint64_t zero_len
= copy_op
.dst_offset
+ copy_op
.length
- dst_offset
;
851 ldout(m_cct
, 20) << "src_snap_seq=" << src_snap_seq
852 << ", inserting zero " << dst_offset
<< "~"
853 << zero_len
<< dendl
;
854 m_dst_zero_interval
[src_snap_seq
].insert(dst_offset
, zero_len
);
856 ceph_assert(dst_offset
== copy_op
.dst_offset
+ copy_op
.length
);
858 m_write_ops
[src_snap_seq
].emplace_back(std::move(copy_op
));
863 template <typename I
>
864 void ObjectCopyRequest
<I
>::compute_zero_ops() {
865 ldout(m_cct
, 20) << dendl
;
867 bool fast_diff
= m_dst_image_ctx
->test_features(RBD_FEATURE_FAST_DIFF
);
868 uint64_t prev_end_size
= 0;
870 m_src_image_ctx
->image_lock
.lock_shared();
871 bool hide_parent
= (m_src_image_ctx
->parent
!= nullptr);
872 m_src_image_ctx
->image_lock
.unlock_shared();
874 for (auto &it
: m_dst_zero_interval
) {
875 auto src_snap_seq
= it
.first
;
876 auto &zero_interval
= it
.second
;
878 auto snap_map_it
= m_snap_map
.find(src_snap_seq
);
879 ceph_assert(snap_map_it
!= m_snap_map
.end());
880 auto dst_snap_seq
= snap_map_it
->second
.front();
882 auto dst_may_exist_it
= m_dst_object_may_exist
.find(dst_snap_seq
);
883 ceph_assert(dst_may_exist_it
!= m_dst_object_may_exist
.end());
884 if (!dst_may_exist_it
->second
&& prev_end_size
> 0) {
885 ldout(m_cct
, 5) << "object DNE for snap_id: " << dst_snap_seq
<< dendl
;
886 m_write_ops
[src_snap_seq
].emplace_back(COPY_OP_TYPE_REMOVE
, 0, 0, 0);
892 std::shared_lock image_locker
{m_dst_image_ctx
->image_lock
};
893 uint64_t parent_overlap
= 0;
894 int r
= m_dst_image_ctx
->get_parent_overlap(dst_snap_seq
,
897 ldout(m_cct
, 5) << "failed getting parent overlap for snap_id: "
898 << dst_snap_seq
<< ": " << cpp_strerror(r
) << dendl
;
900 if (parent_overlap
== 0) {
901 ldout(m_cct
, 20) << "no parent overlap" << dendl
;
904 std::vector
<std::pair
<uint64_t, uint64_t>> image_extents
;
905 Striper::extent_to_file(m_cct
, &m_dst_image_ctx
->layout
,
906 m_dst_object_number
, 0,
907 m_dst_image_ctx
->layout
.object_size
,
909 uint64_t overlap
= m_dst_image_ctx
->prune_parent_extents(image_extents
,
912 ldout(m_cct
, 20) << "no parent overlap" << dendl
;
914 } else if (src_snap_seq
== m_dst_zero_interval
.begin()->first
) {
915 for (auto e
: image_extents
) {
916 prev_end_size
+= e
.second
;
918 ceph_assert(prev_end_size
<= m_dst_image_ctx
->layout
.object_size
);
923 uint64_t end_size
= prev_end_size
;
925 // update end_size if there are writes into higher offsets
926 auto iter
= m_write_ops
.find(src_snap_seq
);
927 if (iter
!= m_write_ops
.end()) {
928 for (auto ©_op
: iter
->second
) {
929 for (auto &e
: copy_op
.dst_extent_map
) {
930 end_size
= std::max(end_size
, e
.first
+ e
.second
);
935 for (auto z
= zero_interval
.begin(); z
!= zero_interval
.end(); z
++) {
936 if (z
.get_start() + z
.get_len() >= end_size
) {
937 // zero interval at the object end
938 if (z
.get_start() == 0 && hide_parent
) {
939 m_write_ops
[src_snap_seq
]
940 .emplace_back(COPY_OP_TYPE_REMOVE_TRUNC
, 0, 0, 0);
941 ldout(m_cct
, 20) << "COPY_OP_TYPE_REMOVE_TRUNC" << dendl
;
942 } else if (z
.get_start() < prev_end_size
) {
943 if (z
.get_start() == 0) {
944 m_write_ops
[src_snap_seq
]
945 .emplace_back(COPY_OP_TYPE_REMOVE
, 0, 0, 0);
946 ldout(m_cct
, 20) << "COPY_OP_TYPE_REMOVE" << dendl
;
948 m_write_ops
[src_snap_seq
]
949 .emplace_back(COPY_OP_TYPE_TRUNC
, 0, z
.get_start(), 0);
950 ldout(m_cct
, 20) << "COPY_OP_TYPE_TRUNC " << z
.get_start() << dendl
;
953 end_size
= std::min(end_size
, z
.get_start());
955 // zero interval inside the object
956 m_write_ops
[src_snap_seq
]
957 .emplace_back(COPY_OP_TYPE_ZERO
, 0, z
.get_start(), z
.get_len());
958 ldout(m_cct
, 20) << "COPY_OP_TYPE_ZERO " << z
.get_start() << "~"
959 << z
.get_len() << dendl
;
962 ldout(m_cct
, 20) << "src_snap_seq=" << src_snap_seq
<< ", end_size="
963 << end_size
<< dendl
;
964 if (end_size
> 0 || hide_parent
) {
965 m_dst_object_state
[src_snap_seq
] = OBJECT_EXISTS
;
966 if (fast_diff
&& end_size
== prev_end_size
&&
967 m_write_ops
[src_snap_seq
].empty()) {
968 m_dst_object_state
[src_snap_seq
] = OBJECT_EXISTS_CLEAN
;
971 prev_end_size
= end_size
;
975 template <typename I
>
976 void ObjectCopyRequest
<I
>::finish(int r
) {
977 ldout(m_cct
, 20) << "r=" << r
<< dendl
;
979 // ensure IoCtxs are closed prior to proceeding
980 auto on_finish
= m_on_finish
;
982 m_src_async_op
->finish_op();
983 delete m_src_async_op
;
986 on_finish
->complete(r
);
989 template <typename I
>
990 void ObjectCopyRequest
<I
>::compute_dst_object_may_exist() {
991 std::shared_lock image_locker
{m_dst_image_ctx
->image_lock
};
993 auto snap_ids
= m_dst_image_ctx
->snaps
;
994 snap_ids
.push_back(CEPH_NOSNAP
);
996 for (auto snap_id
: snap_ids
) {
997 m_dst_object_may_exist
[snap_id
] =
998 (m_dst_object_number
< m_dst_image_ctx
->get_object_count(snap_id
));
1002 } // namespace deep_copy
1003 } // namespace librbd
1005 template class librbd::deep_copy::ObjectCopyRequest
<librbd::ImageCtx
>;