1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "ObjectCopyRequest.h"
5 #include "common/errno.h"
6 #include "librados/snap_set_diff.h"
7 #include "librbd/ExclusiveLock.h"
8 #include "librbd/ObjectMap.h"
9 #include "librbd/Utils.h"
10 #include "librbd/io/AioCompletion.h"
11 #include "librbd/io/AsyncOperation.h"
12 #include "librbd/io/ImageRequest.h"
13 #include "librbd/io/ReadResult.h"
14 #include "osdc/Striper.h"
16 #define dout_subsys ceph_subsys_rbd
18 #define dout_prefix *_dout << "librbd::deep_copy::ObjectCopyRequest: " \
19 << this << " " << __func__ << ": "
23 inline bool operator==(const clone_info_t
& rhs
, const clone_info_t
& lhs
) {
24 return (rhs
.cloneid
== lhs
.cloneid
&&
25 rhs
.snaps
== lhs
.snaps
&&
26 rhs
.overlap
== lhs
.overlap
&&
27 rhs
.size
== lhs
.size
);
30 inline bool operator==(const snap_set_t
& rhs
, const snap_set_t
& lhs
) {
31 return (rhs
.clones
== lhs
.clones
&&
35 } // namespace librados
40 using librbd::util::create_context_callback
;
41 using librbd::util::create_rados_callback
;
44 ObjectCopyRequest
<I
>::ObjectCopyRequest(I
*src_image_ctx
,
46 const SnapMap
&snap_map
,
47 uint64_t dst_object_number
,
48 bool flatten
, Context
*on_finish
)
49 : m_src_image_ctx(src_image_ctx
),
50 m_dst_image_ctx(dst_image_ctx
), m_cct(dst_image_ctx
->cct
),
51 m_snap_map(snap_map
), m_dst_object_number(dst_object_number
),
52 m_flatten(flatten
), m_on_finish(on_finish
) {
53 ceph_assert(!m_snap_map
.empty());
55 m_src_async_op
= new io::AsyncOperation();
56 m_src_async_op
->start_op(*util::get_image_ctx(m_src_image_ctx
));
58 m_src_io_ctx
.dup(m_src_image_ctx
->data_ctx
);
59 m_dst_io_ctx
.dup(m_dst_image_ctx
->data_ctx
);
61 m_dst_oid
= m_dst_image_ctx
->get_object_name(dst_object_number
);
63 ldout(m_cct
, 20) << "dst_oid=" << m_dst_oid
<< dendl
;
65 compute_src_object_extents();
69 void ObjectCopyRequest
<I
>::send() {
74 void ObjectCopyRequest
<I
>::send_list_snaps() {
75 ceph_assert(!m_src_objects
.empty());
76 m_src_ono
= *m_src_objects
.begin();
77 m_src_oid
= m_src_image_ctx
->get_object_name(m_src_ono
);
79 ldout(m_cct
, 20) << "src_oid=" << m_src_oid
<< dendl
;
81 librados::AioCompletion
*rados_completion
= create_rados_callback
<
82 ObjectCopyRequest
<I
>, &ObjectCopyRequest
<I
>::handle_list_snaps
>(this);
84 librados::ObjectReadOperation op
;
87 op
.list_snaps(&m_snap_set
, &m_snap_ret
);
89 m_src_io_ctx
.snap_set_read(CEPH_SNAPDIR
);
90 int r
= m_src_io_ctx
.aio_operate(m_src_oid
, rados_completion
, &op
,
93 rados_completion
->release();
97 void ObjectCopyRequest
<I
>::handle_list_snaps(int r
) {
98 if (r
== 0 && m_snap_ret
< 0) {
102 ldout(m_cct
, 20) << "r=" << r
<< dendl
;
104 if (r
< 0 && r
!= -ENOENT
) {
105 lderr(m_cct
) << "failed to list snaps: " << cpp_strerror(r
) << dendl
;
110 if (m_retry_missing_read
) {
111 if (m_snap_set
== m_retry_snap_set
) {
112 lderr(m_cct
) << "read encountered missing object using up-to-date snap set"
118 ldout(m_cct
, 20) << "retrying using updated snap set" << dendl
;
119 m_retry_missing_read
= false;
120 m_retry_snap_set
= {};
124 for (auto &it
: m_src_object_extents
) {
126 if (e
.object_no
== m_src_ono
) {
132 m_zero_interval
= {};
139 template <typename I
>
140 void ObjectCopyRequest
<I
>::send_read_object() {
142 if (m_read_snaps
.empty()) {
143 // all snapshots have been read
146 ceph_assert(!m_src_objects
.empty());
147 m_src_objects
.erase(m_src_objects
.begin());
149 if (!m_src_objects
.empty()) {
154 // all objects have been read
155 send_read_from_parent();
159 auto index
= *m_read_snaps
.begin();
160 auto src_snap_seq
= index
.second
;
162 bool read_required
= false;
163 librados::ObjectReadOperation op
;
165 for (auto ©_op
: m_read_ops
[index
]) {
166 if (!read_required
) {
167 // map the copy op start snap id back to the necessary read snap id
168 m_src_io_ctx
.snap_set_read(src_snap_seq
);
170 ldout(m_cct
, 20) << "src_snap_seq=" << src_snap_seq
<< dendl
;
171 read_required
= true;
173 ldout(m_cct
, 20) << "read op: " << copy_op
.src_offset
<< "~"
174 << copy_op
.length
<< dendl
;
175 op
.sparse_read(copy_op
.src_offset
, copy_op
.length
, ©_op
.src_extent_map
,
176 ©_op
.out_bl
, nullptr);
177 op
.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
|
178 LIBRADOS_OP_FLAG_FADVISE_NOCACHE
);
181 if (!read_required
) {
182 // nothing written to this object for this snapshot (must be trunc/remove)
183 handle_read_object(0);
187 auto ctx
= create_context_callback
<
188 ObjectCopyRequest
<I
>, &ObjectCopyRequest
<I
>::handle_read_object
>(this);
189 auto comp
= create_rados_callback(ctx
);
191 ldout(m_cct
, 20) << "read " << m_src_oid
<< dendl
;
193 int r
= m_src_io_ctx
.aio_operate(m_src_oid
, comp
, &op
, nullptr);
198 template <typename I
>
199 void ObjectCopyRequest
<I
>::handle_read_object(int r
) {
200 ldout(m_cct
, 20) << "r=" << r
<< dendl
;
203 m_retry_snap_set
= m_snap_set
;
204 m_retry_missing_read
= true;
206 ldout(m_cct
, 5) << "object missing potentially due to removed snapshot"
213 lderr(m_cct
) << "failed to read from source object: " << cpp_strerror(r
)
219 ceph_assert(!m_read_snaps
.empty());
220 m_read_snaps
.erase(m_read_snaps
.begin());
225 template <typename I
>
226 void ObjectCopyRequest
<I
>::send_read_from_parent() {
227 m_src_image_ctx
->snap_lock
.get_read();
228 m_src_image_ctx
->parent_lock
.get_read();
229 io::Extents image_extents
;
230 compute_read_from_parent_ops(&image_extents
);
231 m_src_image_ctx
->snap_lock
.put_read();
233 if (image_extents
.empty()) {
234 m_src_image_ctx
->parent_lock
.put_read();
235 handle_read_from_parent(0);
239 ldout(m_cct
, 20) << dendl
;
241 ceph_assert(m_src_image_ctx
->parent
!= nullptr);
243 auto ctx
= create_context_callback
<
244 ObjectCopyRequest
<I
>, &ObjectCopyRequest
<I
>::handle_read_from_parent
>(this);
245 auto comp
= io::AioCompletion::create_and_start(
246 ctx
, util::get_image_ctx(m_src_image_ctx
->parent
), io::AIO_TYPE_READ
);
247 ldout(m_cct
, 20) << "completion " << comp
<< ", extents " << image_extents
250 auto src_image_ctx
= m_src_image_ctx
;
251 io::ImageRequest
<I
>::aio_read(src_image_ctx
->parent
, comp
,
252 std::move(image_extents
),
253 io::ReadResult
{&m_read_from_parent_data
}, 0,
255 src_image_ctx
->parent_lock
.put_read();
258 template <typename I
>
259 void ObjectCopyRequest
<I
>::handle_read_from_parent(int r
) {
260 ldout(m_cct
, 20) << "r=" << r
<< dendl
;
263 lderr(m_cct
) << "failed to read from parent: " << cpp_strerror(r
) << dendl
;
268 if (!m_read_ops
.empty()) {
269 ceph_assert(m_read_ops
.size() == 1);
270 auto src_snap_seq
= m_read_ops
.begin()->first
.first
;
271 auto ©_ops
= m_read_ops
.begin()->second
;
273 for (auto it
= copy_ops
.begin(); it
!= copy_ops
.end(); ) {
274 it
->out_bl
.substr_of(m_read_from_parent_data
, offset
, it
->length
);
275 offset
+= it
->length
;
276 if (it
->out_bl
.is_zero()) {
277 m_zero_interval
[src_snap_seq
].insert(it
->dst_offset
, it
->length
);
278 it
= copy_ops
.erase(it
);
286 compute_dst_object_may_exist();
289 if (m_write_ops
.empty()) {
299 template <typename I
>
300 void ObjectCopyRequest
<I
>::send_write_object() {
301 ceph_assert(!m_write_ops
.empty());
302 auto& copy_ops
= m_write_ops
.begin()->second
;
304 // retrieve the destination snap context for the op
305 SnapIds dst_snap_ids
;
306 librados::snap_t dst_snap_seq
= 0;
307 librados::snap_t src_snap_seq
= m_write_ops
.begin()->first
;
308 if (src_snap_seq
!= 0) {
309 auto snap_map_it
= m_snap_map
.find(src_snap_seq
);
310 ceph_assert(snap_map_it
!= m_snap_map
.end());
312 auto dst_snap_id
= snap_map_it
->second
.front();
313 auto dst_may_exist_it
= m_dst_object_may_exist
.find(dst_snap_id
);
314 ceph_assert(dst_may_exist_it
!= m_dst_object_may_exist
.end());
315 if (!dst_may_exist_it
->second
&& !copy_ops
.empty()) {
316 // if the object cannot exist, the only valid op is to remove it
317 ceph_assert(copy_ops
.size() == 1U);
318 ceph_assert(copy_ops
.begin()->type
== COPY_OP_TYPE_REMOVE
);
321 // write snapshot context should be before actual snapshot
322 if (snap_map_it
!= m_snap_map
.begin()) {
324 ceph_assert(!snap_map_it
->second
.empty());
325 dst_snap_seq
= snap_map_it
->second
.front();
326 dst_snap_ids
= snap_map_it
->second
;
330 ldout(m_cct
, 20) << "dst_snap_seq=" << dst_snap_seq
<< ", "
331 << "dst_snaps=" << dst_snap_ids
<< dendl
;
333 librados::ObjectWriteOperation op
;
334 uint64_t buffer_offset
;
336 if (!m_dst_image_ctx
->migration_info
.empty()) {
337 cls_client::assert_snapc_seq(&op
, dst_snap_seq
,
338 cls::rbd::ASSERT_SNAPC_SEQ_GT_SNAPSET_SEQ
);
341 for (auto ©_op
: copy_ops
) {
342 switch (copy_op
.type
) {
343 case COPY_OP_TYPE_WRITE
:
345 for (auto &e
: copy_op
.dst_extent_map
) {
346 ldout(m_cct
, 20) << "write op: " << e
.first
<< "~" << e
.second
349 tmpbl
.substr_of(copy_op
.out_bl
, buffer_offset
, e
.second
);
350 op
.write(e
.first
, tmpbl
);
351 op
.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
|
352 LIBRADOS_OP_FLAG_FADVISE_NOCACHE
);
353 buffer_offset
+= e
.second
;
356 case COPY_OP_TYPE_ZERO
:
357 ldout(m_cct
, 20) << "zero op: " << copy_op
.dst_offset
<< "~"
358 << copy_op
.length
<< dendl
;
359 op
.zero(copy_op
.dst_offset
, copy_op
.length
);
361 case COPY_OP_TYPE_REMOVE_TRUNC
:
362 ldout(m_cct
, 20) << "create op" << dendl
;
365 case COPY_OP_TYPE_TRUNC
:
366 ldout(m_cct
, 20) << "trunc op: " << copy_op
.dst_offset
<< dendl
;
367 op
.truncate(copy_op
.dst_offset
);
369 case COPY_OP_TYPE_REMOVE
:
370 ldout(m_cct
, 20) << "remove op" << dendl
;
378 if (op
.size() == (m_dst_image_ctx
->migration_info
.empty() ? 0 : 1)) {
379 handle_write_object(0);
384 Context
*finish_op_ctx
;
386 RWLock::RLocker
owner_locker(m_dst_image_ctx
->owner_lock
);
387 finish_op_ctx
= start_lock_op(m_dst_image_ctx
->owner_lock
, &r
);
389 if (finish_op_ctx
== nullptr) {
390 lderr(m_cct
) << "lost exclusive lock" << dendl
;
395 auto ctx
= new FunctionContext([this, finish_op_ctx
](int r
) {
396 handle_write_object(r
);
397 finish_op_ctx
->complete(0);
399 librados::AioCompletion
*comp
= create_rados_callback(ctx
);
400 r
= m_dst_io_ctx
.aio_operate(m_dst_oid
, comp
, &op
, dst_snap_seq
, dst_snap_ids
,
406 template <typename I
>
407 void ObjectCopyRequest
<I
>::handle_write_object(int r
) {
408 ldout(m_cct
, 20) << "r=" << r
<< dendl
;
412 } else if (r
== -ERANGE
) {
413 ldout(m_cct
, 10) << "concurrent deep copy" << dendl
;
417 lderr(m_cct
) << "failed to write to destination object: " << cpp_strerror(r
)
423 m_write_ops
.erase(m_write_ops
.begin());
424 if (!m_write_ops
.empty()) {
429 send_update_object_map();
432 template <typename I
>
433 void ObjectCopyRequest
<I
>::send_update_object_map() {
434 if (!m_dst_image_ctx
->test_features(RBD_FEATURE_OBJECT_MAP
) ||
435 m_dst_object_state
.empty()) {
440 m_dst_image_ctx
->owner_lock
.get_read();
441 m_dst_image_ctx
->snap_lock
.get_read();
442 if (m_dst_image_ctx
->object_map
== nullptr) {
443 // possible that exclusive lock was lost in background
444 lderr(m_cct
) << "object map is not initialized" << dendl
;
446 m_dst_image_ctx
->snap_lock
.put_read();
447 m_dst_image_ctx
->owner_lock
.put_read();
452 auto &dst_object_state
= *m_dst_object_state
.begin();
453 auto it
= m_snap_map
.find(dst_object_state
.first
);
454 ceph_assert(it
!= m_snap_map
.end());
455 auto dst_snap_id
= it
->second
.front();
456 auto object_state
= dst_object_state
.second
;
457 m_dst_object_state
.erase(m_dst_object_state
.begin());
459 ldout(m_cct
, 20) << "dst_snap_id=" << dst_snap_id
<< ", object_state="
460 << static_cast<uint32_t>(object_state
) << dendl
;
463 auto finish_op_ctx
= start_lock_op(m_dst_image_ctx
->owner_lock
, &r
);
464 if (finish_op_ctx
== nullptr) {
465 lderr(m_cct
) << "lost exclusive lock" << dendl
;
466 m_dst_image_ctx
->snap_lock
.put_read();
467 m_dst_image_ctx
->owner_lock
.put_read();
472 auto ctx
= new FunctionContext([this, finish_op_ctx
](int r
) {
473 handle_update_object_map(r
);
474 finish_op_ctx
->complete(0);
477 auto dst_image_ctx
= m_dst_image_ctx
;
478 dst_image_ctx
->object_map_lock
.get_write();
479 bool sent
= dst_image_ctx
->object_map
->template aio_update
<
480 Context
, &Context::complete
>(dst_snap_id
, m_dst_object_number
, object_state
,
483 // NOTE: state machine might complete before we reach here
484 dst_image_ctx
->object_map_lock
.put_write();
485 dst_image_ctx
->snap_lock
.put_read();
486 dst_image_ctx
->owner_lock
.put_read();
488 ceph_assert(dst_snap_id
== CEPH_NOSNAP
);
493 template <typename I
>
494 void ObjectCopyRequest
<I
>::handle_update_object_map(int r
) {
495 ldout(m_cct
, 20) << "r=" << r
<< dendl
;
498 lderr(m_cct
) << "failed to update object map: " << cpp_strerror(r
) << dendl
;
503 if (!m_dst_object_state
.empty()) {
504 send_update_object_map();
510 template <typename I
>
511 Context
*ObjectCopyRequest
<I
>::start_lock_op(RWLock
&owner_lock
, int* r
) {
512 ceph_assert(m_dst_image_ctx
->owner_lock
.is_locked());
513 if (m_dst_image_ctx
->exclusive_lock
== nullptr) {
514 return new FunctionContext([](int r
) {});
516 return m_dst_image_ctx
->exclusive_lock
->start_op(r
);
519 template <typename I
>
520 uint64_t ObjectCopyRequest
<I
>::src_to_dst_object_offset(uint64_t objectno
,
522 std::vector
<std::pair
<uint64_t, uint64_t>> image_extents
;
523 Striper::extent_to_file(m_cct
, &m_src_image_ctx
->layout
, objectno
, offset
, 1,
525 ceph_assert(image_extents
.size() == 1);
526 auto dst_object_offset
= image_extents
.begin()->first
;
528 std::map
<object_t
, std::vector
<ObjectExtent
>> dst_object_extents
;
529 Striper::file_to_extents(m_cct
, m_dst_image_ctx
->format_string
,
530 &m_dst_image_ctx
->layout
, dst_object_offset
, 1, 0,
532 ceph_assert(dst_object_extents
.size() == 1);
533 ceph_assert(dst_object_extents
.begin()->second
.size() == 1);
534 auto &e
= *dst_object_extents
.begin()->second
.begin();
535 ceph_assert(e
.objectno
== m_dst_object_number
);
540 template <typename I
>
541 void ObjectCopyRequest
<I
>::compute_src_object_extents() {
542 std::vector
<std::pair
<uint64_t, uint64_t>> image_extents
;
543 Striper::extent_to_file(m_cct
, &m_dst_image_ctx
->layout
, m_dst_object_number
,
544 0, m_dst_image_ctx
->layout
.object_size
, image_extents
);
547 for (auto &e
: image_extents
) {
548 std::map
<object_t
, std::vector
<ObjectExtent
>> src_object_extents
;
549 Striper::file_to_extents(m_cct
, m_src_image_ctx
->format_string
,
550 &m_src_image_ctx
->layout
, e
.first
, e
.second
, 0,
552 auto stripe_unit
= std::min(m_src_image_ctx
->layout
.stripe_unit
,
553 m_dst_image_ctx
->layout
.stripe_unit
);
554 for (auto &p
: src_object_extents
) {
555 for (auto &s
: p
.second
) {
556 m_src_objects
.insert(s
.objectno
);
558 while (s
.length
> 0) {
559 ceph_assert(s
.length
>= stripe_unit
);
560 auto dst_object_offset
= src_to_dst_object_offset(s
.objectno
, s
.offset
);
561 m_src_object_extents
[dst_object_offset
] = {s
.objectno
, s
.offset
,
563 s
.offset
+= stripe_unit
;
564 s
.length
-= stripe_unit
;
570 ceph_assert(total
== m_dst_image_ctx
->layout
.object_size
);
572 ldout(m_cct
, 20) << m_src_object_extents
.size() << " src extents" << dendl
;
575 template <typename I
>
576 void ObjectCopyRequest
<I
>::compute_read_ops() {
579 m_zero_interval
= {};
581 m_src_image_ctx
->parent_lock
.get_read();
582 bool hide_parent
= (m_src_image_ctx
->parent
!= nullptr);
583 m_src_image_ctx
->parent_lock
.put_read();
585 librados::snap_t src_copy_point_snap_id
= m_snap_map
.rbegin()->first
;
586 bool prev_exists
= hide_parent
;
587 uint64_t prev_end_size
= prev_exists
?
588 m_src_image_ctx
->layout
.object_size
: 0;
589 librados::snap_t start_src_snap_id
= 0;
591 for (auto &pair
: m_snap_map
) {
592 ceph_assert(!pair
.second
.empty());
593 librados::snap_t end_src_snap_id
= pair
.first
;
594 librados::snap_t end_dst_snap_id
= pair
.second
.front();
596 interval_set
<uint64_t> diff
;
599 librados::snap_t clone_end_snap_id
;
600 calc_snap_set_diff(m_cct
, m_snap_set
, start_src_snap_id
,
601 end_src_snap_id
, &diff
, &end_size
, &exists
,
602 &clone_end_snap_id
, &m_read_whole_object
);
604 if (m_read_whole_object
) {
605 ldout(m_cct
, 1) << "need to read full object" << dendl
;
606 diff
.insert(0, m_src_image_ctx
->layout
.object_size
);
608 end_size
= m_src_image_ctx
->layout
.object_size
;
609 clone_end_snap_id
= end_src_snap_id
;
610 } else if (!exists
) {
612 if (hide_parent
&& end_src_snap_id
== m_snap_map
.begin()->first
&&
613 m_snap_set
.clones
.empty()) {
614 ldout(m_cct
, 20) << "no clones for existing object" << dendl
;
616 diff
.insert(0, m_src_image_ctx
->layout
.object_size
);
617 clone_end_snap_id
= end_src_snap_id
;
621 ldout(m_cct
, 20) << "start_src_snap_id=" << start_src_snap_id
<< ", "
622 << "end_src_snap_id=" << end_src_snap_id
<< ", "
623 << "clone_end_snap_id=" << clone_end_snap_id
<< ", "
624 << "end_dst_snap_id=" << end_dst_snap_id
<< ", "
625 << "diff=" << diff
<< ", "
626 << "end_size=" << end_size
<< ", "
627 << "exists=" << exists
<< dendl
;
629 m_zero_interval
[end_src_snap_id
] = {};
631 if (exists
|| prev_exists
) {
632 // clip diff to size of object (in case it was truncated)
633 if (end_size
< prev_end_size
) {
634 interval_set
<uint64_t> trunc
;
635 trunc
.insert(end_size
, prev_end_size
);
636 trunc
.intersection_of(diff
);
637 diff
.subtract(trunc
);
638 ldout(m_cct
, 20) << "clearing truncate diff: " << trunc
<< dendl
;
642 // reads should be issued against the newest (existing) snapshot within
643 // the associated snapshot object clone. writes should be issued
644 // against the oldest snapshot in the snap_map.
645 ceph_assert(clone_end_snap_id
>= end_src_snap_id
);
646 if (clone_end_snap_id
> src_copy_point_snap_id
) {
647 // do not read past the copy point snapshot
648 clone_end_snap_id
= src_copy_point_snap_id
;
652 for (auto &it
: m_src_object_extents
) {
653 auto dst_object_offset
= it
.first
;
656 if (e
.object_no
!= m_src_ono
) {
660 interval_set
<uint64_t> read_interval
;
661 read_interval
.insert(e
.offset
, e
.length
);
663 if (end_size
< prev_end_size
) {
664 interval_set
<uint64_t> zero_interval
;
665 zero_interval
.insert(end_size
, prev_end_size
- end_size
);
666 zero_interval
.intersection_of(read_interval
);
667 if (!zero_interval
.empty()) {
668 auto it
= zero_interval
.begin();
669 auto offset
= it
.get_start() - e
.offset
;
670 m_zero_interval
[end_src_snap_id
].insert(dst_object_offset
+ offset
,
672 ldout(m_cct
, 20) << "extent " << e
.offset
<< "~" << e
.length
673 << " intersects truncation " << end_size
<< "~"
674 << prev_end_size
- end_size
<< ", inserting zero "
675 << dst_object_offset
+ offset
<< "~"
676 << it
.get_len() << dendl
;
680 // limit read interval to diff
681 read_interval
.intersection_of(diff
);
683 ldout(m_cct
, 20) << "src_object_extent: " << e
.offset
<< "~" << e
.length
684 << ", dst_object_offset=" << dst_object_offset
685 << ", read: " << read_interval
<< dendl
;
687 ceph_assert(exists
|| read_interval
.empty());
689 for (auto it
= read_interval
.begin(); it
!= read_interval
.end();
691 ceph_assert(it
.get_start() >= e
.offset
);
692 auto offset
= it
.get_start() - e
.offset
;
693 ldout(m_cct
, 20) << "read/write op: " << it
.get_start() << "~"
694 << it
.get_len() << " dst: "
695 << dst_object_offset
+ offset
<< dendl
;
696 m_read_ops
[{end_src_snap_id
, clone_end_snap_id
}]
697 .emplace_back(COPY_OP_TYPE_WRITE
, it
.get_start(),
698 dst_object_offset
+ offset
, it
.get_len());
703 prev_end_size
= end_size
;
704 prev_exists
= exists
;
705 if (hide_parent
&& prev_exists
&& prev_end_size
== 0) {
707 prev_end_size
= m_src_image_ctx
->layout
.object_size
;
709 start_src_snap_id
= end_src_snap_id
;
712 for (auto &it
: m_read_ops
) {
713 m_read_snaps
.push_back(it
.first
);
717 template <typename I
>
718 void ObjectCopyRequest
<I
>::compute_read_from_parent_ops(
719 io::Extents
*parent_image_extents
) {
720 assert(m_src_image_ctx
->snap_lock
.is_locked());
721 assert(m_src_image_ctx
->parent_lock
.is_locked());
724 m_zero_interval
= {};
725 parent_image_extents
->clear();
727 if (m_src_image_ctx
->parent
== nullptr) {
728 ldout(m_cct
, 20) << "no parent" << dendl
;
732 size_t noent_count
= 0;
733 for (auto &it
: m_src_object_extents
) {
734 if (it
.second
.noent
) {
739 if (noent_count
== 0) {
740 ldout(m_cct
, 20) << "no extents need read from parent" << dendl
;
744 if (noent_count
== m_src_object_extents
.size() && !m_flatten
) {
745 ldout(m_cct
, 20) << "reading all extents skipped when no flatten"
750 ldout(m_cct
, 20) << dendl
;
752 auto src_snap_seq
= m_snap_map
.begin()->first
;
754 uint64_t parent_overlap
;
755 int r
= m_src_image_ctx
->get_parent_overlap(src_snap_seq
, &parent_overlap
);
757 ldout(m_cct
, 5) << "failed getting parent overlap for snap_id: "
758 << src_snap_seq
<< ": " << cpp_strerror(r
) << dendl
;
761 if (parent_overlap
== 0) {
762 ldout(m_cct
, 20) << "no parent overlap" << dendl
;
766 for (auto &it
: m_src_object_extents
) {
767 auto dst_object_offset
= it
.first
;
774 std::vector
<std::pair
<uint64_t, uint64_t>> image_extents
;
775 Striper::extent_to_file(m_cct
, &m_src_image_ctx
->layout
, e
.object_no
,
776 e
.offset
, e
.length
, image_extents
);
778 uint64_t overlap
= m_src_image_ctx
->prune_parent_extents(image_extents
,
781 ldout(m_cct
, 20) << "no parent overlap for object_no " << e
.object_no
782 << " extent " << e
.offset
<< "~" << e
.length
<< dendl
;
786 ldout(m_cct
, 20) << "object_no " << e
.object_no
<< " extent " << e
.offset
787 << "~" << e
.length
<< " overlap " << parent_overlap
788 << " parent extents " << image_extents
<< dendl
;
790 ceph_assert(image_extents
.size() == 1);
792 auto src_image_offset
= image_extents
.begin()->first
;
793 auto length
= image_extents
.begin()->second
;
794 m_read_ops
[{src_snap_seq
, 0}].emplace_back(COPY_OP_TYPE_WRITE
, e
.offset
,
795 dst_object_offset
, length
);
796 m_read_ops
[{src_snap_seq
, 0}].rbegin()->src_extent_map
[e
.offset
] = length
;
797 parent_image_extents
->emplace_back(src_image_offset
, length
);
800 if (!parent_image_extents
->empty()) {
801 m_dst_object_state
[src_snap_seq
] = OBJECT_EXISTS
;
805 template <typename I
>
806 void ObjectCopyRequest
<I
>::merge_write_ops() {
807 ldout(m_cct
, 20) << dendl
;
809 for (auto &it
: m_zero_interval
) {
810 m_dst_zero_interval
[it
.first
].insert(it
.second
);
813 for (auto &it
: m_read_ops
) {
814 auto src_snap_seq
= it
.first
.first
;
815 auto ©_ops
= it
.second
;
816 for (auto ©_op
: copy_ops
) {
817 uint64_t src_offset
= copy_op
.src_offset
;
818 uint64_t dst_offset
= copy_op
.dst_offset
;
819 for (auto &e
: copy_op
.src_extent_map
) {
820 uint64_t zero_len
= e
.first
- src_offset
;
822 ldout(m_cct
, 20) << "src_snap_seq=" << src_snap_seq
823 << ", inserting zero " << dst_offset
<< "~"
824 << zero_len
<< dendl
;
825 m_dst_zero_interval
[src_snap_seq
].insert(dst_offset
, zero_len
);
826 src_offset
+= zero_len
;
827 dst_offset
+= zero_len
;
829 copy_op
.dst_extent_map
[dst_offset
] = e
.second
;
830 src_offset
+= e
.second
;
831 dst_offset
+= e
.second
;
833 if (dst_offset
< copy_op
.dst_offset
+ copy_op
.length
) {
834 uint64_t zero_len
= copy_op
.dst_offset
+ copy_op
.length
- dst_offset
;
835 ldout(m_cct
, 20) << "src_snap_seq=" << src_snap_seq
836 << ", inserting zero " << dst_offset
<< "~"
837 << zero_len
<< dendl
;
838 m_dst_zero_interval
[src_snap_seq
].insert(dst_offset
, zero_len
);
840 ceph_assert(dst_offset
== copy_op
.dst_offset
+ copy_op
.length
);
842 m_write_ops
[src_snap_seq
].emplace_back(std::move(copy_op
));
847 template <typename I
>
848 void ObjectCopyRequest
<I
>::compute_zero_ops() {
849 ldout(m_cct
, 20) << dendl
;
851 bool fast_diff
= m_dst_image_ctx
->test_features(RBD_FEATURE_FAST_DIFF
);
852 uint64_t prev_end_size
= 0;
854 m_src_image_ctx
->parent_lock
.get_read();
855 bool hide_parent
= (m_src_image_ctx
->parent
!= nullptr);
856 m_src_image_ctx
->parent_lock
.put_read();
858 for (auto &it
: m_dst_zero_interval
) {
859 auto src_snap_seq
= it
.first
;
860 auto &zero_interval
= it
.second
;
862 auto snap_map_it
= m_snap_map
.find(src_snap_seq
);
863 ceph_assert(snap_map_it
!= m_snap_map
.end());
864 auto dst_snap_seq
= snap_map_it
->second
.front();
866 auto dst_may_exist_it
= m_dst_object_may_exist
.find(dst_snap_seq
);
867 ceph_assert(dst_may_exist_it
!= m_dst_object_may_exist
.end());
868 if (!dst_may_exist_it
->second
&& prev_end_size
> 0) {
869 ldout(m_cct
, 5) << "object DNE for snap_id: " << dst_snap_seq
<< dendl
;
870 m_write_ops
[src_snap_seq
].emplace_back(COPY_OP_TYPE_REMOVE
, 0, 0, 0);
876 RWLock::RLocker
snap_locker(m_dst_image_ctx
->snap_lock
);
877 RWLock::RLocker
parent_locker(m_dst_image_ctx
->parent_lock
);
878 uint64_t parent_overlap
= 0;
879 int r
= m_dst_image_ctx
->get_parent_overlap(dst_snap_seq
, &parent_overlap
);
881 ldout(m_cct
, 5) << "failed getting parent overlap for snap_id: "
882 << dst_snap_seq
<< ": " << cpp_strerror(r
) << dendl
;
884 if (parent_overlap
== 0) {
885 ldout(m_cct
, 20) << "no parent overlap" << dendl
;
888 std::vector
<std::pair
<uint64_t, uint64_t>> image_extents
;
889 Striper::extent_to_file(m_cct
, &m_dst_image_ctx
->layout
,
890 m_dst_object_number
, 0,
891 m_dst_image_ctx
->layout
.object_size
,
893 uint64_t overlap
= m_dst_image_ctx
->prune_parent_extents(image_extents
,
896 ldout(m_cct
, 20) << "no parent overlap" << dendl
;
898 } else if (src_snap_seq
== m_dst_zero_interval
.begin()->first
) {
899 for (auto e
: image_extents
) {
900 prev_end_size
+= e
.second
;
902 ceph_assert(prev_end_size
<= m_dst_image_ctx
->layout
.object_size
);
907 uint64_t end_size
= prev_end_size
;
909 // update end_size if there are writes into higher offsets
910 auto iter
= m_write_ops
.find(src_snap_seq
);
911 if (iter
!= m_write_ops
.end()) {
912 for (auto ©_op
: iter
->second
) {
913 for (auto &e
: copy_op
.dst_extent_map
) {
914 end_size
= std::max(end_size
, e
.first
+ e
.second
);
919 for (auto z
= zero_interval
.begin(); z
!= zero_interval
.end(); z
++) {
920 if (z
.get_start() + z
.get_len() >= end_size
) {
921 // zero interval at the object end
922 if (z
.get_start() == 0 && hide_parent
) {
923 m_write_ops
[src_snap_seq
]
924 .emplace_back(COPY_OP_TYPE_REMOVE_TRUNC
, 0, 0, 0);
925 ldout(m_cct
, 20) << "COPY_OP_TYPE_REMOVE_TRUNC" << dendl
;
926 } else if (z
.get_start() < prev_end_size
) {
927 if (z
.get_start() == 0) {
928 m_write_ops
[src_snap_seq
]
929 .emplace_back(COPY_OP_TYPE_REMOVE
, 0, 0, 0);
930 ldout(m_cct
, 20) << "COPY_OP_TYPE_REMOVE" << dendl
;
932 m_write_ops
[src_snap_seq
]
933 .emplace_back(COPY_OP_TYPE_TRUNC
, 0, z
.get_start(), 0);
934 ldout(m_cct
, 20) << "COPY_OP_TYPE_TRUNC " << z
.get_start() << dendl
;
937 end_size
= std::min(end_size
, z
.get_start());
939 // zero interval inside the object
940 m_write_ops
[src_snap_seq
]
941 .emplace_back(COPY_OP_TYPE_ZERO
, 0, z
.get_start(), z
.get_len());
942 ldout(m_cct
, 20) << "COPY_OP_TYPE_ZERO " << z
.get_start() << "~"
943 << z
.get_len() << dendl
;
946 ldout(m_cct
, 20) << "src_snap_seq=" << src_snap_seq
<< ", end_size="
947 << end_size
<< dendl
;
948 if (end_size
> 0 || hide_parent
) {
949 m_dst_object_state
[src_snap_seq
] = OBJECT_EXISTS
;
950 if (fast_diff
&& end_size
== prev_end_size
&&
951 m_write_ops
[src_snap_seq
].empty()) {
952 m_dst_object_state
[src_snap_seq
] = OBJECT_EXISTS_CLEAN
;
955 prev_end_size
= end_size
;
959 template <typename I
>
960 void ObjectCopyRequest
<I
>::finish(int r
) {
961 ldout(m_cct
, 20) << "r=" << r
<< dendl
;
963 // ensure IoCtxs are closed prior to proceeding
964 auto on_finish
= m_on_finish
;
966 m_src_async_op
->finish_op();
967 delete m_src_async_op
;
970 on_finish
->complete(r
);
973 template <typename I
>
974 void ObjectCopyRequest
<I
>::compute_dst_object_may_exist() {
975 RWLock::RLocker
snap_locker(m_dst_image_ctx
->snap_lock
);
977 auto snap_ids
= m_dst_image_ctx
->snaps
;
978 snap_ids
.push_back(CEPH_NOSNAP
);
980 for (auto snap_id
: snap_ids
) {
981 m_dst_object_may_exist
[snap_id
] =
982 (m_dst_object_number
< m_dst_image_ctx
->get_object_count(snap_id
));
986 } // namespace deep_copy
987 } // namespace librbd
989 template class librbd::deep_copy::ObjectCopyRequest
<librbd::ImageCtx
>;