1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "ObjectCopyRequest.h"
5 #include "librados/snap_set_diff.h"
6 #include "librbd/ExclusiveLock.h"
7 #include "librbd/ObjectMap.h"
8 #include "librbd/Utils.h"
9 #include "common/errno.h"
11 #define dout_context g_ceph_context
12 #define dout_subsys ceph_subsys_rbd_mirror
14 #define dout_prefix *_dout << "rbd::mirror::image_sync::ObjectCopyRequest: " \
15 << this << " " << __func__
19 bool operator==(const clone_info_t
& rhs
, const clone_info_t
& lhs
) {
20 return (rhs
.cloneid
== lhs
.cloneid
&&
21 rhs
.snaps
== lhs
.snaps
&&
22 rhs
.overlap
== lhs
.overlap
&&
23 rhs
.size
== lhs
.size
);
26 bool operator==(const snap_set_t
& rhs
, const snap_set_t
& lhs
) {
27 return (rhs
.clones
== lhs
.clones
&&
31 } // namespace librados
35 namespace image_sync
{
37 using librbd::util::create_context_callback
;
38 using librbd::util::create_rados_callback
;
41 ObjectCopyRequest
<I
>::ObjectCopyRequest(I
*local_image_ctx
, I
*remote_image_ctx
,
42 const SnapMap
*snap_map
,
43 uint64_t object_number
,
45 : m_local_image_ctx(local_image_ctx
), m_remote_image_ctx(remote_image_ctx
),
46 m_snap_map(snap_map
), m_object_number(object_number
),
47 m_on_finish(on_finish
) {
48 assert(!snap_map
->empty());
50 m_local_io_ctx
.dup(m_local_image_ctx
->data_ctx
);
51 m_local_oid
= m_local_image_ctx
->get_object_name(object_number
);
53 m_remote_io_ctx
.dup(m_remote_image_ctx
->data_ctx
);
54 m_remote_oid
= m_remote_image_ctx
->get_object_name(object_number
);
57 << "remote_oid=" << m_remote_oid
<< ", "
58 << "local_oid=" << m_local_oid
<< dendl
;
62 void ObjectCopyRequest
<I
>::send() {
67 void ObjectCopyRequest
<I
>::send_list_snaps() {
70 librados::AioCompletion
*rados_completion
= create_rados_callback
<
71 ObjectCopyRequest
<I
>, &ObjectCopyRequest
<I
>::handle_list_snaps
>(this);
73 librados::ObjectReadOperation op
;
76 op
.list_snaps(&m_snap_set
, &m_snap_ret
);
78 m_remote_io_ctx
.snap_set_read(CEPH_SNAPDIR
);
79 int r
= m_remote_io_ctx
.aio_operate(m_remote_oid
, rados_completion
, &op
,
82 rados_completion
->release();
86 void ObjectCopyRequest
<I
>::handle_list_snaps(int r
) {
87 if (r
== 0 && m_snap_ret
< 0) {
91 dout(20) << ": r=" << r
<< dendl
;
99 derr
<< ": failed to list snaps: " << cpp_strerror(r
) << dendl
;
104 if (m_retry_missing_read
) {
105 if (m_snap_set
== m_retry_snap_set
) {
106 derr
<< ": read encountered missing object using up-to-date snap set"
112 dout(20) << ": retrying using updated snap set" << dendl
;
113 m_retry_missing_read
= false;
114 m_retry_snap_set
= {};
121 template <typename I
>
122 void ObjectCopyRequest
<I
>::send_read_object() {
123 if (m_snap_sync_ops
.empty()) {
124 // no more snapshot diffs to read from remote
129 // build the read request
130 auto &sync_ops
= m_snap_sync_ops
.begin()->second
;
131 assert(!sync_ops
.empty());
133 bool read_required
= false;
134 librados::ObjectReadOperation op
;
135 for (auto &sync_op
: sync_ops
) {
136 switch (sync_op
.type
) {
137 case SYNC_OP_TYPE_WRITE
:
138 if (!read_required
) {
139 // map the sync op start snap id back to the necessary read snap id
140 librados::snap_t remote_snap_seq
=
141 m_snap_sync_ops
.begin()->first
.second
;
142 m_remote_io_ctx
.snap_set_read(remote_snap_seq
);
144 dout(20) << ": remote_snap_seq=" << remote_snap_seq
<< dendl
;
145 read_required
= true;
147 dout(20) << ": read op: " << sync_op
.offset
<< "~" << sync_op
.length
149 op
.sparse_read(sync_op
.offset
, sync_op
.length
, &sync_op
.extent_map
,
150 &sync_op
.out_bl
, nullptr);
151 op
.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
|
152 LIBRADOS_OP_FLAG_FADVISE_NOCACHE
);
159 if (!read_required
) {
160 // nothing written to this object for this snapshot (must be trunc/remove)
165 librados::AioCompletion
*comp
= create_rados_callback
<
166 ObjectCopyRequest
<I
>, &ObjectCopyRequest
<I
>::handle_read_object
>(this);
167 int r
= m_remote_io_ctx
.aio_operate(m_remote_oid
, comp
, &op
, nullptr);
172 template <typename I
>
173 void ObjectCopyRequest
<I
>::handle_read_object(int r
) {
174 dout(20) << ": r=" << r
<< dendl
;
177 m_retry_snap_set
= m_snap_set
;
178 m_retry_missing_read
= true;
180 dout(5) << ": object missing potentially due to removed snapshot" << dendl
;
186 derr
<< ": failed to read from remote object: " << cpp_strerror(r
)
195 template <typename I
>
196 void ObjectCopyRequest
<I
>::send_write_object() {
197 // retrieve the local snap context for the op
198 SnapIds local_snap_ids
;
199 librados::snap_t local_snap_seq
= 0;
200 librados::snap_t remote_snap_seq
= m_snap_sync_ops
.begin()->first
.first
;
201 if (remote_snap_seq
!= 0) {
202 auto snap_map_it
= m_snap_map
->find(remote_snap_seq
);
203 assert(snap_map_it
!= m_snap_map
->end());
205 // write snapshot context should be before actual snapshot
206 if (snap_map_it
!= m_snap_map
->begin()) {
208 assert(!snap_map_it
->second
.empty());
209 local_snap_seq
= snap_map_it
->second
.front();
210 local_snap_ids
= snap_map_it
->second
;
214 Context
*finish_op_ctx
;
216 RWLock::RLocker
owner_locker(m_local_image_ctx
->owner_lock
);
217 finish_op_ctx
= start_local_op(m_local_image_ctx
->owner_lock
);
219 if (finish_op_ctx
== nullptr) {
220 derr
<< ": lost exclusive lock" << dendl
;
226 << "local_snap_seq=" << local_snap_seq
<< ", "
227 << "local_snaps=" << local_snap_ids
<< dendl
;
229 auto &sync_ops
= m_snap_sync_ops
.begin()->second
;
230 assert(!sync_ops
.empty());
231 uint64_t object_offset
;
232 uint64_t buffer_offset
;
233 librados::ObjectWriteOperation op
;
234 for (auto &sync_op
: sync_ops
) {
235 switch (sync_op
.type
) {
236 case SYNC_OP_TYPE_WRITE
:
237 object_offset
= sync_op
.offset
;
239 for (auto it
: sync_op
.extent_map
) {
240 if (object_offset
< it
.first
) {
241 dout(20) << ": zero op: " << object_offset
<< "~"
242 << it
.first
- object_offset
<< dendl
;
243 op
.zero(object_offset
, it
.first
- object_offset
);
245 dout(20) << ": write op: " << it
.first
<< "~" << it
.second
<< dendl
;
247 tmpbl
.substr_of(sync_op
.out_bl
, buffer_offset
, it
.second
);
248 op
.write(it
.first
, tmpbl
);
249 op
.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
|
250 LIBRADOS_OP_FLAG_FADVISE_NOCACHE
);
251 buffer_offset
+= it
.second
;
252 object_offset
= it
.first
+ it
.second
;
254 if (object_offset
< sync_op
.offset
+ sync_op
.length
) {
255 uint64_t sync_op_end
= sync_op
.offset
+ sync_op
.length
;
256 assert(sync_op_end
<= m_snap_object_sizes
[remote_snap_seq
]);
257 if (sync_op_end
== m_snap_object_sizes
[remote_snap_seq
]) {
258 dout(20) << ": trunc op: " << object_offset
<< dendl
;
259 op
.truncate(object_offset
);
260 m_snap_object_sizes
[remote_snap_seq
] = object_offset
;
262 dout(20) << ": zero op: " << object_offset
<< "~"
263 << sync_op_end
- object_offset
<< dendl
;
264 op
.zero(object_offset
, sync_op_end
- object_offset
);
268 case SYNC_OP_TYPE_TRUNC
:
269 if (sync_op
.offset
> m_snap_object_sizes
[remote_snap_seq
]) {
270 // skip (must have been updated in WRITE op case issuing trunc op)
273 dout(20) << ": trunc op: " << sync_op
.offset
<< dendl
;
274 op
.truncate(sync_op
.offset
);
276 case SYNC_OP_TYPE_REMOVE
:
277 dout(20) << ": remove op" << dendl
;
285 auto ctx
= new FunctionContext([this, finish_op_ctx
](int r
) {
286 handle_write_object(r
);
287 finish_op_ctx
->complete(0);
289 librados::AioCompletion
*comp
= create_rados_callback(ctx
);
290 int r
= m_local_io_ctx
.aio_operate(m_local_oid
, comp
, &op
, local_snap_seq
,
296 template <typename I
>
297 void ObjectCopyRequest
<I
>::handle_write_object(int r
) {
298 dout(20) << ": r=" << r
<< dendl
;
304 derr
<< ": failed to write to local object: " << cpp_strerror(r
)
310 m_snap_sync_ops
.erase(m_snap_sync_ops
.begin());
311 if (!m_snap_sync_ops
.empty()) {
316 send_update_object_map();
319 template <typename I
>
320 void ObjectCopyRequest
<I
>::send_update_object_map() {
321 m_local_image_ctx
->owner_lock
.get_read();
322 m_local_image_ctx
->snap_lock
.get_read();
323 if (!m_local_image_ctx
->test_features(RBD_FEATURE_OBJECT_MAP
,
324 m_local_image_ctx
->snap_lock
) ||
325 m_snap_object_states
.empty()) {
326 m_local_image_ctx
->snap_lock
.put_read();
327 m_local_image_ctx
->owner_lock
.put_read();
330 } else if (m_local_image_ctx
->object_map
== nullptr) {
331 // possible that exclusive lock was lost in background
332 derr
<< ": object map is not initialized" << dendl
;
334 m_local_image_ctx
->snap_lock
.put_read();
335 m_local_image_ctx
->owner_lock
.put_read();
340 assert(m_local_image_ctx
->object_map
!= nullptr);
342 auto snap_object_state
= *m_snap_object_states
.begin();
343 m_snap_object_states
.erase(m_snap_object_states
.begin());
346 << "local_snap_id=" << snap_object_state
.first
<< ", "
347 << "object_state=" << static_cast<uint32_t>(snap_object_state
.second
)
350 auto finish_op_ctx
= start_local_op(m_local_image_ctx
->owner_lock
);
351 if (finish_op_ctx
== nullptr) {
352 derr
<< ": lost exclusive lock" << dendl
;
353 m_local_image_ctx
->snap_lock
.put_read();
354 m_local_image_ctx
->owner_lock
.put_read();
359 auto ctx
= new FunctionContext([this, finish_op_ctx
](int r
) {
360 handle_update_object_map(r
);
361 finish_op_ctx
->complete(0);
364 RWLock::WLocker
object_map_locker(m_local_image_ctx
->object_map_lock
);
365 bool sent
= m_local_image_ctx
->object_map
->template aio_update
<
366 Context
, &Context::complete
>(
367 snap_object_state
.first
, m_object_number
, snap_object_state
.second
, {},
370 m_local_image_ctx
->snap_lock
.put_read();
371 m_local_image_ctx
->owner_lock
.put_read();
374 template <typename I
>
375 void ObjectCopyRequest
<I
>::handle_update_object_map(int r
) {
376 dout(20) << ": r=" << r
<< dendl
;
379 if (!m_snap_object_states
.empty()) {
380 send_update_object_map();
386 template <typename I
>
387 Context
*ObjectCopyRequest
<I
>::start_local_op(RWLock
&owner_lock
) {
388 assert(m_local_image_ctx
->owner_lock
.is_locked());
389 if (m_local_image_ctx
->exclusive_lock
== nullptr) {
392 return m_local_image_ctx
->exclusive_lock
->start_op();
395 template <typename I
>
396 void ObjectCopyRequest
<I
>::compute_diffs() {
397 CephContext
*cct
= m_local_image_ctx
->cct
;
399 m_snap_sync_ops
= {};
400 m_snap_object_states
= {};
401 m_snap_object_sizes
= {};
403 librados::snap_t remote_sync_pont_snap_id
= m_snap_map
->rbegin()->first
;
404 uint64_t prev_end_size
= 0;
405 bool prev_exists
= false;
406 librados::snap_t start_remote_snap_id
= 0;
407 for (auto &pair
: *m_snap_map
) {
408 assert(!pair
.second
.empty());
409 librados::snap_t end_remote_snap_id
= pair
.first
;
410 librados::snap_t end_local_snap_id
= pair
.second
.front();
412 interval_set
<uint64_t> diff
;
415 librados::snap_t clone_end_snap_id
;
416 calc_snap_set_diff(cct
, m_snap_set
, start_remote_snap_id
,
417 end_remote_snap_id
, &diff
, &end_size
, &exists
,
421 << "start_remote_snap=" << start_remote_snap_id
<< ", "
422 << "end_remote_snap_id=" << end_remote_snap_id
<< ", "
423 << "clone_end_snap_id=" << clone_end_snap_id
<< ", "
424 << "end_local_snap_id=" << end_local_snap_id
<< ", "
425 << "diff=" << diff
<< ", "
426 << "end_size=" << end_size
<< ", "
427 << "exists=" << exists
<< dendl
;
429 // clip diff to size of object (in case it was truncated)
430 if (end_size
< prev_end_size
) {
431 interval_set
<uint64_t> trunc
;
432 trunc
.insert(end_size
, prev_end_size
);
433 trunc
.intersection_of(diff
);
434 diff
.subtract(trunc
);
435 dout(20) << ": clearing truncate diff: " << trunc
<< dendl
;
438 // prepare the object map state
440 RWLock::RLocker
snap_locker(m_local_image_ctx
->snap_lock
);
441 uint8_t object_state
= OBJECT_EXISTS
;
442 if (m_local_image_ctx
->test_features(RBD_FEATURE_FAST_DIFF
,
443 m_local_image_ctx
->snap_lock
) &&
444 prev_exists
&& diff
.empty() && end_size
== prev_end_size
) {
445 object_state
= OBJECT_EXISTS_CLEAN
;
447 m_snap_object_states
[end_local_snap_id
] = object_state
;
450 // reads should be issued against the newest (existing) snapshot within
451 // the associated snapshot object clone. writes should be issued
452 // against the oldest snapshot in the snap_map.
453 assert(clone_end_snap_id
>= end_remote_snap_id
);
454 if (clone_end_snap_id
> remote_sync_pont_snap_id
) {
455 // do not read past the sync point snapshot
456 clone_end_snap_id
= remote_sync_pont_snap_id
;
459 // object write/zero, or truncate
460 // NOTE: a single snapshot clone might represent multiple snapshots, but
461 // the write/zero and truncate ops will only be associated with the first
462 // snapshot encountered within the clone since the diff will be empty for
463 // subsequent snapshots and the size will remain constant for a clone.
464 for (auto it
= diff
.begin(); it
!= diff
.end(); ++it
) {
465 dout(20) << ": read/write op: " << it
.get_start() << "~"
466 << it
.get_len() << dendl
;
467 m_snap_sync_ops
[{end_remote_snap_id
, clone_end_snap_id
}].emplace_back(
468 SYNC_OP_TYPE_WRITE
, it
.get_start(), it
.get_len());
470 if (end_size
< prev_end_size
) {
471 dout(20) << ": trunc op: " << end_size
<< dendl
;
472 m_snap_sync_ops
[{end_remote_snap_id
, clone_end_snap_id
}].emplace_back(
473 SYNC_OP_TYPE_TRUNC
, end_size
, 0U);
475 m_snap_object_sizes
[end_remote_snap_id
] = end_size
;
479 dout(20) << ": remove op" << dendl
;
480 m_snap_sync_ops
[{end_remote_snap_id
, end_remote_snap_id
}].emplace_back(
481 SYNC_OP_TYPE_REMOVE
, 0U, 0U);
485 prev_end_size
= end_size
;
486 prev_exists
= exists
;
487 start_remote_snap_id
= end_remote_snap_id
;
491 template <typename I
>
492 void ObjectCopyRequest
<I
>::finish(int r
) {
493 dout(20) << ": r=" << r
<< dendl
;
495 // ensure IoCtxs are closed prior to proceeding
496 auto on_finish
= m_on_finish
;
499 on_finish
->complete(r
);
502 } // namespace image_sync
503 } // namespace mirror
506 template class rbd::mirror::image_sync::ObjectCopyRequest
<librbd::ImageCtx
>;