1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
5 #include "common/debug.h"
6 #include "common/errno.h"
7 #include "include/stringify.h"
8 #include "common/Timer.h"
9 #include "common/WorkQueue.h"
10 #include "cls/rbd/cls_rbd_client.h"
11 #include "librbd/ImageCtx.h"
12 #include "librbd/ImageState.h"
13 #include "librbd/Utils.h"
14 #include "librbd/deep_copy/ImageCopyRequest.h"
15 #include "librbd/deep_copy/SnapshotCopyRequest.h"
16 #include "librbd/mirror/snapshot/CreateNonPrimaryRequest.h"
17 #include "librbd/mirror/snapshot/GetImageStateRequest.h"
18 #include "librbd/mirror/snapshot/ImageMeta.h"
19 #include "librbd/mirror/snapshot/UnlinkPeerRequest.h"
20 #include "tools/rbd_mirror/InstanceWatcher.h"
21 #include "tools/rbd_mirror/PoolMetaCache.h"
22 #include "tools/rbd_mirror/Threads.h"
23 #include "tools/rbd_mirror/Types.h"
24 #include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
25 #include "tools/rbd_mirror/image_replayer/ReplayerListener.h"
26 #include "tools/rbd_mirror/image_replayer/Utils.h"
27 #include "tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h"
28 #include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h"
29 #include "tools/rbd_mirror/image_replayer/snapshot/Utils.h"
31 #define dout_context g_ceph_context
32 #define dout_subsys ceph_subsys_rbd_mirror
34 #define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
35 << "Replayer: " << this << " " << __func__ << ": "
37 extern PerfCounters
*g_perf_counters
;
41 namespace image_replayer
{
47 std::pair
<uint64_t, librbd::SnapInfo
*> get_newest_mirror_snapshot(
49 for (auto snap_info_it
= image_ctx
->snap_info
.rbegin();
50 snap_info_it
!= image_ctx
->snap_info
.rend(); ++snap_info_it
) {
51 const auto& snap_ns
= snap_info_it
->second
.snap_namespace
;
52 auto mirror_ns
= boost::get
<
53 cls::rbd::MirrorSnapshotNamespace
>(&snap_ns
);
54 if (mirror_ns
== nullptr || !mirror_ns
->complete
) {
58 return {snap_info_it
->first
, &snap_info_it
->second
};
61 return {CEPH_NOSNAP
, nullptr};
64 } // anonymous namespace
66 using librbd::util::create_async_context_callback
;
67 using librbd::util::create_context_callback
;
68 using librbd::util::create_rados_callback
;
71 struct Replayer
<I
>::C_UpdateWatchCtx
: public librbd::UpdateWatchCtx
{
72 Replayer
<I
>* replayer
;
74 C_UpdateWatchCtx(Replayer
<I
>* replayer
) : replayer(replayer
) {
77 void handle_notify() override
{
78 replayer
->handle_image_update_notify();
83 struct Replayer
<I
>::C_TrackedOp
: public Context
{
87 C_TrackedOp(Replayer
* replayer
, Context
* ctx
)
88 : replayer(replayer
), ctx(ctx
) {
89 replayer
->m_in_flight_op_tracker
.start_op();
92 void finish(int r
) override
{
94 replayer
->m_in_flight_op_tracker
.finish_op();
99 struct Replayer
<I
>::ProgressContext
: public librbd::ProgressContext
{
102 ProgressContext(Replayer
* replayer
) : replayer(replayer
) {
105 int update_progress(uint64_t object_number
, uint64_t object_count
) override
{
106 replayer
->handle_copy_image_progress(object_number
, object_count
);
111 template <typename I
>
112 Replayer
<I
>::Replayer(
114 InstanceWatcher
<I
>* instance_watcher
,
115 const std::string
& local_mirror_uuid
,
116 PoolMetaCache
* pool_meta_cache
,
117 StateBuilder
<I
>* state_builder
,
118 ReplayerListener
* replayer_listener
)
119 : m_threads(threads
),
120 m_instance_watcher(instance_watcher
),
121 m_local_mirror_uuid(local_mirror_uuid
),
122 m_pool_meta_cache(pool_meta_cache
),
123 m_state_builder(state_builder
),
124 m_replayer_listener(replayer_listener
),
125 m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
126 "rbd::mirror::image_replayer::snapshot::Replayer", this))) {
130 template <typename I
>
131 Replayer
<I
>::~Replayer() {
133 ceph_assert(m_state
== STATE_COMPLETE
);
134 ceph_assert(m_update_watch_ctx
== nullptr);
135 ceph_assert(m_progress_ctx
== nullptr);
138 template <typename I
>
139 void Replayer
<I
>::init(Context
* on_finish
) {
142 ceph_assert(m_state
== STATE_INIT
);
144 RemotePoolMeta remote_pool_meta
;
145 int r
= m_pool_meta_cache
->get_remote_pool_meta(
146 m_state_builder
->remote_image_ctx
->md_ctx
.get_id(), &remote_pool_meta
);
147 if (r
< 0 || remote_pool_meta
.mirror_peer_uuid
.empty()) {
148 derr
<< "failed to retrieve mirror peer uuid from remote pool" << dendl
;
149 m_state
= STATE_COMPLETE
;
150 m_threads
->work_queue
->queue(on_finish
, r
);
154 m_remote_mirror_peer_uuid
= remote_pool_meta
.mirror_peer_uuid
;
155 dout(10) << "remote_mirror_peer_uuid=" << m_remote_mirror_peer_uuid
<< dendl
;
157 ceph_assert(m_on_init_shutdown
== nullptr);
158 m_on_init_shutdown
= on_finish
;
160 register_local_update_watcher();
163 template <typename I
>
164 void Replayer
<I
>::shut_down(Context
* on_finish
) {
167 std::unique_lock locker
{m_lock
};
168 ceph_assert(m_on_init_shutdown
== nullptr);
169 m_on_init_shutdown
= on_finish
;
171 m_error_description
= "";
173 ceph_assert(m_state
!= STATE_INIT
);
174 auto state
= STATE_COMPLETE
;
175 std::swap(m_state
, state
);
177 if (state
== STATE_REPLAYING
) {
178 // if a sync request was pending, request a cancelation
179 m_instance_watcher
->cancel_sync_request(
180 m_state_builder
->local_image_ctx
->id
);
182 // TODO interrupt snapshot copy and image copy state machines even if remote
183 // cluster is unreachable
184 dout(10) << "shut down pending on completion of snapshot replay" << dendl
;
189 unregister_remote_update_watcher();
192 template <typename I
>
193 void Replayer
<I
>::flush(Context
* on_finish
) {
197 m_threads
->work_queue
->queue(on_finish
, 0);
200 template <typename I
>
201 bool Replayer
<I
>::get_replay_status(std::string
* description
,
202 Context
* on_finish
) {
205 std::unique_lock locker
{m_lock
};
206 if (m_state
!= STATE_REPLAYING
&& m_state
!= STATE_IDLE
) {
209 derr
<< "replay not running" << dendl
;
210 on_finish
->complete(-EAGAIN
);
214 std::shared_lock local_image_locker
{
215 m_state_builder
->local_image_ctx
->image_lock
};
216 auto [local_snap_id
, local_snap_info
] = get_newest_mirror_snapshot(
217 m_state_builder
->local_image_ctx
);
219 std::shared_lock remote_image_locker
{
220 m_state_builder
->remote_image_ctx
->image_lock
};
221 auto [remote_snap_id
, remote_snap_info
] = get_newest_mirror_snapshot(
222 m_state_builder
->remote_image_ctx
);
224 if (remote_snap_info
== nullptr) {
225 remote_image_locker
.unlock();
226 local_image_locker
.unlock();
229 derr
<< "remote image does not contain mirror snapshots" << dendl
;
230 on_finish
->complete(-EAGAIN
);
234 std::string replay_state
= "idle";
235 if (m_remote_snap_id_end
!= CEPH_NOSNAP
) {
236 replay_state
= "syncing";
241 "\"replay_state\": \"" + replay_state
+ "\", " +
242 "\"remote_snapshot_timestamp\": " +
243 stringify(remote_snap_info
->timestamp
.sec());
245 auto matching_remote_snap_id
= util::compute_remote_snap_id(
246 m_state_builder
->local_image_ctx
->image_lock
,
247 m_state_builder
->local_image_ctx
->snap_info
,
248 local_snap_id
, m_state_builder
->remote_mirror_uuid
);
249 auto matching_remote_snap_it
=
250 m_state_builder
->remote_image_ctx
->snap_info
.find(matching_remote_snap_id
);
251 if (matching_remote_snap_id
!= CEPH_NOSNAP
&&
252 matching_remote_snap_it
!=
253 m_state_builder
->remote_image_ctx
->snap_info
.end()) {
254 // use the timestamp from the matching remote image since
255 // the local snapshot would just be the time the snapshot was
256 // synced and not the consistency point in time.
258 "\"local_snapshot_timestamp\": " +
259 stringify(matching_remote_snap_it
->second
.timestamp
.sec());
262 matching_remote_snap_it
= m_state_builder
->remote_image_ctx
->snap_info
.find(
263 m_remote_snap_id_end
);
264 if (m_remote_snap_id_end
!= CEPH_NOSNAP
&&
265 matching_remote_snap_it
!=
266 m_state_builder
->remote_image_ctx
->snap_info
.end()) {
268 "\"syncing_snapshot_timestamp\": " +
269 stringify(remote_snap_info
->timestamp
.sec()) + ", " +
270 "\"syncing_percent\": " + stringify(static_cast<uint32_t>(
271 100 * m_local_mirror_snap_ns
.last_copied_object_number
/
272 static_cast<float>(std::max
<uint64_t>(1U, m_local_object_count
))));
278 local_image_locker
.unlock();
279 remote_image_locker
.unlock();
281 on_finish
->complete(-EEXIST
);
285 template <typename I
>
286 void Replayer
<I
>::load_local_image_meta() {
290 // reset state in case new snapshot is added while we are scanning
291 std::unique_lock locker
{m_lock
};
292 m_image_updated
= false;
295 ceph_assert(m_state_builder
->local_image_meta
!= nullptr);
296 auto ctx
= create_context_callback
<
297 Replayer
<I
>, &Replayer
<I
>::handle_load_local_image_meta
>(this);
298 m_state_builder
->local_image_meta
->load(ctx
);
301 template <typename I
>
302 void Replayer
<I
>::handle_load_local_image_meta(int r
) {
303 dout(10) << "r=" << r
<< dendl
;
305 if (r
< 0 && r
!= -ENOENT
) {
306 derr
<< "failed to load local image-meta: " << cpp_strerror(r
) << dendl
;
307 handle_replay_complete(r
, "failed to load local image-meta");
311 if (r
>= 0 && m_state_builder
->local_image_meta
->resync_requested
) {
312 m_resync_requested
= true;
314 dout(10) << "local image resync requested" << dendl
;
315 handle_replay_complete(0, "resync requested");
319 refresh_local_image();
322 template <typename I
>
323 void Replayer
<I
>::refresh_local_image() {
324 if (!m_state_builder
->local_image_ctx
->state
->is_refresh_required()) {
325 refresh_remote_image();
330 auto ctx
= create_context_callback
<
331 Replayer
<I
>, &Replayer
<I
>::handle_refresh_local_image
>(this);
332 m_state_builder
->local_image_ctx
->state
->refresh(ctx
);
335 template <typename I
>
336 void Replayer
<I
>::handle_refresh_local_image(int r
) {
337 dout(10) << "r=" << r
<< dendl
;
340 derr
<< "failed to refresh local image: " << cpp_strerror(r
) << dendl
;
341 handle_replay_complete(r
, "failed to refresh local image");
345 refresh_remote_image();
348 template <typename I
>
349 void Replayer
<I
>::refresh_remote_image() {
350 if (!m_state_builder
->remote_image_ctx
->state
->is_refresh_required()) {
351 std::unique_lock locker
{m_lock
};
352 scan_local_mirror_snapshots(&locker
);
357 auto ctx
= create_context_callback
<
358 Replayer
<I
>, &Replayer
<I
>::handle_refresh_remote_image
>(this);
359 m_state_builder
->remote_image_ctx
->state
->refresh(ctx
);
362 template <typename I
>
363 void Replayer
<I
>::handle_refresh_remote_image(int r
) {
364 dout(10) << "r=" << r
<< dendl
;
367 derr
<< "failed to refresh remote image: " << cpp_strerror(r
) << dendl
;
368 handle_replay_complete(r
, "failed to refresh remote image");
372 std::unique_lock locker
{m_lock
};
373 scan_local_mirror_snapshots(&locker
);
376 template <typename I
>
377 void Replayer
<I
>::scan_local_mirror_snapshots(
378 std::unique_lock
<ceph::mutex
>* locker
) {
379 if (is_replay_interrupted(locker
)) {
385 m_local_snap_id_start
= 0;
386 m_local_snap_id_end
= CEPH_NOSNAP
;
387 m_local_mirror_snap_ns
= {};
388 m_local_object_count
= 0;
390 m_remote_snap_id_start
= 0;
391 m_remote_snap_id_end
= CEPH_NOSNAP
;
392 m_remote_mirror_snap_ns
= {};
394 auto local_image_ctx
= m_state_builder
->local_image_ctx
;
395 std::shared_lock image_locker
{local_image_ctx
->image_lock
};
396 for (auto snap_info_it
= local_image_ctx
->snap_info
.begin();
397 snap_info_it
!= local_image_ctx
->snap_info
.end(); ++snap_info_it
) {
398 const auto& snap_ns
= snap_info_it
->second
.snap_namespace
;
399 auto mirror_ns
= boost::get
<
400 cls::rbd::MirrorSnapshotNamespace
>(&snap_ns
);
401 if (mirror_ns
== nullptr) {
405 dout(15) << "local mirror snapshot: id=" << snap_info_it
->first
<< ", "
406 << "mirror_ns=" << *mirror_ns
<< dendl
;
407 m_local_mirror_snap_ns
= *mirror_ns
;
409 auto local_snap_id
= snap_info_it
->first
;
410 if (mirror_ns
->is_non_primary()) {
411 if (mirror_ns
->complete
) {
412 // if remote has new snapshots, we would sync from here
413 m_local_snap_id_start
= local_snap_id
;
414 m_local_snap_id_end
= CEPH_NOSNAP
;
416 // start snap will be last complete mirror snapshot or initial
418 m_local_snap_id_end
= local_snap_id
;
420 } else if (mirror_ns
->is_primary()) {
421 if (mirror_ns
->complete
) {
422 m_local_snap_id_start
= local_snap_id
;
423 m_local_snap_id_end
= CEPH_NOSNAP
;
425 derr
<< "incomplete local primary snapshot" << dendl
;
426 handle_replay_complete(locker
, -EINVAL
,
427 "incomplete local primary snapshot");
431 derr
<< "unknown local mirror snapshot state" << dendl
;
432 handle_replay_complete(locker
, -EINVAL
,
433 "invalid local mirror snapshot state");
437 image_locker
.unlock();
439 if (m_local_snap_id_start
> 0 || m_local_snap_id_end
!= CEPH_NOSNAP
) {
440 if (m_local_mirror_snap_ns
.is_non_primary() &&
441 m_local_mirror_snap_ns
.primary_mirror_uuid
!=
442 m_state_builder
->remote_mirror_uuid
) {
443 // TODO support multiple peers
444 derr
<< "local image linked to unknown peer: "
445 << m_local_mirror_snap_ns
.primary_mirror_uuid
<< dendl
;
446 handle_replay_complete(locker
, -EEXIST
,
447 "local image linked to unknown peer");
449 } else if (m_local_mirror_snap_ns
.state
==
450 cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY
) {
451 dout(5) << "local image promoted" << dendl
;
452 handle_replay_complete(locker
, 0, "force promoted");
456 dout(10) << "found local mirror snapshot: "
457 << "local_snap_id_start=" << m_local_snap_id_start
<< ", "
458 << "local_snap_id_end=" << m_local_snap_id_end
<< ", "
459 << "local_snap_ns=" << m_local_mirror_snap_ns
<< dendl
;
460 if (!m_local_mirror_snap_ns
.is_primary() &&
461 m_local_mirror_snap_ns
.complete
) {
462 // our remote sync should start after this completed snapshot
463 m_remote_snap_id_start
= m_local_mirror_snap_ns
.primary_snap_id
;
467 // we don't have any mirror snapshots or only completed non-primary
469 scan_remote_mirror_snapshots(locker
);
472 template <typename I
>
473 void Replayer
<I
>::scan_remote_mirror_snapshots(
474 std::unique_lock
<ceph::mutex
>* locker
) {
477 bool split_brain
= false;
478 bool remote_demoted
= false;
479 auto remote_image_ctx
= m_state_builder
->remote_image_ctx
;
480 std::shared_lock image_locker
{remote_image_ctx
->image_lock
};
481 for (auto snap_info_it
= remote_image_ctx
->snap_info
.begin();
482 snap_info_it
!= remote_image_ctx
->snap_info
.end(); ++snap_info_it
) {
483 const auto& snap_ns
= snap_info_it
->second
.snap_namespace
;
484 auto mirror_ns
= boost::get
<
485 cls::rbd::MirrorSnapshotNamespace
>(&snap_ns
);
486 if (mirror_ns
== nullptr) {
490 dout(15) << "remote mirror snapshot: id=" << snap_info_it
->first
<< ", "
491 << "mirror_ns=" << *mirror_ns
<< dendl
;
492 if (!mirror_ns
->is_primary() && !mirror_ns
->is_non_primary()) {
493 derr
<< "unknown remote mirror snapshot state" << dendl
;
494 handle_replay_complete(locker
, -EINVAL
,
495 "invalid remote mirror snapshot state");
498 remote_demoted
= mirror_ns
->is_demoted();
501 auto remote_snap_id
= snap_info_it
->first
;
502 if (m_local_snap_id_start
> 0 || m_local_snap_id_end
!= CEPH_NOSNAP
) {
503 // we have a local mirror snapshot
504 if (m_local_mirror_snap_ns
.is_non_primary()) {
505 // previously validated that it was linked to remote
506 ceph_assert(m_local_mirror_snap_ns
.primary_mirror_uuid
==
507 m_state_builder
->remote_mirror_uuid
);
509 if (m_local_mirror_snap_ns
.complete
&&
510 m_local_mirror_snap_ns
.primary_snap_id
>= remote_snap_id
) {
511 // skip past completed remote snapshot
512 m_remote_snap_id_start
= remote_snap_id
;
513 dout(15) << "skipping synced remote snapshot " << remote_snap_id
516 } else if (!m_local_mirror_snap_ns
.complete
&&
517 m_local_mirror_snap_ns
.primary_snap_id
> remote_snap_id
) {
518 // skip until we get to the in-progress remote snapshot
519 dout(15) << "skipping synced remote snapshot " << remote_snap_id
520 << " while search for in-progress sync" << dendl
;
521 m_remote_snap_id_start
= remote_snap_id
;
524 } else if (m_local_mirror_snap_ns
.state
==
525 cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED
) {
526 // find the matching demotion snapshot in remote image
527 ceph_assert(m_local_snap_id_start
> 0);
528 if (mirror_ns
->state
==
529 cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY_DEMOTED
&&
530 mirror_ns
->primary_mirror_uuid
== m_local_mirror_uuid
&&
531 mirror_ns
->primary_snap_id
== m_local_snap_id_start
) {
532 dout(10) << "located matching demotion snapshot: "
533 << "remote_snap_id=" << remote_snap_id
<< ", "
534 << "local_snap_id=" << m_local_snap_id_start
<< dendl
;
535 m_remote_snap_id_start
= remote_snap_id
;
538 } else if (m_remote_snap_id_start
== 0) {
539 // still looking for our matching demotion snapshot
540 dout(15) << "skipping remote snapshot " << remote_snap_id
<< " "
541 << "while searching for demotion" << dendl
;
546 // should not have been able to reach this
551 // find first snapshot where were are listed as a peer
552 if (!mirror_ns
->is_primary()) {
553 dout(15) << "skipping non-primary remote snapshot" << dendl
;
555 } else if (mirror_ns
->mirror_peer_uuids
.count(m_remote_mirror_peer_uuid
) ==
557 dout(15) << "skipping remote snapshot due to missing mirror peer"
562 m_remote_snap_id_end
= remote_snap_id
;
563 m_remote_mirror_snap_ns
= *mirror_ns
;
566 image_locker
.unlock();
568 if (m_remote_snap_id_end
!= CEPH_NOSNAP
) {
569 dout(10) << "found remote mirror snapshot: "
570 << "remote_snap_id_start=" << m_remote_snap_id_start
<< ", "
571 << "remote_snap_id_end=" << m_remote_snap_id_end
<< ", "
572 << "remote_snap_ns=" << m_remote_mirror_snap_ns
<< dendl
;
573 if (m_remote_mirror_snap_ns
.complete
) {
576 if (m_local_snap_id_end
!= CEPH_NOSNAP
&&
577 !m_local_mirror_snap_ns
.complete
) {
578 // attempt to resume image-sync
579 dout(10) << "local image contains in-progress mirror snapshot"
581 get_local_image_state();
587 // might have raced with the creation of a remote mirror snapshot
588 // so we will need to refresh and rescan once it completes
589 dout(15) << "remote mirror snapshot not complete" << dendl
;
593 if (m_image_updated
) {
594 // received update notification while scanning image, restart ...
595 m_image_updated
= false;
598 dout(10) << "restarting snapshot scan due to remote update notification"
600 load_local_image_meta();
604 if (is_replay_interrupted(locker
)) {
606 } else if (split_brain
) {
607 derr
<< "split-brain detected: failed to find matching non-primary "
608 << "snapshot in remote image: "
609 << "local_snap_id_start=" << m_local_snap_id_start
<< ", "
610 << "local_snap_ns=" << m_local_mirror_snap_ns
<< dendl
;
611 handle_replay_complete(locker
, -EEXIST
, "split-brain");
613 } else if (remote_demoted
) {
614 dout(10) << "remote image demoted" << dendl
;
615 handle_replay_complete(locker
, -EREMOTEIO
, "remote image demoted");
619 dout(10) << "all remote snapshots synced: idling waiting for new snapshot"
621 ceph_assert(m_state
== STATE_REPLAYING
);
622 m_state
= STATE_IDLE
;
624 notify_status_updated();
627 template <typename I
>
628 void Replayer
<I
>::copy_snapshots() {
629 dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start
<< ", "
630 << "remote_snap_id_end=" << m_remote_snap_id_end
<< ", "
631 << "local_snap_id_start=" << m_local_snap_id_start
<< dendl
;
633 ceph_assert(m_remote_snap_id_start
!= CEPH_NOSNAP
);
634 ceph_assert(m_remote_snap_id_end
> 0 &&
635 m_remote_snap_id_end
!= CEPH_NOSNAP
);
636 ceph_assert(m_local_snap_id_start
!= CEPH_NOSNAP
);
638 m_local_mirror_snap_ns
= {};
639 auto ctx
= create_context_callback
<
640 Replayer
<I
>, &Replayer
<I
>::handle_copy_snapshots
>(this);
641 auto req
= librbd::deep_copy::SnapshotCopyRequest
<I
>::create(
642 m_state_builder
->remote_image_ctx
, m_state_builder
->local_image_ctx
,
643 m_remote_snap_id_start
, m_remote_snap_id_end
, m_local_snap_id_start
,
644 false, m_threads
->work_queue
, &m_local_mirror_snap_ns
.snap_seqs
,
649 template <typename I
>
650 void Replayer
<I
>::handle_copy_snapshots(int r
) {
651 dout(10) << "r=" << r
<< dendl
;
654 derr
<< "failed to copy snapshots from remote to local image: "
655 << cpp_strerror(r
) << dendl
;
656 handle_replay_complete(
657 r
, "failed to copy snapshots from remote to local image");
661 dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start
<< ", "
662 << "remote_snap_id_end=" << m_remote_snap_id_end
<< ", "
663 << "local_snap_id_start=" << m_local_snap_id_start
<< ", "
664 << "snap_seqs=" << m_local_mirror_snap_ns
.snap_seqs
<< dendl
;
665 get_remote_image_state();
668 template <typename I
>
669 void Replayer
<I
>::get_remote_image_state() {
672 auto ctx
= create_context_callback
<
673 Replayer
<I
>, &Replayer
<I
>::handle_get_remote_image_state
>(this);
674 auto req
= librbd::mirror::snapshot::GetImageStateRequest
<I
>::create(
675 m_state_builder
->remote_image_ctx
, m_remote_snap_id_end
,
676 &m_image_state
, ctx
);
680 template <typename I
>
681 void Replayer
<I
>::handle_get_remote_image_state(int r
) {
682 dout(10) << "r=" << r
<< dendl
;
685 derr
<< "failed to retrieve remote snapshot image state: "
686 << cpp_strerror(r
) << dendl
;
687 handle_replay_complete(r
, "failed to retrieve remote snapshot image state");
691 create_non_primary_snapshot();
694 template <typename I
>
695 void Replayer
<I
>::get_local_image_state() {
698 ceph_assert(m_local_snap_id_end
!= CEPH_NOSNAP
);
699 auto ctx
= create_context_callback
<
700 Replayer
<I
>, &Replayer
<I
>::handle_get_local_image_state
>(this);
701 auto req
= librbd::mirror::snapshot::GetImageStateRequest
<I
>::create(
702 m_state_builder
->local_image_ctx
, m_local_snap_id_end
,
703 &m_image_state
, ctx
);
707 template <typename I
>
708 void Replayer
<I
>::handle_get_local_image_state(int r
) {
709 dout(10) << "r=" << r
<< dendl
;
712 derr
<< "failed to retrieve local snapshot image state: "
713 << cpp_strerror(r
) << dendl
;
714 handle_replay_complete(r
, "failed to retrieve local snapshot image state");
721 template <typename I
>
722 void Replayer
<I
>::create_non_primary_snapshot() {
725 auto ctx
= create_context_callback
<
726 Replayer
<I
>, &Replayer
<I
>::handle_create_non_primary_snapshot
>(this);
727 auto req
= librbd::mirror::snapshot::CreateNonPrimaryRequest
<I
>::create(
728 m_state_builder
->local_image_ctx
, m_remote_mirror_snap_ns
.is_demoted(),
729 m_state_builder
->remote_mirror_uuid
, m_remote_snap_id_end
,
730 m_local_mirror_snap_ns
.snap_seqs
, m_image_state
, &m_local_snap_id_end
, ctx
);
734 template <typename I
>
735 void Replayer
<I
>::handle_create_non_primary_snapshot(int r
) {
736 dout(10) << "r=" << r
<< dendl
;
739 derr
<< "failed to create local mirror snapshot: " << cpp_strerror(r
)
741 handle_replay_complete(r
, "failed to create local mirror snapshot");
745 dout(15) << "local_snap_id_end=" << m_local_snap_id_end
<< dendl
;
750 template <typename I
>
751 void Replayer
<I
>::request_sync() {
754 std::unique_lock locker
{m_lock
};
755 auto ctx
= create_async_context_callback(
756 m_threads
->work_queue
, create_context_callback
<
757 Replayer
<I
>, &Replayer
<I
>::handle_request_sync
>(this));
758 m_instance_watcher
->notify_sync_request(m_state_builder
->local_image_ctx
->id
,
762 template <typename I
>
763 void Replayer
<I
>::handle_request_sync(int r
) {
764 dout(10) << "r=" << r
<< dendl
;
766 std::unique_lock locker
{m_lock
};
767 if (is_replay_interrupted(&locker
)) {
769 } else if (r
== -ECANCELED
) {
770 dout(5) << "image-sync canceled" << dendl
;
771 handle_replay_complete(&locker
, r
, "image-sync canceled");
774 derr
<< "failed to request image-sync: " << cpp_strerror(r
) << dendl
;
775 handle_replay_complete(&locker
, r
, "failed to request image-sync");
779 m_sync_in_progress
= true;
785 template <typename I
>
786 void Replayer
<I
>::copy_image() {
787 dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start
<< ", "
788 << "remote_snap_id_end=" << m_remote_snap_id_end
<< ", "
789 << "local_snap_id_start=" << m_local_snap_id_start
<< ", "
790 << "last_copied_object_number="
791 << m_local_mirror_snap_ns
.last_copied_object_number
<< ", "
792 << "snap_seqs=" << m_local_mirror_snap_ns
.snap_seqs
<< dendl
;
794 m_progress_ctx
= new ProgressContext(this);
795 auto ctx
= create_context_callback
<
796 Replayer
<I
>, &Replayer
<I
>::handle_copy_image
>(this);
797 auto req
= librbd::deep_copy::ImageCopyRequest
<I
>::create(
798 m_state_builder
->remote_image_ctx
, m_state_builder
->local_image_ctx
,
799 m_remote_snap_id_start
, m_remote_snap_id_end
, m_local_snap_id_start
, false,
800 (m_local_mirror_snap_ns
.last_copied_object_number
> 0 ?
801 librbd::deep_copy::ObjectNumber
{
802 m_local_mirror_snap_ns
.last_copied_object_number
} :
803 librbd::deep_copy::ObjectNumber
{}),
804 m_local_mirror_snap_ns
.snap_seqs
, m_progress_ctx
, ctx
);
808 template <typename I
>
809 void Replayer
<I
>::handle_copy_image(int r
) {
810 dout(10) << "r=" << r
<< dendl
;
812 delete m_progress_ctx
;
813 m_progress_ctx
= nullptr;
816 derr
<< "failed to copy remote image to local image: " << cpp_strerror(r
)
818 handle_replay_complete(r
, "failed to copy remote image");
825 template <typename I
>
826 void Replayer
<I
>::handle_copy_image_progress(uint64_t object_number
,
827 uint64_t object_count
) {
828 dout(10) << "object_number=" << object_number
<< ", "
829 << "object_count=" << object_count
<< dendl
;
831 std::unique_lock locker
{m_lock
};
832 m_local_mirror_snap_ns
.last_copied_object_number
= std::min(
833 object_number
, object_count
);
834 m_local_object_count
= object_count
;
836 update_non_primary_snapshot(false);
839 template <typename I
>
840 void Replayer
<I
>::apply_image_state() {
843 auto ctx
= create_context_callback
<
844 Replayer
<I
>, &Replayer
<I
>::handle_apply_image_state
>(this);
845 auto req
= ApplyImageStateRequest
<I
>::create(
847 m_state_builder
->remote_mirror_uuid
,
848 m_state_builder
->local_image_ctx
,
849 m_state_builder
->remote_image_ctx
,
854 template <typename I
>
855 void Replayer
<I
>::handle_apply_image_state(int r
) {
856 dout(10) << "r=" << r
<< dendl
;
858 if (r
< 0 && r
!= -ENOENT
) {
859 derr
<< "failed to apply remote image state to local image: "
860 << cpp_strerror(r
) << dendl
;
861 handle_replay_complete(r
, "failed to apply remote image state");
865 std::unique_lock locker
{m_lock
};
866 update_non_primary_snapshot(true);
869 template <typename I
>
870 void Replayer
<I
>::update_non_primary_snapshot(bool complete
) {
871 ceph_assert(ceph_mutex_is_locked_by_me(m_lock
));
873 // disallow two in-flight updates if this isn't the completion of the sync
874 if (m_updating_sync_point
) {
877 m_updating_sync_point
= true;
879 m_local_mirror_snap_ns
.complete
= true;
884 librados::ObjectWriteOperation op
;
885 librbd::cls_client::mirror_image_snapshot_set_copy_progress(
886 &op
, m_local_snap_id_end
, m_local_mirror_snap_ns
.complete
,
887 m_local_mirror_snap_ns
.last_copied_object_number
);
889 auto ctx
= new C_TrackedOp(this, new LambdaContext([this, complete
](int r
) {
890 handle_update_non_primary_snapshot(complete
, r
);
892 auto aio_comp
= create_rados_callback(ctx
);
893 int r
= m_state_builder
->local_image_ctx
->md_ctx
.aio_operate(
894 m_state_builder
->local_image_ctx
->header_oid
, aio_comp
, &op
);
899 template <typename I
>
900 void Replayer
<I
>::handle_update_non_primary_snapshot(bool complete
, int r
) {
901 dout(10) << "r=" << r
<< dendl
;
904 derr
<< "failed to update local snapshot progress: " << cpp_strerror(r
)
907 // only fail if this was the final update
908 handle_replay_complete(r
, "failed to update local snapshot progress");
914 // periodic sync-point update -- do not advance state machine
915 std::unique_lock locker
{m_lock
};
917 ceph_assert(m_updating_sync_point
);
918 m_updating_sync_point
= false;
922 notify_image_update();
925 template <typename I
>
926 void Replayer
<I
>::notify_image_update() {
929 auto ctx
= create_context_callback
<
930 Replayer
<I
>, &Replayer
<I
>::handle_notify_image_update
>(this);
931 m_state_builder
->local_image_ctx
->notify_update(ctx
);
934 template <typename I
>
935 void Replayer
<I
>::handle_notify_image_update(int r
) {
936 dout(10) << "r=" << r
<< dendl
;
939 derr
<< "failed to notify local image update: " << cpp_strerror(r
) << dendl
;
945 template <typename I
>
946 void Replayer
<I
>::unlink_peer() {
947 if (m_remote_snap_id_start
== 0) {
952 // local snapshot fully synced -- we no longer depend on the sync
953 // start snapshot in the remote image
954 dout(10) << "remote_snap_id=" << m_remote_snap_id_start
<< dendl
;
956 auto ctx
= create_context_callback
<
957 Replayer
<I
>, &Replayer
<I
>::handle_unlink_peer
>(this);
958 auto req
= librbd::mirror::snapshot::UnlinkPeerRequest
<I
>::create(
959 m_state_builder
->remote_image_ctx
, m_remote_snap_id_start
,
960 m_remote_mirror_peer_uuid
, ctx
);
964 template <typename I
>
965 void Replayer
<I
>::handle_unlink_peer(int r
) {
966 dout(10) << "r=" << r
<< dendl
;
968 if (r
< 0 && r
!= -ENOENT
) {
969 derr
<< "failed to unlink local peer from remote image: " << cpp_strerror(r
)
971 handle_replay_complete(r
, "failed to unlink local peer from remote image");
978 template <typename I
>
979 void Replayer
<I
>::finish_sync() {
983 std::unique_lock locker
{m_lock
};
984 notify_status_updated();
986 m_sync_in_progress
= false;
987 m_instance_watcher
->notify_sync_complete(
988 m_state_builder
->local_image_ctx
->id
);
991 if (is_replay_interrupted()) {
995 load_local_image_meta();
998 template <typename I
>
999 void Replayer
<I
>::register_local_update_watcher() {
1002 m_update_watch_ctx
= new C_UpdateWatchCtx(this);
1004 int r
= m_state_builder
->local_image_ctx
->state
->register_update_watcher(
1005 m_update_watch_ctx
, &m_local_update_watcher_handle
);
1006 auto ctx
= create_context_callback
<
1007 Replayer
<I
>, &Replayer
<I
>::handle_register_local_update_watcher
>(this);
1008 m_threads
->work_queue
->queue(ctx
, r
);
1011 template <typename I
>
1012 void Replayer
<I
>::handle_register_local_update_watcher(int r
) {
1013 dout(10) << "r=" << r
<< dendl
;
1016 derr
<< "failed to register local update watcher: " << cpp_strerror(r
)
1018 handle_replay_complete(r
, "failed to register local image update watcher");
1019 m_state
= STATE_COMPLETE
;
1021 delete m_update_watch_ctx
;
1022 m_update_watch_ctx
= nullptr;
1024 Context
* on_init
= nullptr;
1025 std::swap(on_init
, m_on_init_shutdown
);
1026 on_init
->complete(r
);
1030 register_remote_update_watcher();
1033 template <typename I
>
1034 void Replayer
<I
>::register_remote_update_watcher() {
1037 int r
= m_state_builder
->remote_image_ctx
->state
->register_update_watcher(
1038 m_update_watch_ctx
, &m_remote_update_watcher_handle
);
1039 auto ctx
= create_context_callback
<
1040 Replayer
<I
>, &Replayer
<I
>::handle_register_remote_update_watcher
>(this);
1041 m_threads
->work_queue
->queue(ctx
, r
);
1044 template <typename I
>
1045 void Replayer
<I
>::handle_register_remote_update_watcher(int r
) {
1046 dout(10) << "r=" << r
<< dendl
;
1049 derr
<< "failed to register remote update watcher: " << cpp_strerror(r
)
1051 handle_replay_complete(r
, "failed to register remote image update watcher");
1052 m_state
= STATE_COMPLETE
;
1054 unregister_local_update_watcher();
1058 m_state
= STATE_REPLAYING
;
1060 Context
* on_init
= nullptr;
1061 std::swap(on_init
, m_on_init_shutdown
);
1062 on_init
->complete(0);
1064 // delay initial snapshot scan until after we have alerted
1065 // image replayer that we have initialized in case an error
1068 std::unique_lock locker
{m_lock
};
1069 notify_status_updated();
1072 load_local_image_meta();
1075 template <typename I
>
1076 void Replayer
<I
>::unregister_remote_update_watcher() {
1079 auto ctx
= create_context_callback
<
1081 &Replayer
<I
>::handle_unregister_remote_update_watcher
>(this);
1082 m_state_builder
->remote_image_ctx
->state
->unregister_update_watcher(
1083 m_remote_update_watcher_handle
, ctx
);
1086 template <typename I
>
1087 void Replayer
<I
>::handle_unregister_remote_update_watcher(int r
) {
1088 dout(10) << "r=" << r
<< dendl
;
1091 derr
<< "failed to unregister remote update watcher: " << cpp_strerror(r
)
1093 handle_replay_complete(
1094 r
, "failed to unregister remote image update watcher");
1097 unregister_local_update_watcher();
1100 template <typename I
>
1101 void Replayer
<I
>::unregister_local_update_watcher() {
1104 auto ctx
= create_context_callback
<
1106 &Replayer
<I
>::handle_unregister_local_update_watcher
>(this);
1107 m_state_builder
->local_image_ctx
->state
->unregister_update_watcher(
1108 m_local_update_watcher_handle
, ctx
);
1111 template <typename I
>
1112 void Replayer
<I
>::handle_unregister_local_update_watcher(int r
) {
1113 dout(10) << "r=" << r
<< dendl
;
1116 derr
<< "failed to unregister local update watcher: " << cpp_strerror(r
)
1118 handle_replay_complete(
1119 r
, "failed to unregister local image update watcher");
1122 delete m_update_watch_ctx
;
1123 m_update_watch_ctx
= nullptr;
1125 wait_for_in_flight_ops();
1128 template <typename I
>
1129 void Replayer
<I
>::wait_for_in_flight_ops() {
1132 auto ctx
= create_async_context_callback(
1133 m_threads
->work_queue
, create_context_callback
<
1134 Replayer
<I
>, &Replayer
<I
>::handle_wait_for_in_flight_ops
>(this));
1135 m_in_flight_op_tracker
.wait_for_ops(ctx
);
1138 template <typename I
>
1139 void Replayer
<I
>::handle_wait_for_in_flight_ops(int r
) {
1140 dout(10) << "r=" << r
<< dendl
;
1142 Context
* on_shutdown
= nullptr;
1144 std::unique_lock locker
{m_lock
};
1145 ceph_assert(m_on_init_shutdown
!= nullptr);
1146 std::swap(on_shutdown
, m_on_init_shutdown
);
1148 on_shutdown
->complete(m_error_code
);
1151 template <typename I
>
1152 void Replayer
<I
>::handle_image_update_notify() {
1155 std::unique_lock locker
{m_lock
};
1156 if (m_state
== STATE_REPLAYING
) {
1157 dout(15) << "flagging snapshot rescan required" << dendl
;
1158 m_image_updated
= true;
1159 } else if (m_state
== STATE_IDLE
) {
1160 m_state
= STATE_REPLAYING
;
1163 dout(15) << "restarting idle replayer" << dendl
;
1164 load_local_image_meta();
1168 template <typename I
>
1169 void Replayer
<I
>::handle_replay_complete(int r
,
1170 const std::string
& description
) {
1171 std::unique_lock locker
{m_lock
};
1172 handle_replay_complete(&locker
, r
, description
);
1175 template <typename I
>
1176 void Replayer
<I
>::handle_replay_complete(std::unique_lock
<ceph::mutex
>* locker
,
1178 const std::string
& description
) {
1179 ceph_assert(ceph_mutex_is_locked_by_me(m_lock
));
1181 if (m_error_code
== 0) {
1183 m_error_description
= description
;
1186 if (m_sync_in_progress
) {
1187 m_sync_in_progress
= false;
1188 m_instance_watcher
->notify_sync_complete(
1189 m_state_builder
->local_image_ctx
->id
);
1192 if (m_state
!= STATE_REPLAYING
&& m_state
!= STATE_IDLE
) {
1196 m_state
= STATE_COMPLETE
;
1197 notify_status_updated();
1200 template <typename I
>
1201 void Replayer
<I
>::notify_status_updated() {
1202 ceph_assert(ceph_mutex_is_locked_by_me(m_lock
));
1205 auto ctx
= new C_TrackedOp(this, new LambdaContext(
1207 m_replayer_listener
->handle_notification();
1209 m_threads
->work_queue
->queue(ctx
, 0);
1212 template <typename I
>
1213 bool Replayer
<I
>::is_replay_interrupted() {
1214 std::unique_lock locker
{m_lock
};
1215 return is_replay_interrupted(&locker
);
1218 template <typename I
>
1219 bool Replayer
<I
>::is_replay_interrupted(std::unique_lock
<ceph::mutex
>* locker
) {
1220 if (m_state
== STATE_COMPLETE
) {
1223 dout(10) << "resuming pending shut down" << dendl
;
1224 unregister_remote_update_watcher();
1230 } // namespace snapshot
1231 } // namespace image_replayer
1232 } // namespace mirror
1235 template class rbd::mirror::image_replayer::snapshot::Replayer
<librbd::ImageCtx
>;