]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / tools / rbd_mirror / image_replayer / snapshot / Replayer.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "Replayer.h"
5 #include "common/debug.h"
6 #include "common/errno.h"
7 #include "include/stringify.h"
8 #include "common/Timer.h"
9 #include "common/WorkQueue.h"
10 #include "cls/rbd/cls_rbd_client.h"
11 #include "librbd/ImageCtx.h"
12 #include "librbd/ImageState.h"
13 #include "librbd/Utils.h"
14 #include "librbd/deep_copy/ImageCopyRequest.h"
15 #include "librbd/deep_copy/SnapshotCopyRequest.h"
16 #include "librbd/mirror/snapshot/CreateNonPrimaryRequest.h"
17 #include "librbd/mirror/snapshot/GetImageStateRequest.h"
18 #include "librbd/mirror/snapshot/ImageMeta.h"
19 #include "librbd/mirror/snapshot/UnlinkPeerRequest.h"
20 #include "tools/rbd_mirror/InstanceWatcher.h"
21 #include "tools/rbd_mirror/PoolMetaCache.h"
22 #include "tools/rbd_mirror/Threads.h"
23 #include "tools/rbd_mirror/Types.h"
24 #include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
25 #include "tools/rbd_mirror/image_replayer/ReplayerListener.h"
26 #include "tools/rbd_mirror/image_replayer/Utils.h"
27 #include "tools/rbd_mirror/image_replayer/snapshot/ApplyImageStateRequest.h"
28 #include "tools/rbd_mirror/image_replayer/snapshot/StateBuilder.h"
29 #include "tools/rbd_mirror/image_replayer/snapshot/Utils.h"
30
31 #define dout_context g_ceph_context
32 #define dout_subsys ceph_subsys_rbd_mirror
33 #undef dout_prefix
34 #define dout_prefix *_dout << "rbd::mirror::image_replayer::snapshot::" \
35 << "Replayer: " << this << " " << __func__ << ": "
36
37 extern PerfCounters *g_perf_counters;
38
39 namespace rbd {
40 namespace mirror {
41 namespace image_replayer {
42 namespace snapshot {
43
44 namespace {
45
46 template<typename I>
47 std::pair<uint64_t, librbd::SnapInfo*> get_newest_mirror_snapshot(
48 I* image_ctx) {
49 for (auto snap_info_it = image_ctx->snap_info.rbegin();
50 snap_info_it != image_ctx->snap_info.rend(); ++snap_info_it) {
51 const auto& snap_ns = snap_info_it->second.snap_namespace;
52 auto mirror_ns = boost::get<
53 cls::rbd::MirrorSnapshotNamespace>(&snap_ns);
54 if (mirror_ns == nullptr || !mirror_ns->complete) {
55 continue;
56 }
57
58 return {snap_info_it->first, &snap_info_it->second};
59 }
60
61 return {CEPH_NOSNAP, nullptr};
62 }
63
64 } // anonymous namespace
65
66 using librbd::util::create_async_context_callback;
67 using librbd::util::create_context_callback;
68 using librbd::util::create_rados_callback;
69
70 template <typename I>
71 struct Replayer<I>::C_UpdateWatchCtx : public librbd::UpdateWatchCtx {
72 Replayer<I>* replayer;
73
74 C_UpdateWatchCtx(Replayer<I>* replayer) : replayer(replayer) {
75 }
76
77 void handle_notify() override {
78 replayer->handle_image_update_notify();
79 }
80 };
81
82 template <typename I>
83 struct Replayer<I>::C_TrackedOp : public Context {
84 Replayer *replayer;
85 Context* ctx;
86
87 C_TrackedOp(Replayer* replayer, Context* ctx)
88 : replayer(replayer), ctx(ctx) {
89 replayer->m_in_flight_op_tracker.start_op();
90 }
91
92 void finish(int r) override {
93 ctx->complete(r);
94 replayer->m_in_flight_op_tracker.finish_op();
95 }
96 };
97
98 template <typename I>
99 struct Replayer<I>::ProgressContext : public librbd::ProgressContext {
100 Replayer *replayer;
101
102 ProgressContext(Replayer* replayer) : replayer(replayer) {
103 }
104
105 int update_progress(uint64_t object_number, uint64_t object_count) override {
106 replayer->handle_copy_image_progress(object_number, object_count);
107 return 0;
108 }
109 };
110
111 template <typename I>
112 Replayer<I>::Replayer(
113 Threads<I>* threads,
114 InstanceWatcher<I>* instance_watcher,
115 const std::string& local_mirror_uuid,
116 PoolMetaCache* pool_meta_cache,
117 StateBuilder<I>* state_builder,
118 ReplayerListener* replayer_listener)
119 : m_threads(threads),
120 m_instance_watcher(instance_watcher),
121 m_local_mirror_uuid(local_mirror_uuid),
122 m_pool_meta_cache(pool_meta_cache),
123 m_state_builder(state_builder),
124 m_replayer_listener(replayer_listener),
125 m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
126 "rbd::mirror::image_replayer::snapshot::Replayer", this))) {
127 dout(10) << dendl;
128 }
129
130 template <typename I>
131 Replayer<I>::~Replayer() {
132 dout(10) << dendl;
133 ceph_assert(m_state == STATE_COMPLETE);
134 ceph_assert(m_update_watch_ctx == nullptr);
135 ceph_assert(m_progress_ctx == nullptr);
136 }
137
138 template <typename I>
139 void Replayer<I>::init(Context* on_finish) {
140 dout(10) << dendl;
141
142 ceph_assert(m_state == STATE_INIT);
143
144 RemotePoolMeta remote_pool_meta;
145 int r = m_pool_meta_cache->get_remote_pool_meta(
146 m_state_builder->remote_image_ctx->md_ctx.get_id(), &remote_pool_meta);
147 if (r < 0 || remote_pool_meta.mirror_peer_uuid.empty()) {
148 derr << "failed to retrieve mirror peer uuid from remote pool" << dendl;
149 m_state = STATE_COMPLETE;
150 m_threads->work_queue->queue(on_finish, r);
151 return;
152 }
153
154 m_remote_mirror_peer_uuid = remote_pool_meta.mirror_peer_uuid;
155 dout(10) << "remote_mirror_peer_uuid=" << m_remote_mirror_peer_uuid << dendl;
156
157 ceph_assert(m_on_init_shutdown == nullptr);
158 m_on_init_shutdown = on_finish;
159
160 register_local_update_watcher();
161 }
162
163 template <typename I>
164 void Replayer<I>::shut_down(Context* on_finish) {
165 dout(10) << dendl;
166
167 std::unique_lock locker{m_lock};
168 ceph_assert(m_on_init_shutdown == nullptr);
169 m_on_init_shutdown = on_finish;
170 m_error_code = 0;
171 m_error_description = "";
172
173 ceph_assert(m_state != STATE_INIT);
174 auto state = STATE_COMPLETE;
175 std::swap(m_state, state);
176
177 if (state == STATE_REPLAYING) {
178 // if a sync request was pending, request a cancelation
179 m_instance_watcher->cancel_sync_request(
180 m_state_builder->local_image_ctx->id);
181
182 // TODO interrupt snapshot copy and image copy state machines even if remote
183 // cluster is unreachable
184 dout(10) << "shut down pending on completion of snapshot replay" << dendl;
185 return;
186 }
187 locker.unlock();
188
189 unregister_remote_update_watcher();
190 }
191
192 template <typename I>
193 void Replayer<I>::flush(Context* on_finish) {
194 dout(10) << dendl;
195
196 // TODO
197 m_threads->work_queue->queue(on_finish, 0);
198 }
199
200 template <typename I>
201 bool Replayer<I>::get_replay_status(std::string* description,
202 Context* on_finish) {
203 dout(10) << dendl;
204
205 std::unique_lock locker{m_lock};
206 if (m_state != STATE_REPLAYING && m_state != STATE_IDLE) {
207 locker.unlock();
208
209 derr << "replay not running" << dendl;
210 on_finish->complete(-EAGAIN);
211 return false;
212 }
213
214 std::shared_lock local_image_locker{
215 m_state_builder->local_image_ctx->image_lock};
216 auto [local_snap_id, local_snap_info] = get_newest_mirror_snapshot(
217 m_state_builder->local_image_ctx);
218
219 std::shared_lock remote_image_locker{
220 m_state_builder->remote_image_ctx->image_lock};
221 auto [remote_snap_id, remote_snap_info] = get_newest_mirror_snapshot(
222 m_state_builder->remote_image_ctx);
223
224 if (remote_snap_info == nullptr) {
225 remote_image_locker.unlock();
226 local_image_locker.unlock();
227 locker.unlock();
228
229 derr << "remote image does not contain mirror snapshots" << dendl;
230 on_finish->complete(-EAGAIN);
231 return false;
232 }
233
234 std::string replay_state = "idle";
235 if (m_remote_snap_id_end != CEPH_NOSNAP) {
236 replay_state = "syncing";
237 }
238
239 *description =
240 "{"
241 "\"replay_state\": \"" + replay_state + "\", " +
242 "\"remote_snapshot_timestamp\": " +
243 stringify(remote_snap_info->timestamp.sec());
244
245 auto matching_remote_snap_id = util::compute_remote_snap_id(
246 m_state_builder->local_image_ctx->image_lock,
247 m_state_builder->local_image_ctx->snap_info,
248 local_snap_id, m_state_builder->remote_mirror_uuid);
249 auto matching_remote_snap_it =
250 m_state_builder->remote_image_ctx->snap_info.find(matching_remote_snap_id);
251 if (matching_remote_snap_id != CEPH_NOSNAP &&
252 matching_remote_snap_it !=
253 m_state_builder->remote_image_ctx->snap_info.end()) {
254 // use the timestamp from the matching remote image since
255 // the local snapshot would just be the time the snapshot was
256 // synced and not the consistency point in time.
257 *description += ", "
258 "\"local_snapshot_timestamp\": " +
259 stringify(matching_remote_snap_it->second.timestamp.sec());
260 }
261
262 matching_remote_snap_it = m_state_builder->remote_image_ctx->snap_info.find(
263 m_remote_snap_id_end);
264 if (m_remote_snap_id_end != CEPH_NOSNAP &&
265 matching_remote_snap_it !=
266 m_state_builder->remote_image_ctx->snap_info.end()) {
267 *description += ", "
268 "\"syncing_snapshot_timestamp\": " +
269 stringify(remote_snap_info->timestamp.sec()) + ", " +
270 "\"syncing_percent\": " + stringify(static_cast<uint32_t>(
271 100 * m_local_mirror_snap_ns.last_copied_object_number /
272 static_cast<float>(std::max<uint64_t>(1U, m_local_object_count))));
273 }
274
275 *description +=
276 "}";
277
278 local_image_locker.unlock();
279 remote_image_locker.unlock();
280 locker.unlock();
281 on_finish->complete(-EEXIST);
282 return true;
283 }
284
285 template <typename I>
286 void Replayer<I>::load_local_image_meta() {
287 dout(10) << dendl;
288
289 {
290 // reset state in case new snapshot is added while we are scanning
291 std::unique_lock locker{m_lock};
292 m_image_updated = false;
293 }
294
295 ceph_assert(m_state_builder->local_image_meta != nullptr);
296 auto ctx = create_context_callback<
297 Replayer<I>, &Replayer<I>::handle_load_local_image_meta>(this);
298 m_state_builder->local_image_meta->load(ctx);
299 }
300
301 template <typename I>
302 void Replayer<I>::handle_load_local_image_meta(int r) {
303 dout(10) << "r=" << r << dendl;
304
305 if (r < 0 && r != -ENOENT) {
306 derr << "failed to load local image-meta: " << cpp_strerror(r) << dendl;
307 handle_replay_complete(r, "failed to load local image-meta");
308 return;
309 }
310
311 if (r >= 0 && m_state_builder->local_image_meta->resync_requested) {
312 m_resync_requested = true;
313
314 dout(10) << "local image resync requested" << dendl;
315 handle_replay_complete(0, "resync requested");
316 return;
317 }
318
319 refresh_local_image();
320 }
321
322 template <typename I>
323 void Replayer<I>::refresh_local_image() {
324 if (!m_state_builder->local_image_ctx->state->is_refresh_required()) {
325 refresh_remote_image();
326 return;
327 }
328
329 dout(10) << dendl;
330 auto ctx = create_context_callback<
331 Replayer<I>, &Replayer<I>::handle_refresh_local_image>(this);
332 m_state_builder->local_image_ctx->state->refresh(ctx);
333 }
334
335 template <typename I>
336 void Replayer<I>::handle_refresh_local_image(int r) {
337 dout(10) << "r=" << r << dendl;
338
339 if (r < 0) {
340 derr << "failed to refresh local image: " << cpp_strerror(r) << dendl;
341 handle_replay_complete(r, "failed to refresh local image");
342 return;
343 }
344
345 refresh_remote_image();
346 }
347
348 template <typename I>
349 void Replayer<I>::refresh_remote_image() {
350 if (!m_state_builder->remote_image_ctx->state->is_refresh_required()) {
351 std::unique_lock locker{m_lock};
352 scan_local_mirror_snapshots(&locker);
353 return;
354 }
355
356 dout(10) << dendl;
357 auto ctx = create_context_callback<
358 Replayer<I>, &Replayer<I>::handle_refresh_remote_image>(this);
359 m_state_builder->remote_image_ctx->state->refresh(ctx);
360 }
361
362 template <typename I>
363 void Replayer<I>::handle_refresh_remote_image(int r) {
364 dout(10) << "r=" << r << dendl;
365
366 if (r < 0) {
367 derr << "failed to refresh remote image: " << cpp_strerror(r) << dendl;
368 handle_replay_complete(r, "failed to refresh remote image");
369 return;
370 }
371
372 std::unique_lock locker{m_lock};
373 scan_local_mirror_snapshots(&locker);
374 }
375
376 template <typename I>
377 void Replayer<I>::scan_local_mirror_snapshots(
378 std::unique_lock<ceph::mutex>* locker) {
379 if (is_replay_interrupted(locker)) {
380 return;
381 }
382
383 dout(10) << dendl;
384
385 m_local_snap_id_start = 0;
386 m_local_snap_id_end = CEPH_NOSNAP;
387 m_local_mirror_snap_ns = {};
388 m_local_object_count = 0;
389
390 m_remote_snap_id_start = 0;
391 m_remote_snap_id_end = CEPH_NOSNAP;
392 m_remote_mirror_snap_ns = {};
393
394 auto local_image_ctx = m_state_builder->local_image_ctx;
395 std::shared_lock image_locker{local_image_ctx->image_lock};
396 for (auto snap_info_it = local_image_ctx->snap_info.begin();
397 snap_info_it != local_image_ctx->snap_info.end(); ++snap_info_it) {
398 const auto& snap_ns = snap_info_it->second.snap_namespace;
399 auto mirror_ns = boost::get<
400 cls::rbd::MirrorSnapshotNamespace>(&snap_ns);
401 if (mirror_ns == nullptr) {
402 continue;
403 }
404
405 dout(15) << "local mirror snapshot: id=" << snap_info_it->first << ", "
406 << "mirror_ns=" << *mirror_ns << dendl;
407 m_local_mirror_snap_ns = *mirror_ns;
408
409 auto local_snap_id = snap_info_it->first;
410 if (mirror_ns->is_non_primary()) {
411 if (mirror_ns->complete) {
412 // if remote has new snapshots, we would sync from here
413 m_local_snap_id_start = local_snap_id;
414 m_local_snap_id_end = CEPH_NOSNAP;
415 } else {
416 // start snap will be last complete mirror snapshot or initial
417 // image revision
418 m_local_snap_id_end = local_snap_id;
419 }
420 } else if (mirror_ns->is_primary()) {
421 if (mirror_ns->complete) {
422 m_local_snap_id_start = local_snap_id;
423 m_local_snap_id_end = CEPH_NOSNAP;
424 } else {
425 derr << "incomplete local primary snapshot" << dendl;
426 handle_replay_complete(locker, -EINVAL,
427 "incomplete local primary snapshot");
428 return;
429 }
430 } else {
431 derr << "unknown local mirror snapshot state" << dendl;
432 handle_replay_complete(locker, -EINVAL,
433 "invalid local mirror snapshot state");
434 return;
435 }
436 }
437 image_locker.unlock();
438
439 if (m_local_snap_id_start > 0 || m_local_snap_id_end != CEPH_NOSNAP) {
440 if (m_local_mirror_snap_ns.is_non_primary() &&
441 m_local_mirror_snap_ns.primary_mirror_uuid !=
442 m_state_builder->remote_mirror_uuid) {
443 // TODO support multiple peers
444 derr << "local image linked to unknown peer: "
445 << m_local_mirror_snap_ns.primary_mirror_uuid << dendl;
446 handle_replay_complete(locker, -EEXIST,
447 "local image linked to unknown peer");
448 return;
449 } else if (m_local_mirror_snap_ns.state ==
450 cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) {
451 dout(5) << "local image promoted" << dendl;
452 handle_replay_complete(locker, 0, "force promoted");
453 return;
454 }
455
456 dout(10) << "found local mirror snapshot: "
457 << "local_snap_id_start=" << m_local_snap_id_start << ", "
458 << "local_snap_id_end=" << m_local_snap_id_end << ", "
459 << "local_snap_ns=" << m_local_mirror_snap_ns << dendl;
460 if (!m_local_mirror_snap_ns.is_primary() &&
461 m_local_mirror_snap_ns.complete) {
462 // our remote sync should start after this completed snapshot
463 m_remote_snap_id_start = m_local_mirror_snap_ns.primary_snap_id;
464 }
465 }
466
467 // we don't have any mirror snapshots or only completed non-primary
468 // mirror snapshots
469 scan_remote_mirror_snapshots(locker);
470 }
471
472 template <typename I>
473 void Replayer<I>::scan_remote_mirror_snapshots(
474 std::unique_lock<ceph::mutex>* locker) {
475 dout(10) << dendl;
476
477 bool split_brain = false;
478 bool remote_demoted = false;
479 auto remote_image_ctx = m_state_builder->remote_image_ctx;
480 std::shared_lock image_locker{remote_image_ctx->image_lock};
481 for (auto snap_info_it = remote_image_ctx->snap_info.begin();
482 snap_info_it != remote_image_ctx->snap_info.end(); ++snap_info_it) {
483 const auto& snap_ns = snap_info_it->second.snap_namespace;
484 auto mirror_ns = boost::get<
485 cls::rbd::MirrorSnapshotNamespace>(&snap_ns);
486 if (mirror_ns == nullptr) {
487 continue;
488 }
489
490 dout(15) << "remote mirror snapshot: id=" << snap_info_it->first << ", "
491 << "mirror_ns=" << *mirror_ns << dendl;
492 if (!mirror_ns->is_primary() && !mirror_ns->is_non_primary()) {
493 derr << "unknown remote mirror snapshot state" << dendl;
494 handle_replay_complete(locker, -EINVAL,
495 "invalid remote mirror snapshot state");
496 return;
497 } else {
498 remote_demoted = mirror_ns->is_demoted();
499 }
500
501 auto remote_snap_id = snap_info_it->first;
502 if (m_local_snap_id_start > 0 || m_local_snap_id_end != CEPH_NOSNAP) {
503 // we have a local mirror snapshot
504 if (m_local_mirror_snap_ns.is_non_primary()) {
505 // previously validated that it was linked to remote
506 ceph_assert(m_local_mirror_snap_ns.primary_mirror_uuid ==
507 m_state_builder->remote_mirror_uuid);
508
509 if (m_local_mirror_snap_ns.complete &&
510 m_local_mirror_snap_ns.primary_snap_id >= remote_snap_id) {
511 // skip past completed remote snapshot
512 m_remote_snap_id_start = remote_snap_id;
513 dout(15) << "skipping synced remote snapshot " << remote_snap_id
514 << dendl;
515 continue;
516 } else if (!m_local_mirror_snap_ns.complete &&
517 m_local_mirror_snap_ns.primary_snap_id > remote_snap_id) {
518 // skip until we get to the in-progress remote snapshot
519 dout(15) << "skipping synced remote snapshot " << remote_snap_id
520 << " while search for in-progress sync" << dendl;
521 m_remote_snap_id_start = remote_snap_id;
522 continue;
523 }
524 } else if (m_local_mirror_snap_ns.state ==
525 cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED) {
526 // find the matching demotion snapshot in remote image
527 ceph_assert(m_local_snap_id_start > 0);
528 if (mirror_ns->state ==
529 cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY_DEMOTED &&
530 mirror_ns->primary_mirror_uuid == m_local_mirror_uuid &&
531 mirror_ns->primary_snap_id == m_local_snap_id_start) {
532 dout(10) << "located matching demotion snapshot: "
533 << "remote_snap_id=" << remote_snap_id << ", "
534 << "local_snap_id=" << m_local_snap_id_start << dendl;
535 m_remote_snap_id_start = remote_snap_id;
536 split_brain = false;
537 continue;
538 } else if (m_remote_snap_id_start == 0) {
539 // still looking for our matching demotion snapshot
540 dout(15) << "skipping remote snapshot " << remote_snap_id << " "
541 << "while searching for demotion" << dendl;
542 split_brain = true;
543 continue;
544 }
545 } else {
546 // should not have been able to reach this
547 ceph_assert(false);
548 }
549 }
550
551 // find first snapshot where were are listed as a peer
552 if (!mirror_ns->is_primary()) {
553 dout(15) << "skipping non-primary remote snapshot" << dendl;
554 continue;
555 } else if (mirror_ns->mirror_peer_uuids.count(m_remote_mirror_peer_uuid) ==
556 0) {
557 dout(15) << "skipping remote snapshot due to missing mirror peer"
558 << dendl;
559 continue;
560 }
561
562 m_remote_snap_id_end = remote_snap_id;
563 m_remote_mirror_snap_ns = *mirror_ns;
564 break;
565 }
566 image_locker.unlock();
567
568 if (m_remote_snap_id_end != CEPH_NOSNAP) {
569 dout(10) << "found remote mirror snapshot: "
570 << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
571 << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
572 << "remote_snap_ns=" << m_remote_mirror_snap_ns << dendl;
573 if (m_remote_mirror_snap_ns.complete) {
574 locker->unlock();
575
576 if (m_local_snap_id_end != CEPH_NOSNAP &&
577 !m_local_mirror_snap_ns.complete) {
578 // attempt to resume image-sync
579 dout(10) << "local image contains in-progress mirror snapshot"
580 << dendl;
581 get_local_image_state();
582 } else {
583 copy_snapshots();
584 }
585 return;
586 } else {
587 // might have raced with the creation of a remote mirror snapshot
588 // so we will need to refresh and rescan once it completes
589 dout(15) << "remote mirror snapshot not complete" << dendl;
590 }
591 }
592
593 if (m_image_updated) {
594 // received update notification while scanning image, restart ...
595 m_image_updated = false;
596 locker->unlock();
597
598 dout(10) << "restarting snapshot scan due to remote update notification"
599 << dendl;
600 load_local_image_meta();
601 return;
602 }
603
604 if (is_replay_interrupted(locker)) {
605 return;
606 } else if (split_brain) {
607 derr << "split-brain detected: failed to find matching non-primary "
608 << "snapshot in remote image: "
609 << "local_snap_id_start=" << m_local_snap_id_start << ", "
610 << "local_snap_ns=" << m_local_mirror_snap_ns << dendl;
611 handle_replay_complete(locker, -EEXIST, "split-brain");
612 return;
613 } else if (remote_demoted) {
614 dout(10) << "remote image demoted" << dendl;
615 handle_replay_complete(locker, -EREMOTEIO, "remote image demoted");
616 return;
617 }
618
619 dout(10) << "all remote snapshots synced: idling waiting for new snapshot"
620 << dendl;
621 ceph_assert(m_state == STATE_REPLAYING);
622 m_state = STATE_IDLE;
623
624 notify_status_updated();
625 }
626
627 template <typename I>
628 void Replayer<I>::copy_snapshots() {
629 dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
630 << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
631 << "local_snap_id_start=" << m_local_snap_id_start << dendl;
632
633 ceph_assert(m_remote_snap_id_start != CEPH_NOSNAP);
634 ceph_assert(m_remote_snap_id_end > 0 &&
635 m_remote_snap_id_end != CEPH_NOSNAP);
636 ceph_assert(m_local_snap_id_start != CEPH_NOSNAP);
637
638 m_local_mirror_snap_ns = {};
639 auto ctx = create_context_callback<
640 Replayer<I>, &Replayer<I>::handle_copy_snapshots>(this);
641 auto req = librbd::deep_copy::SnapshotCopyRequest<I>::create(
642 m_state_builder->remote_image_ctx, m_state_builder->local_image_ctx,
643 m_remote_snap_id_start, m_remote_snap_id_end, m_local_snap_id_start,
644 false, m_threads->work_queue, &m_local_mirror_snap_ns.snap_seqs,
645 ctx);
646 req->send();
647 }
648
649 template <typename I>
650 void Replayer<I>::handle_copy_snapshots(int r) {
651 dout(10) << "r=" << r << dendl;
652
653 if (r < 0) {
654 derr << "failed to copy snapshots from remote to local image: "
655 << cpp_strerror(r) << dendl;
656 handle_replay_complete(
657 r, "failed to copy snapshots from remote to local image");
658 return;
659 }
660
661 dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
662 << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
663 << "local_snap_id_start=" << m_local_snap_id_start << ", "
664 << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl;
665 get_remote_image_state();
666 }
667
668 template <typename I>
669 void Replayer<I>::get_remote_image_state() {
670 dout(10) << dendl;
671
672 auto ctx = create_context_callback<
673 Replayer<I>, &Replayer<I>::handle_get_remote_image_state>(this);
674 auto req = librbd::mirror::snapshot::GetImageStateRequest<I>::create(
675 m_state_builder->remote_image_ctx, m_remote_snap_id_end,
676 &m_image_state, ctx);
677 req->send();
678 }
679
680 template <typename I>
681 void Replayer<I>::handle_get_remote_image_state(int r) {
682 dout(10) << "r=" << r << dendl;
683
684 if (r < 0) {
685 derr << "failed to retrieve remote snapshot image state: "
686 << cpp_strerror(r) << dendl;
687 handle_replay_complete(r, "failed to retrieve remote snapshot image state");
688 return;
689 }
690
691 create_non_primary_snapshot();
692 }
693
694 template <typename I>
695 void Replayer<I>::get_local_image_state() {
696 dout(10) << dendl;
697
698 ceph_assert(m_local_snap_id_end != CEPH_NOSNAP);
699 auto ctx = create_context_callback<
700 Replayer<I>, &Replayer<I>::handle_get_local_image_state>(this);
701 auto req = librbd::mirror::snapshot::GetImageStateRequest<I>::create(
702 m_state_builder->local_image_ctx, m_local_snap_id_end,
703 &m_image_state, ctx);
704 req->send();
705 }
706
707 template <typename I>
708 void Replayer<I>::handle_get_local_image_state(int r) {
709 dout(10) << "r=" << r << dendl;
710
711 if (r < 0) {
712 derr << "failed to retrieve local snapshot image state: "
713 << cpp_strerror(r) << dendl;
714 handle_replay_complete(r, "failed to retrieve local snapshot image state");
715 return;
716 }
717
718 request_sync();
719 }
720
721 template <typename I>
722 void Replayer<I>::create_non_primary_snapshot() {
723 dout(10) << dendl;
724
725 auto ctx = create_context_callback<
726 Replayer<I>, &Replayer<I>::handle_create_non_primary_snapshot>(this);
727 auto req = librbd::mirror::snapshot::CreateNonPrimaryRequest<I>::create(
728 m_state_builder->local_image_ctx, m_remote_mirror_snap_ns.is_demoted(),
729 m_state_builder->remote_mirror_uuid, m_remote_snap_id_end,
730 m_local_mirror_snap_ns.snap_seqs, m_image_state, &m_local_snap_id_end, ctx);
731 req->send();
732 }
733
734 template <typename I>
735 void Replayer<I>::handle_create_non_primary_snapshot(int r) {
736 dout(10) << "r=" << r << dendl;
737
738 if (r < 0) {
739 derr << "failed to create local mirror snapshot: " << cpp_strerror(r)
740 << dendl;
741 handle_replay_complete(r, "failed to create local mirror snapshot");
742 return;
743 }
744
745 dout(15) << "local_snap_id_end=" << m_local_snap_id_end << dendl;
746
747 request_sync();
748 }
749
750 template <typename I>
751 void Replayer<I>::request_sync() {
752 dout(10) << dendl;
753
754 std::unique_lock locker{m_lock};
755 auto ctx = create_async_context_callback(
756 m_threads->work_queue, create_context_callback<
757 Replayer<I>, &Replayer<I>::handle_request_sync>(this));
758 m_instance_watcher->notify_sync_request(m_state_builder->local_image_ctx->id,
759 ctx);
760 }
761
762 template <typename I>
763 void Replayer<I>::handle_request_sync(int r) {
764 dout(10) << "r=" << r << dendl;
765
766 std::unique_lock locker{m_lock};
767 if (is_replay_interrupted(&locker)) {
768 return;
769 } else if (r == -ECANCELED) {
770 dout(5) << "image-sync canceled" << dendl;
771 handle_replay_complete(&locker, r, "image-sync canceled");
772 return;
773 } else if (r < 0) {
774 derr << "failed to request image-sync: " << cpp_strerror(r) << dendl;
775 handle_replay_complete(&locker, r, "failed to request image-sync");
776 return;
777 }
778
779 m_sync_in_progress = true;
780 locker.unlock();
781
782 copy_image();
783 }
784
785 template <typename I>
786 void Replayer<I>::copy_image() {
787 dout(10) << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
788 << "remote_snap_id_end=" << m_remote_snap_id_end << ", "
789 << "local_snap_id_start=" << m_local_snap_id_start << ", "
790 << "last_copied_object_number="
791 << m_local_mirror_snap_ns.last_copied_object_number << ", "
792 << "snap_seqs=" << m_local_mirror_snap_ns.snap_seqs << dendl;
793
794 m_progress_ctx = new ProgressContext(this);
795 auto ctx = create_context_callback<
796 Replayer<I>, &Replayer<I>::handle_copy_image>(this);
797 auto req = librbd::deep_copy::ImageCopyRequest<I>::create(
798 m_state_builder->remote_image_ctx, m_state_builder->local_image_ctx,
799 m_remote_snap_id_start, m_remote_snap_id_end, m_local_snap_id_start, false,
800 (m_local_mirror_snap_ns.last_copied_object_number > 0 ?
801 librbd::deep_copy::ObjectNumber{
802 m_local_mirror_snap_ns.last_copied_object_number} :
803 librbd::deep_copy::ObjectNumber{}),
804 m_local_mirror_snap_ns.snap_seqs, m_progress_ctx, ctx);
805 req->send();
806 }
807
808 template <typename I>
809 void Replayer<I>::handle_copy_image(int r) {
810 dout(10) << "r=" << r << dendl;
811
812 delete m_progress_ctx;
813 m_progress_ctx = nullptr;
814
815 if (r < 0) {
816 derr << "failed to copy remote image to local image: " << cpp_strerror(r)
817 << dendl;
818 handle_replay_complete(r, "failed to copy remote image");
819 return;
820 }
821
822 apply_image_state();
823 }
824
825 template <typename I>
826 void Replayer<I>::handle_copy_image_progress(uint64_t object_number,
827 uint64_t object_count) {
828 dout(10) << "object_number=" << object_number << ", "
829 << "object_count=" << object_count << dendl;
830
831 std::unique_lock locker{m_lock};
832 m_local_mirror_snap_ns.last_copied_object_number = std::min(
833 object_number, object_count);
834 m_local_object_count = object_count;
835
836 update_non_primary_snapshot(false);
837 }
838
839 template <typename I>
840 void Replayer<I>::apply_image_state() {
841 dout(10) << dendl;
842
843 auto ctx = create_context_callback<
844 Replayer<I>, &Replayer<I>::handle_apply_image_state>(this);
845 auto req = ApplyImageStateRequest<I>::create(
846 m_local_mirror_uuid,
847 m_state_builder->remote_mirror_uuid,
848 m_state_builder->local_image_ctx,
849 m_state_builder->remote_image_ctx,
850 m_image_state, ctx);
851 req->send();
852 }
853
854 template <typename I>
855 void Replayer<I>::handle_apply_image_state(int r) {
856 dout(10) << "r=" << r << dendl;
857
858 if (r < 0 && r != -ENOENT) {
859 derr << "failed to apply remote image state to local image: "
860 << cpp_strerror(r) << dendl;
861 handle_replay_complete(r, "failed to apply remote image state");
862 return;
863 }
864
865 std::unique_lock locker{m_lock};
866 update_non_primary_snapshot(true);
867 }
868
869 template <typename I>
870 void Replayer<I>::update_non_primary_snapshot(bool complete) {
871 ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
872 if (!complete) {
873 // disallow two in-flight updates if this isn't the completion of the sync
874 if (m_updating_sync_point) {
875 return;
876 }
877 m_updating_sync_point = true;
878 } else {
879 m_local_mirror_snap_ns.complete = true;
880 }
881
882 dout(10) << dendl;
883
884 librados::ObjectWriteOperation op;
885 librbd::cls_client::mirror_image_snapshot_set_copy_progress(
886 &op, m_local_snap_id_end, m_local_mirror_snap_ns.complete,
887 m_local_mirror_snap_ns.last_copied_object_number);
888
889 auto ctx = new C_TrackedOp(this, new LambdaContext([this, complete](int r) {
890 handle_update_non_primary_snapshot(complete, r);
891 }));
892 auto aio_comp = create_rados_callback(ctx);
893 int r = m_state_builder->local_image_ctx->md_ctx.aio_operate(
894 m_state_builder->local_image_ctx->header_oid, aio_comp, &op);
895 ceph_assert(r == 0);
896 aio_comp->release();
897 }
898
899 template <typename I>
900 void Replayer<I>::handle_update_non_primary_snapshot(bool complete, int r) {
901 dout(10) << "r=" << r << dendl;
902
903 if (r < 0) {
904 derr << "failed to update local snapshot progress: " << cpp_strerror(r)
905 << dendl;
906 if (complete) {
907 // only fail if this was the final update
908 handle_replay_complete(r, "failed to update local snapshot progress");
909 return;
910 }
911 }
912
913 if (!complete) {
914 // periodic sync-point update -- do not advance state machine
915 std::unique_lock locker{m_lock};
916
917 ceph_assert(m_updating_sync_point);
918 m_updating_sync_point = false;
919 return;
920 }
921
922 notify_image_update();
923 }
924
925 template <typename I>
926 void Replayer<I>::notify_image_update() {
927 dout(10) << dendl;
928
929 auto ctx = create_context_callback<
930 Replayer<I>, &Replayer<I>::handle_notify_image_update>(this);
931 m_state_builder->local_image_ctx->notify_update(ctx);
932 }
933
934 template <typename I>
935 void Replayer<I>::handle_notify_image_update(int r) {
936 dout(10) << "r=" << r << dendl;
937
938 if (r < 0) {
939 derr << "failed to notify local image update: " << cpp_strerror(r) << dendl;
940 }
941
942 unlink_peer();
943 }
944
945 template <typename I>
946 void Replayer<I>::unlink_peer() {
947 if (m_remote_snap_id_start == 0) {
948 finish_sync();
949 return;
950 }
951
952 // local snapshot fully synced -- we no longer depend on the sync
953 // start snapshot in the remote image
954 dout(10) << "remote_snap_id=" << m_remote_snap_id_start << dendl;
955
956 auto ctx = create_context_callback<
957 Replayer<I>, &Replayer<I>::handle_unlink_peer>(this);
958 auto req = librbd::mirror::snapshot::UnlinkPeerRequest<I>::create(
959 m_state_builder->remote_image_ctx, m_remote_snap_id_start,
960 m_remote_mirror_peer_uuid, ctx);
961 req->send();
962 }
963
964 template <typename I>
965 void Replayer<I>::handle_unlink_peer(int r) {
966 dout(10) << "r=" << r << dendl;
967
968 if (r < 0 && r != -ENOENT) {
969 derr << "failed to unlink local peer from remote image: " << cpp_strerror(r)
970 << dendl;
971 handle_replay_complete(r, "failed to unlink local peer from remote image");
972 return;
973 }
974
975 finish_sync();
976 }
977
978 template <typename I>
979 void Replayer<I>::finish_sync() {
980 dout(10) << dendl;
981
982 {
983 std::unique_lock locker{m_lock};
984 notify_status_updated();
985
986 m_sync_in_progress = false;
987 m_instance_watcher->notify_sync_complete(
988 m_state_builder->local_image_ctx->id);
989 }
990
991 if (is_replay_interrupted()) {
992 return;
993 }
994
995 load_local_image_meta();
996 }
997
998 template <typename I>
999 void Replayer<I>::register_local_update_watcher() {
1000 dout(10) << dendl;
1001
1002 m_update_watch_ctx = new C_UpdateWatchCtx(this);
1003
1004 int r = m_state_builder->local_image_ctx->state->register_update_watcher(
1005 m_update_watch_ctx, &m_local_update_watcher_handle);
1006 auto ctx = create_context_callback<
1007 Replayer<I>, &Replayer<I>::handle_register_local_update_watcher>(this);
1008 m_threads->work_queue->queue(ctx, r);
1009 }
1010
1011 template <typename I>
1012 void Replayer<I>::handle_register_local_update_watcher(int r) {
1013 dout(10) << "r=" << r << dendl;
1014
1015 if (r < 0) {
1016 derr << "failed to register local update watcher: " << cpp_strerror(r)
1017 << dendl;
1018 handle_replay_complete(r, "failed to register local image update watcher");
1019 m_state = STATE_COMPLETE;
1020
1021 delete m_update_watch_ctx;
1022 m_update_watch_ctx = nullptr;
1023
1024 Context* on_init = nullptr;
1025 std::swap(on_init, m_on_init_shutdown);
1026 on_init->complete(r);
1027 return;
1028 }
1029
1030 register_remote_update_watcher();
1031 }
1032
1033 template <typename I>
1034 void Replayer<I>::register_remote_update_watcher() {
1035 dout(10) << dendl;
1036
1037 int r = m_state_builder->remote_image_ctx->state->register_update_watcher(
1038 m_update_watch_ctx, &m_remote_update_watcher_handle);
1039 auto ctx = create_context_callback<
1040 Replayer<I>, &Replayer<I>::handle_register_remote_update_watcher>(this);
1041 m_threads->work_queue->queue(ctx, r);
1042 }
1043
1044 template <typename I>
1045 void Replayer<I>::handle_register_remote_update_watcher(int r) {
1046 dout(10) << "r=" << r << dendl;
1047
1048 if (r < 0) {
1049 derr << "failed to register remote update watcher: " << cpp_strerror(r)
1050 << dendl;
1051 handle_replay_complete(r, "failed to register remote image update watcher");
1052 m_state = STATE_COMPLETE;
1053
1054 unregister_local_update_watcher();
1055 return;
1056 }
1057
1058 m_state = STATE_REPLAYING;
1059
1060 Context* on_init = nullptr;
1061 std::swap(on_init, m_on_init_shutdown);
1062 on_init->complete(0);
1063
1064 // delay initial snapshot scan until after we have alerted
1065 // image replayer that we have initialized in case an error
1066 // occurs
1067 {
1068 std::unique_lock locker{m_lock};
1069 notify_status_updated();
1070 }
1071
1072 load_local_image_meta();
1073 }
1074
1075 template <typename I>
1076 void Replayer<I>::unregister_remote_update_watcher() {
1077 dout(10) << dendl;
1078
1079 auto ctx = create_context_callback<
1080 Replayer<I>,
1081 &Replayer<I>::handle_unregister_remote_update_watcher>(this);
1082 m_state_builder->remote_image_ctx->state->unregister_update_watcher(
1083 m_remote_update_watcher_handle, ctx);
1084 }
1085
1086 template <typename I>
1087 void Replayer<I>::handle_unregister_remote_update_watcher(int r) {
1088 dout(10) << "r=" << r << dendl;
1089
1090 if (r < 0) {
1091 derr << "failed to unregister remote update watcher: " << cpp_strerror(r)
1092 << dendl;
1093 handle_replay_complete(
1094 r, "failed to unregister remote image update watcher");
1095 }
1096
1097 unregister_local_update_watcher();
1098 }
1099
1100 template <typename I>
1101 void Replayer<I>::unregister_local_update_watcher() {
1102 dout(10) << dendl;
1103
1104 auto ctx = create_context_callback<
1105 Replayer<I>,
1106 &Replayer<I>::handle_unregister_local_update_watcher>(this);
1107 m_state_builder->local_image_ctx->state->unregister_update_watcher(
1108 m_local_update_watcher_handle, ctx);
1109 }
1110
1111 template <typename I>
1112 void Replayer<I>::handle_unregister_local_update_watcher(int r) {
1113 dout(10) << "r=" << r << dendl;
1114
1115 if (r < 0) {
1116 derr << "failed to unregister local update watcher: " << cpp_strerror(r)
1117 << dendl;
1118 handle_replay_complete(
1119 r, "failed to unregister local image update watcher");
1120 }
1121
1122 delete m_update_watch_ctx;
1123 m_update_watch_ctx = nullptr;
1124
1125 wait_for_in_flight_ops();
1126 }
1127
1128 template <typename I>
1129 void Replayer<I>::wait_for_in_flight_ops() {
1130 dout(10) << dendl;
1131
1132 auto ctx = create_async_context_callback(
1133 m_threads->work_queue, create_context_callback<
1134 Replayer<I>, &Replayer<I>::handle_wait_for_in_flight_ops>(this));
1135 m_in_flight_op_tracker.wait_for_ops(ctx);
1136 }
1137
1138 template <typename I>
1139 void Replayer<I>::handle_wait_for_in_flight_ops(int r) {
1140 dout(10) << "r=" << r << dendl;
1141
1142 Context* on_shutdown = nullptr;
1143 {
1144 std::unique_lock locker{m_lock};
1145 ceph_assert(m_on_init_shutdown != nullptr);
1146 std::swap(on_shutdown, m_on_init_shutdown);
1147 }
1148 on_shutdown->complete(m_error_code);
1149 }
1150
1151 template <typename I>
1152 void Replayer<I>::handle_image_update_notify() {
1153 dout(10) << dendl;
1154
1155 std::unique_lock locker{m_lock};
1156 if (m_state == STATE_REPLAYING) {
1157 dout(15) << "flagging snapshot rescan required" << dendl;
1158 m_image_updated = true;
1159 } else if (m_state == STATE_IDLE) {
1160 m_state = STATE_REPLAYING;
1161 locker.unlock();
1162
1163 dout(15) << "restarting idle replayer" << dendl;
1164 load_local_image_meta();
1165 }
1166 }
1167
1168 template <typename I>
1169 void Replayer<I>::handle_replay_complete(int r,
1170 const std::string& description) {
1171 std::unique_lock locker{m_lock};
1172 handle_replay_complete(&locker, r, description);
1173 }
1174
1175 template <typename I>
1176 void Replayer<I>::handle_replay_complete(std::unique_lock<ceph::mutex>* locker,
1177 int r,
1178 const std::string& description) {
1179 ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
1180
1181 if (m_error_code == 0) {
1182 m_error_code = r;
1183 m_error_description = description;
1184 }
1185
1186 if (m_sync_in_progress) {
1187 m_sync_in_progress = false;
1188 m_instance_watcher->notify_sync_complete(
1189 m_state_builder->local_image_ctx->id);
1190 }
1191
1192 if (m_state != STATE_REPLAYING && m_state != STATE_IDLE) {
1193 return;
1194 }
1195
1196 m_state = STATE_COMPLETE;
1197 notify_status_updated();
1198 }
1199
1200 template <typename I>
1201 void Replayer<I>::notify_status_updated() {
1202 ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
1203
1204 dout(10) << dendl;
1205 auto ctx = new C_TrackedOp(this, new LambdaContext(
1206 [this](int) {
1207 m_replayer_listener->handle_notification();
1208 }));
1209 m_threads->work_queue->queue(ctx, 0);
1210 }
1211
1212 template <typename I>
1213 bool Replayer<I>::is_replay_interrupted() {
1214 std::unique_lock locker{m_lock};
1215 return is_replay_interrupted(&locker);
1216 }
1217
1218 template <typename I>
1219 bool Replayer<I>::is_replay_interrupted(std::unique_lock<ceph::mutex>* locker) {
1220 if (m_state == STATE_COMPLETE) {
1221 locker->unlock();
1222
1223 dout(10) << "resuming pending shut down" << dendl;
1224 unregister_remote_update_watcher();
1225 return true;
1226 }
1227 return false;
1228 }
1229
1230 } // namespace snapshot
1231 } // namespace image_replayer
1232 } // namespace mirror
1233 } // namespace rbd
1234
1235 template class rbd::mirror::image_replayer::snapshot::Replayer<librbd::ImageCtx>;