1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "PrepareReplayRequest.h"
5 #include "common/debug.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "journal/Journaler.h"
9 #include "librbd/ImageCtx.h"
10 #include "librbd/Journal.h"
11 #include "librbd/Utils.h"
12 #include "tools/rbd_mirror/ProgressContext.h"
13 #include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
15 #define dout_context g_ceph_context
16 #define dout_subsys ceph_subsys_rbd_mirror
18 #define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
19 << "PrepareReplayRequest: " << this << " " \
24 namespace image_replayer
{
27 using librbd::util::create_context_callback
;
30 void PrepareReplayRequest
<I
>::send() {
31 *m_resync_requested
= false;
34 if (m_state_builder
->local_image_id
!=
35 m_state_builder
->remote_client_meta
.image_id
) {
36 // somehow our local image has a different image id than the image id
37 // registered in the remote image
38 derr
<< "split-brain detected: local_image_id="
39 << m_state_builder
->local_image_id
<< ", "
40 << "registered local_image_id="
41 << m_state_builder
->remote_client_meta
.image_id
<< dendl
;
46 std::shared_lock
image_locker(m_state_builder
->local_image_ctx
->image_lock
);
47 if (m_state_builder
->local_image_ctx
->journal
== nullptr) {
48 image_locker
.unlock();
50 derr
<< "local image does not support journaling" << dendl
;
55 int r
= m_state_builder
->local_image_ctx
->journal
->is_resync_requested(
58 image_locker
.unlock();
60 derr
<< "failed to check if a resync was requested" << dendl
;
65 m_local_tag_tid
= m_state_builder
->local_image_ctx
->journal
->get_tag_tid();
66 m_local_tag_data
= m_state_builder
->local_image_ctx
->journal
->get_tag_data();
67 dout(10) << "local tag=" << m_local_tag_tid
<< ", "
68 << "local tag data=" << m_local_tag_data
<< dendl
;
69 image_locker
.unlock();
71 if (m_local_tag_data
.mirror_uuid
!= m_state_builder
->remote_mirror_uuid
&&
72 m_remote_promotion_state
!= librbd::mirror::PROMOTION_STATE_PRIMARY
) {
73 // if the local mirror is not linked to the (now) non-primary image,
74 // stop the replay. Otherwise, we ignore that the remote is non-primary
75 // so that we can replay the demotion
76 dout(5) << "remote image is not primary -- skipping image replay"
82 if (*m_resync_requested
) {
85 } else if (m_state_builder
->remote_client_meta
.state
==
86 librbd::journal::MIRROR_PEER_STATE_SYNCING
&&
87 m_local_tag_data
.mirror_uuid
==
88 m_state_builder
->remote_mirror_uuid
) {
89 // if the initial sync hasn't completed, we cannot replay
95 update_client_state();
99 void PrepareReplayRequest
<I
>::update_client_state() {
100 if (m_state_builder
->remote_client_meta
.state
!=
101 librbd::journal::MIRROR_PEER_STATE_SYNCING
||
102 m_local_tag_data
.mirror_uuid
== m_state_builder
->remote_mirror_uuid
) {
103 get_remote_tag_class();
107 // our local image is not primary, is flagged as syncing on the remote side,
108 // but is no longer tied to the remote -- this implies we were forced
109 // promoted and then demoted at some point
111 update_progress("UPDATE_CLIENT_STATE");
113 auto client_meta
= m_state_builder
->remote_client_meta
;
114 client_meta
.state
= librbd::journal::MIRROR_PEER_STATE_REPLAYING
;
116 librbd::journal::ClientData
client_data(client_meta
);
118 encode(client_data
, data_bl
);
120 auto ctx
= create_context_callback
<
121 PrepareReplayRequest
<I
>,
122 &PrepareReplayRequest
<I
>::handle_update_client_state
>(this);
123 m_state_builder
->remote_journaler
->update_client(data_bl
, ctx
);
126 template <typename I
>
127 void PrepareReplayRequest
<I
>::handle_update_client_state(int r
) {
128 dout(15) << "r=" << r
<< dendl
;
130 derr
<< "failed to update client: " << cpp_strerror(r
) << dendl
;
135 m_state_builder
->remote_client_meta
.state
=
136 librbd::journal::MIRROR_PEER_STATE_REPLAYING
;
137 get_remote_tag_class();
140 template <typename I
>
141 void PrepareReplayRequest
<I
>::get_remote_tag_class() {
143 update_progress("GET_REMOTE_TAG_CLASS");
145 auto ctx
= create_context_callback
<
146 PrepareReplayRequest
<I
>,
147 &PrepareReplayRequest
<I
>::handle_get_remote_tag_class
>(this);
148 m_state_builder
->remote_journaler
->get_client(
149 librbd::Journal
<>::IMAGE_CLIENT_ID
, &m_client
, ctx
);
152 template <typename I
>
153 void PrepareReplayRequest
<I
>::handle_get_remote_tag_class(int r
) {
154 dout(10) << "r=" << r
<< dendl
;
157 derr
<< "failed to retrieve remote client: " << cpp_strerror(r
) << dendl
;
162 librbd::journal::ClientData client_data
;
163 auto it
= m_client
.data
.cbegin();
165 decode(client_data
, it
);
166 } catch (const buffer::error
&err
) {
167 derr
<< "failed to decode remote client meta data: " << err
.what()
173 librbd::journal::ImageClientMeta
*client_meta
=
174 boost::get
<librbd::journal::ImageClientMeta
>(&client_data
.client_meta
);
175 if (client_meta
== nullptr) {
176 derr
<< "unknown remote client registration" << dendl
;
181 m_remote_tag_class
= client_meta
->tag_class
;
182 dout(10) << "remote tag class=" << m_remote_tag_class
<< dendl
;
187 template <typename I
>
188 void PrepareReplayRequest
<I
>::get_remote_tags() {
190 update_progress("GET_REMOTE_TAGS");
192 auto ctx
= create_context_callback
<
193 PrepareReplayRequest
<I
>,
194 &PrepareReplayRequest
<I
>::handle_get_remote_tags
>(this);
195 m_state_builder
->remote_journaler
->get_tags(m_remote_tag_class
,
196 &m_remote_tags
, ctx
);
199 template <typename I
>
200 void PrepareReplayRequest
<I
>::handle_get_remote_tags(int r
) {
201 dout(10) << "r=" << r
<< dendl
;
204 derr
<< "failed to retrieve remote tags: " << cpp_strerror(r
) << dendl
;
209 // At this point, the local image was existing, non-primary, and replaying;
210 // and the remote image is primary. Attempt to link the local image's most
211 // recent tag to the remote image's tag chain.
212 bool remote_tag_data_valid
= false;
213 librbd::journal::TagData remote_tag_data
;
214 boost::optional
<uint64_t> remote_orphan_tag_tid
=
215 boost::make_optional
<uint64_t>(false, 0U);
216 bool reconnect_orphan
= false;
218 // decode the remote tags
219 for (auto &remote_tag
: m_remote_tags
) {
220 if (m_local_tag_data
.predecessor
.commit_valid
&&
221 m_local_tag_data
.predecessor
.mirror_uuid
==
222 m_state_builder
->remote_mirror_uuid
&&
223 m_local_tag_data
.predecessor
.tag_tid
> remote_tag
.tid
) {
224 dout(10) << "skipping processed predecessor remote tag "
225 << remote_tag
.tid
<< dendl
;
230 auto it
= remote_tag
.data
.cbegin();
231 decode(remote_tag_data
, it
);
232 remote_tag_data_valid
= true;
233 } catch (const buffer::error
&err
) {
234 derr
<< "failed to decode remote tag " << remote_tag
.tid
<< ": "
235 << err
.what() << dendl
;
240 dout(10) << "decoded remote tag " << remote_tag
.tid
<< ": "
241 << remote_tag_data
<< dendl
;
243 if (!m_local_tag_data
.predecessor
.commit_valid
) {
244 // newly synced local image (no predecessor) replays from the first tag
245 if (remote_tag_data
.mirror_uuid
!= librbd::Journal
<>::LOCAL_MIRROR_UUID
) {
246 dout(10) << "skipping non-primary remote tag" << dendl
;
250 dout(10) << "using initial primary remote tag" << dendl
;
254 if (m_local_tag_data
.mirror_uuid
== librbd::Journal
<>::ORPHAN_MIRROR_UUID
) {
255 // demotion last available local epoch
257 if (remote_tag_data
.mirror_uuid
== m_local_tag_data
.mirror_uuid
&&
258 remote_tag_data
.predecessor
.commit_valid
&&
259 remote_tag_data
.predecessor
.tag_tid
==
260 m_local_tag_data
.predecessor
.tag_tid
) {
261 // demotion matches remote epoch
263 if (remote_tag_data
.predecessor
.mirror_uuid
== m_local_mirror_uuid
&&
264 m_local_tag_data
.predecessor
.mirror_uuid
==
265 librbd::Journal
<>::LOCAL_MIRROR_UUID
) {
266 // local demoted and remote has matching event
267 dout(10) << "found matching local demotion tag" << dendl
;
268 remote_orphan_tag_tid
= remote_tag
.tid
;
272 if (m_local_tag_data
.predecessor
.mirror_uuid
==
273 m_state_builder
->remote_mirror_uuid
&&
274 remote_tag_data
.predecessor
.mirror_uuid
==
275 librbd::Journal
<>::LOCAL_MIRROR_UUID
) {
276 // remote demoted and local has matching event
277 dout(10) << "found matching remote demotion tag" << dendl
;
278 remote_orphan_tag_tid
= remote_tag
.tid
;
283 if (remote_tag_data
.mirror_uuid
== librbd::Journal
<>::LOCAL_MIRROR_UUID
&&
284 remote_tag_data
.predecessor
.mirror_uuid
==
285 librbd::Journal
<>::ORPHAN_MIRROR_UUID
&&
286 remote_tag_data
.predecessor
.commit_valid
&& remote_orphan_tag_tid
&&
287 remote_tag_data
.predecessor
.tag_tid
== *remote_orphan_tag_tid
) {
288 // remote promotion tag chained to remote/local demotion tag
289 dout(10) << "found chained remote promotion tag" << dendl
;
290 reconnect_orphan
= true;
294 // promotion must follow demotion
295 remote_orphan_tag_tid
= boost::none
;
299 if (remote_tag_data_valid
&&
300 m_local_tag_data
.mirror_uuid
== m_state_builder
->remote_mirror_uuid
) {
301 dout(10) << "local image is in clean replay state" << dendl
;
302 } else if (reconnect_orphan
) {
303 dout(10) << "remote image was demoted/promoted" << dendl
;
305 derr
<< "split-brain detected -- skipping image replay" << dendl
;
313 template <typename I
>
314 void PrepareReplayRequest
<I
>::update_progress(const std::string
&description
) {
315 dout(10) << description
<< dendl
;
317 if (m_progress_ctx
!= nullptr) {
318 m_progress_ctx
->update_progress(description
);
322 } // namespace journal
323 } // namespace image_replayer
324 } // namespace mirror
327 template class rbd::mirror::image_replayer::journal::PrepareReplayRequest
<librbd::ImageCtx
>;