]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/rbd_mirror/image_replayer/journal/PrepareReplayRequest.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / tools / rbd_mirror / image_replayer / journal / PrepareReplayRequest.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "PrepareReplayRequest.h"
5 #include "common/debug.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "journal/Journaler.h"
9 #include "librbd/ImageCtx.h"
10 #include "librbd/Journal.h"
11 #include "librbd/Utils.h"
12 #include "tools/rbd_mirror/ProgressContext.h"
13 #include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h"
14
15 #define dout_context g_ceph_context
16 #define dout_subsys ceph_subsys_rbd_mirror
17 #undef dout_prefix
18 #define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \
19 << "PrepareReplayRequest: " << this << " " \
20 << __func__ << ": "
21
22 namespace rbd {
23 namespace mirror {
24 namespace image_replayer {
25 namespace journal {
26
27 using librbd::util::create_context_callback;
28
29 template <typename I>
30 void PrepareReplayRequest<I>::send() {
31 *m_resync_requested = false;
32 *m_syncing = false;
33
34 if (m_state_builder->local_image_id !=
35 m_state_builder->remote_client_meta.image_id) {
36 // somehow our local image has a different image id than the image id
37 // registered in the remote image
38 derr << "split-brain detected: local_image_id="
39 << m_state_builder->local_image_id << ", "
40 << "registered local_image_id="
41 << m_state_builder->remote_client_meta.image_id << dendl;
42 finish(-EEXIST);
43 return;
44 }
45
46 std::shared_lock image_locker(m_state_builder->local_image_ctx->image_lock);
47 if (m_state_builder->local_image_ctx->journal == nullptr) {
48 image_locker.unlock();
49
50 derr << "local image does not support journaling" << dendl;
51 finish(-EINVAL);
52 return;
53 }
54
55 int r = m_state_builder->local_image_ctx->journal->is_resync_requested(
56 m_resync_requested);
57 if (r < 0) {
58 image_locker.unlock();
59
60 derr << "failed to check if a resync was requested" << dendl;
61 finish(r);
62 return;
63 }
64
65 m_local_tag_tid = m_state_builder->local_image_ctx->journal->get_tag_tid();
66 m_local_tag_data = m_state_builder->local_image_ctx->journal->get_tag_data();
67 dout(10) << "local tag=" << m_local_tag_tid << ", "
68 << "local tag data=" << m_local_tag_data << dendl;
69 image_locker.unlock();
70
71 if (m_local_tag_data.mirror_uuid != m_state_builder->remote_mirror_uuid &&
72 m_remote_promotion_state != librbd::mirror::PROMOTION_STATE_PRIMARY) {
73 // if the local mirror is not linked to the (now) non-primary image,
74 // stop the replay. Otherwise, we ignore that the remote is non-primary
75 // so that we can replay the demotion
76 dout(5) << "remote image is not primary -- skipping image replay"
77 << dendl;
78 finish(-EREMOTEIO);
79 return;
80 }
81
82 if (*m_resync_requested) {
83 finish(0);
84 return;
85 } else if (m_state_builder->remote_client_meta.state ==
86 librbd::journal::MIRROR_PEER_STATE_SYNCING &&
87 m_local_tag_data.mirror_uuid ==
88 m_state_builder->remote_mirror_uuid) {
89 // if the initial sync hasn't completed, we cannot replay
90 *m_syncing = true;
91 finish(0);
92 return;
93 }
94
95 update_client_state();
96 }
97
98 template <typename I>
99 void PrepareReplayRequest<I>::update_client_state() {
100 if (m_state_builder->remote_client_meta.state !=
101 librbd::journal::MIRROR_PEER_STATE_SYNCING ||
102 m_local_tag_data.mirror_uuid == m_state_builder->remote_mirror_uuid) {
103 get_remote_tag_class();
104 return;
105 }
106
107 // our local image is not primary, is flagged as syncing on the remote side,
108 // but is no longer tied to the remote -- this implies we were forced
109 // promoted and then demoted at some point
110 dout(15) << dendl;
111 update_progress("UPDATE_CLIENT_STATE");
112
113 auto client_meta = m_state_builder->remote_client_meta;
114 client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
115
116 librbd::journal::ClientData client_data(client_meta);
117 bufferlist data_bl;
118 encode(client_data, data_bl);
119
120 auto ctx = create_context_callback<
121 PrepareReplayRequest<I>,
122 &PrepareReplayRequest<I>::handle_update_client_state>(this);
123 m_state_builder->remote_journaler->update_client(data_bl, ctx);
124 }
125
126 template <typename I>
127 void PrepareReplayRequest<I>::handle_update_client_state(int r) {
128 dout(15) << "r=" << r << dendl;
129 if (r < 0) {
130 derr << "failed to update client: " << cpp_strerror(r) << dendl;
131 finish(r);
132 return;
133 }
134
135 m_state_builder->remote_client_meta.state =
136 librbd::journal::MIRROR_PEER_STATE_REPLAYING;
137 get_remote_tag_class();
138 }
139
140 template <typename I>
141 void PrepareReplayRequest<I>::get_remote_tag_class() {
142 dout(10) << dendl;
143 update_progress("GET_REMOTE_TAG_CLASS");
144
145 auto ctx = create_context_callback<
146 PrepareReplayRequest<I>,
147 &PrepareReplayRequest<I>::handle_get_remote_tag_class>(this);
148 m_state_builder->remote_journaler->get_client(
149 librbd::Journal<>::IMAGE_CLIENT_ID, &m_client, ctx);
150 }
151
152 template <typename I>
153 void PrepareReplayRequest<I>::handle_get_remote_tag_class(int r) {
154 dout(10) << "r=" << r << dendl;
155
156 if (r < 0) {
157 derr << "failed to retrieve remote client: " << cpp_strerror(r) << dendl;
158 finish(r);
159 return;
160 }
161
162 librbd::journal::ClientData client_data;
163 auto it = m_client.data.cbegin();
164 try {
165 decode(client_data, it);
166 } catch (const buffer::error &err) {
167 derr << "failed to decode remote client meta data: " << err.what()
168 << dendl;
169 finish(-EBADMSG);
170 return;
171 }
172
173 librbd::journal::ImageClientMeta *client_meta =
174 boost::get<librbd::journal::ImageClientMeta>(&client_data.client_meta);
175 if (client_meta == nullptr) {
176 derr << "unknown remote client registration" << dendl;
177 finish(-EINVAL);
178 return;
179 }
180
181 m_remote_tag_class = client_meta->tag_class;
182 dout(10) << "remote tag class=" << m_remote_tag_class << dendl;
183
184 get_remote_tags();
185 }
186
187 template <typename I>
188 void PrepareReplayRequest<I>::get_remote_tags() {
189 dout(10) << dendl;
190 update_progress("GET_REMOTE_TAGS");
191
192 auto ctx = create_context_callback<
193 PrepareReplayRequest<I>,
194 &PrepareReplayRequest<I>::handle_get_remote_tags>(this);
195 m_state_builder->remote_journaler->get_tags(m_remote_tag_class,
196 &m_remote_tags, ctx);
197 }
198
199 template <typename I>
200 void PrepareReplayRequest<I>::handle_get_remote_tags(int r) {
201 dout(10) << "r=" << r << dendl;
202
203 if (r < 0) {
204 derr << "failed to retrieve remote tags: " << cpp_strerror(r) << dendl;
205 finish(r);
206 return;
207 }
208
209 // At this point, the local image was existing, non-primary, and replaying;
210 // and the remote image is primary. Attempt to link the local image's most
211 // recent tag to the remote image's tag chain.
212 bool remote_tag_data_valid = false;
213 librbd::journal::TagData remote_tag_data;
214 boost::optional<uint64_t> remote_orphan_tag_tid =
215 boost::make_optional<uint64_t>(false, 0U);
216 bool reconnect_orphan = false;
217
218 // decode the remote tags
219 for (auto &remote_tag : m_remote_tags) {
220 if (m_local_tag_data.predecessor.commit_valid &&
221 m_local_tag_data.predecessor.mirror_uuid ==
222 m_state_builder->remote_mirror_uuid &&
223 m_local_tag_data.predecessor.tag_tid > remote_tag.tid) {
224 dout(10) << "skipping processed predecessor remote tag "
225 << remote_tag.tid << dendl;
226 continue;
227 }
228
229 try {
230 auto it = remote_tag.data.cbegin();
231 decode(remote_tag_data, it);
232 remote_tag_data_valid = true;
233 } catch (const buffer::error &err) {
234 derr << "failed to decode remote tag " << remote_tag.tid << ": "
235 << err.what() << dendl;
236 finish(-EBADMSG);
237 return;
238 }
239
240 dout(10) << "decoded remote tag " << remote_tag.tid << ": "
241 << remote_tag_data << dendl;
242
243 if (!m_local_tag_data.predecessor.commit_valid) {
244 // newly synced local image (no predecessor) replays from the first tag
245 if (remote_tag_data.mirror_uuid != librbd::Journal<>::LOCAL_MIRROR_UUID) {
246 dout(10) << "skipping non-primary remote tag" << dendl;
247 continue;
248 }
249
250 dout(10) << "using initial primary remote tag" << dendl;
251 break;
252 }
253
254 if (m_local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) {
255 // demotion last available local epoch
256
257 if (remote_tag_data.mirror_uuid == m_local_tag_data.mirror_uuid &&
258 remote_tag_data.predecessor.commit_valid &&
259 remote_tag_data.predecessor.tag_tid ==
260 m_local_tag_data.predecessor.tag_tid) {
261 // demotion matches remote epoch
262
263 if (remote_tag_data.predecessor.mirror_uuid == m_local_mirror_uuid &&
264 m_local_tag_data.predecessor.mirror_uuid ==
265 librbd::Journal<>::LOCAL_MIRROR_UUID) {
266 // local demoted and remote has matching event
267 dout(10) << "found matching local demotion tag" << dendl;
268 remote_orphan_tag_tid = remote_tag.tid;
269 continue;
270 }
271
272 if (m_local_tag_data.predecessor.mirror_uuid ==
273 m_state_builder->remote_mirror_uuid &&
274 remote_tag_data.predecessor.mirror_uuid ==
275 librbd::Journal<>::LOCAL_MIRROR_UUID) {
276 // remote demoted and local has matching event
277 dout(10) << "found matching remote demotion tag" << dendl;
278 remote_orphan_tag_tid = remote_tag.tid;
279 continue;
280 }
281 }
282
283 if (remote_tag_data.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID &&
284 remote_tag_data.predecessor.mirror_uuid ==
285 librbd::Journal<>::ORPHAN_MIRROR_UUID &&
286 remote_tag_data.predecessor.commit_valid && remote_orphan_tag_tid &&
287 remote_tag_data.predecessor.tag_tid == *remote_orphan_tag_tid) {
288 // remote promotion tag chained to remote/local demotion tag
289 dout(10) << "found chained remote promotion tag" << dendl;
290 reconnect_orphan = true;
291 break;
292 }
293
294 // promotion must follow demotion
295 remote_orphan_tag_tid = boost::none;
296 }
297 }
298
299 if (remote_tag_data_valid &&
300 m_local_tag_data.mirror_uuid == m_state_builder->remote_mirror_uuid) {
301 dout(10) << "local image is in clean replay state" << dendl;
302 } else if (reconnect_orphan) {
303 dout(10) << "remote image was demoted/promoted" << dendl;
304 } else {
305 derr << "split-brain detected -- skipping image replay" << dendl;
306 finish(-EEXIST);
307 return;
308 }
309
310 finish(0);
311 }
312
313 template <typename I>
314 void PrepareReplayRequest<I>::update_progress(const std::string &description) {
315 dout(10) << description << dendl;
316
317 if (m_progress_ctx != nullptr) {
318 m_progress_ctx->update_progress(description);
319 }
320 }
321
322 } // namespace journal
323 } // namespace image_replayer
324 } // namespace mirror
325 } // namespace rbd
326
327 template class rbd::mirror::image_replayer::journal::PrepareReplayRequest<librbd::ImageCtx>;