]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "PrepareReplayRequest.h" | |
5 | #include "common/debug.h" | |
6 | #include "common/dout.h" | |
7 | #include "common/errno.h" | |
8 | #include "journal/Journaler.h" | |
9 | #include "librbd/ImageCtx.h" | |
10 | #include "librbd/Journal.h" | |
11 | #include "librbd/Utils.h" | |
12 | #include "tools/rbd_mirror/ProgressContext.h" | |
13 | #include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" | |
14 | ||
15 | #define dout_context g_ceph_context | |
16 | #define dout_subsys ceph_subsys_rbd_mirror | |
17 | #undef dout_prefix | |
18 | #define dout_prefix *_dout << "rbd::mirror::image_replayer::journal::" \ | |
19 | << "PrepareReplayRequest: " << this << " " \ | |
20 | << __func__ << ": " | |
21 | ||
22 | namespace rbd { | |
23 | namespace mirror { | |
24 | namespace image_replayer { | |
25 | namespace journal { | |
26 | ||
27 | using librbd::util::create_context_callback; | |
28 | ||
29 | template <typename I> | |
30 | void PrepareReplayRequest<I>::send() { | |
31 | *m_resync_requested = false; | |
32 | *m_syncing = false; | |
33 | ||
34 | if (m_state_builder->local_image_id != | |
35 | m_state_builder->remote_client_meta.image_id) { | |
36 | // somehow our local image has a different image id than the image id | |
37 | // registered in the remote image | |
38 | derr << "split-brain detected: local_image_id=" | |
39 | << m_state_builder->local_image_id << ", " | |
40 | << "registered local_image_id=" | |
41 | << m_state_builder->remote_client_meta.image_id << dendl; | |
42 | finish(-EEXIST); | |
43 | return; | |
44 | } | |
45 | ||
46 | std::shared_lock image_locker(m_state_builder->local_image_ctx->image_lock); | |
47 | if (m_state_builder->local_image_ctx->journal == nullptr) { | |
48 | image_locker.unlock(); | |
49 | ||
50 | derr << "local image does not support journaling" << dendl; | |
51 | finish(-EINVAL); | |
52 | return; | |
53 | } | |
54 | ||
55 | int r = m_state_builder->local_image_ctx->journal->is_resync_requested( | |
56 | m_resync_requested); | |
57 | if (r < 0) { | |
58 | image_locker.unlock(); | |
59 | ||
60 | derr << "failed to check if a resync was requested" << dendl; | |
61 | finish(r); | |
62 | return; | |
63 | } | |
64 | ||
65 | m_local_tag_tid = m_state_builder->local_image_ctx->journal->get_tag_tid(); | |
66 | m_local_tag_data = m_state_builder->local_image_ctx->journal->get_tag_data(); | |
67 | dout(10) << "local tag=" << m_local_tag_tid << ", " | |
68 | << "local tag data=" << m_local_tag_data << dendl; | |
69 | image_locker.unlock(); | |
70 | ||
9f95a23c TL |
71 | if (*m_resync_requested) { |
72 | finish(0); | |
73 | return; | |
74 | } else if (m_state_builder->remote_client_meta.state == | |
75 | librbd::journal::MIRROR_PEER_STATE_SYNCING && | |
76 | m_local_tag_data.mirror_uuid == | |
77 | m_state_builder->remote_mirror_uuid) { | |
78 | // if the initial sync hasn't completed, we cannot replay | |
79 | *m_syncing = true; | |
80 | finish(0); | |
81 | return; | |
82 | } | |
83 | ||
84 | update_client_state(); | |
85 | } | |
86 | ||
87 | template <typename I> | |
88 | void PrepareReplayRequest<I>::update_client_state() { | |
89 | if (m_state_builder->remote_client_meta.state != | |
90 | librbd::journal::MIRROR_PEER_STATE_SYNCING || | |
91 | m_local_tag_data.mirror_uuid == m_state_builder->remote_mirror_uuid) { | |
92 | get_remote_tag_class(); | |
93 | return; | |
94 | } | |
95 | ||
96 | // our local image is not primary, is flagged as syncing on the remote side, | |
97 | // but is no longer tied to the remote -- this implies we were forced | |
98 | // promoted and then demoted at some point | |
99 | dout(15) << dendl; | |
100 | update_progress("UPDATE_CLIENT_STATE"); | |
101 | ||
102 | auto client_meta = m_state_builder->remote_client_meta; | |
103 | client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; | |
104 | ||
105 | librbd::journal::ClientData client_data(client_meta); | |
106 | bufferlist data_bl; | |
107 | encode(client_data, data_bl); | |
108 | ||
109 | auto ctx = create_context_callback< | |
110 | PrepareReplayRequest<I>, | |
111 | &PrepareReplayRequest<I>::handle_update_client_state>(this); | |
112 | m_state_builder->remote_journaler->update_client(data_bl, ctx); | |
113 | } | |
114 | ||
115 | template <typename I> | |
116 | void PrepareReplayRequest<I>::handle_update_client_state(int r) { | |
117 | dout(15) << "r=" << r << dendl; | |
118 | if (r < 0) { | |
119 | derr << "failed to update client: " << cpp_strerror(r) << dendl; | |
120 | finish(r); | |
121 | return; | |
122 | } | |
123 | ||
124 | m_state_builder->remote_client_meta.state = | |
125 | librbd::journal::MIRROR_PEER_STATE_REPLAYING; | |
126 | get_remote_tag_class(); | |
127 | } | |
128 | ||
129 | template <typename I> | |
130 | void PrepareReplayRequest<I>::get_remote_tag_class() { | |
131 | dout(10) << dendl; | |
132 | update_progress("GET_REMOTE_TAG_CLASS"); | |
133 | ||
134 | auto ctx = create_context_callback< | |
135 | PrepareReplayRequest<I>, | |
136 | &PrepareReplayRequest<I>::handle_get_remote_tag_class>(this); | |
137 | m_state_builder->remote_journaler->get_client( | |
138 | librbd::Journal<>::IMAGE_CLIENT_ID, &m_client, ctx); | |
139 | } | |
140 | ||
141 | template <typename I> | |
142 | void PrepareReplayRequest<I>::handle_get_remote_tag_class(int r) { | |
143 | dout(10) << "r=" << r << dendl; | |
144 | ||
145 | if (r < 0) { | |
146 | derr << "failed to retrieve remote client: " << cpp_strerror(r) << dendl; | |
147 | finish(r); | |
148 | return; | |
149 | } | |
150 | ||
151 | librbd::journal::ClientData client_data; | |
152 | auto it = m_client.data.cbegin(); | |
153 | try { | |
154 | decode(client_data, it); | |
155 | } catch (const buffer::error &err) { | |
156 | derr << "failed to decode remote client meta data: " << err.what() | |
157 | << dendl; | |
158 | finish(-EBADMSG); | |
159 | return; | |
160 | } | |
161 | ||
162 | librbd::journal::ImageClientMeta *client_meta = | |
163 | boost::get<librbd::journal::ImageClientMeta>(&client_data.client_meta); | |
164 | if (client_meta == nullptr) { | |
165 | derr << "unknown remote client registration" << dendl; | |
166 | finish(-EINVAL); | |
167 | return; | |
168 | } | |
169 | ||
170 | m_remote_tag_class = client_meta->tag_class; | |
171 | dout(10) << "remote tag class=" << m_remote_tag_class << dendl; | |
172 | ||
173 | get_remote_tags(); | |
174 | } | |
175 | ||
176 | template <typename I> | |
177 | void PrepareReplayRequest<I>::get_remote_tags() { | |
178 | dout(10) << dendl; | |
179 | update_progress("GET_REMOTE_TAGS"); | |
180 | ||
181 | auto ctx = create_context_callback< | |
182 | PrepareReplayRequest<I>, | |
183 | &PrepareReplayRequest<I>::handle_get_remote_tags>(this); | |
184 | m_state_builder->remote_journaler->get_tags(m_remote_tag_class, | |
185 | &m_remote_tags, ctx); | |
186 | } | |
187 | ||
188 | template <typename I> | |
189 | void PrepareReplayRequest<I>::handle_get_remote_tags(int r) { | |
190 | dout(10) << "r=" << r << dendl; | |
191 | ||
192 | if (r < 0) { | |
193 | derr << "failed to retrieve remote tags: " << cpp_strerror(r) << dendl; | |
194 | finish(r); | |
195 | return; | |
196 | } | |
197 | ||
198 | // At this point, the local image was existing, non-primary, and replaying; | |
199 | // and the remote image is primary. Attempt to link the local image's most | |
200 | // recent tag to the remote image's tag chain. | |
201 | bool remote_tag_data_valid = false; | |
202 | librbd::journal::TagData remote_tag_data; | |
203 | boost::optional<uint64_t> remote_orphan_tag_tid = | |
204 | boost::make_optional<uint64_t>(false, 0U); | |
205 | bool reconnect_orphan = false; | |
206 | ||
207 | // decode the remote tags | |
208 | for (auto &remote_tag : m_remote_tags) { | |
209 | if (m_local_tag_data.predecessor.commit_valid && | |
210 | m_local_tag_data.predecessor.mirror_uuid == | |
211 | m_state_builder->remote_mirror_uuid && | |
212 | m_local_tag_data.predecessor.tag_tid > remote_tag.tid) { | |
213 | dout(10) << "skipping processed predecessor remote tag " | |
214 | << remote_tag.tid << dendl; | |
215 | continue; | |
216 | } | |
217 | ||
218 | try { | |
219 | auto it = remote_tag.data.cbegin(); | |
220 | decode(remote_tag_data, it); | |
221 | remote_tag_data_valid = true; | |
222 | } catch (const buffer::error &err) { | |
223 | derr << "failed to decode remote tag " << remote_tag.tid << ": " | |
224 | << err.what() << dendl; | |
225 | finish(-EBADMSG); | |
226 | return; | |
227 | } | |
228 | ||
229 | dout(10) << "decoded remote tag " << remote_tag.tid << ": " | |
230 | << remote_tag_data << dendl; | |
231 | ||
232 | if (!m_local_tag_data.predecessor.commit_valid) { | |
233 | // newly synced local image (no predecessor) replays from the first tag | |
234 | if (remote_tag_data.mirror_uuid != librbd::Journal<>::LOCAL_MIRROR_UUID) { | |
235 | dout(10) << "skipping non-primary remote tag" << dendl; | |
236 | continue; | |
237 | } | |
238 | ||
239 | dout(10) << "using initial primary remote tag" << dendl; | |
240 | break; | |
241 | } | |
242 | ||
243 | if (m_local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) { | |
244 | // demotion last available local epoch | |
245 | ||
246 | if (remote_tag_data.mirror_uuid == m_local_tag_data.mirror_uuid && | |
247 | remote_tag_data.predecessor.commit_valid && | |
248 | remote_tag_data.predecessor.tag_tid == | |
249 | m_local_tag_data.predecessor.tag_tid) { | |
250 | // demotion matches remote epoch | |
251 | ||
252 | if (remote_tag_data.predecessor.mirror_uuid == m_local_mirror_uuid && | |
253 | m_local_tag_data.predecessor.mirror_uuid == | |
254 | librbd::Journal<>::LOCAL_MIRROR_UUID) { | |
255 | // local demoted and remote has matching event | |
256 | dout(10) << "found matching local demotion tag" << dendl; | |
257 | remote_orphan_tag_tid = remote_tag.tid; | |
258 | continue; | |
259 | } | |
260 | ||
261 | if (m_local_tag_data.predecessor.mirror_uuid == | |
262 | m_state_builder->remote_mirror_uuid && | |
263 | remote_tag_data.predecessor.mirror_uuid == | |
264 | librbd::Journal<>::LOCAL_MIRROR_UUID) { | |
265 | // remote demoted and local has matching event | |
266 | dout(10) << "found matching remote demotion tag" << dendl; | |
267 | remote_orphan_tag_tid = remote_tag.tid; | |
268 | continue; | |
269 | } | |
270 | } | |
271 | ||
272 | if (remote_tag_data.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID && | |
273 | remote_tag_data.predecessor.mirror_uuid == | |
274 | librbd::Journal<>::ORPHAN_MIRROR_UUID && | |
275 | remote_tag_data.predecessor.commit_valid && remote_orphan_tag_tid && | |
276 | remote_tag_data.predecessor.tag_tid == *remote_orphan_tag_tid) { | |
277 | // remote promotion tag chained to remote/local demotion tag | |
278 | dout(10) << "found chained remote promotion tag" << dendl; | |
279 | reconnect_orphan = true; | |
280 | break; | |
281 | } | |
282 | ||
283 | // promotion must follow demotion | |
284 | remote_orphan_tag_tid = boost::none; | |
285 | } | |
286 | } | |
287 | ||
288 | if (remote_tag_data_valid && | |
289 | m_local_tag_data.mirror_uuid == m_state_builder->remote_mirror_uuid) { | |
290 | dout(10) << "local image is in clean replay state" << dendl; | |
291 | } else if (reconnect_orphan) { | |
292 | dout(10) << "remote image was demoted/promoted" << dendl; | |
293 | } else { | |
294 | derr << "split-brain detected -- skipping image replay" << dendl; | |
295 | finish(-EEXIST); | |
296 | return; | |
297 | } | |
298 | ||
299 | finish(0); | |
300 | } | |
301 | ||
302 | template <typename I> | |
303 | void PrepareReplayRequest<I>::update_progress(const std::string &description) { | |
304 | dout(10) << description << dendl; | |
305 | ||
306 | if (m_progress_ctx != nullptr) { | |
307 | m_progress_ctx->update_progress(description); | |
308 | } | |
309 | } | |
310 | ||
311 | } // namespace journal | |
312 | } // namespace image_replayer | |
313 | } // namespace mirror | |
314 | } // namespace rbd | |
315 | ||
316 | template class rbd::mirror::image_replayer::journal::PrepareReplayRequest<librbd::ImageCtx>; |