]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc
bump version to 12.2.12-pve1
[ceph.git] / ceph / src / tools / rbd_mirror / image_sync / ObjectCopyRequest.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "ObjectCopyRequest.h"
5#include "librados/snap_set_diff.h"
31f18b77 6#include "librbd/ExclusiveLock.h"
7c673cae
FG
7#include "librbd/ObjectMap.h"
8#include "librbd/Utils.h"
9#include "common/errno.h"
10
11#define dout_context g_ceph_context
12#define dout_subsys ceph_subsys_rbd_mirror
13#undef dout_prefix
14#define dout_prefix *_dout << "rbd::mirror::image_sync::ObjectCopyRequest: " \
15 << this << " " << __func__
16
17namespace librados {
18
19bool operator==(const clone_info_t& rhs, const clone_info_t& lhs) {
20 return (rhs.cloneid == lhs.cloneid &&
21 rhs.snaps == lhs.snaps &&
22 rhs.overlap == lhs.overlap &&
23 rhs.size == lhs.size);
24}
25
26bool operator==(const snap_set_t& rhs, const snap_set_t& lhs) {
27 return (rhs.clones == lhs.clones &&
28 rhs.seq == lhs.seq);
29}
30
31} // namespace librados
32
33namespace rbd {
34namespace mirror {
35namespace image_sync {
36
37using librbd::util::create_context_callback;
38using librbd::util::create_rados_callback;
39
40template <typename I>
41ObjectCopyRequest<I>::ObjectCopyRequest(I *local_image_ctx, I *remote_image_ctx,
42 const SnapMap *snap_map,
43 uint64_t object_number,
44 Context *on_finish)
45 : m_local_image_ctx(local_image_ctx), m_remote_image_ctx(remote_image_ctx),
46 m_snap_map(snap_map), m_object_number(object_number),
47 m_on_finish(on_finish) {
48 assert(!snap_map->empty());
49
50 m_local_io_ctx.dup(m_local_image_ctx->data_ctx);
51 m_local_oid = m_local_image_ctx->get_object_name(object_number);
52
53 m_remote_io_ctx.dup(m_remote_image_ctx->data_ctx);
54 m_remote_oid = m_remote_image_ctx->get_object_name(object_number);
55
56 dout(20) << ": "
57 << "remote_oid=" << m_remote_oid << ", "
58 << "local_oid=" << m_local_oid << dendl;
59}
60
61template <typename I>
62void ObjectCopyRequest<I>::send() {
63 send_list_snaps();
64}
65
66template <typename I>
67void ObjectCopyRequest<I>::send_list_snaps() {
68 dout(20) << dendl;
69
70 librados::AioCompletion *rados_completion = create_rados_callback<
71 ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_list_snaps>(this);
72
73 librados::ObjectReadOperation op;
74 m_snap_set = {};
75 m_snap_ret = 0;
76 op.list_snaps(&m_snap_set, &m_snap_ret);
77
78 m_remote_io_ctx.snap_set_read(CEPH_SNAPDIR);
79 int r = m_remote_io_ctx.aio_operate(m_remote_oid, rados_completion, &op,
80 nullptr);
81 assert(r == 0);
82 rados_completion->release();
83}
84
85template <typename I>
86void ObjectCopyRequest<I>::handle_list_snaps(int r) {
87 if (r == 0 && m_snap_ret < 0) {
88 r = m_snap_ret;
89 }
90
91 dout(20) << ": r=" << r << dendl;
92
93 if (r == -ENOENT) {
94 finish(0);
95 return;
96 }
97
98 if (r < 0) {
99 derr << ": failed to list snaps: " << cpp_strerror(r) << dendl;
100 finish(r);
101 return;
102 }
103
104 if (m_retry_missing_read) {
105 if (m_snap_set == m_retry_snap_set) {
106 derr << ": read encountered missing object using up-to-date snap set"
107 << dendl;
108 finish(-ENOENT);
109 return;
110 }
111
112 dout(20) << ": retrying using updated snap set" << dendl;
113 m_retry_missing_read = false;
114 m_retry_snap_set = {};
115 }
116
117 compute_diffs();
118 send_read_object();
119}
120
121template <typename I>
122void ObjectCopyRequest<I>::send_read_object() {
123 if (m_snap_sync_ops.empty()) {
124 // no more snapshot diffs to read from remote
125 finish(0);
126 return;
127 }
128
129 // build the read request
130 auto &sync_ops = m_snap_sync_ops.begin()->second;
131 assert(!sync_ops.empty());
132
133 bool read_required = false;
134 librados::ObjectReadOperation op;
135 for (auto &sync_op : sync_ops) {
136 switch (sync_op.type) {
137 case SYNC_OP_TYPE_WRITE:
138 if (!read_required) {
139 // map the sync op start snap id back to the necessary read snap id
140 librados::snap_t remote_snap_seq =
141 m_snap_sync_ops.begin()->first.second;
142 m_remote_io_ctx.snap_set_read(remote_snap_seq);
143
144 dout(20) << ": remote_snap_seq=" << remote_snap_seq << dendl;
145 read_required = true;
146 }
147 dout(20) << ": read op: " << sync_op.offset << "~" << sync_op.length
148 << dendl;
149 op.sparse_read(sync_op.offset, sync_op.length, &sync_op.extent_map,
150 &sync_op.out_bl, nullptr);
151 op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
152 LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
153 break;
154 default:
155 break;
156 }
157 }
158
159 if (!read_required) {
160 // nothing written to this object for this snapshot (must be trunc/remove)
161 send_write_object();
162 return;
163 }
164
165 librados::AioCompletion *comp = create_rados_callback<
166 ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_read_object>(this);
167 int r = m_remote_io_ctx.aio_operate(m_remote_oid, comp, &op, nullptr);
168 assert(r == 0);
169 comp->release();
170}
171
172template <typename I>
173void ObjectCopyRequest<I>::handle_read_object(int r) {
174 dout(20) << ": r=" << r << dendl;
175
94b18763
FG
176 auto snap_seq = m_snap_sync_ops.begin()->first.second;
177 if (r == -ENOENT && m_read_whole_object[snap_seq]) {
178 dout(5) << ": object missing when forced to read whole object"
179 << dendl;
180 r = 0;
181 }
182
7c673cae
FG
183 if (r == -ENOENT) {
184 m_retry_snap_set = m_snap_set;
185 m_retry_missing_read = true;
186
187 dout(5) << ": object missing potentially due to removed snapshot" << dendl;
188 send_list_snaps();
189 return;
190 }
191
192 if (r < 0) {
193 derr << ": failed to read from remote object: " << cpp_strerror(r)
194 << dendl;
195 finish(r);
196 return;
197 }
198
199 send_write_object();
200}
201
202template <typename I>
203void ObjectCopyRequest<I>::send_write_object() {
204 // retrieve the local snap context for the op
205 SnapIds local_snap_ids;
206 librados::snap_t local_snap_seq = 0;
207 librados::snap_t remote_snap_seq = m_snap_sync_ops.begin()->first.first;
208 if (remote_snap_seq != 0) {
209 auto snap_map_it = m_snap_map->find(remote_snap_seq);
210 assert(snap_map_it != m_snap_map->end());
211
212 // write snapshot context should be before actual snapshot
213 if (snap_map_it != m_snap_map->begin()) {
214 --snap_map_it;
215 assert(!snap_map_it->second.empty());
216 local_snap_seq = snap_map_it->second.front();
217 local_snap_ids = snap_map_it->second;
218 }
219 }
220
91327a77 221 int r;
31f18b77
FG
222 Context *finish_op_ctx;
223 {
224 RWLock::RLocker owner_locker(m_local_image_ctx->owner_lock);
91327a77 225 finish_op_ctx = start_local_op(m_local_image_ctx->owner_lock, &r);
31f18b77
FG
226 }
227 if (finish_op_ctx == nullptr) {
228 derr << ": lost exclusive lock" << dendl;
91327a77 229 finish(r);
31f18b77
FG
230 return;
231 }
232
7c673cae
FG
233 dout(20) << ": "
234 << "local_snap_seq=" << local_snap_seq << ", "
235 << "local_snaps=" << local_snap_ids << dendl;
236
237 auto &sync_ops = m_snap_sync_ops.begin()->second;
238 assert(!sync_ops.empty());
239 uint64_t object_offset;
240 uint64_t buffer_offset;
241 librados::ObjectWriteOperation op;
242 for (auto &sync_op : sync_ops) {
243 switch (sync_op.type) {
244 case SYNC_OP_TYPE_WRITE:
245 object_offset = sync_op.offset;
246 buffer_offset = 0;
247 for (auto it : sync_op.extent_map) {
248 if (object_offset < it.first) {
249 dout(20) << ": zero op: " << object_offset << "~"
250 << it.first - object_offset << dendl;
251 op.zero(object_offset, it.first - object_offset);
252 }
253 dout(20) << ": write op: " << it.first << "~" << it.second << dendl;
254 bufferlist tmpbl;
255 tmpbl.substr_of(sync_op.out_bl, buffer_offset, it.second);
256 op.write(it.first, tmpbl);
257 op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
258 LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
259 buffer_offset += it.second;
260 object_offset = it.first + it.second;
261 }
262 if (object_offset < sync_op.offset + sync_op.length) {
263 uint64_t sync_op_end = sync_op.offset + sync_op.length;
264 assert(sync_op_end <= m_snap_object_sizes[remote_snap_seq]);
265 if (sync_op_end == m_snap_object_sizes[remote_snap_seq]) {
266 dout(20) << ": trunc op: " << object_offset << dendl;
267 op.truncate(object_offset);
268 m_snap_object_sizes[remote_snap_seq] = object_offset;
269 } else {
270 dout(20) << ": zero op: " << object_offset << "~"
271 << sync_op_end - object_offset << dendl;
272 op.zero(object_offset, sync_op_end - object_offset);
273 }
274 }
275 break;
276 case SYNC_OP_TYPE_TRUNC:
277 if (sync_op.offset > m_snap_object_sizes[remote_snap_seq]) {
278 // skip (must have been updated in WRITE op case issuing trunc op)
279 break;
280 }
281 dout(20) << ": trunc op: " << sync_op.offset << dendl;
282 op.truncate(sync_op.offset);
283 break;
284 case SYNC_OP_TYPE_REMOVE:
285 dout(20) << ": remove op" << dendl;
286 op.remove();
287 break;
288 default:
289 assert(false);
290 }
291 }
292
31f18b77
FG
293 auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
294 handle_write_object(r);
295 finish_op_ctx->complete(0);
296 });
297 librados::AioCompletion *comp = create_rados_callback(ctx);
91327a77
AA
298 r = m_local_io_ctx.aio_operate(m_local_oid, comp, &op, local_snap_seq,
299 local_snap_ids);
7c673cae
FG
300 assert(r == 0);
301 comp->release();
302}
303
304template <typename I>
305void ObjectCopyRequest<I>::handle_write_object(int r) {
306 dout(20) << ": r=" << r << dendl;
307
308 if (r == -ENOENT) {
309 r = 0;
310 }
311 if (r < 0) {
312 derr << ": failed to write to local object: " << cpp_strerror(r)
313 << dendl;
314 finish(r);
315 return;
316 }
317
318 m_snap_sync_ops.erase(m_snap_sync_ops.begin());
319 if (!m_snap_sync_ops.empty()) {
320 send_read_object();
321 return;
322 }
323
324 send_update_object_map();
325}
326
327template <typename I>
328void ObjectCopyRequest<I>::send_update_object_map() {
31f18b77 329 m_local_image_ctx->owner_lock.get_read();
7c673cae
FG
330 m_local_image_ctx->snap_lock.get_read();
331 if (!m_local_image_ctx->test_features(RBD_FEATURE_OBJECT_MAP,
332 m_local_image_ctx->snap_lock) ||
333 m_snap_object_states.empty()) {
334 m_local_image_ctx->snap_lock.put_read();
31f18b77 335 m_local_image_ctx->owner_lock.put_read();
7c673cae
FG
336 finish(0);
337 return;
338 } else if (m_local_image_ctx->object_map == nullptr) {
339 // possible that exclusive lock was lost in background
340 derr << ": object map is not initialized" << dendl;
341
342 m_local_image_ctx->snap_lock.put_read();
31f18b77 343 m_local_image_ctx->owner_lock.put_read();
7c673cae
FG
344 finish(-EINVAL);
345 return;
346 }
347
348 assert(m_local_image_ctx->object_map != nullptr);
349
350 auto snap_object_state = *m_snap_object_states.begin();
351 m_snap_object_states.erase(m_snap_object_states.begin());
352
353 dout(20) << ": "
354 << "local_snap_id=" << snap_object_state.first << ", "
355 << "object_state=" << static_cast<uint32_t>(snap_object_state.second)
356 << dendl;
357
91327a77
AA
358 int r;
359 auto finish_op_ctx = start_local_op(m_local_image_ctx->owner_lock, &r);
31f18b77
FG
360 if (finish_op_ctx == nullptr) {
361 derr << ": lost exclusive lock" << dendl;
362 m_local_image_ctx->snap_lock.put_read();
363 m_local_image_ctx->owner_lock.put_read();
91327a77 364 finish(r);
31f18b77
FG
365 return;
366 }
367
368 auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
369 handle_update_object_map(r);
370 finish_op_ctx->complete(0);
371 });
372
7c673cae
FG
373 RWLock::WLocker object_map_locker(m_local_image_ctx->object_map_lock);
374 bool sent = m_local_image_ctx->object_map->template aio_update<
31f18b77 375 Context, &Context::complete>(
7c673cae 376 snap_object_state.first, m_object_number, snap_object_state.second, {},
91327a77 377 {}, false, ctx);
7c673cae
FG
378 assert(sent);
379 m_local_image_ctx->snap_lock.put_read();
31f18b77 380 m_local_image_ctx->owner_lock.put_read();
7c673cae
FG
381}
382
383template <typename I>
384void ObjectCopyRequest<I>::handle_update_object_map(int r) {
385 dout(20) << ": r=" << r << dendl;
386
387 assert(r == 0);
388 if (!m_snap_object_states.empty()) {
389 send_update_object_map();
390 return;
391 }
392 finish(0);
393}
394
31f18b77 395template <typename I>
91327a77 396Context *ObjectCopyRequest<I>::start_local_op(RWLock &owner_lock, int *r) {
31f18b77
FG
397 assert(m_local_image_ctx->owner_lock.is_locked());
398 if (m_local_image_ctx->exclusive_lock == nullptr) {
91327a77 399 *r = -EROFS;
31f18b77
FG
400 return nullptr;
401 }
91327a77 402 return m_local_image_ctx->exclusive_lock->start_op(r);
31f18b77
FG
403}
404
7c673cae
FG
405template <typename I>
406void ObjectCopyRequest<I>::compute_diffs() {
407 CephContext *cct = m_local_image_ctx->cct;
408
409 m_snap_sync_ops = {};
410 m_snap_object_states = {};
411 m_snap_object_sizes = {};
412
413 librados::snap_t remote_sync_pont_snap_id = m_snap_map->rbegin()->first;
414 uint64_t prev_end_size = 0;
415 bool prev_exists = false;
416 librados::snap_t start_remote_snap_id = 0;
417 for (auto &pair : *m_snap_map) {
418 assert(!pair.second.empty());
419 librados::snap_t end_remote_snap_id = pair.first;
420 librados::snap_t end_local_snap_id = pair.second.front();
421
422 interval_set<uint64_t> diff;
423 uint64_t end_size;
424 bool exists;
425 librados::snap_t clone_end_snap_id;
94b18763 426 bool read_whole_object;
7c673cae
FG
427 calc_snap_set_diff(cct, m_snap_set, start_remote_snap_id,
428 end_remote_snap_id, &diff, &end_size, &exists,
94b18763
FG
429 &clone_end_snap_id, &read_whole_object);
430
431 if (read_whole_object) {
432 dout(1) << ": need to read full object" << dendl;
433 diff.insert(0, m_remote_image_ctx->layout.object_size);
434 exists = true;
435 end_size = m_remote_image_ctx->layout.object_size;
436 clone_end_snap_id = end_remote_snap_id;
437 }
7c673cae
FG
438
439 dout(20) << ": "
440 << "start_remote_snap=" << start_remote_snap_id << ", "
441 << "end_remote_snap_id=" << end_remote_snap_id << ", "
442 << "clone_end_snap_id=" << clone_end_snap_id << ", "
443 << "end_local_snap_id=" << end_local_snap_id << ", "
444 << "diff=" << diff << ", "
445 << "end_size=" << end_size << ", "
446 << "exists=" << exists << dendl;
447 if (exists) {
448 // clip diff to size of object (in case it was truncated)
449 if (end_size < prev_end_size) {
450 interval_set<uint64_t> trunc;
451 trunc.insert(end_size, prev_end_size);
452 trunc.intersection_of(diff);
453 diff.subtract(trunc);
454 dout(20) << ": clearing truncate diff: " << trunc << dendl;
455 }
456
457 // prepare the object map state
458 {
459 RWLock::RLocker snap_locker(m_local_image_ctx->snap_lock);
460 uint8_t object_state = OBJECT_EXISTS;
461 if (m_local_image_ctx->test_features(RBD_FEATURE_FAST_DIFF,
462 m_local_image_ctx->snap_lock) &&
463 prev_exists && diff.empty() && end_size == prev_end_size) {
464 object_state = OBJECT_EXISTS_CLEAN;
465 }
466 m_snap_object_states[end_local_snap_id] = object_state;
467 }
468
469 // reads should be issued against the newest (existing) snapshot within
470 // the associated snapshot object clone. writes should be issued
471 // against the oldest snapshot in the snap_map.
472 assert(clone_end_snap_id >= end_remote_snap_id);
473 if (clone_end_snap_id > remote_sync_pont_snap_id) {
474 // do not read past the sync point snapshot
475 clone_end_snap_id = remote_sync_pont_snap_id;
476 }
94b18763 477 m_read_whole_object[clone_end_snap_id] = read_whole_object;
7c673cae
FG
478
479 // object write/zero, or truncate
480 // NOTE: a single snapshot clone might represent multiple snapshots, but
481 // the write/zero and truncate ops will only be associated with the first
482 // snapshot encountered within the clone since the diff will be empty for
483 // subsequent snapshots and the size will remain constant for a clone.
484 for (auto it = diff.begin(); it != diff.end(); ++it) {
485 dout(20) << ": read/write op: " << it.get_start() << "~"
486 << it.get_len() << dendl;
487 m_snap_sync_ops[{end_remote_snap_id, clone_end_snap_id}].emplace_back(
488 SYNC_OP_TYPE_WRITE, it.get_start(), it.get_len());
489 }
490 if (end_size < prev_end_size) {
491 dout(20) << ": trunc op: " << end_size << dendl;
492 m_snap_sync_ops[{end_remote_snap_id, clone_end_snap_id}].emplace_back(
493 SYNC_OP_TYPE_TRUNC, end_size, 0U);
494 }
495 m_snap_object_sizes[end_remote_snap_id] = end_size;
496 } else {
497 if (prev_exists) {
498 // object remove
499 dout(20) << ": remove op" << dendl;
500 m_snap_sync_ops[{end_remote_snap_id, end_remote_snap_id}].emplace_back(
501 SYNC_OP_TYPE_REMOVE, 0U, 0U);
502 }
503 }
504
505 prev_end_size = end_size;
506 prev_exists = exists;
507 start_remote_snap_id = end_remote_snap_id;
508 }
509}
510
511template <typename I>
512void ObjectCopyRequest<I>::finish(int r) {
513 dout(20) << ": r=" << r << dendl;
514
515 // ensure IoCtxs are closed prior to proceeding
516 auto on_finish = m_on_finish;
517 delete this;
518
519 on_finish->complete(r);
520}
521
522} // namespace image_sync
523} // namespace mirror
524} // namespace rbd
525
526template class rbd::mirror::image_sync::ObjectCopyRequest<librbd::ImageCtx>;