]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/deep_copy/ObjectCopyRequest.cc
4e8d23d53fb6af103e1f258e5c7eeef3e7649496
[ceph.git] / ceph / src / librbd / deep_copy / ObjectCopyRequest.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "ObjectCopyRequest.h"
5 #include "common/errno.h"
6 #include "librados/snap_set_diff.h"
7 #include "librbd/ExclusiveLock.h"
8 #include "librbd/ObjectMap.h"
9 #include "librbd/Utils.h"
10 #include "librbd/deep_copy/Handler.h"
11 #include "librbd/io/AioCompletion.h"
12 #include "librbd/io/AsyncOperation.h"
13 #include "librbd/io/ImageRequest.h"
14 #include "librbd/io/ReadResult.h"
15 #include "osdc/Striper.h"
16
17 #define dout_subsys ceph_subsys_rbd
18 #undef dout_prefix
19 #define dout_prefix *_dout << "librbd::deep_copy::ObjectCopyRequest: " \
20 << this << " " << __func__ << ": "
21
22 namespace librados {
23
24 inline bool operator==(const clone_info_t& rhs, const clone_info_t& lhs) {
25 return (rhs.cloneid == lhs.cloneid &&
26 rhs.snaps == lhs.snaps &&
27 rhs.overlap == lhs.overlap &&
28 rhs.size == lhs.size);
29 }
30
31 inline bool operator==(const snap_set_t& rhs, const snap_set_t& lhs) {
32 return (rhs.clones == lhs.clones &&
33 rhs.seq == lhs.seq);
34 }
35
36 } // namespace librados
37
38 namespace librbd {
39 namespace deep_copy {
40
41 using librbd::util::create_context_callback;
42 using librbd::util::create_rados_callback;
43
44 template <typename I>
45 ObjectCopyRequest<I>::ObjectCopyRequest(I *src_image_ctx,
46 I *dst_image_ctx,
47 librados::snap_t src_snap_id_start,
48 librados::snap_t dst_snap_id_start,
49 const SnapMap &snap_map,
50 uint64_t dst_object_number,
51 bool flatten, Handler* handler,
52 Context *on_finish)
53 : m_src_image_ctx(src_image_ctx),
54 m_dst_image_ctx(dst_image_ctx), m_cct(dst_image_ctx->cct),
55 m_src_snap_id_start(src_snap_id_start),
56 m_dst_snap_id_start(dst_snap_id_start), m_snap_map(snap_map),
57 m_dst_object_number(dst_object_number), m_flatten(flatten),
58 m_handler(handler), m_on_finish(on_finish) {
59 ceph_assert(src_image_ctx->data_ctx.is_valid());
60 ceph_assert(dst_image_ctx->data_ctx.is_valid());
61 ceph_assert(!m_snap_map.empty());
62
63 m_src_async_op = new io::AsyncOperation();
64 m_src_async_op->start_op(*util::get_image_ctx(m_src_image_ctx));
65
66 m_src_io_ctx.dup(m_src_image_ctx->data_ctx);
67 m_dst_io_ctx.dup(m_dst_image_ctx->data_ctx);
68
69 m_dst_oid = m_dst_image_ctx->get_object_name(dst_object_number);
70
71 ldout(m_cct, 20) << "dst_oid=" << m_dst_oid << dendl;
72
73 compute_src_object_extents();
74 }
75
76 template <typename I>
77 void ObjectCopyRequest<I>::send() {
78 send_list_snaps();
79 }
80
81 template <typename I>
82 void ObjectCopyRequest<I>::send_list_snaps() {
83 ceph_assert(!m_src_objects.empty());
84 m_src_ono = *m_src_objects.begin();
85 m_src_oid = m_src_image_ctx->get_object_name(m_src_ono);
86
87 ldout(m_cct, 20) << "src_oid=" << m_src_oid << dendl;
88
89 librados::AioCompletion *rados_completion = create_rados_callback<
90 ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_list_snaps>(this);
91
92 librados::ObjectReadOperation op;
93 m_snap_set = {};
94 m_snap_ret = 0;
95 op.list_snaps(&m_snap_set, &m_snap_ret);
96
97 m_src_io_ctx.snap_set_read(CEPH_SNAPDIR);
98 int r = m_src_io_ctx.aio_operate(m_src_oid, rados_completion, &op,
99 nullptr);
100 ceph_assert(r == 0);
101 rados_completion->release();
102 }
103
104 template <typename I>
105 void ObjectCopyRequest<I>::handle_list_snaps(int r) {
106 if (r == 0 && m_snap_ret < 0) {
107 r = m_snap_ret;
108 }
109
110 ldout(m_cct, 20) << "r=" << r << dendl;
111
112 if (r < 0 && r != -ENOENT) {
113 lderr(m_cct) << "failed to list snaps: " << cpp_strerror(r) << dendl;
114 finish(r);
115 return;
116 }
117
118 if (m_retry_missing_read) {
119 if (m_snap_set == m_retry_snap_set) {
120 lderr(m_cct) << "read encountered missing object using up-to-date snap set"
121 << dendl;
122 finish(-ENOENT);
123 return;
124 }
125
126 ldout(m_cct, 20) << "retrying using updated snap set" << dendl;
127 m_retry_missing_read = false;
128 m_retry_snap_set = {};
129 }
130
131 if (r == -ENOENT) {
132 for (auto &it : m_src_object_extents) {
133 auto &e = it.second;
134 if (e.object_no == m_src_ono) {
135 e.noent = true;
136 }
137 }
138 m_read_ops = {};
139 m_read_snaps = {};
140 m_zero_interval = {};
141 } else {
142 compute_read_ops();
143 }
144 send_read_object();
145 }
146
147 template <typename I>
148 void ObjectCopyRequest<I>::send_read_object() {
149
150 if (m_read_snaps.empty()) {
151 // all snapshots have been read
152 merge_write_ops();
153
154 ceph_assert(!m_src_objects.empty());
155 m_src_objects.erase(m_src_objects.begin());
156
157 if (!m_src_objects.empty()) {
158 send_list_snaps();
159 return;
160 }
161
162 // all objects have been read
163 send_read_from_parent();
164 return;
165 }
166
167 auto index = *m_read_snaps.begin();
168 auto src_snap_seq = index.second;
169
170 bool read_required = false;
171 librados::ObjectReadOperation op;
172
173 for (auto &copy_op : m_read_ops[index]) {
174 if (!read_required) {
175 // map the copy op start snap id back to the necessary read snap id
176 m_src_io_ctx.snap_set_read(src_snap_seq);
177
178 ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << dendl;
179 read_required = true;
180 }
181 ldout(m_cct, 20) << "read op: " << copy_op.src_offset << "~"
182 << copy_op.length << dendl;
183 op.sparse_read(copy_op.src_offset, copy_op.length, &copy_op.src_extent_map,
184 &copy_op.out_bl, nullptr);
185 op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
186 LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
187 }
188
189 if (!read_required) {
190 // nothing written to this object for this snapshot (must be trunc/remove)
191 handle_read_object(0);
192 return;
193 }
194
195 auto ctx = create_context_callback<
196 ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_read_object>(this);
197 auto comp = create_rados_callback(ctx);
198
199 ldout(m_cct, 20) << "read " << m_src_oid << dendl;
200
201 int r = m_src_io_ctx.aio_operate(m_src_oid, comp, &op, nullptr);
202 ceph_assert(r == 0);
203 comp->release();
204 }
205
206 template <typename I>
207 void ObjectCopyRequest<I>::handle_read_object(int r) {
208 ldout(m_cct, 20) << "r=" << r << dendl;
209
210 if (r == -ENOENT) {
211 m_retry_snap_set = m_snap_set;
212 m_retry_missing_read = true;
213
214 ldout(m_cct, 5) << "object missing potentially due to removed snapshot"
215 << dendl;
216 send_list_snaps();
217 return;
218 }
219
220 if (r < 0) {
221 lderr(m_cct) << "failed to read from source object: " << cpp_strerror(r)
222 << dendl;
223 finish(r);
224 return;
225 }
226
227 if (m_handler != nullptr) {
228 uint64_t bytes_read = 0;
229
230 auto index = *m_read_snaps.begin();
231 for (auto &copy_op : m_read_ops[index]) {
232 bytes_read += copy_op.out_bl.length();
233 }
234 m_handler->handle_read(bytes_read);
235 }
236
237 ceph_assert(!m_read_snaps.empty());
238 m_read_snaps.erase(m_read_snaps.begin());
239
240 send_read_object();
241 }
242
243 template <typename I>
244 void ObjectCopyRequest<I>::send_read_from_parent() {
245 m_src_image_ctx->image_lock.lock_shared();
246 io::Extents image_extents;
247 compute_read_from_parent_ops(&image_extents);
248 m_src_image_ctx->image_lock.unlock_shared();
249
250 if (image_extents.empty()) {
251 handle_read_from_parent(0);
252 return;
253 }
254
255 ldout(m_cct, 20) << dendl;
256
257 ceph_assert(m_src_image_ctx->parent != nullptr);
258
259 auto ctx = create_context_callback<
260 ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_read_from_parent>(this);
261 auto comp = io::AioCompletion::create_and_start(
262 ctx, util::get_image_ctx(m_src_image_ctx->parent), io::AIO_TYPE_READ);
263 ldout(m_cct, 20) << "completion " << comp << ", extents " << image_extents
264 << dendl;
265
266 auto src_image_ctx = m_src_image_ctx;
267 io::ImageRequest<I>::aio_read(src_image_ctx->parent, comp,
268 std::move(image_extents),
269 io::ReadResult{&m_read_from_parent_data}, 0,
270 ZTracer::Trace());
271 }
272
273 template <typename I>
274 void ObjectCopyRequest<I>::handle_read_from_parent(int r) {
275 ldout(m_cct, 20) << "r=" << r << dendl;
276
277 if (r < 0) {
278 lderr(m_cct) << "failed to read from parent: " << cpp_strerror(r) << dendl;
279 finish(r);
280 return;
281 }
282
283 if (!m_read_ops.empty()) {
284 ceph_assert(m_read_ops.size() == 1);
285 auto src_snap_seq = m_read_ops.begin()->first.first;
286 auto &copy_ops = m_read_ops.begin()->second;
287 uint64_t offset = 0;
288 for (auto it = copy_ops.begin(); it != copy_ops.end(); ) {
289 it->out_bl.substr_of(m_read_from_parent_data, offset, it->length);
290 offset += it->length;
291 if (it->out_bl.is_zero()) {
292 m_zero_interval[src_snap_seq].insert(it->dst_offset, it->length);
293 it = copy_ops.erase(it);
294 } else {
295 it++;
296 }
297 }
298 merge_write_ops();
299 }
300
301 compute_dst_object_may_exist();
302 compute_zero_ops();
303
304 if (m_write_ops.empty()) {
305 // nothing to copy
306 finish(-ENOENT);
307 return;
308 }
309
310 send_write_object();
311 return;
312 }
313
314 template <typename I>
315 void ObjectCopyRequest<I>::send_write_object() {
316 ceph_assert(!m_write_ops.empty());
317 auto& copy_ops = m_write_ops.begin()->second;
318
319 // retrieve the destination snap context for the op
320 SnapIds dst_snap_ids;
321 librados::snap_t dst_snap_seq = 0;
322 librados::snap_t src_snap_seq = m_write_ops.begin()->first;
323 if (src_snap_seq != 0) {
324 auto snap_map_it = m_snap_map.find(src_snap_seq);
325 ceph_assert(snap_map_it != m_snap_map.end());
326
327 auto dst_snap_id = snap_map_it->second.front();
328 auto dst_may_exist_it = m_dst_object_may_exist.find(dst_snap_id);
329 ceph_assert(dst_may_exist_it != m_dst_object_may_exist.end());
330 if (!dst_may_exist_it->second && !copy_ops.empty()) {
331 // if the object cannot exist, the only valid op is to remove it
332 ceph_assert(copy_ops.size() == 1U);
333 ceph_assert(copy_ops.begin()->type == COPY_OP_TYPE_REMOVE);
334 }
335
336 // write snapshot context should be before actual snapshot
337 ceph_assert(!snap_map_it->second.empty());
338 auto dst_snap_ids_it = snap_map_it->second.begin();
339 ++dst_snap_ids_it;
340
341 dst_snap_ids = SnapIds{dst_snap_ids_it, snap_map_it->second.end()};
342 if (!dst_snap_ids.empty()) {
343 dst_snap_seq = dst_snap_ids.front();
344 }
345 ceph_assert(dst_snap_seq != CEPH_NOSNAP);
346 }
347
348 ldout(m_cct, 20) << "dst_snap_seq=" << dst_snap_seq << ", "
349 << "dst_snaps=" << dst_snap_ids << dendl;
350
351 librados::ObjectWriteOperation op;
352 uint64_t buffer_offset;
353
354 if (!m_dst_image_ctx->migration_info.empty()) {
355 cls_client::assert_snapc_seq(&op, dst_snap_seq,
356 cls::rbd::ASSERT_SNAPC_SEQ_GT_SNAPSET_SEQ);
357 }
358
359 for (auto &copy_op : copy_ops) {
360 switch (copy_op.type) {
361 case COPY_OP_TYPE_WRITE:
362 buffer_offset = 0;
363 for (auto &e : copy_op.dst_extent_map) {
364 ldout(m_cct, 20) << "write op: " << e.first << "~" << e.second
365 << dendl;
366 bufferlist tmpbl;
367 tmpbl.substr_of(copy_op.out_bl, buffer_offset, e.second);
368 op.write(e.first, tmpbl);
369 op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
370 LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
371 buffer_offset += e.second;
372 }
373 break;
374 case COPY_OP_TYPE_ZERO:
375 ldout(m_cct, 20) << "zero op: " << copy_op.dst_offset << "~"
376 << copy_op.length << dendl;
377 op.zero(copy_op.dst_offset, copy_op.length);
378 break;
379 case COPY_OP_TYPE_REMOVE_TRUNC:
380 ldout(m_cct, 20) << "create op" << dendl;
381 op.create(false);
382 // fall through
383 case COPY_OP_TYPE_TRUNC:
384 ldout(m_cct, 20) << "trunc op: " << copy_op.dst_offset << dendl;
385 op.truncate(copy_op.dst_offset);
386 break;
387 case COPY_OP_TYPE_REMOVE:
388 ldout(m_cct, 20) << "remove op" << dendl;
389 op.remove();
390 break;
391 default:
392 ceph_abort();
393 }
394 }
395
396 if (op.size() == (m_dst_image_ctx->migration_info.empty() ? 0 : 1)) {
397 handle_write_object(0);
398 return;
399 }
400
401 int r;
402 Context *finish_op_ctx;
403 {
404 std::shared_lock owner_locker{m_dst_image_ctx->owner_lock};
405 finish_op_ctx = start_lock_op(m_dst_image_ctx->owner_lock, &r);
406 }
407 if (finish_op_ctx == nullptr) {
408 lderr(m_cct) << "lost exclusive lock" << dendl;
409 finish(r);
410 return;
411 }
412
413 auto ctx = new LambdaContext([this, finish_op_ctx](int r) {
414 handle_write_object(r);
415 finish_op_ctx->complete(0);
416 });
417 librados::AioCompletion *comp = create_rados_callback(ctx);
418 r = m_dst_io_ctx.aio_operate(m_dst_oid, comp, &op, dst_snap_seq, dst_snap_ids,
419 nullptr);
420 ceph_assert(r == 0);
421 comp->release();
422 }
423
424 template <typename I>
425 void ObjectCopyRequest<I>::handle_write_object(int r) {
426 ldout(m_cct, 20) << "r=" << r << dendl;
427
428 if (r == -ENOENT) {
429 r = 0;
430 } else if (r == -ERANGE) {
431 ldout(m_cct, 10) << "concurrent deep copy" << dendl;
432 r = 0;
433 }
434 if (r < 0) {
435 lderr(m_cct) << "failed to write to destination object: " << cpp_strerror(r)
436 << dendl;
437 finish(r);
438 return;
439 }
440
441 m_write_ops.erase(m_write_ops.begin());
442 if (!m_write_ops.empty()) {
443 send_write_object();
444 return;
445 }
446
447 send_update_object_map();
448 }
449
450 template <typename I>
451 void ObjectCopyRequest<I>::send_update_object_map() {
452 if (!m_dst_image_ctx->test_features(RBD_FEATURE_OBJECT_MAP) ||
453 m_dst_object_state.empty()) {
454 finish(0);
455 return;
456 }
457
458 m_dst_image_ctx->owner_lock.lock_shared();
459 m_dst_image_ctx->image_lock.lock_shared();
460 if (m_dst_image_ctx->object_map == nullptr) {
461 // possible that exclusive lock was lost in background
462 lderr(m_cct) << "object map is not initialized" << dendl;
463
464 m_dst_image_ctx->image_lock.unlock_shared();
465 m_dst_image_ctx->owner_lock.unlock_shared();
466 finish(-EINVAL);
467 return;
468 }
469
470 auto &dst_object_state = *m_dst_object_state.begin();
471 auto it = m_snap_map.find(dst_object_state.first);
472 ceph_assert(it != m_snap_map.end());
473 auto dst_snap_id = it->second.front();
474 auto object_state = dst_object_state.second;
475 m_dst_object_state.erase(m_dst_object_state.begin());
476
477 ldout(m_cct, 20) << "dst_snap_id=" << dst_snap_id << ", object_state="
478 << static_cast<uint32_t>(object_state) << dendl;
479
480 int r;
481 auto finish_op_ctx = start_lock_op(m_dst_image_ctx->owner_lock, &r);
482 if (finish_op_ctx == nullptr) {
483 lderr(m_cct) << "lost exclusive lock" << dendl;
484 m_dst_image_ctx->image_lock.unlock_shared();
485 m_dst_image_ctx->owner_lock.unlock_shared();
486 finish(r);
487 return;
488 }
489
490 auto ctx = new LambdaContext([this, finish_op_ctx](int r) {
491 handle_update_object_map(r);
492 finish_op_ctx->complete(0);
493 });
494
495 auto dst_image_ctx = m_dst_image_ctx;
496 bool sent = dst_image_ctx->object_map->template aio_update<
497 Context, &Context::complete>(dst_snap_id, m_dst_object_number, object_state,
498 {}, {}, false, ctx);
499
500 // NOTE: state machine might complete before we reach here
501 dst_image_ctx->image_lock.unlock_shared();
502 dst_image_ctx->owner_lock.unlock_shared();
503 if (!sent) {
504 ceph_assert(dst_snap_id == CEPH_NOSNAP);
505 ctx->complete(0);
506 }
507 }
508
509 template <typename I>
510 void ObjectCopyRequest<I>::handle_update_object_map(int r) {
511 ldout(m_cct, 20) << "r=" << r << dendl;
512
513 if (r < 0) {
514 lderr(m_cct) << "failed to update object map: " << cpp_strerror(r) << dendl;
515 finish(r);
516 return;
517 }
518
519 if (!m_dst_object_state.empty()) {
520 send_update_object_map();
521 return;
522 }
523 finish(0);
524 }
525
526 template <typename I>
527 Context *ObjectCopyRequest<I>::start_lock_op(ceph::shared_mutex &owner_lock,
528 int* r) {
529 ceph_assert(ceph_mutex_is_locked(m_dst_image_ctx->owner_lock));
530 if (m_dst_image_ctx->exclusive_lock == nullptr) {
531 return new LambdaContext([](int r) {});
532 }
533 return m_dst_image_ctx->exclusive_lock->start_op(r);
534 }
535
536 template <typename I>
537 uint64_t ObjectCopyRequest<I>::src_to_dst_object_offset(uint64_t objectno,
538 uint64_t offset) {
539 std::vector<std::pair<uint64_t, uint64_t>> image_extents;
540 Striper::extent_to_file(m_cct, &m_src_image_ctx->layout, objectno, offset, 1,
541 image_extents);
542 ceph_assert(image_extents.size() == 1);
543 auto dst_object_offset = image_extents.begin()->first;
544
545 std::map<object_t, std::vector<ObjectExtent>> dst_object_extents;
546 Striper::file_to_extents(m_cct, m_dst_image_ctx->format_string,
547 &m_dst_image_ctx->layout, dst_object_offset, 1, 0,
548 dst_object_extents);
549 ceph_assert(dst_object_extents.size() == 1);
550 ceph_assert(dst_object_extents.begin()->second.size() == 1);
551 auto &e = *dst_object_extents.begin()->second.begin();
552 ceph_assert(e.objectno == m_dst_object_number);
553
554 return e.offset;
555 }
556
557 template <typename I>
558 void ObjectCopyRequest<I>::compute_src_object_extents() {
559 std::vector<std::pair<uint64_t, uint64_t>> image_extents;
560 Striper::extent_to_file(m_cct, &m_dst_image_ctx->layout, m_dst_object_number,
561 0, m_dst_image_ctx->layout.object_size, image_extents);
562
563 size_t total = 0;
564 for (auto &e : image_extents) {
565 std::map<object_t, std::vector<ObjectExtent>> src_object_extents;
566 Striper::file_to_extents(m_cct, m_src_image_ctx->format_string,
567 &m_src_image_ctx->layout, e.first, e.second, 0,
568 src_object_extents);
569 auto stripe_unit = std::min(m_src_image_ctx->layout.stripe_unit,
570 m_dst_image_ctx->layout.stripe_unit);
571 for (auto &p : src_object_extents) {
572 for (auto &s : p.second) {
573 m_src_objects.insert(s.objectno);
574 total += s.length;
575 while (s.length > 0) {
576 ceph_assert(s.length >= stripe_unit);
577 auto dst_object_offset = src_to_dst_object_offset(s.objectno, s.offset);
578 m_src_object_extents[dst_object_offset] = {s.objectno, s.offset,
579 stripe_unit};
580 s.offset += stripe_unit;
581 s.length -= stripe_unit;
582 }
583 }
584 }
585 }
586
587 ceph_assert(total == m_dst_image_ctx->layout.object_size);
588
589 ldout(m_cct, 20) << m_src_object_extents.size() << " src extents" << dendl;
590 }
591
592 template <typename I>
593 void ObjectCopyRequest<I>::compute_read_ops() {
594 m_read_ops = {};
595 m_read_snaps = {};
596 m_zero_interval = {};
597
598 m_src_image_ctx->image_lock.lock_shared();
599 bool hide_parent = (m_src_image_ctx->parent != nullptr);
600 m_src_image_ctx->image_lock.unlock_shared();
601
602 librados::snap_t src_copy_point_snap_id = m_snap_map.rbegin()->first;
603 bool prev_exists = (hide_parent || m_src_snap_id_start > 0);
604 uint64_t prev_end_size = prev_exists ?
605 m_src_image_ctx->layout.object_size : 0;
606 librados::snap_t start_src_snap_id = m_src_snap_id_start;
607
608 for (auto &pair : m_snap_map) {
609 ceph_assert(!pair.second.empty());
610 librados::snap_t end_src_snap_id = pair.first;
611 librados::snap_t end_dst_snap_id = pair.second.front();
612
613 interval_set<uint64_t> diff;
614 uint64_t end_size;
615 bool exists;
616 librados::snap_t clone_end_snap_id;
617 calc_snap_set_diff(m_cct, m_snap_set, start_src_snap_id,
618 end_src_snap_id, &diff, &end_size, &exists,
619 &clone_end_snap_id, &m_read_whole_object);
620
621 if (m_read_whole_object) {
622 ldout(m_cct, 1) << "need to read full object" << dendl;
623 diff.insert(0, m_src_image_ctx->layout.object_size);
624 exists = true;
625 end_size = m_src_image_ctx->layout.object_size;
626 clone_end_snap_id = end_src_snap_id;
627 } else if (!exists) {
628 end_size = 0;
629 if (hide_parent && end_src_snap_id == m_snap_map.begin()->first &&
630 m_snap_set.clones.empty()) {
631 ldout(m_cct, 20) << "no clones for existing object" << dendl;
632 exists = true;
633 diff.insert(0, m_src_image_ctx->layout.object_size);
634 clone_end_snap_id = end_src_snap_id;
635 }
636 }
637
638 ldout(m_cct, 20) << "start_src_snap_id=" << start_src_snap_id << ", "
639 << "end_src_snap_id=" << end_src_snap_id << ", "
640 << "clone_end_snap_id=" << clone_end_snap_id << ", "
641 << "end_dst_snap_id=" << end_dst_snap_id << ", "
642 << "diff=" << diff << ", "
643 << "end_size=" << end_size << ", "
644 << "exists=" << exists << dendl;
645
646 m_zero_interval[end_src_snap_id] = {};
647
648 if (exists || prev_exists) {
649 // clip diff to size of object (in case it was truncated)
650 if (end_size < prev_end_size) {
651 interval_set<uint64_t> trunc;
652 trunc.insert(end_size, prev_end_size - end_size);
653 trunc.intersection_of(diff);
654 diff.subtract(trunc);
655 ldout(m_cct, 20) << "clearing truncate diff: " << trunc << dendl;
656 }
657
658 if (exists) {
659 // reads should be issued against the newest (existing) snapshot within
660 // the associated snapshot object clone. writes should be issued
661 // against the oldest snapshot in the snap_map.
662 ceph_assert(clone_end_snap_id >= end_src_snap_id);
663 if (clone_end_snap_id > src_copy_point_snap_id) {
664 // do not read past the copy point snapshot
665 clone_end_snap_id = src_copy_point_snap_id;
666 }
667 }
668
669 for (auto &it : m_src_object_extents) {
670 auto dst_object_offset = it.first;
671 auto &e = it.second;
672
673 if (e.object_no != m_src_ono) {
674 continue;
675 }
676
677 interval_set<uint64_t> read_interval;
678 read_interval.insert(e.offset, e.length);
679
680 if (end_size < prev_end_size) {
681 interval_set<uint64_t> zero_interval;
682 zero_interval.insert(end_size, prev_end_size - end_size);
683 zero_interval.intersection_of(read_interval);
684 if (!zero_interval.empty()) {
685 auto it = zero_interval.begin();
686 auto offset = it.get_start() - e.offset;
687 m_zero_interval[end_src_snap_id].insert(dst_object_offset + offset,
688 it.get_len());
689 ldout(m_cct, 20) << "extent " << e.offset << "~" << e.length
690 << " intersects truncation " << end_size << "~"
691 << prev_end_size - end_size << ", inserting zero "
692 << dst_object_offset + offset << "~"
693 << it.get_len() << dendl;
694 }
695 }
696
697 // limit read interval to diff
698 read_interval.intersection_of(diff);
699
700 ldout(m_cct, 20) << "src_object_extent: " << e.offset << "~" << e.length
701 << ", dst_object_offset=" << dst_object_offset
702 << ", read: " << read_interval << dendl;
703
704 ceph_assert(exists || read_interval.empty());
705
706 for (auto it = read_interval.begin(); it != read_interval.end();
707 it++) {
708 ceph_assert(it.get_start() >= e.offset);
709 auto offset = it.get_start() - e.offset;
710 ldout(m_cct, 20) << "read/write op: " << it.get_start() << "~"
711 << it.get_len() << " dst: "
712 << dst_object_offset + offset << dendl;
713 m_read_ops[{end_src_snap_id, clone_end_snap_id}]
714 .emplace_back(COPY_OP_TYPE_WRITE, it.get_start(),
715 dst_object_offset + offset, it.get_len());
716 }
717 }
718 }
719
720 prev_end_size = end_size;
721 prev_exists = exists;
722 if (hide_parent && prev_exists && prev_end_size == 0) {
723 // hide parent
724 prev_end_size = m_src_image_ctx->layout.object_size;
725 }
726 start_src_snap_id = end_src_snap_id;
727 }
728
729 for (auto &it : m_read_ops) {
730 m_read_snaps.push_back(it.first);
731 }
732 }
733
734 template <typename I>
735 void ObjectCopyRequest<I>::compute_read_from_parent_ops(
736 io::Extents *parent_image_extents) {
737 assert(ceph_mutex_is_locked(m_src_image_ctx->image_lock));
738
739 m_read_ops = {};
740 m_zero_interval = {};
741 parent_image_extents->clear();
742
743 if (m_src_image_ctx->parent == nullptr) {
744 ldout(m_cct, 20) << "no parent" << dendl;
745 return;
746 }
747
748 size_t noent_count = 0;
749 for (auto &it : m_src_object_extents) {
750 if (it.second.noent) {
751 noent_count++;
752 }
753 }
754
755 if (noent_count == 0) {
756 ldout(m_cct, 20) << "no extents need read from parent" << dendl;
757 return;
758 }
759
760 if (noent_count == m_src_object_extents.size() && !m_flatten) {
761 ldout(m_cct, 20) << "reading all extents skipped when no flatten"
762 << dendl;
763 return;
764 }
765
766 ldout(m_cct, 20) << dendl;
767
768 auto src_snap_seq = m_snap_map.begin()->first;
769
770 uint64_t parent_overlap;
771 int r = m_src_image_ctx->get_parent_overlap(src_snap_seq, &parent_overlap);
772 if (r < 0) {
773 ldout(m_cct, 5) << "failed getting parent overlap for snap_id: "
774 << src_snap_seq << ": " << cpp_strerror(r) << dendl;
775 return;
776 }
777 if (parent_overlap == 0) {
778 ldout(m_cct, 20) << "no parent overlap" << dendl;
779 return;
780 }
781
782 for (auto &it : m_src_object_extents) {
783 auto dst_object_offset = it.first;
784 auto &e = it.second;
785
786 if (!e.noent) {
787 continue;
788 }
789
790 std::vector<std::pair<uint64_t, uint64_t>> image_extents;
791 Striper::extent_to_file(m_cct, &m_src_image_ctx->layout, e.object_no,
792 e.offset, e.length, image_extents);
793
794 uint64_t overlap = m_src_image_ctx->prune_parent_extents(image_extents,
795 parent_overlap);
796 if (overlap == 0) {
797 ldout(m_cct, 20) << "no parent overlap for object_no " << e.object_no
798 << " extent " << e.offset << "~" << e.length << dendl;
799 continue;
800 }
801
802 ldout(m_cct, 20) << "object_no " << e.object_no << " extent " << e.offset
803 << "~" << e.length << " overlap " << parent_overlap
804 << " parent extents " << image_extents << dendl;
805
806 ceph_assert(image_extents.size() == 1);
807
808 auto src_image_offset = image_extents.begin()->first;
809 auto length = image_extents.begin()->second;
810 m_read_ops[{src_snap_seq, 0}].emplace_back(COPY_OP_TYPE_WRITE, e.offset,
811 dst_object_offset, length);
812 m_read_ops[{src_snap_seq, 0}].rbegin()->src_extent_map[e.offset] = length;
813 parent_image_extents->emplace_back(src_image_offset, length);
814 }
815
816 if (!parent_image_extents->empty()) {
817 m_dst_object_state[src_snap_seq] = OBJECT_EXISTS;
818 }
819 }
820
821 template <typename I>
822 void ObjectCopyRequest<I>::merge_write_ops() {
823 ldout(m_cct, 20) << dendl;
824
825 for (auto &it : m_zero_interval) {
826 m_dst_zero_interval[it.first].insert(it.second);
827 }
828
829 for (auto &it : m_read_ops) {
830 auto src_snap_seq = it.first.first;
831 auto &copy_ops = it.second;
832 for (auto &copy_op : copy_ops) {
833 uint64_t src_offset = copy_op.src_offset;
834 uint64_t dst_offset = copy_op.dst_offset;
835 for (auto &e : copy_op.src_extent_map) {
836 uint64_t zero_len = e.first - src_offset;
837 if (zero_len > 0) {
838 ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq
839 << ", inserting zero " << dst_offset << "~"
840 << zero_len << dendl;
841 m_dst_zero_interval[src_snap_seq].insert(dst_offset, zero_len);
842 src_offset += zero_len;
843 dst_offset += zero_len;
844 }
845 copy_op.dst_extent_map[dst_offset] = e.second;
846 src_offset += e.second;
847 dst_offset += e.second;
848 }
849 if (dst_offset < copy_op.dst_offset + copy_op.length) {
850 uint64_t zero_len = copy_op.dst_offset + copy_op.length - dst_offset;
851 ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq
852 << ", inserting zero " << dst_offset << "~"
853 << zero_len << dendl;
854 m_dst_zero_interval[src_snap_seq].insert(dst_offset, zero_len);
855 } else {
856 ceph_assert(dst_offset == copy_op.dst_offset + copy_op.length);
857 }
858 m_write_ops[src_snap_seq].emplace_back(std::move(copy_op));
859 }
860 }
861 }
862
863 template <typename I>
864 void ObjectCopyRequest<I>::compute_zero_ops() {
865 ldout(m_cct, 20) << dendl;
866
867 bool fast_diff = m_dst_image_ctx->test_features(RBD_FEATURE_FAST_DIFF);
868 uint64_t prev_end_size = 0;
869
870 m_src_image_ctx->image_lock.lock_shared();
871 bool hide_parent = (m_src_image_ctx->parent != nullptr);
872 m_src_image_ctx->image_lock.unlock_shared();
873
874 for (auto &it : m_dst_zero_interval) {
875 auto src_snap_seq = it.first;
876 auto &zero_interval = it.second;
877
878 auto snap_map_it = m_snap_map.find(src_snap_seq);
879 ceph_assert(snap_map_it != m_snap_map.end());
880 auto dst_snap_seq = snap_map_it->second.front();
881
882 auto dst_may_exist_it = m_dst_object_may_exist.find(dst_snap_seq);
883 ceph_assert(dst_may_exist_it != m_dst_object_may_exist.end());
884 if (!dst_may_exist_it->second && prev_end_size > 0) {
885 ldout(m_cct, 5) << "object DNE for snap_id: " << dst_snap_seq << dendl;
886 m_write_ops[src_snap_seq].emplace_back(COPY_OP_TYPE_REMOVE, 0, 0, 0);
887 prev_end_size = 0;
888 continue;
889 }
890
891 if (hide_parent) {
892 std::shared_lock image_locker{m_dst_image_ctx->image_lock};
893 uint64_t parent_overlap = 0;
894 int r = m_dst_image_ctx->get_parent_overlap(dst_snap_seq,
895 &parent_overlap);
896 if (r < 0) {
897 ldout(m_cct, 5) << "failed getting parent overlap for snap_id: "
898 << dst_snap_seq << ": " << cpp_strerror(r) << dendl;
899 }
900 if (parent_overlap == 0) {
901 ldout(m_cct, 20) << "no parent overlap" << dendl;
902 hide_parent = false;
903 } else {
904 std::vector<std::pair<uint64_t, uint64_t>> image_extents;
905 Striper::extent_to_file(m_cct, &m_dst_image_ctx->layout,
906 m_dst_object_number, 0,
907 m_dst_image_ctx->layout.object_size,
908 image_extents);
909 uint64_t overlap = m_dst_image_ctx->prune_parent_extents(image_extents,
910 parent_overlap);
911 if (overlap == 0) {
912 ldout(m_cct, 20) << "no parent overlap" << dendl;
913 hide_parent = false;
914 } else if (src_snap_seq == m_dst_zero_interval.begin()->first) {
915 for (auto e : image_extents) {
916 prev_end_size += e.second;
917 }
918 ceph_assert(prev_end_size <= m_dst_image_ctx->layout.object_size);
919 }
920 }
921 }
922
923 uint64_t end_size = prev_end_size;
924
925 // update end_size if there are writes into higher offsets
926 auto iter = m_write_ops.find(src_snap_seq);
927 if (iter != m_write_ops.end()) {
928 for (auto &copy_op : iter->second) {
929 for (auto &e : copy_op.dst_extent_map) {
930 end_size = std::max(end_size, e.first + e.second);
931 }
932 }
933 }
934
935 for (auto z = zero_interval.begin(); z != zero_interval.end(); z++) {
936 if (z.get_start() + z.get_len() >= end_size) {
937 // zero interval at the object end
938 if (z.get_start() == 0 && hide_parent) {
939 m_write_ops[src_snap_seq]
940 .emplace_back(COPY_OP_TYPE_REMOVE_TRUNC, 0, 0, 0);
941 ldout(m_cct, 20) << "COPY_OP_TYPE_REMOVE_TRUNC" << dendl;
942 } else if (z.get_start() < prev_end_size) {
943 if (z.get_start() == 0) {
944 m_write_ops[src_snap_seq]
945 .emplace_back(COPY_OP_TYPE_REMOVE, 0, 0, 0);
946 ldout(m_cct, 20) << "COPY_OP_TYPE_REMOVE" << dendl;
947 } else {
948 m_write_ops[src_snap_seq]
949 .emplace_back(COPY_OP_TYPE_TRUNC, 0, z.get_start(), 0);
950 ldout(m_cct, 20) << "COPY_OP_TYPE_TRUNC " << z.get_start() << dendl;
951 }
952 }
953 end_size = std::min(end_size, z.get_start());
954 } else {
955 // zero interval inside the object
956 m_write_ops[src_snap_seq]
957 .emplace_back(COPY_OP_TYPE_ZERO, 0, z.get_start(), z.get_len());
958 ldout(m_cct, 20) << "COPY_OP_TYPE_ZERO " << z.get_start() << "~"
959 << z.get_len() << dendl;
960 }
961 }
962 ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << ", end_size="
963 << end_size << dendl;
964 if (end_size > 0 || hide_parent) {
965 m_dst_object_state[src_snap_seq] = OBJECT_EXISTS;
966 if (fast_diff && end_size == prev_end_size &&
967 m_write_ops[src_snap_seq].empty()) {
968 m_dst_object_state[src_snap_seq] = OBJECT_EXISTS_CLEAN;
969 }
970 }
971 prev_end_size = end_size;
972 }
973 }
974
975 template <typename I>
976 void ObjectCopyRequest<I>::finish(int r) {
977 ldout(m_cct, 20) << "r=" << r << dendl;
978
979 // ensure IoCtxs are closed prior to proceeding
980 auto on_finish = m_on_finish;
981
982 m_src_async_op->finish_op();
983 delete m_src_async_op;
984 delete this;
985
986 on_finish->complete(r);
987 }
988
989 template <typename I>
990 void ObjectCopyRequest<I>::compute_dst_object_may_exist() {
991 std::shared_lock image_locker{m_dst_image_ctx->image_lock};
992
993 auto snap_ids = m_dst_image_ctx->snaps;
994 snap_ids.push_back(CEPH_NOSNAP);
995
996 for (auto snap_id : snap_ids) {
997 m_dst_object_may_exist[snap_id] =
998 (m_dst_object_number < m_dst_image_ctx->get_object_count(snap_id));
999 }
1000 }
1001
1002 } // namespace deep_copy
1003 } // namespace librbd
1004
1005 template class librbd::deep_copy::ObjectCopyRequest<librbd::ImageCtx>;