]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/deep_copy/ObjectCopyRequest.cc
import ceph nautilus 14.2.2
[ceph.git] / ceph / src / librbd / deep_copy / ObjectCopyRequest.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "ObjectCopyRequest.h"
5 #include "common/errno.h"
6 #include "librados/snap_set_diff.h"
7 #include "librbd/ExclusiveLock.h"
8 #include "librbd/ObjectMap.h"
9 #include "librbd/Utils.h"
10 #include "librbd/io/AioCompletion.h"
11 #include "librbd/io/AsyncOperation.h"
12 #include "librbd/io/ImageRequest.h"
13 #include "librbd/io/ReadResult.h"
14 #include "osdc/Striper.h"
15
16 #define dout_subsys ceph_subsys_rbd
17 #undef dout_prefix
18 #define dout_prefix *_dout << "librbd::deep_copy::ObjectCopyRequest: " \
19 << this << " " << __func__ << ": "
20
21 namespace librados {
22
23 inline bool operator==(const clone_info_t& rhs, const clone_info_t& lhs) {
24 return (rhs.cloneid == lhs.cloneid &&
25 rhs.snaps == lhs.snaps &&
26 rhs.overlap == lhs.overlap &&
27 rhs.size == lhs.size);
28 }
29
30 inline bool operator==(const snap_set_t& rhs, const snap_set_t& lhs) {
31 return (rhs.clones == lhs.clones &&
32 rhs.seq == lhs.seq);
33 }
34
35 } // namespace librados
36
37 namespace librbd {
38 namespace deep_copy {
39
40 using librbd::util::create_context_callback;
41 using librbd::util::create_rados_callback;
42
43 template <typename I>
44 ObjectCopyRequest<I>::ObjectCopyRequest(I *src_image_ctx,
45 I *dst_image_ctx,
46 const SnapMap &snap_map,
47 uint64_t dst_object_number,
48 bool flatten, Context *on_finish)
49 : m_src_image_ctx(src_image_ctx),
50 m_dst_image_ctx(dst_image_ctx), m_cct(dst_image_ctx->cct),
51 m_snap_map(snap_map), m_dst_object_number(dst_object_number),
52 m_flatten(flatten), m_on_finish(on_finish) {
53 ceph_assert(!m_snap_map.empty());
54
55 m_src_async_op = new io::AsyncOperation();
56 m_src_async_op->start_op(*util::get_image_ctx(m_src_image_ctx));
57
58 m_src_io_ctx.dup(m_src_image_ctx->data_ctx);
59 m_dst_io_ctx.dup(m_dst_image_ctx->data_ctx);
60
61 m_dst_oid = m_dst_image_ctx->get_object_name(dst_object_number);
62
63 ldout(m_cct, 20) << "dst_oid=" << m_dst_oid << dendl;
64
65 compute_src_object_extents();
66 }
67
68 template <typename I>
69 void ObjectCopyRequest<I>::send() {
70 send_list_snaps();
71 }
72
73 template <typename I>
74 void ObjectCopyRequest<I>::send_list_snaps() {
75 ceph_assert(!m_src_objects.empty());
76 m_src_ono = *m_src_objects.begin();
77 m_src_oid = m_src_image_ctx->get_object_name(m_src_ono);
78
79 ldout(m_cct, 20) << "src_oid=" << m_src_oid << dendl;
80
81 librados::AioCompletion *rados_completion = create_rados_callback<
82 ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_list_snaps>(this);
83
84 librados::ObjectReadOperation op;
85 m_snap_set = {};
86 m_snap_ret = 0;
87 op.list_snaps(&m_snap_set, &m_snap_ret);
88
89 m_src_io_ctx.snap_set_read(CEPH_SNAPDIR);
90 int r = m_src_io_ctx.aio_operate(m_src_oid, rados_completion, &op,
91 nullptr);
92 ceph_assert(r == 0);
93 rados_completion->release();
94 }
95
96 template <typename I>
97 void ObjectCopyRequest<I>::handle_list_snaps(int r) {
98 if (r == 0 && m_snap_ret < 0) {
99 r = m_snap_ret;
100 }
101
102 ldout(m_cct, 20) << "r=" << r << dendl;
103
104 if (r < 0 && r != -ENOENT) {
105 lderr(m_cct) << "failed to list snaps: " << cpp_strerror(r) << dendl;
106 finish(r);
107 return;
108 }
109
110 if (m_retry_missing_read) {
111 if (m_snap_set == m_retry_snap_set) {
112 lderr(m_cct) << "read encountered missing object using up-to-date snap set"
113 << dendl;
114 finish(-ENOENT);
115 return;
116 }
117
118 ldout(m_cct, 20) << "retrying using updated snap set" << dendl;
119 m_retry_missing_read = false;
120 m_retry_snap_set = {};
121 }
122
123 if (r == -ENOENT) {
124 for (auto &it : m_src_object_extents) {
125 auto &e = it.second;
126 if (e.object_no == m_src_ono) {
127 e.noent = true;
128 }
129 }
130 m_read_ops = {};
131 m_read_snaps = {};
132 m_zero_interval = {};
133 } else {
134 compute_read_ops();
135 }
136 send_read_object();
137 }
138
139 template <typename I>
140 void ObjectCopyRequest<I>::send_read_object() {
141
142 if (m_read_snaps.empty()) {
143 // all snapshots have been read
144 merge_write_ops();
145
146 ceph_assert(!m_src_objects.empty());
147 m_src_objects.erase(m_src_objects.begin());
148
149 if (!m_src_objects.empty()) {
150 send_list_snaps();
151 return;
152 }
153
154 // all objects have been read
155 send_read_from_parent();
156 return;
157 }
158
159 auto index = *m_read_snaps.begin();
160 auto src_snap_seq = index.second;
161
162 bool read_required = false;
163 librados::ObjectReadOperation op;
164
165 for (auto &copy_op : m_read_ops[index]) {
166 if (!read_required) {
167 // map the copy op start snap id back to the necessary read snap id
168 m_src_io_ctx.snap_set_read(src_snap_seq);
169
170 ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << dendl;
171 read_required = true;
172 }
173 ldout(m_cct, 20) << "read op: " << copy_op.src_offset << "~"
174 << copy_op.length << dendl;
175 op.sparse_read(copy_op.src_offset, copy_op.length, &copy_op.src_extent_map,
176 &copy_op.out_bl, nullptr);
177 op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
178 LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
179 }
180
181 if (!read_required) {
182 // nothing written to this object for this snapshot (must be trunc/remove)
183 handle_read_object(0);
184 return;
185 }
186
187 auto ctx = create_context_callback<
188 ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_read_object>(this);
189 auto comp = create_rados_callback(ctx);
190
191 ldout(m_cct, 20) << "read " << m_src_oid << dendl;
192
193 int r = m_src_io_ctx.aio_operate(m_src_oid, comp, &op, nullptr);
194 ceph_assert(r == 0);
195 comp->release();
196 }
197
198 template <typename I>
199 void ObjectCopyRequest<I>::handle_read_object(int r) {
200 ldout(m_cct, 20) << "r=" << r << dendl;
201
202 if (r == -ENOENT) {
203 m_retry_snap_set = m_snap_set;
204 m_retry_missing_read = true;
205
206 ldout(m_cct, 5) << "object missing potentially due to removed snapshot"
207 << dendl;
208 send_list_snaps();
209 return;
210 }
211
212 if (r < 0) {
213 lderr(m_cct) << "failed to read from source object: " << cpp_strerror(r)
214 << dendl;
215 finish(r);
216 return;
217 }
218
219 ceph_assert(!m_read_snaps.empty());
220 m_read_snaps.erase(m_read_snaps.begin());
221
222 send_read_object();
223 }
224
225 template <typename I>
226 void ObjectCopyRequest<I>::send_read_from_parent() {
227 m_src_image_ctx->snap_lock.get_read();
228 m_src_image_ctx->parent_lock.get_read();
229 io::Extents image_extents;
230 compute_read_from_parent_ops(&image_extents);
231 m_src_image_ctx->snap_lock.put_read();
232
233 if (image_extents.empty()) {
234 m_src_image_ctx->parent_lock.put_read();
235 handle_read_from_parent(0);
236 return;
237 }
238
239 ldout(m_cct, 20) << dendl;
240
241 ceph_assert(m_src_image_ctx->parent != nullptr);
242
243 auto ctx = create_context_callback<
244 ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_read_from_parent>(this);
245 auto comp = io::AioCompletion::create_and_start(
246 ctx, util::get_image_ctx(m_src_image_ctx->parent), io::AIO_TYPE_READ);
247 ldout(m_cct, 20) << "completion " << comp << ", extents " << image_extents
248 << dendl;
249
250 auto src_image_ctx = m_src_image_ctx;
251 io::ImageRequest<I>::aio_read(src_image_ctx->parent, comp,
252 std::move(image_extents),
253 io::ReadResult{&m_read_from_parent_data}, 0,
254 ZTracer::Trace());
255 src_image_ctx->parent_lock.put_read();
256 }
257
258 template <typename I>
259 void ObjectCopyRequest<I>::handle_read_from_parent(int r) {
260 ldout(m_cct, 20) << "r=" << r << dendl;
261
262 if (r < 0) {
263 lderr(m_cct) << "failed to read from parent: " << cpp_strerror(r) << dendl;
264 finish(r);
265 return;
266 }
267
268 if (!m_read_ops.empty()) {
269 ceph_assert(m_read_ops.size() == 1);
270 auto src_snap_seq = m_read_ops.begin()->first.first;
271 auto &copy_ops = m_read_ops.begin()->second;
272 uint64_t offset = 0;
273 for (auto it = copy_ops.begin(); it != copy_ops.end(); ) {
274 it->out_bl.substr_of(m_read_from_parent_data, offset, it->length);
275 offset += it->length;
276 if (it->out_bl.is_zero()) {
277 m_zero_interval[src_snap_seq].insert(it->dst_offset, it->length);
278 it = copy_ops.erase(it);
279 } else {
280 it++;
281 }
282 }
283 merge_write_ops();
284 }
285
286 compute_dst_object_may_exist();
287 compute_zero_ops();
288
289 if (m_write_ops.empty()) {
290 // nothing to copy
291 finish(-ENOENT);
292 return;
293 }
294
295 send_write_object();
296 return;
297 }
298
299 template <typename I>
300 void ObjectCopyRequest<I>::send_write_object() {
301 ceph_assert(!m_write_ops.empty());
302 auto& copy_ops = m_write_ops.begin()->second;
303
304 // retrieve the destination snap context for the op
305 SnapIds dst_snap_ids;
306 librados::snap_t dst_snap_seq = 0;
307 librados::snap_t src_snap_seq = m_write_ops.begin()->first;
308 if (src_snap_seq != 0) {
309 auto snap_map_it = m_snap_map.find(src_snap_seq);
310 ceph_assert(snap_map_it != m_snap_map.end());
311
312 auto dst_snap_id = snap_map_it->second.front();
313 auto dst_may_exist_it = m_dst_object_may_exist.find(dst_snap_id);
314 ceph_assert(dst_may_exist_it != m_dst_object_may_exist.end());
315 if (!dst_may_exist_it->second && !copy_ops.empty()) {
316 // if the object cannot exist, the only valid op is to remove it
317 ceph_assert(copy_ops.size() == 1U);
318 ceph_assert(copy_ops.begin()->type == COPY_OP_TYPE_REMOVE);
319 }
320
321 // write snapshot context should be before actual snapshot
322 if (snap_map_it != m_snap_map.begin()) {
323 --snap_map_it;
324 ceph_assert(!snap_map_it->second.empty());
325 dst_snap_seq = snap_map_it->second.front();
326 dst_snap_ids = snap_map_it->second;
327 }
328 }
329
330 ldout(m_cct, 20) << "dst_snap_seq=" << dst_snap_seq << ", "
331 << "dst_snaps=" << dst_snap_ids << dendl;
332
333 librados::ObjectWriteOperation op;
334 uint64_t buffer_offset;
335
336 if (!m_dst_image_ctx->migration_info.empty()) {
337 cls_client::assert_snapc_seq(&op, dst_snap_seq,
338 cls::rbd::ASSERT_SNAPC_SEQ_GT_SNAPSET_SEQ);
339 }
340
341 for (auto &copy_op : copy_ops) {
342 switch (copy_op.type) {
343 case COPY_OP_TYPE_WRITE:
344 buffer_offset = 0;
345 for (auto &e : copy_op.dst_extent_map) {
346 ldout(m_cct, 20) << "write op: " << e.first << "~" << e.second
347 << dendl;
348 bufferlist tmpbl;
349 tmpbl.substr_of(copy_op.out_bl, buffer_offset, e.second);
350 op.write(e.first, tmpbl);
351 op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
352 LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
353 buffer_offset += e.second;
354 }
355 break;
356 case COPY_OP_TYPE_ZERO:
357 ldout(m_cct, 20) << "zero op: " << copy_op.dst_offset << "~"
358 << copy_op.length << dendl;
359 op.zero(copy_op.dst_offset, copy_op.length);
360 break;
361 case COPY_OP_TYPE_REMOVE_TRUNC:
362 ldout(m_cct, 20) << "create op" << dendl;
363 op.create(false);
364 // fall through
365 case COPY_OP_TYPE_TRUNC:
366 ldout(m_cct, 20) << "trunc op: " << copy_op.dst_offset << dendl;
367 op.truncate(copy_op.dst_offset);
368 break;
369 case COPY_OP_TYPE_REMOVE:
370 ldout(m_cct, 20) << "remove op" << dendl;
371 op.remove();
372 break;
373 default:
374 ceph_abort();
375 }
376 }
377
378 if (op.size() == (m_dst_image_ctx->migration_info.empty() ? 0 : 1)) {
379 handle_write_object(0);
380 return;
381 }
382
383 int r;
384 Context *finish_op_ctx;
385 {
386 RWLock::RLocker owner_locker(m_dst_image_ctx->owner_lock);
387 finish_op_ctx = start_lock_op(m_dst_image_ctx->owner_lock, &r);
388 }
389 if (finish_op_ctx == nullptr) {
390 lderr(m_cct) << "lost exclusive lock" << dendl;
391 finish(r);
392 return;
393 }
394
395 auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
396 handle_write_object(r);
397 finish_op_ctx->complete(0);
398 });
399 librados::AioCompletion *comp = create_rados_callback(ctx);
400 r = m_dst_io_ctx.aio_operate(m_dst_oid, comp, &op, dst_snap_seq, dst_snap_ids,
401 nullptr);
402 ceph_assert(r == 0);
403 comp->release();
404 }
405
406 template <typename I>
407 void ObjectCopyRequest<I>::handle_write_object(int r) {
408 ldout(m_cct, 20) << "r=" << r << dendl;
409
410 if (r == -ENOENT) {
411 r = 0;
412 } else if (r == -ERANGE) {
413 ldout(m_cct, 10) << "concurrent deep copy" << dendl;
414 r = 0;
415 }
416 if (r < 0) {
417 lderr(m_cct) << "failed to write to destination object: " << cpp_strerror(r)
418 << dendl;
419 finish(r);
420 return;
421 }
422
423 m_write_ops.erase(m_write_ops.begin());
424 if (!m_write_ops.empty()) {
425 send_write_object();
426 return;
427 }
428
429 send_update_object_map();
430 }
431
432 template <typename I>
433 void ObjectCopyRequest<I>::send_update_object_map() {
434 if (!m_dst_image_ctx->test_features(RBD_FEATURE_OBJECT_MAP) ||
435 m_dst_object_state.empty()) {
436 finish(0);
437 return;
438 }
439
440 m_dst_image_ctx->owner_lock.get_read();
441 m_dst_image_ctx->snap_lock.get_read();
442 if (m_dst_image_ctx->object_map == nullptr) {
443 // possible that exclusive lock was lost in background
444 lderr(m_cct) << "object map is not initialized" << dendl;
445
446 m_dst_image_ctx->snap_lock.put_read();
447 m_dst_image_ctx->owner_lock.put_read();
448 finish(-EINVAL);
449 return;
450 }
451
452 auto &dst_object_state = *m_dst_object_state.begin();
453 auto it = m_snap_map.find(dst_object_state.first);
454 ceph_assert(it != m_snap_map.end());
455 auto dst_snap_id = it->second.front();
456 auto object_state = dst_object_state.second;
457 m_dst_object_state.erase(m_dst_object_state.begin());
458
459 ldout(m_cct, 20) << "dst_snap_id=" << dst_snap_id << ", object_state="
460 << static_cast<uint32_t>(object_state) << dendl;
461
462 int r;
463 auto finish_op_ctx = start_lock_op(m_dst_image_ctx->owner_lock, &r);
464 if (finish_op_ctx == nullptr) {
465 lderr(m_cct) << "lost exclusive lock" << dendl;
466 m_dst_image_ctx->snap_lock.put_read();
467 m_dst_image_ctx->owner_lock.put_read();
468 finish(r);
469 return;
470 }
471
472 auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
473 handle_update_object_map(r);
474 finish_op_ctx->complete(0);
475 });
476
477 auto dst_image_ctx = m_dst_image_ctx;
478 dst_image_ctx->object_map_lock.get_write();
479 bool sent = dst_image_ctx->object_map->template aio_update<
480 Context, &Context::complete>(dst_snap_id, m_dst_object_number, object_state,
481 {}, {}, false, ctx);
482
483 // NOTE: state machine might complete before we reach here
484 dst_image_ctx->object_map_lock.put_write();
485 dst_image_ctx->snap_lock.put_read();
486 dst_image_ctx->owner_lock.put_read();
487 if (!sent) {
488 ceph_assert(dst_snap_id == CEPH_NOSNAP);
489 ctx->complete(0);
490 }
491 }
492
493 template <typename I>
494 void ObjectCopyRequest<I>::handle_update_object_map(int r) {
495 ldout(m_cct, 20) << "r=" << r << dendl;
496
497 if (r < 0) {
498 lderr(m_cct) << "failed to update object map: " << cpp_strerror(r) << dendl;
499 finish(r);
500 return;
501 }
502
503 if (!m_dst_object_state.empty()) {
504 send_update_object_map();
505 return;
506 }
507 finish(0);
508 }
509
510 template <typename I>
511 Context *ObjectCopyRequest<I>::start_lock_op(RWLock &owner_lock, int* r) {
512 ceph_assert(m_dst_image_ctx->owner_lock.is_locked());
513 if (m_dst_image_ctx->exclusive_lock == nullptr) {
514 return new FunctionContext([](int r) {});
515 }
516 return m_dst_image_ctx->exclusive_lock->start_op(r);
517 }
518
519 template <typename I>
520 uint64_t ObjectCopyRequest<I>::src_to_dst_object_offset(uint64_t objectno,
521 uint64_t offset) {
522 std::vector<std::pair<uint64_t, uint64_t>> image_extents;
523 Striper::extent_to_file(m_cct, &m_src_image_ctx->layout, objectno, offset, 1,
524 image_extents);
525 ceph_assert(image_extents.size() == 1);
526 auto dst_object_offset = image_extents.begin()->first;
527
528 std::map<object_t, std::vector<ObjectExtent>> dst_object_extents;
529 Striper::file_to_extents(m_cct, m_dst_image_ctx->format_string,
530 &m_dst_image_ctx->layout, dst_object_offset, 1, 0,
531 dst_object_extents);
532 ceph_assert(dst_object_extents.size() == 1);
533 ceph_assert(dst_object_extents.begin()->second.size() == 1);
534 auto &e = *dst_object_extents.begin()->second.begin();
535 ceph_assert(e.objectno == m_dst_object_number);
536
537 return e.offset;
538 }
539
540 template <typename I>
541 void ObjectCopyRequest<I>::compute_src_object_extents() {
542 std::vector<std::pair<uint64_t, uint64_t>> image_extents;
543 Striper::extent_to_file(m_cct, &m_dst_image_ctx->layout, m_dst_object_number,
544 0, m_dst_image_ctx->layout.object_size, image_extents);
545
546 size_t total = 0;
547 for (auto &e : image_extents) {
548 std::map<object_t, std::vector<ObjectExtent>> src_object_extents;
549 Striper::file_to_extents(m_cct, m_src_image_ctx->format_string,
550 &m_src_image_ctx->layout, e.first, e.second, 0,
551 src_object_extents);
552 auto stripe_unit = std::min(m_src_image_ctx->layout.stripe_unit,
553 m_dst_image_ctx->layout.stripe_unit);
554 for (auto &p : src_object_extents) {
555 for (auto &s : p.second) {
556 m_src_objects.insert(s.objectno);
557 total += s.length;
558 while (s.length > 0) {
559 ceph_assert(s.length >= stripe_unit);
560 auto dst_object_offset = src_to_dst_object_offset(s.objectno, s.offset);
561 m_src_object_extents[dst_object_offset] = {s.objectno, s.offset,
562 stripe_unit};
563 s.offset += stripe_unit;
564 s.length -= stripe_unit;
565 }
566 }
567 }
568 }
569
570 ceph_assert(total == m_dst_image_ctx->layout.object_size);
571
572 ldout(m_cct, 20) << m_src_object_extents.size() << " src extents" << dendl;
573 }
574
575 template <typename I>
576 void ObjectCopyRequest<I>::compute_read_ops() {
577 m_read_ops = {};
578 m_read_snaps = {};
579 m_zero_interval = {};
580
581 m_src_image_ctx->parent_lock.get_read();
582 bool hide_parent = (m_src_image_ctx->parent != nullptr);
583 m_src_image_ctx->parent_lock.put_read();
584
585 librados::snap_t src_copy_point_snap_id = m_snap_map.rbegin()->first;
586 bool prev_exists = hide_parent;
587 uint64_t prev_end_size = prev_exists ?
588 m_src_image_ctx->layout.object_size : 0;
589 librados::snap_t start_src_snap_id = 0;
590
591 for (auto &pair : m_snap_map) {
592 ceph_assert(!pair.second.empty());
593 librados::snap_t end_src_snap_id = pair.first;
594 librados::snap_t end_dst_snap_id = pair.second.front();
595
596 interval_set<uint64_t> diff;
597 uint64_t end_size;
598 bool exists;
599 librados::snap_t clone_end_snap_id;
600 calc_snap_set_diff(m_cct, m_snap_set, start_src_snap_id,
601 end_src_snap_id, &diff, &end_size, &exists,
602 &clone_end_snap_id, &m_read_whole_object);
603
604 if (m_read_whole_object) {
605 ldout(m_cct, 1) << "need to read full object" << dendl;
606 diff.insert(0, m_src_image_ctx->layout.object_size);
607 exists = true;
608 end_size = m_src_image_ctx->layout.object_size;
609 clone_end_snap_id = end_src_snap_id;
610 } else if (!exists) {
611 end_size = 0;
612 if (hide_parent && end_src_snap_id == m_snap_map.begin()->first &&
613 m_snap_set.clones.empty()) {
614 ldout(m_cct, 20) << "no clones for existing object" << dendl;
615 exists = true;
616 diff.insert(0, m_src_image_ctx->layout.object_size);
617 clone_end_snap_id = end_src_snap_id;
618 }
619 }
620
621 ldout(m_cct, 20) << "start_src_snap_id=" << start_src_snap_id << ", "
622 << "end_src_snap_id=" << end_src_snap_id << ", "
623 << "clone_end_snap_id=" << clone_end_snap_id << ", "
624 << "end_dst_snap_id=" << end_dst_snap_id << ", "
625 << "diff=" << diff << ", "
626 << "end_size=" << end_size << ", "
627 << "exists=" << exists << dendl;
628
629 m_zero_interval[end_src_snap_id] = {};
630
631 if (exists || prev_exists) {
632 // clip diff to size of object (in case it was truncated)
633 if (end_size < prev_end_size) {
634 interval_set<uint64_t> trunc;
635 trunc.insert(end_size, prev_end_size);
636 trunc.intersection_of(diff);
637 diff.subtract(trunc);
638 ldout(m_cct, 20) << "clearing truncate diff: " << trunc << dendl;
639 }
640
641 if (exists) {
642 // reads should be issued against the newest (existing) snapshot within
643 // the associated snapshot object clone. writes should be issued
644 // against the oldest snapshot in the snap_map.
645 ceph_assert(clone_end_snap_id >= end_src_snap_id);
646 if (clone_end_snap_id > src_copy_point_snap_id) {
647 // do not read past the copy point snapshot
648 clone_end_snap_id = src_copy_point_snap_id;
649 }
650 }
651
652 for (auto &it : m_src_object_extents) {
653 auto dst_object_offset = it.first;
654 auto &e = it.second;
655
656 if (e.object_no != m_src_ono) {
657 continue;
658 }
659
660 interval_set<uint64_t> read_interval;
661 read_interval.insert(e.offset, e.length);
662
663 if (end_size < prev_end_size) {
664 interval_set<uint64_t> zero_interval;
665 zero_interval.insert(end_size, prev_end_size - end_size);
666 zero_interval.intersection_of(read_interval);
667 if (!zero_interval.empty()) {
668 auto it = zero_interval.begin();
669 auto offset = it.get_start() - e.offset;
670 m_zero_interval[end_src_snap_id].insert(dst_object_offset + offset,
671 it.get_len());
672 ldout(m_cct, 20) << "extent " << e.offset << "~" << e.length
673 << " intersects truncation " << end_size << "~"
674 << prev_end_size - end_size << ", inserting zero "
675 << dst_object_offset + offset << "~"
676 << it.get_len() << dendl;
677 }
678 }
679
680 // limit read interval to diff
681 read_interval.intersection_of(diff);
682
683 ldout(m_cct, 20) << "src_object_extent: " << e.offset << "~" << e.length
684 << ", dst_object_offset=" << dst_object_offset
685 << ", read: " << read_interval << dendl;
686
687 ceph_assert(exists || read_interval.empty());
688
689 for (auto it = read_interval.begin(); it != read_interval.end();
690 it++) {
691 ceph_assert(it.get_start() >= e.offset);
692 auto offset = it.get_start() - e.offset;
693 ldout(m_cct, 20) << "read/write op: " << it.get_start() << "~"
694 << it.get_len() << " dst: "
695 << dst_object_offset + offset << dendl;
696 m_read_ops[{end_src_snap_id, clone_end_snap_id}]
697 .emplace_back(COPY_OP_TYPE_WRITE, it.get_start(),
698 dst_object_offset + offset, it.get_len());
699 }
700 }
701 }
702
703 prev_end_size = end_size;
704 prev_exists = exists;
705 if (hide_parent && prev_exists && prev_end_size == 0) {
706 // hide parent
707 prev_end_size = m_src_image_ctx->layout.object_size;
708 }
709 start_src_snap_id = end_src_snap_id;
710 }
711
712 for (auto &it : m_read_ops) {
713 m_read_snaps.push_back(it.first);
714 }
715 }
716
717 template <typename I>
718 void ObjectCopyRequest<I>::compute_read_from_parent_ops(
719 io::Extents *parent_image_extents) {
720 assert(m_src_image_ctx->snap_lock.is_locked());
721 assert(m_src_image_ctx->parent_lock.is_locked());
722
723 m_read_ops = {};
724 m_zero_interval = {};
725 parent_image_extents->clear();
726
727 if (m_src_image_ctx->parent == nullptr) {
728 ldout(m_cct, 20) << "no parent" << dendl;
729 return;
730 }
731
732 size_t noent_count = 0;
733 for (auto &it : m_src_object_extents) {
734 if (it.second.noent) {
735 noent_count++;
736 }
737 }
738
739 if (noent_count == 0) {
740 ldout(m_cct, 20) << "no extents need read from parent" << dendl;
741 return;
742 }
743
744 if (noent_count == m_src_object_extents.size() && !m_flatten) {
745 ldout(m_cct, 20) << "reading all extents skipped when no flatten"
746 << dendl;
747 return;
748 }
749
750 ldout(m_cct, 20) << dendl;
751
752 auto src_snap_seq = m_snap_map.begin()->first;
753
754 uint64_t parent_overlap;
755 int r = m_src_image_ctx->get_parent_overlap(src_snap_seq, &parent_overlap);
756 if (r < 0) {
757 ldout(m_cct, 5) << "failed getting parent overlap for snap_id: "
758 << src_snap_seq << ": " << cpp_strerror(r) << dendl;
759 return;
760 }
761 if (parent_overlap == 0) {
762 ldout(m_cct, 20) << "no parent overlap" << dendl;
763 return;
764 }
765
766 for (auto &it : m_src_object_extents) {
767 auto dst_object_offset = it.first;
768 auto &e = it.second;
769
770 if (!e.noent) {
771 continue;
772 }
773
774 std::vector<std::pair<uint64_t, uint64_t>> image_extents;
775 Striper::extent_to_file(m_cct, &m_src_image_ctx->layout, e.object_no,
776 e.offset, e.length, image_extents);
777
778 uint64_t overlap = m_src_image_ctx->prune_parent_extents(image_extents,
779 parent_overlap);
780 if (overlap == 0) {
781 ldout(m_cct, 20) << "no parent overlap for object_no " << e.object_no
782 << " extent " << e.offset << "~" << e.length << dendl;
783 continue;
784 }
785
786 ldout(m_cct, 20) << "object_no " << e.object_no << " extent " << e.offset
787 << "~" << e.length << " overlap " << parent_overlap
788 << " parent extents " << image_extents << dendl;
789
790 ceph_assert(image_extents.size() == 1);
791
792 auto src_image_offset = image_extents.begin()->first;
793 auto length = image_extents.begin()->second;
794 m_read_ops[{src_snap_seq, 0}].emplace_back(COPY_OP_TYPE_WRITE, e.offset,
795 dst_object_offset, length);
796 m_read_ops[{src_snap_seq, 0}].rbegin()->src_extent_map[e.offset] = length;
797 parent_image_extents->emplace_back(src_image_offset, length);
798 }
799
800 if (!parent_image_extents->empty()) {
801 m_dst_object_state[src_snap_seq] = OBJECT_EXISTS;
802 }
803 }
804
805 template <typename I>
806 void ObjectCopyRequest<I>::merge_write_ops() {
807 ldout(m_cct, 20) << dendl;
808
809 for (auto &it : m_zero_interval) {
810 m_dst_zero_interval[it.first].insert(it.second);
811 }
812
813 for (auto &it : m_read_ops) {
814 auto src_snap_seq = it.first.first;
815 auto &copy_ops = it.second;
816 for (auto &copy_op : copy_ops) {
817 uint64_t src_offset = copy_op.src_offset;
818 uint64_t dst_offset = copy_op.dst_offset;
819 for (auto &e : copy_op.src_extent_map) {
820 uint64_t zero_len = e.first - src_offset;
821 if (zero_len > 0) {
822 ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq
823 << ", inserting zero " << dst_offset << "~"
824 << zero_len << dendl;
825 m_dst_zero_interval[src_snap_seq].insert(dst_offset, zero_len);
826 src_offset += zero_len;
827 dst_offset += zero_len;
828 }
829 copy_op.dst_extent_map[dst_offset] = e.second;
830 src_offset += e.second;
831 dst_offset += e.second;
832 }
833 if (dst_offset < copy_op.dst_offset + copy_op.length) {
834 uint64_t zero_len = copy_op.dst_offset + copy_op.length - dst_offset;
835 ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq
836 << ", inserting zero " << dst_offset << "~"
837 << zero_len << dendl;
838 m_dst_zero_interval[src_snap_seq].insert(dst_offset, zero_len);
839 } else {
840 ceph_assert(dst_offset == copy_op.dst_offset + copy_op.length);
841 }
842 m_write_ops[src_snap_seq].emplace_back(std::move(copy_op));
843 }
844 }
845 }
846
847 template <typename I>
848 void ObjectCopyRequest<I>::compute_zero_ops() {
849 ldout(m_cct, 20) << dendl;
850
851 bool fast_diff = m_dst_image_ctx->test_features(RBD_FEATURE_FAST_DIFF);
852 uint64_t prev_end_size = 0;
853
854 m_src_image_ctx->parent_lock.get_read();
855 bool hide_parent = (m_src_image_ctx->parent != nullptr);
856 m_src_image_ctx->parent_lock.put_read();
857
858 for (auto &it : m_dst_zero_interval) {
859 auto src_snap_seq = it.first;
860 auto &zero_interval = it.second;
861
862 auto snap_map_it = m_snap_map.find(src_snap_seq);
863 ceph_assert(snap_map_it != m_snap_map.end());
864 auto dst_snap_seq = snap_map_it->second.front();
865
866 auto dst_may_exist_it = m_dst_object_may_exist.find(dst_snap_seq);
867 ceph_assert(dst_may_exist_it != m_dst_object_may_exist.end());
868 if (!dst_may_exist_it->second && prev_end_size > 0) {
869 ldout(m_cct, 5) << "object DNE for snap_id: " << dst_snap_seq << dendl;
870 m_write_ops[src_snap_seq].emplace_back(COPY_OP_TYPE_REMOVE, 0, 0, 0);
871 prev_end_size = 0;
872 continue;
873 }
874
875 if (hide_parent) {
876 RWLock::RLocker snap_locker(m_dst_image_ctx->snap_lock);
877 RWLock::RLocker parent_locker(m_dst_image_ctx->parent_lock);
878 uint64_t parent_overlap = 0;
879 int r = m_dst_image_ctx->get_parent_overlap(dst_snap_seq, &parent_overlap);
880 if (r < 0) {
881 ldout(m_cct, 5) << "failed getting parent overlap for snap_id: "
882 << dst_snap_seq << ": " << cpp_strerror(r) << dendl;
883 }
884 if (parent_overlap == 0) {
885 ldout(m_cct, 20) << "no parent overlap" << dendl;
886 hide_parent = false;
887 } else {
888 std::vector<std::pair<uint64_t, uint64_t>> image_extents;
889 Striper::extent_to_file(m_cct, &m_dst_image_ctx->layout,
890 m_dst_object_number, 0,
891 m_dst_image_ctx->layout.object_size,
892 image_extents);
893 uint64_t overlap = m_dst_image_ctx->prune_parent_extents(image_extents,
894 parent_overlap);
895 if (overlap == 0) {
896 ldout(m_cct, 20) << "no parent overlap" << dendl;
897 hide_parent = false;
898 } else if (src_snap_seq == m_dst_zero_interval.begin()->first) {
899 for (auto e : image_extents) {
900 prev_end_size += e.second;
901 }
902 ceph_assert(prev_end_size <= m_dst_image_ctx->layout.object_size);
903 }
904 }
905 }
906
907 uint64_t end_size = prev_end_size;
908
909 // update end_size if there are writes into higher offsets
910 auto iter = m_write_ops.find(src_snap_seq);
911 if (iter != m_write_ops.end()) {
912 for (auto &copy_op : iter->second) {
913 for (auto &e : copy_op.dst_extent_map) {
914 end_size = std::max(end_size, e.first + e.second);
915 }
916 }
917 }
918
919 for (auto z = zero_interval.begin(); z != zero_interval.end(); z++) {
920 if (z.get_start() + z.get_len() >= end_size) {
921 // zero interval at the object end
922 if (z.get_start() == 0 && hide_parent) {
923 m_write_ops[src_snap_seq]
924 .emplace_back(COPY_OP_TYPE_REMOVE_TRUNC, 0, 0, 0);
925 ldout(m_cct, 20) << "COPY_OP_TYPE_REMOVE_TRUNC" << dendl;
926 } else if (z.get_start() < prev_end_size) {
927 if (z.get_start() == 0) {
928 m_write_ops[src_snap_seq]
929 .emplace_back(COPY_OP_TYPE_REMOVE, 0, 0, 0);
930 ldout(m_cct, 20) << "COPY_OP_TYPE_REMOVE" << dendl;
931 } else {
932 m_write_ops[src_snap_seq]
933 .emplace_back(COPY_OP_TYPE_TRUNC, 0, z.get_start(), 0);
934 ldout(m_cct, 20) << "COPY_OP_TYPE_TRUNC " << z.get_start() << dendl;
935 }
936 }
937 end_size = std::min(end_size, z.get_start());
938 } else {
939 // zero interval inside the object
940 m_write_ops[src_snap_seq]
941 .emplace_back(COPY_OP_TYPE_ZERO, 0, z.get_start(), z.get_len());
942 ldout(m_cct, 20) << "COPY_OP_TYPE_ZERO " << z.get_start() << "~"
943 << z.get_len() << dendl;
944 }
945 }
946 ldout(m_cct, 20) << "src_snap_seq=" << src_snap_seq << ", end_size="
947 << end_size << dendl;
948 if (end_size > 0 || hide_parent) {
949 m_dst_object_state[src_snap_seq] = OBJECT_EXISTS;
950 if (fast_diff && end_size == prev_end_size &&
951 m_write_ops[src_snap_seq].empty()) {
952 m_dst_object_state[src_snap_seq] = OBJECT_EXISTS_CLEAN;
953 }
954 }
955 prev_end_size = end_size;
956 }
957 }
958
959 template <typename I>
960 void ObjectCopyRequest<I>::finish(int r) {
961 ldout(m_cct, 20) << "r=" << r << dendl;
962
963 // ensure IoCtxs are closed prior to proceeding
964 auto on_finish = m_on_finish;
965
966 m_src_async_op->finish_op();
967 delete m_src_async_op;
968 delete this;
969
970 on_finish->complete(r);
971 }
972
973 template <typename I>
974 void ObjectCopyRequest<I>::compute_dst_object_may_exist() {
975 RWLock::RLocker snap_locker(m_dst_image_ctx->snap_lock);
976
977 auto snap_ids = m_dst_image_ctx->snaps;
978 snap_ids.push_back(CEPH_NOSNAP);
979
980 for (auto snap_id : snap_ids) {
981 m_dst_object_may_exist[snap_id] =
982 (m_dst_object_number < m_dst_image_ctx->get_object_count(snap_id));
983 }
984 }
985
986 } // namespace deep_copy
987 } // namespace librbd
988
989 template class librbd::deep_copy::ObjectCopyRequest<librbd::ImageCtx>;