1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "librbd/io/CopyupRequest.h"
5 #include "common/ceph_context.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "common/Mutex.h"
9 #include "common/WorkQueue.h"
10 #include "librbd/AsyncObjectThrottle.h"
11 #include "librbd/ExclusiveLock.h"
12 #include "librbd/ImageCtx.h"
13 #include "librbd/ObjectMap.h"
14 #include "librbd/Utils.h"
15 #include "librbd/deep_copy/ObjectCopyRequest.h"
16 #include "librbd/io/AioCompletion.h"
17 #include "librbd/io/ImageRequest.h"
18 #include "librbd/io/ObjectRequest.h"
19 #include "librbd/io/ReadResult.h"
21 #include <boost/bind.hpp>
22 #include <boost/lambda/bind.hpp>
23 #include <boost/lambda/construct.hpp>
25 #define dout_subsys ceph_subsys_rbd
27 #define dout_prefix *_dout << "librbd::io::CopyupRequest: " << this \
28 << " " << __func__ << ": "
36 class C_UpdateObjectMap
: public C_AsyncObjectThrottle
<I
> {
38 C_UpdateObjectMap(AsyncObjectThrottle
<I
> &throttle
, I
*image_ctx
,
39 uint64_t object_no
, uint8_t head_object_map_state
,
40 const std::vector
<uint64_t> *snap_ids
,
41 bool first_snap_is_clean
, const ZTracer::Trace
&trace
,
43 : C_AsyncObjectThrottle
<I
>(throttle
, *image_ctx
), m_object_no(object_no
),
44 m_head_object_map_state(head_object_map_state
), m_snap_ids(*snap_ids
),
45 m_first_snap_is_clean(first_snap_is_clean
), m_trace(trace
),
46 m_snap_id_idx(snap_id_idx
)
51 auto& image_ctx
= this->m_image_ctx
;
52 ceph_assert(image_ctx
.owner_lock
.is_locked());
53 if (image_ctx
.exclusive_lock
== nullptr) {
56 ceph_assert(image_ctx
.exclusive_lock
->is_lock_owner());
58 RWLock::RLocker
snap_locker(image_ctx
.snap_lock
);
59 if (image_ctx
.object_map
== nullptr) {
63 uint64_t snap_id
= m_snap_ids
[m_snap_id_idx
];
64 if (snap_id
== CEPH_NOSNAP
) {
67 return update_snapshot(snap_id
);
72 auto& image_ctx
= this->m_image_ctx
;
73 RWLock::WLocker
object_map_locker(image_ctx
.object_map_lock
);
74 bool sent
= image_ctx
.object_map
->template aio_update
<Context
>(
75 CEPH_NOSNAP
, m_object_no
, m_head_object_map_state
, {}, m_trace
, false,
77 return (sent
? 0 : 1);
80 int update_snapshot(uint64_t snap_id
) {
81 auto& image_ctx
= this->m_image_ctx
;
82 uint8_t state
= OBJECT_EXISTS
;
83 if (image_ctx
.test_features(RBD_FEATURE_FAST_DIFF
, image_ctx
.snap_lock
) &&
84 (m_snap_id_idx
> 0 || m_first_snap_is_clean
)) {
85 // first snapshot should be exists+dirty since it contains
86 // the copyup data -- later snapshots inherit the data.
87 state
= OBJECT_EXISTS_CLEAN
;
90 RWLock::RLocker
object_map_locker(image_ctx
.object_map_lock
);
91 bool sent
= image_ctx
.object_map
->template aio_update
<Context
>(
92 snap_id
, m_object_no
, state
, {}, m_trace
, true, this);
99 uint8_t m_head_object_map_state
;
100 const std::vector
<uint64_t> &m_snap_ids
;
101 bool m_first_snap_is_clean
;
102 const ZTracer::Trace
&m_trace
;
103 size_t m_snap_id_idx
;
106 } // anonymous namespace
108 template <typename I
>
109 CopyupRequest
<I
>::CopyupRequest(I
*ictx
, const std::string
&oid
,
110 uint64_t objectno
, Extents
&&image_extents
,
111 const ZTracer::Trace
&parent_trace
)
112 : m_image_ctx(ictx
), m_oid(oid
), m_object_no(objectno
),
113 m_image_extents(image_extents
),
114 m_trace(util::create_trace(*m_image_ctx
, "copy-up", parent_trace
)),
115 m_lock("CopyupRequest", false, false)
117 m_async_op
.start_op(*util::get_image_ctx(m_image_ctx
));
120 template <typename I
>
121 CopyupRequest
<I
>::~CopyupRequest() {
122 ceph_assert(m_pending_requests
.empty());
123 m_async_op
.finish_op();
126 template <typename I
>
127 void CopyupRequest
<I
>::append_request(AbstractObjectWriteRequest
<I
> *req
) {
128 Mutex::Locker
locker(m_lock
);
130 auto cct
= m_image_ctx
->cct
;
131 ldout(cct
, 20) << "oid=" << m_oid
<< ", "
132 << "object_request=" << req
<< ", "
133 << "append=" << m_append_request_permitted
<< dendl
;
134 if (m_append_request_permitted
) {
135 m_pending_requests
.push_back(req
);
137 m_restart_requests
.push_back(req
);
141 template <typename I
>
142 void CopyupRequest
<I
>::send() {
146 template <typename I
>
147 void CopyupRequest
<I
>::read_from_parent() {
148 auto cct
= m_image_ctx
->cct
;
149 RWLock::RLocker
snap_locker(m_image_ctx
->snap_lock
);
150 RWLock::RLocker
parent_locker(m_image_ctx
->parent_lock
);
152 if (m_image_ctx
->parent
== nullptr) {
153 ldout(cct
, 5) << "parent detached" << dendl
;
155 m_image_ctx
->op_work_queue
->queue(
156 util::create_context_callback
<
157 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_read_from_parent
>(this),
160 } else if (is_deep_copy()) {
165 auto comp
= AioCompletion::create_and_start
<
167 &CopyupRequest
<I
>::handle_read_from_parent
>(
168 this, util::get_image_ctx(m_image_ctx
->parent
), AIO_TYPE_READ
);
170 ldout(cct
, 20) << "oid=" << m_oid
<< ", "
171 << "completion=" << comp
<< ", "
172 << "extents=" << m_image_extents
174 ImageRequest
<I
>::aio_read(m_image_ctx
->parent
, comp
,
175 std::move(m_image_extents
),
176 ReadResult
{&m_copyup_data
}, 0, m_trace
);
179 template <typename I
>
180 void CopyupRequest
<I
>::handle_read_from_parent(int r
) {
181 auto cct
= m_image_ctx
->cct
;
182 ldout(cct
, 20) << "oid=" << m_oid
<< ", r=" << r
<< dendl
;
184 m_image_ctx
->snap_lock
.get_read();
186 m_copyup_is_zero
= m_copyup_data
.is_zero();
187 m_copyup_required
= is_copyup_required();
188 disable_append_requests();
190 if (r
< 0 && r
!= -ENOENT
) {
192 m_image_ctx
->snap_lock
.put_read();
194 lderr(cct
) << "error reading from parent: " << cpp_strerror(r
) << dendl
;
199 if (!m_copyup_required
) {
201 m_image_ctx
->snap_lock
.put_read();
203 ldout(cct
, 20) << "no-op, skipping" << dendl
;
208 // copyup() will affect snapshots only if parent data is not all
210 if (!m_copyup_is_zero
) {
211 m_snap_ids
.insert(m_snap_ids
.end(), m_image_ctx
->snaps
.rbegin(),
212 m_image_ctx
->snaps
.rend());
216 m_image_ctx
->snap_lock
.put_read();
218 update_object_maps();
221 template <typename I
>
222 void CopyupRequest
<I
>::deep_copy() {
223 auto cct
= m_image_ctx
->cct
;
224 ceph_assert(m_image_ctx
->snap_lock
.is_locked());
225 ceph_assert(m_image_ctx
->parent_lock
.is_locked());
226 ceph_assert(m_image_ctx
->parent
!= nullptr);
229 m_flatten
= is_copyup_required() ? true : m_image_ctx
->migration_info
.flatten
;
232 ldout(cct
, 20) << "oid=" << m_oid
<< ", flatten=" << m_flatten
<< dendl
;
234 auto ctx
= util::create_context_callback
<
235 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_deep_copy
>(this);
236 auto req
= deep_copy::ObjectCopyRequest
<I
>::create(
237 m_image_ctx
->parent
, m_image_ctx
, m_image_ctx
->migration_info
.snap_map
,
238 m_object_no
, m_flatten
, ctx
);
243 template <typename I
>
244 void CopyupRequest
<I
>::handle_deep_copy(int r
) {
245 auto cct
= m_image_ctx
->cct
;
246 ldout(cct
, 20) << "oid=" << m_oid
<< ", r=" << r
<< dendl
;
248 m_image_ctx
->snap_lock
.get_read();
250 m_copyup_required
= is_copyup_required();
251 if (r
== -ENOENT
&& !m_flatten
&& m_copyup_required
) {
253 m_image_ctx
->snap_lock
.put_read();
255 ldout(cct
, 10) << "restart deep-copy with flatten" << dendl
;
260 disable_append_requests();
262 if (r
< 0 && r
!= -ENOENT
) {
264 m_image_ctx
->snap_lock
.put_read();
266 lderr(cct
) << "error encountered during deep-copy: " << cpp_strerror(r
)
272 if (!m_copyup_required
&& !is_update_object_map_required(r
)) {
274 m_image_ctx
->snap_lock
.put_read();
280 ldout(cct
, 20) << "skipping" << dendl
;
285 // For deep-copy, copyup() will never affect snapshots. However,
286 // this state machine is responsible for updating object maps for
287 // snapshots that have been created on destination image after
288 // migration started.
290 compute_deep_copy_snap_ids();
294 m_image_ctx
->snap_lock
.put_read();
296 update_object_maps();
299 template <typename I
>
300 void CopyupRequest
<I
>::update_object_maps() {
301 RWLock::RLocker
owner_locker(m_image_ctx
->owner_lock
);
302 RWLock::RLocker
snap_locker(m_image_ctx
->snap_lock
);
303 if (m_image_ctx
->object_map
== nullptr) {
304 snap_locker
.unlock();
305 owner_locker
.unlock();
311 auto cct
= m_image_ctx
->cct
;
312 ldout(cct
, 20) << "oid=" << m_oid
<< dendl
;
314 bool copy_on_read
= m_pending_requests
.empty();
315 uint8_t head_object_map_state
= OBJECT_EXISTS
;
316 if (copy_on_read
&& !m_snap_ids
.empty() &&
317 m_image_ctx
->test_features(RBD_FEATURE_FAST_DIFF
,
318 m_image_ctx
->snap_lock
)) {
319 // HEAD is non-dirty since data is tied to first snapshot
320 head_object_map_state
= OBJECT_EXISTS_CLEAN
;
323 auto r_it
= m_pending_requests
.rbegin();
324 if (r_it
!= m_pending_requests
.rend()) {
325 // last write-op determines the final object map state
326 head_object_map_state
= (*r_it
)->get_pre_write_object_map_state();
329 RWLock::WLocker
object_map_locker(m_image_ctx
->object_map_lock
);
330 if ((*m_image_ctx
->object_map
)[m_object_no
] != head_object_map_state
) {
331 // (maybe) need to update the HEAD object map state
332 m_snap_ids
.push_back(CEPH_NOSNAP
);
334 object_map_locker
.unlock();
335 snap_locker
.unlock();
337 ceph_assert(m_image_ctx
->exclusive_lock
->is_lock_owner());
338 typename AsyncObjectThrottle
<I
>::ContextFactory
context_factory(
339 boost::lambda::bind(boost::lambda::new_ptr
<C_UpdateObjectMap
<I
>>(),
340 boost::lambda::_1
, m_image_ctx
, m_object_no
, head_object_map_state
,
341 &m_snap_ids
, m_first_snap_is_clean
, m_trace
, boost::lambda::_2
));
342 auto ctx
= util::create_context_callback
<
343 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_update_object_maps
>(this);
344 auto throttle
= new AsyncObjectThrottle
<I
>(
345 nullptr, *m_image_ctx
, context_factory
, ctx
, nullptr, 0, m_snap_ids
.size());
347 m_image_ctx
->config
.template get_val
<uint64_t>("rbd_concurrent_management_ops"));
350 template <typename I
>
351 void CopyupRequest
<I
>::handle_update_object_maps(int r
) {
352 auto cct
= m_image_ctx
->cct
;
353 ldout(cct
, 20) << "oid=" << m_oid
<< ", r=" << r
<< dendl
;
356 lderr(m_image_ctx
->cct
) << "failed to update object map: "
357 << cpp_strerror(r
) << dendl
;
366 template <typename I
>
367 void CopyupRequest
<I
>::copyup() {
368 auto cct
= m_image_ctx
->cct
;
369 m_image_ctx
->snap_lock
.get_read();
370 auto snapc
= m_image_ctx
->snapc
;
371 m_image_ctx
->snap_lock
.put_read();
374 if (!m_copyup_required
) {
377 ldout(cct
, 20) << "skipping copyup" << dendl
;
382 ldout(cct
, 20) << "oid=" << m_oid
<< dendl
;
384 bool copy_on_read
= m_pending_requests
.empty();
385 bool deep_copyup
= !snapc
.snaps
.empty() && !m_copyup_is_zero
;
386 if (m_copyup_is_zero
) {
387 m_copyup_data
.clear();
391 librados::ObjectWriteOperation copyup_op
;
392 if (copy_on_read
|| deep_copyup
) {
393 copyup_op
.exec("rbd", "copyup", m_copyup_data
);
394 ObjectRequest
<I
>::add_write_hint(*m_image_ctx
, ©up_op
);
398 librados::ObjectWriteOperation write_op
;
401 write_op
.exec("rbd", "copyup", m_copyup_data
);
402 ObjectRequest
<I
>::add_write_hint(*m_image_ctx
, &write_op
);
405 // merge all pending write ops into this single RADOS op
406 for (auto req
: m_pending_requests
) {
407 ldout(cct
, 20) << "add_copyup_ops " << req
<< dendl
;
408 req
->add_copyup_ops(&write_op
);
411 if (write_op
.size() > 0) {
417 // issue librados ops at the end to simplify test cases
418 std::vector
<librados::snap_t
> snaps
;
419 if (copyup_op
.size() > 0) {
420 // send only the copyup request with a blank snapshot context so that
421 // all snapshots are detected from the parent for this object. If
422 // this is a CoW request, a second request will be created for the
423 // actual modification.
424 ldout(cct
, 20) << "copyup with empty snapshot context" << dendl
;
426 auto comp
= util::create_rados_callback
<
427 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_copyup
>(this);
428 r
= m_image_ctx
->data_ctx
.aio_operate(
429 m_oid
, comp
, ©up_op
, 0, snaps
,
430 (m_trace
.valid() ? m_trace
.get_info() : nullptr));
435 if (write_op
.size() > 0) {
436 // compare-and-write doesn't add any write ops (copyup+cmpext+write
437 // can't be executed in the same RADOS op because, unless the object
438 // was already present in the clone, cmpext wouldn't see it)
439 ldout(cct
, 20) << (!deep_copyup
&& write_op
.size() > 2 ?
440 "copyup + ops" : !deep_copyup
? "copyup" : "ops")
441 << " with current snapshot context" << dendl
;
443 snaps
.insert(snaps
.end(), snapc
.snaps
.begin(), snapc
.snaps
.end());
444 auto comp
= util::create_rados_callback
<
445 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_copyup
>(this);
446 r
= m_image_ctx
->data_ctx
.aio_operate(
447 m_oid
, comp
, &write_op
, snapc
.seq
, snaps
,
448 (m_trace
.valid() ? m_trace
.get_info() : nullptr));
454 template <typename I
>
455 void CopyupRequest
<I
>::handle_copyup(int r
) {
456 auto cct
= m_image_ctx
->cct
;
457 unsigned pending_copyups
;
459 Mutex::Locker
locker(m_lock
);
460 ceph_assert(m_pending_copyups
> 0);
461 pending_copyups
= --m_pending_copyups
;
464 ldout(cct
, 20) << "oid=" << m_oid
<< ", " << "r=" << r
<< ", "
465 << "pending=" << pending_copyups
<< dendl
;
467 if (r
< 0 && r
!= -ENOENT
) {
468 lderr(cct
) << "failed to copyup object: " << cpp_strerror(r
) << dendl
;
469 complete_requests(false, r
);
472 if (pending_copyups
== 0) {
477 template <typename I
>
478 void CopyupRequest
<I
>::finish(int r
) {
479 auto cct
= m_image_ctx
->cct
;
480 ldout(cct
, 20) << "oid=" << m_oid
<< ", r=" << r
<< dendl
;
482 complete_requests(true, r
);
486 template <typename I
>
487 void CopyupRequest
<I
>::complete_requests(bool override_restart_retval
, int r
) {
488 auto cct
= m_image_ctx
->cct
;
491 while (!m_pending_requests
.empty()) {
492 auto it
= m_pending_requests
.begin();
494 ldout(cct
, 20) << "completing request " << req
<< dendl
;
495 req
->handle_copyup(r
);
496 m_pending_requests
.erase(it
);
499 if (override_restart_retval
) {
503 while (!m_restart_requests
.empty()) {
504 auto it
= m_restart_requests
.begin();
506 ldout(cct
, 20) << "restarting request " << req
<< dendl
;
507 req
->handle_copyup(r
);
508 m_restart_requests
.erase(it
);
512 template <typename I
>
513 void CopyupRequest
<I
>::disable_append_requests() {
514 ceph_assert(m_lock
.is_locked());
515 m_append_request_permitted
= false;
518 template <typename I
>
519 void CopyupRequest
<I
>::remove_from_list() {
520 Mutex::Locker
copyup_list_locker(m_image_ctx
->copyup_list_lock
);
522 auto it
= m_image_ctx
->copyup_list
.find(m_object_no
);
523 if (it
!= m_image_ctx
->copyup_list
.end()) {
524 m_image_ctx
->copyup_list
.erase(it
);
528 template <typename I
>
529 bool CopyupRequest
<I
>::is_copyup_required() {
530 ceph_assert(m_lock
.is_locked());
532 bool copy_on_read
= m_pending_requests
.empty();
534 // always force a copyup if CoR enabled
538 if (!m_copyup_is_zero
) {
542 for (auto req
: m_pending_requests
) {
543 if (!req
->is_empty_write_op()) {
550 template <typename I
>
551 bool CopyupRequest
<I
>::is_deep_copy() const {
552 ceph_assert(m_image_ctx
->snap_lock
.is_locked());
553 return !m_image_ctx
->migration_info
.empty();
556 template <typename I
>
557 bool CopyupRequest
<I
>::is_update_object_map_required(int r
) {
558 ceph_assert(m_image_ctx
->snap_lock
.is_locked());
564 if (m_image_ctx
->object_map
== nullptr) {
568 if (m_image_ctx
->migration_info
.empty()) {
569 // migration might have completed while IO was in-flight,
570 // assume worst-case and perform an object map update
574 auto it
= m_image_ctx
->migration_info
.snap_map
.find(CEPH_NOSNAP
);
575 ceph_assert(it
!= m_image_ctx
->migration_info
.snap_map
.end());
576 return it
->second
[0] != CEPH_NOSNAP
;
579 template <typename I
>
580 void CopyupRequest
<I
>::compute_deep_copy_snap_ids() {
581 ceph_assert(m_image_ctx
->snap_lock
.is_locked());
583 // don't copy ids for the snaps updated by object deep copy or
584 // that don't overlap
585 std::set
<uint64_t> deep_copied
;
586 for (auto &it
: m_image_ctx
->migration_info
.snap_map
) {
587 if (it
.first
!= CEPH_NOSNAP
) {
588 deep_copied
.insert(it
.second
.front());
592 RWLock::RLocker
parent_locker(m_image_ctx
->parent_lock
);
593 std::copy_if(m_image_ctx
->snaps
.rbegin(), m_image_ctx
->snaps
.rend(),
594 std::back_inserter(m_snap_ids
),
595 [this, cct
=m_image_ctx
->cct
, &deep_copied
](uint64_t snap_id
) {
596 if (deep_copied
.count(snap_id
)) {
597 m_first_snap_is_clean
= true;
601 uint64_t parent_overlap
= 0;
602 int r
= m_image_ctx
->get_parent_overlap(snap_id
, &parent_overlap
);
604 ldout(cct
, 5) << "failed getting parent overlap for snap_id: "
605 << snap_id
<< ": " << cpp_strerror(r
) << dendl
;
607 if (parent_overlap
== 0) {
610 std::vector
<std::pair
<uint64_t, uint64_t>> extents
;
611 Striper::extent_to_file(cct
, &m_image_ctx
->layout
,
613 m_image_ctx
->layout
.object_size
,
615 auto overlap
= m_image_ctx
->prune_parent_extents(
616 extents
, parent_overlap
);
622 } // namespace librbd
624 template class librbd::io::CopyupRequest
<librbd::ImageCtx
>;