1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "librbd/io/CopyupRequest.h"
5 #include "common/ceph_context.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "common/Mutex.h"
9 #include "common/WorkQueue.h"
10 #include "librbd/AsyncObjectThrottle.h"
11 #include "librbd/ExclusiveLock.h"
12 #include "librbd/ImageCtx.h"
13 #include "librbd/ObjectMap.h"
14 #include "librbd/Utils.h"
15 #include "librbd/deep_copy/ObjectCopyRequest.h"
16 #include "librbd/io/AioCompletion.h"
17 #include "librbd/io/ImageRequest.h"
18 #include "librbd/io/ObjectRequest.h"
19 #include "librbd/io/ReadResult.h"
21 #include <boost/bind.hpp>
22 #include <boost/lambda/bind.hpp>
23 #include <boost/lambda/construct.hpp>
25 #define dout_subsys ceph_subsys_rbd
27 #define dout_prefix *_dout << "librbd::io::CopyupRequest: " << this \
28 << " " << __func__ << ": "
36 class C_UpdateObjectMap
: public C_AsyncObjectThrottle
<I
> {
38 C_UpdateObjectMap(AsyncObjectThrottle
<I
> &throttle
, I
*image_ctx
,
39 uint64_t object_no
, uint8_t head_object_map_state
,
40 const std::vector
<uint64_t> *snap_ids
,
41 bool first_snap_is_clean
, const ZTracer::Trace
&trace
,
43 : C_AsyncObjectThrottle
<I
>(throttle
, *image_ctx
), m_object_no(object_no
),
44 m_head_object_map_state(head_object_map_state
), m_snap_ids(*snap_ids
),
45 m_first_snap_is_clean(first_snap_is_clean
), m_trace(trace
),
46 m_snap_id_idx(snap_id_idx
)
51 auto& image_ctx
= this->m_image_ctx
;
52 ceph_assert(image_ctx
.owner_lock
.is_locked());
53 if (image_ctx
.exclusive_lock
== nullptr) {
56 ceph_assert(image_ctx
.exclusive_lock
->is_lock_owner());
58 RWLock::RLocker
snap_locker(image_ctx
.snap_lock
);
59 if (image_ctx
.object_map
== nullptr) {
63 uint64_t snap_id
= m_snap_ids
[m_snap_id_idx
];
64 if (snap_id
== CEPH_NOSNAP
) {
67 return update_snapshot(snap_id
);
72 auto& image_ctx
= this->m_image_ctx
;
73 RWLock::WLocker
object_map_locker(image_ctx
.object_map_lock
);
74 bool sent
= image_ctx
.object_map
->template aio_update
<Context
>(
75 CEPH_NOSNAP
, m_object_no
, m_head_object_map_state
, {}, m_trace
, false,
77 return (sent
? 0 : 1);
80 int update_snapshot(uint64_t snap_id
) {
81 auto& image_ctx
= this->m_image_ctx
;
82 uint8_t state
= OBJECT_EXISTS
;
83 if (image_ctx
.test_features(RBD_FEATURE_FAST_DIFF
, image_ctx
.snap_lock
) &&
84 (m_snap_id_idx
> 0 || m_first_snap_is_clean
)) {
85 // first snapshot should be exists+dirty since it contains
86 // the copyup data -- later snapshots inherit the data.
87 state
= OBJECT_EXISTS_CLEAN
;
90 RWLock::RLocker
object_map_locker(image_ctx
.object_map_lock
);
91 bool sent
= image_ctx
.object_map
->template aio_update
<Context
>(
92 snap_id
, m_object_no
, state
, {}, m_trace
, true, this);
99 uint8_t m_head_object_map_state
;
100 const std::vector
<uint64_t> &m_snap_ids
;
101 bool m_first_snap_is_clean
;
102 const ZTracer::Trace
&m_trace
;
103 size_t m_snap_id_idx
;
106 } // anonymous namespace
108 template <typename I
>
109 CopyupRequest
<I
>::CopyupRequest(I
*ictx
, const std::string
&oid
,
110 uint64_t objectno
, Extents
&&image_extents
,
111 const ZTracer::Trace
&parent_trace
)
112 : m_image_ctx(ictx
), m_oid(oid
), m_object_no(objectno
),
113 m_image_extents(image_extents
),
114 m_trace(util::create_trace(*m_image_ctx
, "copy-up", parent_trace
)),
115 m_lock("CopyupRequest", false, false)
117 ceph_assert(m_image_ctx
->data_ctx
.is_valid());
118 m_async_op
.start_op(*util::get_image_ctx(m_image_ctx
));
121 template <typename I
>
122 CopyupRequest
<I
>::~CopyupRequest() {
123 ceph_assert(m_pending_requests
.empty());
124 m_async_op
.finish_op();
127 template <typename I
>
128 void CopyupRequest
<I
>::append_request(AbstractObjectWriteRequest
<I
> *req
) {
129 Mutex::Locker
locker(m_lock
);
131 auto cct
= m_image_ctx
->cct
;
132 ldout(cct
, 20) << "oid=" << m_oid
<< ", "
133 << "object_request=" << req
<< ", "
134 << "append=" << m_append_request_permitted
<< dendl
;
135 if (m_append_request_permitted
) {
136 m_pending_requests
.push_back(req
);
138 m_restart_requests
.push_back(req
);
142 template <typename I
>
143 void CopyupRequest
<I
>::send() {
147 template <typename I
>
148 void CopyupRequest
<I
>::read_from_parent() {
149 auto cct
= m_image_ctx
->cct
;
150 RWLock::RLocker
snap_locker(m_image_ctx
->snap_lock
);
151 RWLock::RLocker
parent_locker(m_image_ctx
->parent_lock
);
153 if (m_image_ctx
->parent
== nullptr) {
154 ldout(cct
, 5) << "parent detached" << dendl
;
156 m_image_ctx
->op_work_queue
->queue(
157 util::create_context_callback
<
158 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_read_from_parent
>(this),
161 } else if (is_deep_copy()) {
166 auto comp
= AioCompletion::create_and_start
<
168 &CopyupRequest
<I
>::handle_read_from_parent
>(
169 this, util::get_image_ctx(m_image_ctx
->parent
), AIO_TYPE_READ
);
171 ldout(cct
, 20) << "oid=" << m_oid
<< ", "
172 << "completion=" << comp
<< ", "
173 << "extents=" << m_image_extents
175 ImageRequest
<I
>::aio_read(m_image_ctx
->parent
, comp
,
176 std::move(m_image_extents
),
177 ReadResult
{&m_copyup_data
}, 0, m_trace
);
180 template <typename I
>
181 void CopyupRequest
<I
>::handle_read_from_parent(int r
) {
182 auto cct
= m_image_ctx
->cct
;
183 ldout(cct
, 20) << "oid=" << m_oid
<< ", r=" << r
<< dendl
;
185 m_image_ctx
->snap_lock
.get_read();
187 m_copyup_is_zero
= m_copyup_data
.is_zero();
188 m_copyup_required
= is_copyup_required();
189 disable_append_requests();
191 if (r
< 0 && r
!= -ENOENT
) {
193 m_image_ctx
->snap_lock
.put_read();
195 lderr(cct
) << "error reading from parent: " << cpp_strerror(r
) << dendl
;
200 if (!m_copyup_required
) {
202 m_image_ctx
->snap_lock
.put_read();
204 ldout(cct
, 20) << "no-op, skipping" << dendl
;
209 // copyup() will affect snapshots only if parent data is not all
211 if (!m_copyup_is_zero
) {
212 m_snap_ids
.insert(m_snap_ids
.end(), m_image_ctx
->snaps
.rbegin(),
213 m_image_ctx
->snaps
.rend());
217 m_image_ctx
->snap_lock
.put_read();
219 update_object_maps();
222 template <typename I
>
223 void CopyupRequest
<I
>::deep_copy() {
224 auto cct
= m_image_ctx
->cct
;
225 ceph_assert(m_image_ctx
->snap_lock
.is_locked());
226 ceph_assert(m_image_ctx
->parent_lock
.is_locked());
227 ceph_assert(m_image_ctx
->parent
!= nullptr);
230 m_flatten
= is_copyup_required() ? true : m_image_ctx
->migration_info
.flatten
;
233 ldout(cct
, 20) << "oid=" << m_oid
<< ", flatten=" << m_flatten
<< dendl
;
235 auto ctx
= util::create_context_callback
<
236 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_deep_copy
>(this);
237 auto req
= deep_copy::ObjectCopyRequest
<I
>::create(
238 m_image_ctx
->parent
, m_image_ctx
, m_image_ctx
->migration_info
.snap_map
,
239 m_object_no
, m_flatten
, ctx
);
244 template <typename I
>
245 void CopyupRequest
<I
>::handle_deep_copy(int r
) {
246 auto cct
= m_image_ctx
->cct
;
247 ldout(cct
, 20) << "oid=" << m_oid
<< ", r=" << r
<< dendl
;
249 m_image_ctx
->snap_lock
.get_read();
251 m_copyup_required
= is_copyup_required();
252 if (r
== -ENOENT
&& !m_flatten
&& m_copyup_required
) {
254 m_image_ctx
->snap_lock
.put_read();
256 ldout(cct
, 10) << "restart deep-copy with flatten" << dendl
;
261 disable_append_requests();
263 if (r
< 0 && r
!= -ENOENT
) {
265 m_image_ctx
->snap_lock
.put_read();
267 lderr(cct
) << "error encountered during deep-copy: " << cpp_strerror(r
)
273 if (!m_copyup_required
&& !is_update_object_map_required(r
)) {
275 m_image_ctx
->snap_lock
.put_read();
281 ldout(cct
, 20) << "skipping" << dendl
;
286 // For deep-copy, copyup() will never affect snapshots. However,
287 // this state machine is responsible for updating object maps for
288 // snapshots that have been created on destination image after
289 // migration started.
291 compute_deep_copy_snap_ids();
295 m_image_ctx
->snap_lock
.put_read();
297 update_object_maps();
300 template <typename I
>
301 void CopyupRequest
<I
>::update_object_maps() {
302 RWLock::RLocker
owner_locker(m_image_ctx
->owner_lock
);
303 RWLock::RLocker
snap_locker(m_image_ctx
->snap_lock
);
304 if (m_image_ctx
->object_map
== nullptr) {
305 snap_locker
.unlock();
306 owner_locker
.unlock();
312 auto cct
= m_image_ctx
->cct
;
313 ldout(cct
, 20) << "oid=" << m_oid
<< dendl
;
315 bool copy_on_read
= m_pending_requests
.empty();
316 uint8_t head_object_map_state
= OBJECT_EXISTS
;
317 if (copy_on_read
&& !m_snap_ids
.empty() &&
318 m_image_ctx
->test_features(RBD_FEATURE_FAST_DIFF
,
319 m_image_ctx
->snap_lock
)) {
320 // HEAD is non-dirty since data is tied to first snapshot
321 head_object_map_state
= OBJECT_EXISTS_CLEAN
;
324 auto r_it
= m_pending_requests
.rbegin();
325 if (r_it
!= m_pending_requests
.rend()) {
326 // last write-op determines the final object map state
327 head_object_map_state
= (*r_it
)->get_pre_write_object_map_state();
330 RWLock::WLocker
object_map_locker(m_image_ctx
->object_map_lock
);
331 if ((*m_image_ctx
->object_map
)[m_object_no
] != head_object_map_state
) {
332 // (maybe) need to update the HEAD object map state
333 m_snap_ids
.push_back(CEPH_NOSNAP
);
335 object_map_locker
.unlock();
336 snap_locker
.unlock();
338 ceph_assert(m_image_ctx
->exclusive_lock
->is_lock_owner());
339 typename AsyncObjectThrottle
<I
>::ContextFactory
context_factory(
340 boost::lambda::bind(boost::lambda::new_ptr
<C_UpdateObjectMap
<I
>>(),
341 boost::lambda::_1
, m_image_ctx
, m_object_no
, head_object_map_state
,
342 &m_snap_ids
, m_first_snap_is_clean
, m_trace
, boost::lambda::_2
));
343 auto ctx
= util::create_context_callback
<
344 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_update_object_maps
>(this);
345 auto throttle
= new AsyncObjectThrottle
<I
>(
346 nullptr, *m_image_ctx
, context_factory
, ctx
, nullptr, 0, m_snap_ids
.size());
348 m_image_ctx
->config
.template get_val
<uint64_t>("rbd_concurrent_management_ops"));
351 template <typename I
>
352 void CopyupRequest
<I
>::handle_update_object_maps(int r
) {
353 auto cct
= m_image_ctx
->cct
;
354 ldout(cct
, 20) << "oid=" << m_oid
<< ", r=" << r
<< dendl
;
357 lderr(m_image_ctx
->cct
) << "failed to update object map: "
358 << cpp_strerror(r
) << dendl
;
367 template <typename I
>
368 void CopyupRequest
<I
>::copyup() {
369 auto cct
= m_image_ctx
->cct
;
370 m_image_ctx
->snap_lock
.get_read();
371 auto snapc
= m_image_ctx
->snapc
;
372 m_image_ctx
->snap_lock
.put_read();
375 if (!m_copyup_required
) {
378 ldout(cct
, 20) << "skipping copyup" << dendl
;
383 ldout(cct
, 20) << "oid=" << m_oid
<< dendl
;
385 bool copy_on_read
= m_pending_requests
.empty();
386 bool deep_copyup
= !snapc
.snaps
.empty() && !m_copyup_is_zero
;
387 if (m_copyup_is_zero
) {
388 m_copyup_data
.clear();
392 librados::ObjectWriteOperation copyup_op
;
393 if (copy_on_read
|| deep_copyup
) {
394 copyup_op
.exec("rbd", "copyup", m_copyup_data
);
395 ObjectRequest
<I
>::add_write_hint(*m_image_ctx
, ©up_op
);
399 librados::ObjectWriteOperation write_op
;
402 write_op
.exec("rbd", "copyup", m_copyup_data
);
403 ObjectRequest
<I
>::add_write_hint(*m_image_ctx
, &write_op
);
406 // merge all pending write ops into this single RADOS op
407 for (auto req
: m_pending_requests
) {
408 ldout(cct
, 20) << "add_copyup_ops " << req
<< dendl
;
409 req
->add_copyup_ops(&write_op
);
412 if (write_op
.size() > 0) {
418 // issue librados ops at the end to simplify test cases
419 std::vector
<librados::snap_t
> snaps
;
420 if (copyup_op
.size() > 0) {
421 // send only the copyup request with a blank snapshot context so that
422 // all snapshots are detected from the parent for this object. If
423 // this is a CoW request, a second request will be created for the
424 // actual modification.
425 ldout(cct
, 20) << "copyup with empty snapshot context" << dendl
;
427 auto comp
= util::create_rados_callback
<
428 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_copyup
>(this);
429 r
= m_image_ctx
->data_ctx
.aio_operate(
430 m_oid
, comp
, ©up_op
, 0, snaps
,
431 (m_trace
.valid() ? m_trace
.get_info() : nullptr));
436 if (write_op
.size() > 0) {
437 // compare-and-write doesn't add any write ops (copyup+cmpext+write
438 // can't be executed in the same RADOS op because, unless the object
439 // was already present in the clone, cmpext wouldn't see it)
440 ldout(cct
, 20) << (!deep_copyup
&& write_op
.size() > 2 ?
441 "copyup + ops" : !deep_copyup
? "copyup" : "ops")
442 << " with current snapshot context" << dendl
;
444 snaps
.insert(snaps
.end(), snapc
.snaps
.begin(), snapc
.snaps
.end());
445 auto comp
= util::create_rados_callback
<
446 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_copyup
>(this);
447 r
= m_image_ctx
->data_ctx
.aio_operate(
448 m_oid
, comp
, &write_op
, snapc
.seq
, snaps
,
449 (m_trace
.valid() ? m_trace
.get_info() : nullptr));
455 template <typename I
>
456 void CopyupRequest
<I
>::handle_copyup(int r
) {
457 auto cct
= m_image_ctx
->cct
;
458 unsigned pending_copyups
;
460 Mutex::Locker
locker(m_lock
);
461 ceph_assert(m_pending_copyups
> 0);
462 pending_copyups
= --m_pending_copyups
;
465 ldout(cct
, 20) << "oid=" << m_oid
<< ", " << "r=" << r
<< ", "
466 << "pending=" << pending_copyups
<< dendl
;
468 if (r
< 0 && r
!= -ENOENT
) {
469 lderr(cct
) << "failed to copyup object: " << cpp_strerror(r
) << dendl
;
470 complete_requests(false, r
);
473 if (pending_copyups
== 0) {
478 template <typename I
>
479 void CopyupRequest
<I
>::finish(int r
) {
480 auto cct
= m_image_ctx
->cct
;
481 ldout(cct
, 20) << "oid=" << m_oid
<< ", r=" << r
<< dendl
;
483 complete_requests(true, r
);
487 template <typename I
>
488 void CopyupRequest
<I
>::complete_requests(bool override_restart_retval
, int r
) {
489 auto cct
= m_image_ctx
->cct
;
492 while (!m_pending_requests
.empty()) {
493 auto it
= m_pending_requests
.begin();
495 ldout(cct
, 20) << "completing request " << req
<< dendl
;
496 req
->handle_copyup(r
);
497 m_pending_requests
.erase(it
);
500 if (override_restart_retval
) {
504 while (!m_restart_requests
.empty()) {
505 auto it
= m_restart_requests
.begin();
507 ldout(cct
, 20) << "restarting request " << req
<< dendl
;
508 req
->handle_copyup(r
);
509 m_restart_requests
.erase(it
);
513 template <typename I
>
514 void CopyupRequest
<I
>::disable_append_requests() {
515 ceph_assert(m_lock
.is_locked());
516 m_append_request_permitted
= false;
519 template <typename I
>
520 void CopyupRequest
<I
>::remove_from_list() {
521 Mutex::Locker
copyup_list_locker(m_image_ctx
->copyup_list_lock
);
523 auto it
= m_image_ctx
->copyup_list
.find(m_object_no
);
524 if (it
!= m_image_ctx
->copyup_list
.end()) {
525 m_image_ctx
->copyup_list
.erase(it
);
529 template <typename I
>
530 bool CopyupRequest
<I
>::is_copyup_required() {
531 ceph_assert(m_lock
.is_locked());
533 bool copy_on_read
= m_pending_requests
.empty();
535 // always force a copyup if CoR enabled
539 if (!m_copyup_is_zero
) {
543 for (auto req
: m_pending_requests
) {
544 if (!req
->is_empty_write_op()) {
551 template <typename I
>
552 bool CopyupRequest
<I
>::is_deep_copy() const {
553 ceph_assert(m_image_ctx
->snap_lock
.is_locked());
554 return !m_image_ctx
->migration_info
.empty();
557 template <typename I
>
558 bool CopyupRequest
<I
>::is_update_object_map_required(int r
) {
559 ceph_assert(m_image_ctx
->snap_lock
.is_locked());
565 if (m_image_ctx
->object_map
== nullptr) {
569 if (m_image_ctx
->migration_info
.empty()) {
570 // migration might have completed while IO was in-flight,
571 // assume worst-case and perform an object map update
575 auto it
= m_image_ctx
->migration_info
.snap_map
.find(CEPH_NOSNAP
);
576 ceph_assert(it
!= m_image_ctx
->migration_info
.snap_map
.end());
577 return it
->second
[0] != CEPH_NOSNAP
;
580 template <typename I
>
581 void CopyupRequest
<I
>::compute_deep_copy_snap_ids() {
582 ceph_assert(m_image_ctx
->snap_lock
.is_locked());
584 // don't copy ids for the snaps updated by object deep copy or
585 // that don't overlap
586 std::set
<uint64_t> deep_copied
;
587 for (auto &it
: m_image_ctx
->migration_info
.snap_map
) {
588 if (it
.first
!= CEPH_NOSNAP
) {
589 deep_copied
.insert(it
.second
.front());
593 RWLock::RLocker
parent_locker(m_image_ctx
->parent_lock
);
594 std::copy_if(m_image_ctx
->snaps
.rbegin(), m_image_ctx
->snaps
.rend(),
595 std::back_inserter(m_snap_ids
),
596 [this, cct
=m_image_ctx
->cct
, &deep_copied
](uint64_t snap_id
) {
597 if (deep_copied
.count(snap_id
)) {
598 m_first_snap_is_clean
= true;
602 uint64_t parent_overlap
= 0;
603 int r
= m_image_ctx
->get_parent_overlap(snap_id
, &parent_overlap
);
605 ldout(cct
, 5) << "failed getting parent overlap for snap_id: "
606 << snap_id
<< ": " << cpp_strerror(r
) << dendl
;
608 if (parent_overlap
== 0) {
611 std::vector
<std::pair
<uint64_t, uint64_t>> extents
;
612 Striper::extent_to_file(cct
, &m_image_ctx
->layout
,
614 m_image_ctx
->layout
.object_size
,
616 auto overlap
= m_image_ctx
->prune_parent_extents(
617 extents
, parent_overlap
);
623 } // namespace librbd
625 template class librbd::io::CopyupRequest
<librbd::ImageCtx
>;