1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "librbd/io/CopyupRequest.h"
5 #include "common/ceph_context.h"
6 #include "common/ceph_mutex.h"
7 #include "common/dout.h"
8 #include "common/errno.h"
9 #include "common/WorkQueue.h"
10 #include "librbd/AsyncObjectThrottle.h"
11 #include "librbd/ExclusiveLock.h"
12 #include "librbd/ImageCtx.h"
13 #include "librbd/ObjectMap.h"
14 #include "librbd/Utils.h"
15 #include "librbd/deep_copy/ObjectCopyRequest.h"
16 #include "librbd/io/AioCompletion.h"
17 #include "librbd/io/ImageRequest.h"
18 #include "librbd/io/ObjectRequest.h"
19 #include "librbd/io/ReadResult.h"
21 #include <boost/bind.hpp>
22 #include <boost/lambda/bind.hpp>
23 #include <boost/lambda/construct.hpp>
25 #define dout_subsys ceph_subsys_rbd
27 #define dout_prefix *_dout << "librbd::io::CopyupRequest: " << this \
28 << " " << __func__ << ": " \
29 << data_object_name(m_image_ctx, m_object_no) << " "
34 using librbd::util::data_object_name
;
39 class C_UpdateObjectMap
: public C_AsyncObjectThrottle
<I
> {
41 C_UpdateObjectMap(AsyncObjectThrottle
<I
> &throttle
, I
*image_ctx
,
42 uint64_t object_no
, uint8_t head_object_map_state
,
43 const std::vector
<uint64_t> *snap_ids
,
44 bool first_snap_is_clean
, const ZTracer::Trace
&trace
,
46 : C_AsyncObjectThrottle
<I
>(throttle
, *image_ctx
), m_object_no(object_no
),
47 m_head_object_map_state(head_object_map_state
), m_snap_ids(*snap_ids
),
48 m_first_snap_is_clean(first_snap_is_clean
), m_trace(trace
),
49 m_snap_id_idx(snap_id_idx
)
54 auto& image_ctx
= this->m_image_ctx
;
55 ceph_assert(ceph_mutex_is_locked(image_ctx
.owner_lock
));
56 if (image_ctx
.exclusive_lock
== nullptr) {
59 ceph_assert(image_ctx
.exclusive_lock
->is_lock_owner());
61 std::shared_lock image_locker
{image_ctx
.image_lock
};
62 if (image_ctx
.object_map
== nullptr) {
66 uint64_t snap_id
= m_snap_ids
[m_snap_id_idx
];
67 if (snap_id
== CEPH_NOSNAP
) {
70 return update_snapshot(snap_id
);
75 auto& image_ctx
= this->m_image_ctx
;
76 ceph_assert(ceph_mutex_is_locked(image_ctx
.image_lock
));
78 bool sent
= image_ctx
.object_map
->template aio_update
<Context
>(
79 CEPH_NOSNAP
, m_object_no
, m_head_object_map_state
, {}, m_trace
, false,
81 return (sent
? 0 : 1);
84 int update_snapshot(uint64_t snap_id
) {
85 auto& image_ctx
= this->m_image_ctx
;
86 ceph_assert(ceph_mutex_is_locked(image_ctx
.image_lock
));
88 uint8_t state
= OBJECT_EXISTS
;
89 if (image_ctx
.test_features(RBD_FEATURE_FAST_DIFF
, image_ctx
.image_lock
) &&
90 (m_snap_id_idx
> 0 || m_first_snap_is_clean
)) {
91 // first snapshot should be exists+dirty since it contains
92 // the copyup data -- later snapshots inherit the data.
93 state
= OBJECT_EXISTS_CLEAN
;
96 bool sent
= image_ctx
.object_map
->template aio_update
<Context
>(
97 snap_id
, m_object_no
, state
, {}, m_trace
, true, this);
103 uint64_t m_object_no
;
104 uint8_t m_head_object_map_state
;
105 const std::vector
<uint64_t> &m_snap_ids
;
106 bool m_first_snap_is_clean
;
107 const ZTracer::Trace
&m_trace
;
108 size_t m_snap_id_idx
;
111 } // anonymous namespace
113 template <typename I
>
114 CopyupRequest
<I
>::CopyupRequest(I
*ictx
, uint64_t objectno
,
115 Extents
&&image_extents
,
116 const ZTracer::Trace
&parent_trace
)
117 : m_image_ctx(ictx
), m_object_no(objectno
), m_image_extents(image_extents
),
118 m_trace(util::create_trace(*m_image_ctx
, "copy-up", parent_trace
))
120 ceph_assert(m_image_ctx
->data_ctx
.is_valid());
121 m_async_op
.start_op(*util::get_image_ctx(m_image_ctx
));
124 template <typename I
>
125 CopyupRequest
<I
>::~CopyupRequest() {
126 ceph_assert(m_pending_requests
.empty());
127 m_async_op
.finish_op();
130 template <typename I
>
131 void CopyupRequest
<I
>::append_request(AbstractObjectWriteRequest
<I
> *req
) {
132 std::lock_guard locker
{m_lock
};
134 auto cct
= m_image_ctx
->cct
;
135 ldout(cct
, 20) << "object_request=" << req
<< ", "
136 << "append=" << m_append_request_permitted
<< dendl
;
137 if (m_append_request_permitted
) {
138 m_pending_requests
.push_back(req
);
140 m_restart_requests
.push_back(req
);
144 template <typename I
>
145 void CopyupRequest
<I
>::send() {
149 template <typename I
>
150 void CopyupRequest
<I
>::read_from_parent() {
151 auto cct
= m_image_ctx
->cct
;
152 std::shared_lock image_locker
{m_image_ctx
->image_lock
};
154 if (m_image_ctx
->parent
== nullptr) {
155 ldout(cct
, 5) << "parent detached" << dendl
;
157 m_image_ctx
->op_work_queue
->queue(
158 util::create_context_callback
<
159 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_read_from_parent
>(this),
162 } else if (is_deep_copy()) {
167 auto comp
= AioCompletion::create_and_start
<
169 &CopyupRequest
<I
>::handle_read_from_parent
>(
170 this, util::get_image_ctx(m_image_ctx
->parent
), AIO_TYPE_READ
);
172 ldout(cct
, 20) << "completion=" << comp
<< ", "
173 << "extents=" << m_image_extents
175 if (m_image_ctx
->enable_sparse_copyup
) {
176 ImageRequest
<I
>::aio_read(
177 m_image_ctx
->parent
, comp
, std::move(m_image_extents
),
178 ReadResult
{&m_copyup_extent_map
, &m_copyup_data
}, 0, m_trace
);
180 ImageRequest
<I
>::aio_read(
181 m_image_ctx
->parent
, comp
, std::move(m_image_extents
),
182 ReadResult
{&m_copyup_data
}, 0, m_trace
);
186 template <typename I
>
187 void CopyupRequest
<I
>::handle_read_from_parent(int r
) {
188 auto cct
= m_image_ctx
->cct
;
189 ldout(cct
, 20) << "r=" << r
<< dendl
;
191 m_image_ctx
->image_lock
.lock_shared();
193 m_copyup_is_zero
= m_copyup_data
.is_zero();
194 m_copyup_required
= is_copyup_required();
195 disable_append_requests();
197 if (r
< 0 && r
!= -ENOENT
) {
199 m_image_ctx
->image_lock
.unlock_shared();
201 lderr(cct
) << "error reading from parent: " << cpp_strerror(r
) << dendl
;
206 if (!m_copyup_required
) {
208 m_image_ctx
->image_lock
.unlock_shared();
210 ldout(cct
, 20) << "no-op, skipping" << dendl
;
215 // copyup() will affect snapshots only if parent data is not all
217 if (!m_copyup_is_zero
) {
218 m_snap_ids
.insert(m_snap_ids
.end(), m_image_ctx
->snaps
.rbegin(),
219 m_image_ctx
->snaps
.rend());
223 m_image_ctx
->image_lock
.unlock_shared();
225 update_object_maps();
228 template <typename I
>
229 void CopyupRequest
<I
>::deep_copy() {
230 auto cct
= m_image_ctx
->cct
;
231 ceph_assert(ceph_mutex_is_locked(m_image_ctx
->image_lock
));
232 ceph_assert(m_image_ctx
->parent
!= nullptr);
235 m_flatten
= is_copyup_required() ? true : m_image_ctx
->migration_info
.flatten
;
238 ldout(cct
, 20) << "flatten=" << m_flatten
<< dendl
;
240 auto ctx
= util::create_context_callback
<
241 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_deep_copy
>(this);
242 auto req
= deep_copy::ObjectCopyRequest
<I
>::create(
243 m_image_ctx
->parent
, m_image_ctx
, 0, 0,
244 m_image_ctx
->migration_info
.snap_map
, m_object_no
, m_flatten
, nullptr, ctx
);
249 template <typename I
>
250 void CopyupRequest
<I
>::handle_deep_copy(int r
) {
251 auto cct
= m_image_ctx
->cct
;
252 ldout(cct
, 20) << "r=" << r
<< dendl
;
254 m_image_ctx
->image_lock
.lock_shared();
256 m_copyup_required
= is_copyup_required();
257 if (r
== -ENOENT
&& !m_flatten
&& m_copyup_required
) {
259 m_image_ctx
->image_lock
.unlock_shared();
261 ldout(cct
, 10) << "restart deep-copy with flatten" << dendl
;
266 disable_append_requests();
268 if (r
< 0 && r
!= -ENOENT
) {
270 m_image_ctx
->image_lock
.unlock_shared();
272 lderr(cct
) << "error encountered during deep-copy: " << cpp_strerror(r
)
278 if (!m_copyup_required
&& !is_update_object_map_required(r
)) {
280 m_image_ctx
->image_lock
.unlock_shared();
286 ldout(cct
, 20) << "skipping" << dendl
;
291 // For deep-copy, copyup() will never affect snapshots. However,
292 // this state machine is responsible for updating object maps for
293 // snapshots that have been created on destination image after
294 // migration started.
296 compute_deep_copy_snap_ids();
300 m_image_ctx
->image_lock
.unlock_shared();
302 update_object_maps();
305 template <typename I
>
306 void CopyupRequest
<I
>::update_object_maps() {
307 std::shared_lock owner_locker
{m_image_ctx
->owner_lock
};
308 std::shared_lock image_locker
{m_image_ctx
->image_lock
};
309 if (m_image_ctx
->object_map
== nullptr) {
310 image_locker
.unlock();
311 owner_locker
.unlock();
317 auto cct
= m_image_ctx
->cct
;
318 ldout(cct
, 20) << dendl
;
320 bool copy_on_read
= m_pending_requests
.empty();
321 uint8_t head_object_map_state
= OBJECT_EXISTS
;
322 if (copy_on_read
&& !m_snap_ids
.empty() &&
323 m_image_ctx
->test_features(RBD_FEATURE_FAST_DIFF
,
324 m_image_ctx
->image_lock
)) {
325 // HEAD is non-dirty since data is tied to first snapshot
326 head_object_map_state
= OBJECT_EXISTS_CLEAN
;
329 auto r_it
= m_pending_requests
.rbegin();
330 if (r_it
!= m_pending_requests
.rend()) {
331 // last write-op determines the final object map state
332 head_object_map_state
= (*r_it
)->get_pre_write_object_map_state();
335 if ((*m_image_ctx
->object_map
)[m_object_no
] != head_object_map_state
) {
336 // (maybe) need to update the HEAD object map state
337 m_snap_ids
.push_back(CEPH_NOSNAP
);
339 image_locker
.unlock();
341 ceph_assert(m_image_ctx
->exclusive_lock
->is_lock_owner());
342 typename AsyncObjectThrottle
<I
>::ContextFactory
context_factory(
343 boost::lambda::bind(boost::lambda::new_ptr
<C_UpdateObjectMap
<I
>>(),
344 boost::lambda::_1
, m_image_ctx
, m_object_no
, head_object_map_state
,
345 &m_snap_ids
, m_first_snap_is_clean
, m_trace
, boost::lambda::_2
));
346 auto ctx
= util::create_context_callback
<
347 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_update_object_maps
>(this);
348 auto throttle
= new AsyncObjectThrottle
<I
>(
349 nullptr, *m_image_ctx
, context_factory
, ctx
, nullptr, 0, m_snap_ids
.size());
351 m_image_ctx
->config
.template get_val
<uint64_t>("rbd_concurrent_management_ops"));
354 template <typename I
>
355 void CopyupRequest
<I
>::handle_update_object_maps(int r
) {
356 auto cct
= m_image_ctx
->cct
;
357 ldout(cct
, 20) << "r=" << r
<< dendl
;
360 lderr(m_image_ctx
->cct
) << "failed to update object map: "
361 << cpp_strerror(r
) << dendl
;
370 template <typename I
>
371 void CopyupRequest
<I
>::copyup() {
372 auto cct
= m_image_ctx
->cct
;
373 m_image_ctx
->image_lock
.lock_shared();
374 auto snapc
= m_image_ctx
->snapc
;
375 m_image_ctx
->image_lock
.unlock_shared();
378 if (!m_copyup_required
) {
381 ldout(cct
, 20) << "skipping copyup" << dendl
;
386 ldout(cct
, 20) << dendl
;
388 bool copy_on_read
= m_pending_requests
.empty();
389 bool deep_copyup
= !snapc
.snaps
.empty() && !m_copyup_is_zero
;
390 if (m_copyup_is_zero
) {
391 m_copyup_data
.clear();
392 m_copyup_extent_map
.clear();
396 librados::ObjectWriteOperation copyup_op
;
397 if (copy_on_read
|| deep_copyup
) {
398 if (m_image_ctx
->enable_sparse_copyup
) {
399 cls_client::sparse_copyup(©up_op
, m_copyup_extent_map
, m_copyup_data
);
401 cls_client::copyup(©up_op
, m_copyup_data
);
403 ObjectRequest
<I
>::add_write_hint(*m_image_ctx
, ©up_op
);
407 librados::ObjectWriteOperation write_op
;
410 if (m_image_ctx
->enable_sparse_copyup
) {
411 cls_client::sparse_copyup(&write_op
, m_copyup_extent_map
,
414 cls_client::copyup(&write_op
, m_copyup_data
);
416 ObjectRequest
<I
>::add_write_hint(*m_image_ctx
, &write_op
);
419 // merge all pending write ops into this single RADOS op
420 for (auto req
: m_pending_requests
) {
421 ldout(cct
, 20) << "add_copyup_ops " << req
<< dendl
;
422 req
->add_copyup_ops(&write_op
);
425 if (write_op
.size() > 0) {
431 // issue librados ops at the end to simplify test cases
432 std::string
oid(data_object_name(m_image_ctx
, m_object_no
));
433 std::vector
<librados::snap_t
> snaps
;
434 if (copyup_op
.size() > 0) {
435 // send only the copyup request with a blank snapshot context so that
436 // all snapshots are detected from the parent for this object. If
437 // this is a CoW request, a second request will be created for the
438 // actual modification.
439 ldout(cct
, 20) << "copyup with empty snapshot context" << dendl
;
441 auto comp
= util::create_rados_callback
<
442 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_copyup
>(this);
443 r
= m_image_ctx
->data_ctx
.aio_operate(
444 oid
, comp
, ©up_op
, 0, snaps
,
445 (m_trace
.valid() ? m_trace
.get_info() : nullptr));
450 if (write_op
.size() > 0) {
451 // compare-and-write doesn't add any write ops (copyup+cmpext+write
452 // can't be executed in the same RADOS op because, unless the object
453 // was already present in the clone, cmpext wouldn't see it)
454 ldout(cct
, 20) << (!deep_copyup
&& write_op
.size() > 2 ?
455 "copyup + ops" : !deep_copyup
? "copyup" : "ops")
456 << " with current snapshot context" << dendl
;
458 snaps
.insert(snaps
.end(), snapc
.snaps
.begin(), snapc
.snaps
.end());
459 auto comp
= util::create_rados_callback
<
460 CopyupRequest
<I
>, &CopyupRequest
<I
>::handle_copyup
>(this);
461 r
= m_image_ctx
->data_ctx
.aio_operate(
462 oid
, comp
, &write_op
, snapc
.seq
, snaps
,
463 (m_trace
.valid() ? m_trace
.get_info() : nullptr));
469 template <typename I
>
470 void CopyupRequest
<I
>::handle_copyup(int r
) {
471 auto cct
= m_image_ctx
->cct
;
472 unsigned pending_copyups
;
474 std::lock_guard locker
{m_lock
};
475 ceph_assert(m_pending_copyups
> 0);
476 pending_copyups
= --m_pending_copyups
;
479 ldout(cct
, 20) << "r=" << r
<< ", "
480 << "pending=" << pending_copyups
<< dendl
;
482 if (r
< 0 && r
!= -ENOENT
) {
483 lderr(cct
) << "failed to copyup object: " << cpp_strerror(r
) << dendl
;
484 complete_requests(false, r
);
487 if (pending_copyups
== 0) {
492 template <typename I
>
493 void CopyupRequest
<I
>::finish(int r
) {
494 auto cct
= m_image_ctx
->cct
;
495 ldout(cct
, 20) << "r=" << r
<< dendl
;
497 complete_requests(true, r
);
501 template <typename I
>
502 void CopyupRequest
<I
>::complete_requests(bool override_restart_retval
, int r
) {
503 auto cct
= m_image_ctx
->cct
;
506 while (!m_pending_requests
.empty()) {
507 auto it
= m_pending_requests
.begin();
509 ldout(cct
, 20) << "completing request " << req
<< dendl
;
510 req
->handle_copyup(r
);
511 m_pending_requests
.erase(it
);
514 if (override_restart_retval
) {
518 while (!m_restart_requests
.empty()) {
519 auto it
= m_restart_requests
.begin();
521 ldout(cct
, 20) << "restarting request " << req
<< dendl
;
522 req
->handle_copyup(r
);
523 m_restart_requests
.erase(it
);
527 template <typename I
>
528 void CopyupRequest
<I
>::disable_append_requests() {
529 ceph_assert(ceph_mutex_is_locked(m_lock
));
530 m_append_request_permitted
= false;
533 template <typename I
>
534 void CopyupRequest
<I
>::remove_from_list() {
535 std::lock_guard copyup_list_locker
{m_image_ctx
->copyup_list_lock
};
537 auto it
= m_image_ctx
->copyup_list
.find(m_object_no
);
538 if (it
!= m_image_ctx
->copyup_list
.end()) {
539 m_image_ctx
->copyup_list
.erase(it
);
543 template <typename I
>
544 bool CopyupRequest
<I
>::is_copyup_required() {
545 ceph_assert(ceph_mutex_is_locked(m_lock
));
547 bool copy_on_read
= m_pending_requests
.empty();
549 // always force a copyup if CoR enabled
553 if (!m_copyup_is_zero
) {
557 for (auto req
: m_pending_requests
) {
558 if (!req
->is_empty_write_op()) {
565 template <typename I
>
566 bool CopyupRequest
<I
>::is_deep_copy() const {
567 ceph_assert(ceph_mutex_is_locked(m_image_ctx
->image_lock
));
568 return !m_image_ctx
->migration_info
.empty();
571 template <typename I
>
572 bool CopyupRequest
<I
>::is_update_object_map_required(int r
) {
573 ceph_assert(ceph_mutex_is_locked(m_image_ctx
->image_lock
));
579 if (m_image_ctx
->object_map
== nullptr) {
583 if (m_image_ctx
->migration_info
.empty()) {
584 // migration might have completed while IO was in-flight,
585 // assume worst-case and perform an object map update
589 auto it
= m_image_ctx
->migration_info
.snap_map
.find(CEPH_NOSNAP
);
590 ceph_assert(it
!= m_image_ctx
->migration_info
.snap_map
.end());
591 return it
->second
[0] != CEPH_NOSNAP
;
594 template <typename I
>
595 void CopyupRequest
<I
>::compute_deep_copy_snap_ids() {
596 ceph_assert(ceph_mutex_is_locked(m_image_ctx
->image_lock
));
598 // don't copy ids for the snaps updated by object deep copy or
599 // that don't overlap
600 std::set
<uint64_t> deep_copied
;
601 for (auto &it
: m_image_ctx
->migration_info
.snap_map
) {
602 if (it
.first
!= CEPH_NOSNAP
) {
603 deep_copied
.insert(it
.second
.front());
607 std::copy_if(m_image_ctx
->snaps
.rbegin(), m_image_ctx
->snaps
.rend(),
608 std::back_inserter(m_snap_ids
),
609 [this, cct
=m_image_ctx
->cct
, &deep_copied
](uint64_t snap_id
) {
610 if (deep_copied
.count(snap_id
)) {
611 m_first_snap_is_clean
= true;
615 uint64_t parent_overlap
= 0;
616 int r
= m_image_ctx
->get_parent_overlap(snap_id
, &parent_overlap
);
618 ldout(cct
, 5) << "failed getting parent overlap for snap_id: "
619 << snap_id
<< ": " << cpp_strerror(r
) << dendl
;
621 if (parent_overlap
== 0) {
624 std::vector
<std::pair
<uint64_t, uint64_t>> extents
;
625 Striper::extent_to_file(cct
, &m_image_ctx
->layout
,
627 m_image_ctx
->layout
.object_size
,
629 auto overlap
= m_image_ctx
->prune_parent_extents(
630 extents
, parent_overlap
);
636 } // namespace librbd
638 template class librbd::io::CopyupRequest
<librbd::ImageCtx
>;