1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "librbd/io/CopyupRequest.h"
5 #include "common/ceph_context.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "common/Mutex.h"
10 #include "librbd/AsyncObjectThrottle.h"
11 #include "librbd/ExclusiveLock.h"
12 #include "librbd/ImageCtx.h"
13 #include "librbd/ObjectMap.h"
14 #include "librbd/Utils.h"
15 #include "librbd/io/AioCompletion.h"
16 #include "librbd/io/ImageRequest.h"
17 #include "librbd/io/ObjectRequest.h"
18 #include "librbd/io/ReadResult.h"
20 #include <boost/bind.hpp>
21 #include <boost/lambda/bind.hpp>
22 #include <boost/lambda/construct.hpp>
24 #define dout_subsys ceph_subsys_rbd
26 #define dout_prefix *_dout << "librbd::io::CopyupRequest: " << this \
27 << " " << __func__ << ": "
34 class UpdateObjectMap
: public C_AsyncObjectThrottle
<> {
36 UpdateObjectMap(AsyncObjectThrottle
<> &throttle
, ImageCtx
*image_ctx
,
37 uint64_t object_no
, const std::vector
<uint64_t> *snap_ids
,
38 const ZTracer::Trace
&trace
, size_t snap_id_idx
)
39 : C_AsyncObjectThrottle(throttle
, *image_ctx
), m_object_no(object_no
),
40 m_snap_ids(*snap_ids
), m_trace(trace
), m_snap_id_idx(snap_id_idx
)
45 uint64_t snap_id
= m_snap_ids
[m_snap_id_idx
];
46 if (snap_id
== CEPH_NOSNAP
) {
47 RWLock::RLocker
snap_locker(m_image_ctx
.snap_lock
);
48 RWLock::WLocker
object_map_locker(m_image_ctx
.object_map_lock
);
49 assert(m_image_ctx
.exclusive_lock
->is_lock_owner());
50 assert(m_image_ctx
.object_map
!= nullptr);
51 bool sent
= m_image_ctx
.object_map
->aio_update
<Context
>(
52 CEPH_NOSNAP
, m_object_no
, OBJECT_EXISTS
, {}, m_trace
, false, this);
53 return (sent
? 0 : 1);
56 uint8_t state
= OBJECT_EXISTS
;
57 if (m_image_ctx
.test_features(RBD_FEATURE_FAST_DIFF
) &&
58 m_snap_id_idx
+ 1 < m_snap_ids
.size()) {
59 state
= OBJECT_EXISTS_CLEAN
;
62 RWLock::RLocker
snap_locker(m_image_ctx
.snap_lock
);
63 RWLock::RLocker
object_map_locker(m_image_ctx
.object_map_lock
);
64 if (m_image_ctx
.object_map
== nullptr) {
68 bool sent
= m_image_ctx
.object_map
->aio_update
<Context
>(
69 snap_id
, m_object_no
, state
, {}, m_trace
, true, this);
76 const std::vector
<uint64_t> &m_snap_ids
;
77 const ZTracer::Trace
&m_trace
;
81 } // anonymous namespace
84 CopyupRequest
<I
>::CopyupRequest(I
*ictx
, const std::string
&oid
,
85 uint64_t objectno
, Extents
&&image_extents
,
86 const ZTracer::Trace
&parent_trace
)
87 : m_ictx(util::get_image_ctx(ictx
)), m_oid(oid
), m_object_no(objectno
),
88 m_image_extents(image_extents
),
89 m_trace(util::create_trace(*m_ictx
, "copy-up", parent_trace
)),
90 m_state(STATE_READ_FROM_PARENT
), m_lock("CopyupRequest", false, false)
92 m_async_op
.start_op(*m_ictx
);
96 CopyupRequest
<I
>::~CopyupRequest() {
97 assert(m_pending_requests
.empty());
98 m_async_op
.finish_op();
101 template <typename I
>
102 void CopyupRequest
<I
>::append_request(AbstractObjectWriteRequest
<I
> *req
) {
103 ldout(m_ictx
->cct
, 20) << req
<< dendl
;
104 m_pending_requests
.push_back(req
);
107 template <typename I
>
108 void CopyupRequest
<I
>::complete_requests(int r
) {
109 while (!m_pending_requests
.empty()) {
110 auto it
= m_pending_requests
.begin();
112 ldout(m_ictx
->cct
, 20) << "completing request " << req
<< dendl
;
113 req
->handle_copyup(r
);
114 m_pending_requests
.erase(it
);
118 template <typename I
>
119 bool CopyupRequest
<I
>::send_copyup() {
120 bool copy_on_read
= m_pending_requests
.empty();
121 bool add_copyup_op
= !m_copyup_data
.is_zero();
122 if (!add_copyup_op
) {
123 m_copyup_data
.clear();
126 ldout(m_ictx
->cct
, 20) << "oid " << m_oid
<< dendl
;
127 m_state
= STATE_COPYUP
;
129 m_ictx
->snap_lock
.get_read();
130 ::SnapContext snapc
= m_ictx
->snapc
;
131 m_ictx
->snap_lock
.put_read();
133 std::vector
<librados::snap_t
> snaps
;
135 Mutex::Locker
locker(m_lock
);
137 if (copy_on_read
|| (!snapc
.snaps
.empty() && add_copyup_op
)) {
139 librados::ObjectWriteOperation copyup_op
;
140 copyup_op
.exec("rbd", "copyup", m_copyup_data
);
141 m_copyup_data
.clear();
143 ObjectRequest
<I
>::add_write_hint(*m_ictx
, ©up_op
);
145 // send only the copyup request with a blank snapshot context so that
146 // all snapshots are detected from the parent for this object. If
147 // this is a CoW request, a second request will be created for the
148 // actual modification.
151 ldout(m_ictx
->cct
, 20) << "copyup with empty snapshot context" << dendl
;
152 librados::AioCompletion
*comp
= util::create_rados_callback(this);
154 librados::Rados
rados(m_ictx
->data_ctx
);
155 r
= rados
.ioctx_create2(m_ictx
->data_ctx
.get_id(), m_data_ctx
);
158 r
= m_data_ctx
.aio_operate(
159 m_oid
, comp
, ©up_op
, 0, snaps
,
160 (m_trace
.valid() ? m_trace
.get_info() : nullptr));
166 librados::ObjectWriteOperation write_op
;
167 write_op
.exec("rbd", "copyup", m_copyup_data
);
169 // merge all pending write ops into this single RADOS op
170 ObjectRequest
<I
>::add_write_hint(*m_ictx
, &write_op
);
171 for (auto req
: m_pending_requests
) {
172 ldout(m_ictx
->cct
, 20) << "add_copyup_ops " << req
<< dendl
;
173 req
->add_copyup_ops(&write_op
);
178 snaps
.insert(snaps
.end(), snapc
.snaps
.begin(), snapc
.snaps
.end());
179 librados::AioCompletion
*comp
= util::create_rados_callback(this);
180 r
= m_ictx
->data_ctx
.aio_operate(
181 m_oid
, comp
, &write_op
, snapc
.seq
, snaps
,
182 (m_trace
.valid() ? m_trace
.get_info() : nullptr));
189 template <typename I
>
190 bool CopyupRequest
<I
>::is_copyup_required() {
191 bool copy_on_read
= m_pending_requests
.empty();
193 // always force a copyup if CoR enabled
197 if (!m_copyup_data
.is_zero()) {
201 for (auto req
: m_pending_requests
) {
202 if (!req
->is_empty_write_op()) {
209 template <typename I
>
210 void CopyupRequest
<I
>::send()
212 m_state
= STATE_READ_FROM_PARENT
;
213 AioCompletion
*comp
= AioCompletion::create_and_start(
214 this, m_ictx
, AIO_TYPE_READ
);
216 ldout(m_ictx
->cct
, 20) << "completion " << comp
218 << ", extents " << m_image_extents
220 ImageRequest
<>::aio_read(m_ictx
->parent
, comp
, std::move(m_image_extents
),
221 ReadResult
{&m_copyup_data
}, 0, m_trace
);
224 template <typename I
>
225 void CopyupRequest
<I
>::complete(int r
)
227 if (should_complete(r
)) {
228 complete_requests(r
);
233 template <typename I
>
234 bool CopyupRequest
<I
>::should_complete(int r
)
236 CephContext
*cct
= m_ictx
->cct
;
237 ldout(cct
, 20) << "oid " << m_oid
238 << ", r " << r
<< dendl
;
240 uint64_t pending_copyups
;
242 case STATE_READ_FROM_PARENT
:
243 ldout(cct
, 20) << "READ_FROM_PARENT" << dendl
;
245 if (r
>= 0 || r
== -ENOENT
) {
246 if (!is_copyup_required()) {
247 ldout(cct
, 20) << "nop, skipping" << dendl
;
251 return send_object_map_head();
255 case STATE_OBJECT_MAP_HEAD
:
256 ldout(cct
, 20) << "OBJECT_MAP_HEAD" << dendl
;
258 return send_object_map();
260 case STATE_OBJECT_MAP
:
261 ldout(cct
, 20) << "OBJECT_MAP" << dendl
;
263 return send_copyup();
267 Mutex::Locker
locker(m_lock
);
268 assert(m_pending_copyups
> 0);
269 pending_copyups
= --m_pending_copyups
;
271 ldout(cct
, 20) << "COPYUP (" << pending_copyups
<< " pending)"
274 // hide the -ENOENT error if this is the last op
275 if (pending_copyups
== 0) {
276 complete_requests(0);
279 complete_requests(r
);
281 return (pending_copyups
== 0);
284 lderr(cct
) << "invalid state: " << m_state
<< dendl
;
291 template <typename I
>
292 void CopyupRequest
<I
>::remove_from_list()
294 Mutex::Locker
l(m_ictx
->copyup_list_lock
);
296 auto it
= m_ictx
->copyup_list
.find(m_object_no
);
297 assert(it
!= m_ictx
->copyup_list
.end());
298 m_ictx
->copyup_list
.erase(it
);
301 template <typename I
>
302 bool CopyupRequest
<I
>::send_object_map_head() {
303 CephContext
*cct
= m_ictx
->cct
;
304 ldout(cct
, 20) << dendl
;
306 m_state
= STATE_OBJECT_MAP_HEAD
;
309 RWLock::RLocker
owner_locker(m_ictx
->owner_lock
);
310 RWLock::RLocker
snap_locker(m_ictx
->snap_lock
);
311 if (m_ictx
->object_map
!= nullptr) {
312 bool copy_on_read
= m_pending_requests
.empty();
313 assert(m_ictx
->exclusive_lock
->is_lock_owner());
315 RWLock::WLocker
object_map_locker(m_ictx
->object_map_lock
);
316 if (!m_ictx
->snaps
.empty()) {
317 m_snap_ids
.insert(m_snap_ids
.end(), m_ictx
->snaps
.begin(),
318 m_ictx
->snaps
.end());
321 (*m_ictx
->object_map
)[m_object_no
] != OBJECT_EXISTS
) {
322 m_snap_ids
.insert(m_snap_ids
.begin(), CEPH_NOSNAP
);
323 object_map_locker
.unlock();
324 snap_locker
.unlock();
325 owner_locker
.unlock();
326 return send_object_map();
329 bool may_update
= false;
331 uint8_t current_state
= (*m_ictx
->object_map
)[m_object_no
];
333 auto r_it
= m_pending_requests
.rbegin();
334 if (r_it
!= m_pending_requests
.rend()) {
336 new_state
= req
->get_pre_write_object_map_state();
338 ldout(cct
, 20) << req
->get_op_type() << " object no "
339 << m_object_no
<< " current state "
340 << stringify(static_cast<uint32_t>(current_state
))
341 << " new state " << stringify(static_cast<uint32_t>(new_state
))
346 if (may_update
&& (new_state
!= current_state
) &&
347 m_ictx
->object_map
->aio_update
<CopyupRequest
>(
348 CEPH_NOSNAP
, m_object_no
, new_state
, current_state
, m_trace
,
355 return send_object_map();
358 template <typename I
>
359 bool CopyupRequest
<I
>::send_object_map() {
360 // avoid possible recursive lock attempts
361 if (m_snap_ids
.empty()) {
362 // no object map update required
363 return send_copyup();
365 // update object maps for HEAD and all existing snapshots
366 ldout(m_ictx
->cct
, 20) << "oid " << m_oid
<< dendl
;
367 m_state
= STATE_OBJECT_MAP
;
369 RWLock::RLocker
owner_locker(m_ictx
->owner_lock
);
370 AsyncObjectThrottle
<>::ContextFactory
context_factory(
371 boost::lambda::bind(boost::lambda::new_ptr
<UpdateObjectMap
>(),
372 boost::lambda::_1
, m_ictx
, m_object_no
, &m_snap_ids
, m_trace
,
374 AsyncObjectThrottle
<> *throttle
= new AsyncObjectThrottle
<>(
375 NULL
, *m_ictx
, context_factory
, util::create_context_callback(this),
376 NULL
, 0, m_snap_ids
.size());
377 throttle
->start_ops(m_ictx
->concurrent_management_ops
);
383 } // namespace librbd
385 template class librbd::io::CopyupRequest
<librbd::ImageCtx
>;