1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "librbd/api/DiffIterate.h"
5 #include "librbd/ImageCtx.h"
6 #include "librbd/ImageState.h"
7 #include "librbd/ObjectMap.h"
8 #include "librbd/Utils.h"
9 #include "librbd/internal.h"
10 #include "librbd/io/AioCompletion.h"
11 #include "librbd/io/ImageDispatchSpec.h"
12 #include "librbd/io/ImageRequestWQ.h"
13 #include "librbd/object_map/DiffRequest.h"
14 #include "include/rados/librados.hpp"
15 #include "include/interval_set.h"
16 #include "common/errno.h"
17 #include "common/Cond.h"
18 #include "common/Throttle.h"
19 #include "osdc/Striper.h"
20 #include "librados/snap_set_diff.h"
21 #include <boost/tuple/tuple.hpp>
26 #define dout_subsys ceph_subsys_rbd
28 #define dout_prefix *_dout << "librbd::DiffIterate: "
35 enum ObjectDiffState
{
36 OBJECT_DIFF_STATE_NONE
= 0,
37 OBJECT_DIFF_STATE_UPDATED
= 1,
38 OBJECT_DIFF_STATE_HOLE
= 2
42 DiffIterate
<>::Callback callback
;
45 uint64_t from_snap_id
;
47 interval_set
<uint64_t> parent_diff
;
48 OrderedThrottle throttle
;
51 DiffContext(I
&image_ctx
, DiffIterate
<>::Callback callback
,
52 void *callback_arg
, bool _whole_object
, uint64_t _from_snap_id
,
53 uint64_t _end_snap_id
)
54 : callback(callback
), callback_arg(callback_arg
),
55 whole_object(_whole_object
), from_snap_id(_from_snap_id
),
56 end_snap_id(_end_snap_id
),
57 throttle(image_ctx
.config
.template get_val
<uint64_t>("rbd_concurrent_management_ops"), true) {
61 class C_DiffObject
: public Context
{
64 C_DiffObject(I
&image_ctx
, librados::IoCtx
&head_ctx
,
65 DiffContext
&diff_context
, const std::string
&oid
,
66 uint64_t offset
, const std::vector
<ObjectExtent
> &object_extents
)
67 : m_cct(image_ctx
.cct
), m_head_ctx(head_ctx
),
68 m_diff_context(diff_context
), m_oid(oid
), m_offset(offset
),
69 m_object_extents(object_extents
), m_snap_ret(0) {
73 C_OrderedThrottle
*ctx
= m_diff_context
.throttle
.start_op(this);
74 librados::AioCompletion
*rados_completion
=
75 util::create_rados_callback(ctx
);
77 librados::ObjectReadOperation op
;
78 op
.list_snaps(&m_snap_set
, &m_snap_ret
);
80 int r
= m_head_ctx
.aio_operate(m_oid
, rados_completion
, &op
, NULL
);
82 rados_completion
->release();
86 typedef boost::tuple
<uint64_t, size_t, bool> Diff
;
87 typedef std::list
<Diff
> Diffs
;
89 void finish(int r
) override
{
90 CephContext
*cct
= m_cct
;
91 if (r
== 0 && m_snap_ret
< 0) {
97 ldout(cct
, 20) << "object " << m_oid
<< ": list_snaps complete" << dendl
;
98 compute_diffs(&diffs
);
99 } else if (r
== -ENOENT
) {
100 ldout(cct
, 20) << "object " << m_oid
<< ": list_snaps (not found)"
103 compute_parent_overlap(&diffs
);
105 ldout(cct
, 20) << "object " << m_oid
<< ": list_snaps failed: "
106 << cpp_strerror(r
) << dendl
;
110 for (Diffs::const_iterator d
= diffs
.begin(); d
!= diffs
.end(); ++d
) {
111 r
= m_diff_context
.callback(d
->get
<0>(), d
->get
<1>(), d
->get
<2>(),
112 m_diff_context
.callback_arg
);
118 m_diff_context
.throttle
.end_op(r
);
123 librados::IoCtx
&m_head_ctx
;
124 DiffContext
&m_diff_context
;
127 std::vector
<ObjectExtent
> m_object_extents
;
129 librados::snap_set_t m_snap_set
;
132 void compute_diffs(Diffs
*diffs
) {
133 CephContext
*cct
= m_cct
;
135 // calc diff from from_snap_id -> to_snap_id
136 interval_set
<uint64_t> diff
;
139 librados::snap_t clone_end_snap_id
;
141 calc_snap_set_diff(cct
, m_snap_set
, m_diff_context
.from_snap_id
,
142 m_diff_context
.end_snap_id
, &diff
, &end_size
,
143 &end_exists
, &clone_end_snap_id
, &whole_object
);
145 ldout(cct
, 1) << "object " << m_oid
<< ": need to provide full object"
148 ldout(cct
, 20) << " diff " << diff
<< " end_exists=" << end_exists
150 if (diff
.empty() && !whole_object
) {
151 if (m_diff_context
.from_snap_id
== 0 && !end_exists
) {
152 compute_parent_overlap(diffs
);
155 } else if (m_diff_context
.whole_object
|| whole_object
) {
156 // provide the full object extents to the callback
157 for (vector
<ObjectExtent
>::iterator q
= m_object_extents
.begin();
158 q
!= m_object_extents
.end(); ++q
) {
159 diffs
->push_back(boost::make_tuple(m_offset
+ q
->offset
, q
->length
,
165 for (vector
<ObjectExtent
>::iterator q
= m_object_extents
.begin();
166 q
!= m_object_extents
.end(); ++q
) {
167 ldout(cct
, 20) << "diff_iterate object " << m_oid
<< " extent "
168 << q
->offset
<< "~" << q
->length
<< " from "
169 << q
->buffer_extents
<< dendl
;
170 uint64_t opos
= q
->offset
;
171 for (vector
<pair
<uint64_t,uint64_t> >::iterator r
=
172 q
->buffer_extents
.begin();
173 r
!= q
->buffer_extents
.end(); ++r
) {
174 interval_set
<uint64_t> overlap
; // object extents
175 overlap
.insert(opos
, r
->second
);
176 overlap
.intersection_of(diff
);
177 ldout(cct
, 20) << " opos " << opos
178 << " buf " << r
->first
<< "~" << r
->second
179 << " overlap " << overlap
<< dendl
;
180 for (interval_set
<uint64_t>::iterator s
= overlap
.begin();
181 s
!= overlap
.end(); ++s
) {
182 uint64_t su_off
= s
.get_start() - opos
;
183 uint64_t logical_off
= m_offset
+ r
->first
+ su_off
;
184 ldout(cct
, 20) << " overlap extent " << s
.get_start() << "~"
185 << s
.get_len() << " logical " << logical_off
<< "~"
186 << s
.get_len() << dendl
;
187 diffs
->push_back(boost::make_tuple(logical_off
, s
.get_len(),
192 ceph_assert(opos
== q
->offset
+ q
->length
);
196 void compute_parent_overlap(Diffs
*diffs
) {
197 if (m_diff_context
.from_snap_id
== 0 &&
198 !m_diff_context
.parent_diff
.empty()) {
199 // report parent diff instead
200 for (vector
<ObjectExtent
>::iterator q
= m_object_extents
.begin();
201 q
!= m_object_extents
.end(); ++q
) {
202 for (vector
<pair
<uint64_t,uint64_t> >::iterator r
=
203 q
->buffer_extents
.begin();
204 r
!= q
->buffer_extents
.end(); ++r
) {
205 interval_set
<uint64_t> o
;
206 o
.insert(m_offset
+ r
->first
, r
->second
);
207 o
.intersection_of(m_diff_context
.parent_diff
);
208 ldout(m_cct
, 20) << " reporting parent overlap " << o
<< dendl
;
209 for (interval_set
<uint64_t>::iterator s
= o
.begin(); s
!= o
.end();
211 diffs
->push_back(boost::make_tuple(s
.get_start(), s
.get_len(),
220 int simple_diff_cb(uint64_t off
, size_t len
, int exists
, void *arg
) {
221 // it's possible for a discard to create a hole in the parent image -- ignore
223 interval_set
<uint64_t> *diff
= static_cast<interval_set
<uint64_t> *>(arg
);
224 diff
->insert(off
, len
);
229 } // anonymous namespace
231 template <typename I
>
232 int DiffIterate
<I
>::diff_iterate(I
*ictx
,
233 const cls::rbd::SnapshotNamespace
& from_snap_namespace
,
234 const char *fromsnapname
,
235 uint64_t off
, uint64_t len
,
236 bool include_parent
, bool whole_object
,
237 int (*cb
)(uint64_t, size_t, int, void *),
240 ldout(ictx
->cct
, 20) << "diff_iterate " << ictx
<< " off = " << off
241 << " len = " << len
<< dendl
;
243 if (!ictx
->data_ctx
.is_valid()) {
247 // ensure previous writes are visible to listsnaps
248 C_SaferCond flush_ctx
;
250 std::shared_lock owner_locker
{ictx
->owner_lock
};
251 auto aio_comp
= io::AioCompletion::create_and_start(&flush_ctx
, ictx
,
253 auto req
= io::ImageDispatchSpec
<I
>::create_flush_request(
254 *ictx
, aio_comp
, io::FLUSH_SOURCE_INTERNAL
, {});
258 int r
= flush_ctx
.wait();
263 r
= ictx
->state
->refresh_if_required();
268 ictx
->image_lock
.lock_shared();
269 r
= clip_io(ictx
, off
, &len
);
270 ictx
->image_lock
.unlock_shared();
275 DiffIterate
command(*ictx
, from_snap_namespace
, fromsnapname
, off
, len
,
276 include_parent
, whole_object
, cb
, arg
);
277 r
= command
.execute();
281 template <typename I
>
282 int DiffIterate
<I
>::execute() {
283 CephContext
* cct
= m_image_ctx
.cct
;
285 ceph_assert(m_image_ctx
.data_ctx
.is_valid());
287 librados::IoCtx head_ctx
;
288 librados::snap_t from_snap_id
= 0;
289 librados::snap_t end_snap_id
;
290 uint64_t from_size
= 0;
293 std::shared_lock image_locker
{m_image_ctx
.image_lock
};
294 head_ctx
.dup(m_image_ctx
.data_ctx
);
295 if (m_from_snap_name
) {
296 from_snap_id
= m_image_ctx
.get_snap_id(m_from_snap_namespace
, m_from_snap_name
);
297 from_size
= m_image_ctx
.get_image_size(from_snap_id
);
299 end_snap_id
= m_image_ctx
.snap_id
;
300 end_size
= m_image_ctx
.get_image_size(end_snap_id
);
303 if (from_snap_id
== CEPH_NOSNAP
) {
306 if (from_snap_id
== end_snap_id
) {
310 if (from_snap_id
>= end_snap_id
) {
315 bool fast_diff_enabled
= false;
316 BitVector
<2> object_diff_state
;
317 if (m_whole_object
) {
319 auto req
= object_map::DiffRequest
<I
>::create(&m_image_ctx
, from_snap_id
,
321 &object_diff_state
, &ctx
);
326 ldout(cct
, 5) << "fast diff disabled" << dendl
;
328 ldout(cct
, 5) << "fast diff enabled" << dendl
;
329 fast_diff_enabled
= true;
333 // we must list snaps via the head, not end snap
334 head_ctx
.snap_set_read(CEPH_SNAPDIR
);
336 ldout(cct
, 5) << "diff_iterate from " << from_snap_id
<< " to "
337 << end_snap_id
<< " size from " << from_size
338 << " to " << end_size
<< dendl
;
340 // check parent overlap only if we are comparing to the beginning of time
341 DiffContext
diff_context(m_image_ctx
, m_callback
, m_callback_arg
,
342 m_whole_object
, from_snap_id
, end_snap_id
);
343 if (m_include_parent
&& from_snap_id
== 0) {
344 std::shared_lock image_locker
{m_image_ctx
.image_lock
};
345 uint64_t overlap
= 0;
346 m_image_ctx
.get_parent_overlap(m_image_ctx
.snap_id
, &overlap
);
348 if (m_image_ctx
.parent
&& overlap
> 0) {
349 ldout(cct
, 10) << " first getting parent diff" << dendl
;
350 DiffIterate
diff_parent(*m_image_ctx
.parent
, {},
352 m_include_parent
, m_whole_object
,
354 &diff_context
.parent_diff
);
355 r
= diff_parent
.execute();
362 uint64_t period
= m_image_ctx
.get_stripe_period();
363 uint64_t off
= m_offset
;
364 uint64_t left
= m_length
;
367 uint64_t period_off
= off
- (off
% period
);
368 uint64_t read_len
= min(period_off
+ period
- off
, left
);
371 map
<object_t
,vector
<ObjectExtent
> > object_extents
;
372 Striper::file_to_extents(cct
, m_image_ctx
.format_string
,
373 &m_image_ctx
.layout
, off
, read_len
, 0,
376 // get snap info for each object
377 for (map
<object_t
,vector
<ObjectExtent
> >::iterator p
=
378 object_extents
.begin();
379 p
!= object_extents
.end(); ++p
) {
380 ldout(cct
, 20) << "object " << p
->first
<< dendl
;
382 if (fast_diff_enabled
) {
383 const uint64_t object_no
= p
->second
.front().objectno
;
384 if (object_diff_state
[object_no
] == OBJECT_DIFF_STATE_NONE
&&
385 from_snap_id
== 0 && !diff_context
.parent_diff
.empty()) {
386 // no data in child object -- report parent diff instead
387 for (auto& oe
: p
->second
) {
388 for (auto& be
: oe
.buffer_extents
) {
389 interval_set
<uint64_t> o
;
390 o
.insert(off
+ be
.first
, be
.second
);
391 o
.intersection_of(diff_context
.parent_diff
);
392 ldout(cct
, 20) << " reporting parent overlap " << o
<< dendl
;
393 for (auto e
= o
.begin(); e
!= o
.end(); ++e
) {
394 r
= m_callback(e
.get_start(), e
.get_len(), true,
402 } else if (object_diff_state
[object_no
] != OBJECT_DIFF_STATE_NONE
) {
403 bool updated
= (object_diff_state
[object_no
] ==
404 OBJECT_DIFF_STATE_UPDATED
);
405 for (std::vector
<ObjectExtent
>::iterator q
= p
->second
.begin();
406 q
!= p
->second
.end(); ++q
) {
407 r
= m_callback(off
+ q
->offset
, q
->length
, updated
, m_callback_arg
);
414 C_DiffObject
*diff_object
= new C_DiffObject(m_image_ctx
, head_ctx
,
420 if (diff_context
.throttle
.pending_error()) {
421 r
= diff_context
.throttle
.wait_for_ret();
431 r
= diff_context
.throttle
.wait_for_ret();
439 } // namespace librbd
441 template class librbd::api::DiffIterate
<librbd::ImageCtx
>;