1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "librbd/api/DiffIterate.h"
5 #include "librbd/ImageCtx.h"
6 #include "librbd/ImageState.h"
7 #include "librbd/ObjectMap.h"
8 #include "librbd/Utils.h"
9 #include "librbd/internal.h"
10 #include "librbd/io/AioCompletion.h"
11 #include "librbd/io/ImageDispatchSpec.h"
12 #include "librbd/io/ImageRequestWQ.h"
13 #include "include/rados/librados.hpp"
14 #include "include/interval_set.h"
15 #include "common/errno.h"
16 #include "common/Throttle.h"
17 #include "osdc/Striper.h"
18 #include "librados/snap_set_diff.h"
19 #include <boost/tuple/tuple.hpp>
24 #define dout_subsys ceph_subsys_rbd
26 #define dout_prefix *_dout << "librbd::DiffIterate: "
33 enum ObjectDiffState
{
34 OBJECT_DIFF_STATE_NONE
= 0,
35 OBJECT_DIFF_STATE_UPDATED
= 1,
36 OBJECT_DIFF_STATE_HOLE
= 2
40 DiffIterate
<>::Callback callback
;
43 uint64_t from_snap_id
;
45 interval_set
<uint64_t> parent_diff
;
46 OrderedThrottle throttle
;
49 DiffContext(I
&image_ctx
, DiffIterate
<>::Callback callback
,
50 void *callback_arg
, bool _whole_object
, uint64_t _from_snap_id
,
51 uint64_t _end_snap_id
)
52 : callback(callback
), callback_arg(callback_arg
),
53 whole_object(_whole_object
), from_snap_id(_from_snap_id
),
54 end_snap_id(_end_snap_id
),
55 throttle(image_ctx
.config
.template get_val
<uint64_t>("rbd_concurrent_management_ops"), true) {
59 class C_DiffObject
: public Context
{
62 C_DiffObject(I
&image_ctx
, librados::IoCtx
&head_ctx
,
63 DiffContext
&diff_context
, const std::string
&oid
,
64 uint64_t offset
, const std::vector
<ObjectExtent
> &object_extents
)
65 : m_cct(image_ctx
.cct
), m_head_ctx(head_ctx
),
66 m_diff_context(diff_context
), m_oid(oid
), m_offset(offset
),
67 m_object_extents(object_extents
), m_snap_ret(0) {
71 C_OrderedThrottle
*ctx
= m_diff_context
.throttle
.start_op(this);
72 librados::AioCompletion
*rados_completion
=
73 util::create_rados_callback(ctx
);
75 librados::ObjectReadOperation op
;
76 op
.list_snaps(&m_snap_set
, &m_snap_ret
);
78 int r
= m_head_ctx
.aio_operate(m_oid
, rados_completion
, &op
, NULL
);
80 rados_completion
->release();
84 typedef boost::tuple
<uint64_t, size_t, bool> Diff
;
85 typedef std::list
<Diff
> Diffs
;
87 void finish(int r
) override
{
88 CephContext
*cct
= m_cct
;
89 if (r
== 0 && m_snap_ret
< 0) {
95 ldout(cct
, 20) << "object " << m_oid
<< ": list_snaps complete" << dendl
;
96 compute_diffs(&diffs
);
97 } else if (r
== -ENOENT
) {
98 ldout(cct
, 20) << "object " << m_oid
<< ": list_snaps (not found)"
101 compute_parent_overlap(&diffs
);
103 ldout(cct
, 20) << "object " << m_oid
<< ": list_snaps failed: "
104 << cpp_strerror(r
) << dendl
;
108 for (Diffs::const_iterator d
= diffs
.begin(); d
!= diffs
.end(); ++d
) {
109 r
= m_diff_context
.callback(d
->get
<0>(), d
->get
<1>(), d
->get
<2>(),
110 m_diff_context
.callback_arg
);
116 m_diff_context
.throttle
.end_op(r
);
121 librados::IoCtx
&m_head_ctx
;
122 DiffContext
&m_diff_context
;
125 std::vector
<ObjectExtent
> m_object_extents
;
127 librados::snap_set_t m_snap_set
;
130 void compute_diffs(Diffs
*diffs
) {
131 CephContext
*cct
= m_cct
;
133 // calc diff from from_snap_id -> to_snap_id
134 interval_set
<uint64_t> diff
;
137 librados::snap_t clone_end_snap_id
;
139 calc_snap_set_diff(cct
, m_snap_set
, m_diff_context
.from_snap_id
,
140 m_diff_context
.end_snap_id
, &diff
, &end_size
,
141 &end_exists
, &clone_end_snap_id
, &whole_object
);
143 ldout(cct
, 1) << "object " << m_oid
<< ": need to provide full object"
146 ldout(cct
, 20) << " diff " << diff
<< " end_exists=" << end_exists
148 if (diff
.empty() && !whole_object
) {
149 if (m_diff_context
.from_snap_id
== 0 && !end_exists
) {
150 compute_parent_overlap(diffs
);
153 } else if (m_diff_context
.whole_object
|| whole_object
) {
154 // provide the full object extents to the callback
155 for (vector
<ObjectExtent
>::iterator q
= m_object_extents
.begin();
156 q
!= m_object_extents
.end(); ++q
) {
157 diffs
->push_back(boost::make_tuple(m_offset
+ q
->offset
, q
->length
,
163 for (vector
<ObjectExtent
>::iterator q
= m_object_extents
.begin();
164 q
!= m_object_extents
.end(); ++q
) {
165 ldout(cct
, 20) << "diff_iterate object " << m_oid
<< " extent "
166 << q
->offset
<< "~" << q
->length
<< " from "
167 << q
->buffer_extents
<< dendl
;
168 uint64_t opos
= q
->offset
;
169 for (vector
<pair
<uint64_t,uint64_t> >::iterator r
=
170 q
->buffer_extents
.begin();
171 r
!= q
->buffer_extents
.end(); ++r
) {
172 interval_set
<uint64_t> overlap
; // object extents
173 overlap
.insert(opos
, r
->second
);
174 overlap
.intersection_of(diff
);
175 ldout(cct
, 20) << " opos " << opos
176 << " buf " << r
->first
<< "~" << r
->second
177 << " overlap " << overlap
<< dendl
;
178 for (interval_set
<uint64_t>::iterator s
= overlap
.begin();
179 s
!= overlap
.end(); ++s
) {
180 uint64_t su_off
= s
.get_start() - opos
;
181 uint64_t logical_off
= m_offset
+ r
->first
+ su_off
;
182 ldout(cct
, 20) << " overlap extent " << s
.get_start() << "~"
183 << s
.get_len() << " logical " << logical_off
<< "~"
184 << s
.get_len() << dendl
;
185 diffs
->push_back(boost::make_tuple(logical_off
, s
.get_len(),
190 ceph_assert(opos
== q
->offset
+ q
->length
);
194 void compute_parent_overlap(Diffs
*diffs
) {
195 if (m_diff_context
.from_snap_id
== 0 &&
196 !m_diff_context
.parent_diff
.empty()) {
197 // report parent diff instead
198 for (vector
<ObjectExtent
>::iterator q
= m_object_extents
.begin();
199 q
!= m_object_extents
.end(); ++q
) {
200 for (vector
<pair
<uint64_t,uint64_t> >::iterator r
=
201 q
->buffer_extents
.begin();
202 r
!= q
->buffer_extents
.end(); ++r
) {
203 interval_set
<uint64_t> o
;
204 o
.insert(m_offset
+ r
->first
, r
->second
);
205 o
.intersection_of(m_diff_context
.parent_diff
);
206 ldout(m_cct
, 20) << " reporting parent overlap " << o
<< dendl
;
207 for (interval_set
<uint64_t>::iterator s
= o
.begin(); s
!= o
.end();
209 diffs
->push_back(boost::make_tuple(s
.get_start(), s
.get_len(),
218 int simple_diff_cb(uint64_t off
, size_t len
, int exists
, void *arg
) {
219 // it's possible for a discard to create a hole in the parent image -- ignore
221 interval_set
<uint64_t> *diff
= static_cast<interval_set
<uint64_t> *>(arg
);
222 diff
->insert(off
, len
);
227 } // anonymous namespace
229 template <typename I
>
230 int DiffIterate
<I
>::diff_iterate(I
*ictx
,
231 const cls::rbd::SnapshotNamespace
& from_snap_namespace
,
232 const char *fromsnapname
,
233 uint64_t off
, uint64_t len
,
234 bool include_parent
, bool whole_object
,
235 int (*cb
)(uint64_t, size_t, int, void *),
238 ldout(ictx
->cct
, 20) << "diff_iterate " << ictx
<< " off = " << off
239 << " len = " << len
<< dendl
;
241 // ensure previous writes are visible to listsnaps
242 C_SaferCond flush_ctx
;
244 RWLock::RLocker
owner_locker(ictx
->owner_lock
);
245 auto aio_comp
= io::AioCompletion::create_and_start(&flush_ctx
, ictx
,
247 auto req
= io::ImageDispatchSpec
<I
>::create_flush_request(
248 *ictx
, aio_comp
, io::FLUSH_SOURCE_INTERNAL
, {});
252 int r
= flush_ctx
.wait();
257 r
= ictx
->state
->refresh_if_required();
262 ictx
->snap_lock
.get_read();
263 r
= clip_io(ictx
, off
, &len
);
264 ictx
->snap_lock
.put_read();
269 DiffIterate
command(*ictx
, from_snap_namespace
, fromsnapname
, off
, len
,
270 include_parent
, whole_object
, cb
, arg
);
271 r
= command
.execute();
275 template <typename I
>
276 int DiffIterate
<I
>::execute() {
277 CephContext
* cct
= m_image_ctx
.cct
;
279 librados::IoCtx head_ctx
;
280 librados::snap_t from_snap_id
= 0;
281 librados::snap_t end_snap_id
;
282 uint64_t from_size
= 0;
285 RWLock::RLocker
md_locker(m_image_ctx
.md_lock
);
286 RWLock::RLocker
snap_locker(m_image_ctx
.snap_lock
);
287 head_ctx
.dup(m_image_ctx
.data_ctx
);
288 if (m_from_snap_name
) {
289 from_snap_id
= m_image_ctx
.get_snap_id(m_from_snap_namespace
, m_from_snap_name
);
290 from_size
= m_image_ctx
.get_image_size(from_snap_id
);
292 end_snap_id
= m_image_ctx
.snap_id
;
293 end_size
= m_image_ctx
.get_image_size(end_snap_id
);
296 if (from_snap_id
== CEPH_NOSNAP
) {
299 if (from_snap_id
== end_snap_id
) {
303 if (from_snap_id
>= end_snap_id
) {
308 bool fast_diff_enabled
= false;
309 BitVector
<2> object_diff_state
;
311 RWLock::RLocker
snap_locker(m_image_ctx
.snap_lock
);
312 if (m_whole_object
&& (m_image_ctx
.features
& RBD_FEATURE_FAST_DIFF
) != 0) {
313 r
= diff_object_map(from_snap_id
, end_snap_id
, &object_diff_state
);
315 ldout(cct
, 5) << "fast diff disabled" << dendl
;
317 ldout(cct
, 5) << "fast diff enabled" << dendl
;
318 fast_diff_enabled
= true;
323 // we must list snaps via the head, not end snap
324 head_ctx
.snap_set_read(CEPH_SNAPDIR
);
326 ldout(cct
, 5) << "diff_iterate from " << from_snap_id
<< " to "
327 << end_snap_id
<< " size from " << from_size
328 << " to " << end_size
<< dendl
;
330 // check parent overlap only if we are comparing to the beginning of time
331 DiffContext
diff_context(m_image_ctx
, m_callback
, m_callback_arg
,
332 m_whole_object
, from_snap_id
, end_snap_id
);
333 if (m_include_parent
&& from_snap_id
== 0) {
334 RWLock::RLocker
l(m_image_ctx
.snap_lock
);
335 RWLock::RLocker
l2(m_image_ctx
.parent_lock
);
336 uint64_t overlap
= 0;
337 m_image_ctx
.get_parent_overlap(m_image_ctx
.snap_id
, &overlap
);
339 if (m_image_ctx
.parent
&& overlap
> 0) {
340 ldout(cct
, 10) << " first getting parent diff" << dendl
;
341 DiffIterate
diff_parent(*m_image_ctx
.parent
, {},
343 m_include_parent
, m_whole_object
,
345 &diff_context
.parent_diff
);
346 r
= diff_parent
.execute();
353 uint64_t period
= m_image_ctx
.get_stripe_period();
354 uint64_t off
= m_offset
;
355 uint64_t left
= m_length
;
358 uint64_t period_off
= off
- (off
% period
);
359 uint64_t read_len
= min(period_off
+ period
- off
, left
);
362 map
<object_t
,vector
<ObjectExtent
> > object_extents
;
363 Striper::file_to_extents(cct
, m_image_ctx
.format_string
,
364 &m_image_ctx
.layout
, off
, read_len
, 0,
367 // get snap info for each object
368 for (map
<object_t
,vector
<ObjectExtent
> >::iterator p
=
369 object_extents
.begin();
370 p
!= object_extents
.end(); ++p
) {
371 ldout(cct
, 20) << "object " << p
->first
<< dendl
;
373 if (fast_diff_enabled
) {
374 const uint64_t object_no
= p
->second
.front().objectno
;
375 if (object_diff_state
[object_no
] != OBJECT_DIFF_STATE_NONE
) {
376 bool updated
= (object_diff_state
[object_no
] ==
377 OBJECT_DIFF_STATE_UPDATED
);
378 for (std::vector
<ObjectExtent
>::iterator q
= p
->second
.begin();
379 q
!= p
->second
.end(); ++q
) {
380 r
= m_callback(off
+ q
->offset
, q
->length
, updated
, m_callback_arg
);
387 C_DiffObject
*diff_object
= new C_DiffObject(m_image_ctx
, head_ctx
,
393 if (diff_context
.throttle
.pending_error()) {
394 r
= diff_context
.throttle
.wait_for_ret();
404 r
= diff_context
.throttle
.wait_for_ret();
411 template <typename I
>
412 int DiffIterate
<I
>::diff_object_map(uint64_t from_snap_id
, uint64_t to_snap_id
,
413 BitVector
<2>* object_diff_state
) {
414 ceph_assert(m_image_ctx
.snap_lock
.is_locked());
415 CephContext
* cct
= m_image_ctx
.cct
;
417 bool diff_from_start
= (from_snap_id
== 0);
418 if (from_snap_id
== 0) {
419 if (!m_image_ctx
.snaps
.empty()) {
420 from_snap_id
= m_image_ctx
.snaps
.back();
422 from_snap_id
= CEPH_NOSNAP
;
426 object_diff_state
->clear();
427 uint64_t current_snap_id
= from_snap_id
;
428 uint64_t next_snap_id
= to_snap_id
;
429 BitVector
<2> prev_object_map
;
430 bool prev_object_map_valid
= false;
432 uint64_t current_size
= m_image_ctx
.size
;
433 if (current_snap_id
!= CEPH_NOSNAP
) {
434 std::map
<librados::snap_t
, SnapInfo
>::const_iterator snap_it
=
435 m_image_ctx
.snap_info
.find(current_snap_id
);
436 ceph_assert(snap_it
!= m_image_ctx
.snap_info
.end());
437 current_size
= snap_it
->second
.size
;
440 if (snap_it
!= m_image_ctx
.snap_info
.end()) {
441 next_snap_id
= snap_it
->first
;
443 next_snap_id
= CEPH_NOSNAP
;
448 int r
= m_image_ctx
.get_flags(from_snap_id
, &flags
);
450 lderr(cct
) << "diff_object_map: failed to retrieve image flags" << dendl
;
453 if ((flags
& RBD_FLAG_FAST_DIFF_INVALID
) != 0) {
454 ldout(cct
, 1) << "diff_object_map: cannot perform fast diff on invalid "
455 << "object map" << dendl
;
459 BitVector
<2> object_map
;
460 std::string
oid(ObjectMap
<>::object_map_name(m_image_ctx
.id
,
462 r
= cls_client::object_map_load(&m_image_ctx
.md_ctx
, oid
, &object_map
);
464 lderr(cct
) << "diff_object_map: failed to load object map " << oid
468 ldout(cct
, 20) << "diff_object_map: loaded object map " << oid
<< dendl
;
470 uint64_t num_objs
= Striper::get_num_objects(m_image_ctx
.layout
,
472 if (object_map
.size() < num_objs
) {
473 ldout(cct
, 1) << "diff_object_map: object map too small: "
474 << object_map
.size() << " < " << num_objs
<< dendl
;
477 object_map
.resize(num_objs
);
478 object_diff_state
->resize(object_map
.size());
480 uint64_t overlap
= std::min(object_map
.size(), prev_object_map
.size());
481 auto it
= object_map
.begin();
482 auto overlap_end_it
= it
+ overlap
;
483 auto pre_it
= prev_object_map
.begin();
484 auto diff_it
= object_diff_state
->begin();
486 for (; it
!= overlap_end_it
; ++it
, ++pre_it
, ++diff_it
, ++i
) {
487 ldout(cct
, 20) << __func__
<< ": object state: " << i
<< " "
488 << static_cast<uint32_t>(*pre_it
)
489 << "->" << static_cast<uint32_t>(*it
) << dendl
;
490 if (*it
== OBJECT_NONEXISTENT
) {
491 if (*pre_it
!= OBJECT_NONEXISTENT
) {
492 *diff_it
= OBJECT_DIFF_STATE_HOLE
;
494 } else if (*it
== OBJECT_EXISTS
||
496 !(*pre_it
== OBJECT_EXISTS
&&
497 *it
== OBJECT_EXISTS_CLEAN
))) {
498 *diff_it
= OBJECT_DIFF_STATE_UPDATED
;
501 ldout(cct
, 20) << "diff_object_map: computed overlap diffs" << dendl
;
502 auto end_it
= object_map
.end();
503 if (object_map
.size() > prev_object_map
.size() &&
504 (diff_from_start
|| prev_object_map_valid
)) {
505 for (; it
!= end_it
; ++it
,++diff_it
, ++i
) {
506 ldout(cct
, 20) << __func__
<< ": object state: " << i
<< " "
507 << "->" << static_cast<uint32_t>(*it
) << dendl
;
508 if (*it
== OBJECT_NONEXISTENT
) {
509 *diff_it
= OBJECT_DIFF_STATE_NONE
;
511 *diff_it
= OBJECT_DIFF_STATE_UPDATED
;
515 ldout(cct
, 20) << "diff_object_map: computed resize diffs" << dendl
;
517 if (current_snap_id
== next_snap_id
|| next_snap_id
> to_snap_id
) {
520 current_snap_id
= next_snap_id
;
521 prev_object_map
= object_map
;
522 prev_object_map_valid
= true;
528 } // namespace librbd
530 template class librbd::api::DiffIterate
<librbd::ImageCtx
>;