]>
git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/api/DiffIterate.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "librbd/api/DiffIterate.h"
5 #include "librbd/ImageCtx.h"
6 #include "librbd/ImageState.h"
7 #include "librbd/ObjectMap.h"
8 #include "librbd/Utils.h"
9 #include "librbd/internal.h"
10 #include "librbd/io/AioCompletion.h"
11 #include "librbd/io/ImageDispatchSpec.h"
12 #include "librbd/io/ImageRequestWQ.h"
13 #include "librbd/object_map/DiffRequest.h"
14 #include "include/rados/librados.hpp"
15 #include "include/interval_set.h"
16 #include "common/errno.h"
17 #include "common/Cond.h"
18 #include "common/Throttle.h"
19 #include "osdc/Striper.h"
20 #include "librados/snap_set_diff.h"
21 #include <boost/tuple/tuple.hpp>
26 #define dout_subsys ceph_subsys_rbd
28 #define dout_prefix *_dout << "librbd::DiffIterate: "
36 DiffIterate
<>::Callback callback
;
39 uint64_t from_snap_id
;
41 interval_set
<uint64_t> parent_diff
;
42 OrderedThrottle throttle
;
45 DiffContext(I
&image_ctx
, DiffIterate
<>::Callback callback
,
46 void *callback_arg
, bool _whole_object
, uint64_t _from_snap_id
,
47 uint64_t _end_snap_id
)
48 : callback(callback
), callback_arg(callback_arg
),
49 whole_object(_whole_object
), from_snap_id(_from_snap_id
),
50 end_snap_id(_end_snap_id
),
51 throttle(image_ctx
.config
.template get_val
<uint64_t>("rbd_concurrent_management_ops"), true) {
55 class C_DiffObject
: public Context
{
58 C_DiffObject(I
&image_ctx
, librados::IoCtx
&head_ctx
,
59 DiffContext
&diff_context
, const std::string
&oid
,
60 uint64_t offset
, const std::vector
<ObjectExtent
> &object_extents
)
61 : m_cct(image_ctx
.cct
), m_head_ctx(head_ctx
),
62 m_diff_context(diff_context
), m_oid(oid
), m_offset(offset
),
63 m_object_extents(object_extents
), m_snap_ret(0) {
67 C_OrderedThrottle
*ctx
= m_diff_context
.throttle
.start_op(this);
68 librados::AioCompletion
*rados_completion
=
69 util::create_rados_callback(ctx
);
71 librados::ObjectReadOperation op
;
72 op
.list_snaps(&m_snap_set
, &m_snap_ret
);
74 int r
= m_head_ctx
.aio_operate(m_oid
, rados_completion
, &op
, NULL
);
76 rados_completion
->release();
80 typedef boost::tuple
<uint64_t, size_t, bool> Diff
;
81 typedef std::list
<Diff
> Diffs
;
83 void finish(int r
) override
{
84 CephContext
*cct
= m_cct
;
85 if (r
== 0 && m_snap_ret
< 0) {
91 ldout(cct
, 20) << "object " << m_oid
<< ": list_snaps complete" << dendl
;
92 compute_diffs(&diffs
);
93 } else if (r
== -ENOENT
) {
94 ldout(cct
, 20) << "object " << m_oid
<< ": list_snaps (not found)"
97 compute_parent_overlap(&diffs
);
99 ldout(cct
, 20) << "object " << m_oid
<< ": list_snaps failed: "
100 << cpp_strerror(r
) << dendl
;
104 for (Diffs::const_iterator d
= diffs
.begin(); d
!= diffs
.end(); ++d
) {
105 r
= m_diff_context
.callback(d
->get
<0>(), d
->get
<1>(), d
->get
<2>(),
106 m_diff_context
.callback_arg
);
112 m_diff_context
.throttle
.end_op(r
);
117 librados::IoCtx
&m_head_ctx
;
118 DiffContext
&m_diff_context
;
121 std::vector
<ObjectExtent
> m_object_extents
;
123 librados::snap_set_t m_snap_set
;
126 void compute_diffs(Diffs
*diffs
) {
127 CephContext
*cct
= m_cct
;
129 // calc diff from from_snap_id -> to_snap_id
130 interval_set
<uint64_t> diff
;
133 librados::snap_t clone_end_snap_id
;
135 calc_snap_set_diff(cct
, m_snap_set
, m_diff_context
.from_snap_id
,
136 m_diff_context
.end_snap_id
, &diff
, &end_size
,
137 &end_exists
, &clone_end_snap_id
, &whole_object
);
139 ldout(cct
, 1) << "object " << m_oid
<< ": need to provide full object"
142 ldout(cct
, 20) << " diff " << diff
<< " end_exists=" << end_exists
144 if (diff
.empty() && !whole_object
) {
145 if (m_diff_context
.from_snap_id
== 0 && !end_exists
) {
146 compute_parent_overlap(diffs
);
149 } else if (m_diff_context
.whole_object
|| whole_object
) {
150 // provide the full object extents to the callback
151 for (vector
<ObjectExtent
>::iterator q
= m_object_extents
.begin();
152 q
!= m_object_extents
.end(); ++q
) {
153 for (vector
<pair
<uint64_t,uint64_t> >::iterator r
=
154 q
->buffer_extents
.begin();
155 r
!= q
->buffer_extents
.end(); ++r
) {
156 diffs
->push_back(boost::make_tuple(m_offset
+ r
->first
, r
->second
,
163 for (vector
<ObjectExtent
>::iterator q
= m_object_extents
.begin();
164 q
!= m_object_extents
.end(); ++q
) {
165 ldout(cct
, 20) << "diff_iterate object " << m_oid
<< " extent "
166 << q
->offset
<< "~" << q
->length
<< " from "
167 << q
->buffer_extents
<< dendl
;
168 uint64_t opos
= q
->offset
;
169 for (vector
<pair
<uint64_t,uint64_t> >::iterator r
=
170 q
->buffer_extents
.begin();
171 r
!= q
->buffer_extents
.end(); ++r
) {
172 interval_set
<uint64_t> overlap
; // object extents
173 overlap
.insert(opos
, r
->second
);
174 overlap
.intersection_of(diff
);
175 ldout(cct
, 20) << " opos " << opos
176 << " buf " << r
->first
<< "~" << r
->second
177 << " overlap " << overlap
<< dendl
;
178 for (interval_set
<uint64_t>::iterator s
= overlap
.begin();
179 s
!= overlap
.end(); ++s
) {
180 uint64_t su_off
= s
.get_start() - opos
;
181 uint64_t logical_off
= m_offset
+ r
->first
+ su_off
;
182 ldout(cct
, 20) << " overlap extent " << s
.get_start() << "~"
183 << s
.get_len() << " logical " << logical_off
<< "~"
184 << s
.get_len() << dendl
;
185 diffs
->push_back(boost::make_tuple(logical_off
, s
.get_len(),
190 ceph_assert(opos
== q
->offset
+ q
->length
);
194 void compute_parent_overlap(Diffs
*diffs
) {
195 if (m_diff_context
.from_snap_id
== 0 &&
196 !m_diff_context
.parent_diff
.empty()) {
197 // report parent diff instead
198 for (vector
<ObjectExtent
>::iterator q
= m_object_extents
.begin();
199 q
!= m_object_extents
.end(); ++q
) {
200 for (vector
<pair
<uint64_t,uint64_t> >::iterator r
=
201 q
->buffer_extents
.begin();
202 r
!= q
->buffer_extents
.end(); ++r
) {
203 interval_set
<uint64_t> o
;
204 o
.insert(m_offset
+ r
->first
, r
->second
);
205 o
.intersection_of(m_diff_context
.parent_diff
);
206 ldout(m_cct
, 20) << " reporting parent overlap " << o
<< dendl
;
207 for (interval_set
<uint64_t>::iterator s
= o
.begin(); s
!= o
.end();
209 diffs
->push_back(boost::make_tuple(s
.get_start(), s
.get_len(),
218 int simple_diff_cb(uint64_t off
, size_t len
, int exists
, void *arg
) {
219 // it's possible for a discard to create a hole in the parent image -- ignore
221 interval_set
<uint64_t> *diff
= static_cast<interval_set
<uint64_t> *>(arg
);
222 diff
->insert(off
, len
);
227 } // anonymous namespace
229 template <typename I
>
230 int DiffIterate
<I
>::diff_iterate(I
*ictx
,
231 const cls::rbd::SnapshotNamespace
& from_snap_namespace
,
232 const char *fromsnapname
,
233 uint64_t off
, uint64_t len
,
234 bool include_parent
, bool whole_object
,
235 int (*cb
)(uint64_t, size_t, int, void *),
238 ldout(ictx
->cct
, 20) << "diff_iterate " << ictx
<< " off = " << off
239 << " len = " << len
<< dendl
;
241 if (!ictx
->data_ctx
.is_valid()) {
245 // ensure previous writes are visible to listsnaps
246 C_SaferCond flush_ctx
;
248 std::shared_lock owner_locker
{ictx
->owner_lock
};
249 auto aio_comp
= io::AioCompletion::create_and_start(&flush_ctx
, ictx
,
251 auto req
= io::ImageDispatchSpec
<I
>::create_flush_request(
252 *ictx
, aio_comp
, io::FLUSH_SOURCE_INTERNAL
, {});
256 int r
= flush_ctx
.wait();
261 r
= ictx
->state
->refresh_if_required();
266 ictx
->image_lock
.lock_shared();
267 r
= clip_io(ictx
, off
, &len
);
268 ictx
->image_lock
.unlock_shared();
273 DiffIterate
command(*ictx
, from_snap_namespace
, fromsnapname
, off
, len
,
274 include_parent
, whole_object
, cb
, arg
);
275 r
= command
.execute();
279 template <typename I
>
280 int DiffIterate
<I
>::execute() {
281 CephContext
* cct
= m_image_ctx
.cct
;
283 ceph_assert(m_image_ctx
.data_ctx
.is_valid());
285 librados::IoCtx head_ctx
;
286 librados::snap_t from_snap_id
= 0;
287 librados::snap_t end_snap_id
;
288 uint64_t from_size
= 0;
291 std::shared_lock image_locker
{m_image_ctx
.image_lock
};
292 head_ctx
.dup(m_image_ctx
.data_ctx
);
293 if (m_from_snap_name
) {
294 from_snap_id
= m_image_ctx
.get_snap_id(m_from_snap_namespace
, m_from_snap_name
);
295 from_size
= m_image_ctx
.get_image_size(from_snap_id
);
297 end_snap_id
= m_image_ctx
.snap_id
;
298 end_size
= m_image_ctx
.get_image_size(end_snap_id
);
301 if (from_snap_id
== CEPH_NOSNAP
) {
304 if (from_snap_id
== end_snap_id
) {
308 if (from_snap_id
>= end_snap_id
) {
313 bool fast_diff_enabled
= false;
314 BitVector
<2> object_diff_state
;
315 if (m_whole_object
) {
317 auto req
= object_map::DiffRequest
<I
>::create(&m_image_ctx
, from_snap_id
,
319 &object_diff_state
, &ctx
);
324 ldout(cct
, 5) << "fast diff disabled" << dendl
;
326 ldout(cct
, 5) << "fast diff enabled" << dendl
;
327 fast_diff_enabled
= true;
331 // we must list snaps via the head, not end snap
332 head_ctx
.snap_set_read(CEPH_SNAPDIR
);
334 ldout(cct
, 5) << "diff_iterate from " << from_snap_id
<< " to "
335 << end_snap_id
<< " size from " << from_size
336 << " to " << end_size
<< dendl
;
338 // check parent overlap only if we are comparing to the beginning of time
339 DiffContext
diff_context(m_image_ctx
, m_callback
, m_callback_arg
,
340 m_whole_object
, from_snap_id
, end_snap_id
);
341 if (m_include_parent
&& from_snap_id
== 0) {
342 std::shared_lock image_locker
{m_image_ctx
.image_lock
};
343 uint64_t overlap
= 0;
344 m_image_ctx
.get_parent_overlap(m_image_ctx
.snap_id
, &overlap
);
346 if (m_image_ctx
.parent
&& overlap
> 0) {
347 ldout(cct
, 10) << " first getting parent diff" << dendl
;
348 DiffIterate
diff_parent(*m_image_ctx
.parent
, {},
350 m_include_parent
, m_whole_object
,
352 &diff_context
.parent_diff
);
353 r
= diff_parent
.execute();
360 uint64_t period
= m_image_ctx
.get_stripe_period();
361 uint64_t off
= m_offset
;
362 uint64_t left
= m_length
;
365 uint64_t period_off
= off
- (off
% period
);
366 uint64_t read_len
= min(period_off
+ period
- off
, left
);
369 map
<object_t
,vector
<ObjectExtent
> > object_extents
;
370 Striper::file_to_extents(cct
, m_image_ctx
.format_string
,
371 &m_image_ctx
.layout
, off
, read_len
, 0,
374 // get snap info for each object
375 for (map
<object_t
,vector
<ObjectExtent
> >::iterator p
=
376 object_extents
.begin();
377 p
!= object_extents
.end(); ++p
) {
378 ldout(cct
, 20) << "object " << p
->first
<< dendl
;
380 if (fast_diff_enabled
) {
381 const uint64_t object_no
= p
->second
.front().objectno
;
382 uint8_t diff_state
= object_diff_state
[object_no
];
383 if (diff_state
== object_map::DIFF_STATE_HOLE
&&
384 from_snap_id
== 0 && !diff_context
.parent_diff
.empty()) {
385 // no data in child object -- report parent diff instead
386 for (auto& oe
: p
->second
) {
387 for (auto& be
: oe
.buffer_extents
) {
388 interval_set
<uint64_t> o
;
389 o
.insert(off
+ be
.first
, be
.second
);
390 o
.intersection_of(diff_context
.parent_diff
);
391 ldout(cct
, 20) << " reporting parent overlap " << o
<< dendl
;
392 for (auto e
= o
.begin(); e
!= o
.end(); ++e
) {
393 r
= m_callback(e
.get_start(), e
.get_len(), true,
401 } else if (diff_state
== object_map::DIFF_STATE_HOLE_UPDATED
||
402 diff_state
== object_map::DIFF_STATE_DATA_UPDATED
) {
403 bool updated
= (diff_state
== object_map::DIFF_STATE_DATA_UPDATED
);
404 for (std::vector
<ObjectExtent
>::iterator q
= p
->second
.begin();
405 q
!= p
->second
.end(); ++q
) {
406 for (auto& be
: q
->buffer_extents
) {
407 r
= m_callback(off
+ be
.first
, be
.second
, updated
,
416 C_DiffObject
*diff_object
= new C_DiffObject(m_image_ctx
, head_ctx
,
422 if (diff_context
.throttle
.pending_error()) {
423 r
= diff_context
.throttle
.wait_for_ret();
433 r
= diff_context
.throttle
.wait_for_ret();
441 } // namespace librbd
443 template class librbd::api::DiffIterate
<librbd::ImageCtx
>;