]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/api/DiffIterate.cc
7e47c7ec3a82ec6d0879fdae3314a86ec224bfd1
[ceph.git] / ceph / src / librbd / api / DiffIterate.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/api/DiffIterate.h"
5 #include "librbd/ImageCtx.h"
6 #include "librbd/ImageState.h"
7 #include "librbd/ObjectMap.h"
8 #include "librbd/Utils.h"
9 #include "librbd/internal.h"
10 #include "librbd/io/AioCompletion.h"
11 #include "librbd/io/ImageDispatchSpec.h"
12 #include "librbd/io/ImageRequestWQ.h"
13 #include "librbd/object_map/DiffRequest.h"
14 #include "include/rados/librados.hpp"
15 #include "include/interval_set.h"
16 #include "common/errno.h"
17 #include "common/Cond.h"
18 #include "common/Throttle.h"
19 #include "osdc/Striper.h"
20 #include "librados/snap_set_diff.h"
21 #include <boost/tuple/tuple.hpp>
22 #include <list>
23 #include <map>
24 #include <vector>
25
26 #define dout_subsys ceph_subsys_rbd
27 #undef dout_prefix
28 #define dout_prefix *_dout << "librbd::DiffIterate: "
29
30 namespace librbd {
31 namespace api {
32
33 namespace {
34
35 enum ObjectDiffState {
36 OBJECT_DIFF_STATE_NONE = 0,
37 OBJECT_DIFF_STATE_UPDATED = 1,
38 OBJECT_DIFF_STATE_HOLE = 2
39 };
40
41 struct DiffContext {
42 DiffIterate<>::Callback callback;
43 void *callback_arg;
44 bool whole_object;
45 uint64_t from_snap_id;
46 uint64_t end_snap_id;
47 interval_set<uint64_t> parent_diff;
48 OrderedThrottle throttle;
49
50 template <typename I>
51 DiffContext(I &image_ctx, DiffIterate<>::Callback callback,
52 void *callback_arg, bool _whole_object, uint64_t _from_snap_id,
53 uint64_t _end_snap_id)
54 : callback(callback), callback_arg(callback_arg),
55 whole_object(_whole_object), from_snap_id(_from_snap_id),
56 end_snap_id(_end_snap_id),
57 throttle(image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true) {
58 }
59 };
60
61 class C_DiffObject : public Context {
62 public:
63 template <typename I>
64 C_DiffObject(I &image_ctx, librados::IoCtx &head_ctx,
65 DiffContext &diff_context, const std::string &oid,
66 uint64_t offset, const std::vector<ObjectExtent> &object_extents)
67 : m_cct(image_ctx.cct), m_head_ctx(head_ctx),
68 m_diff_context(diff_context), m_oid(oid), m_offset(offset),
69 m_object_extents(object_extents), m_snap_ret(0) {
70 }
71
72 void send() {
73 C_OrderedThrottle *ctx = m_diff_context.throttle.start_op(this);
74 librados::AioCompletion *rados_completion =
75 util::create_rados_callback(ctx);
76
77 librados::ObjectReadOperation op;
78 op.list_snaps(&m_snap_set, &m_snap_ret);
79
80 int r = m_head_ctx.aio_operate(m_oid, rados_completion, &op, NULL);
81 ceph_assert(r == 0);
82 rados_completion->release();
83 }
84
85 protected:
86 typedef boost::tuple<uint64_t, size_t, bool> Diff;
87 typedef std::list<Diff> Diffs;
88
89 void finish(int r) override {
90 CephContext *cct = m_cct;
91 if (r == 0 && m_snap_ret < 0) {
92 r = m_snap_ret;
93 }
94
95 Diffs diffs;
96 if (r == 0) {
97 ldout(cct, 20) << "object " << m_oid << ": list_snaps complete" << dendl;
98 compute_diffs(&diffs);
99 } else if (r == -ENOENT) {
100 ldout(cct, 20) << "object " << m_oid << ": list_snaps (not found)"
101 << dendl;
102 r = 0;
103 compute_parent_overlap(&diffs);
104 } else {
105 ldout(cct, 20) << "object " << m_oid << ": list_snaps failed: "
106 << cpp_strerror(r) << dendl;
107 }
108
109 if (r == 0) {
110 for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) {
111 r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(),
112 m_diff_context.callback_arg);
113 if (r < 0) {
114 break;
115 }
116 }
117 }
118 m_diff_context.throttle.end_op(r);
119 }
120
121 private:
122 CephContext *m_cct;
123 librados::IoCtx &m_head_ctx;
124 DiffContext &m_diff_context;
125 std::string m_oid;
126 uint64_t m_offset;
127 std::vector<ObjectExtent> m_object_extents;
128
129 librados::snap_set_t m_snap_set;
130 int m_snap_ret;
131
132 void compute_diffs(Diffs *diffs) {
133 CephContext *cct = m_cct;
134
135 // calc diff from from_snap_id -> to_snap_id
136 interval_set<uint64_t> diff;
137 uint64_t end_size;
138 bool end_exists;
139 librados::snap_t clone_end_snap_id;
140 bool whole_object;
141 calc_snap_set_diff(cct, m_snap_set, m_diff_context.from_snap_id,
142 m_diff_context.end_snap_id, &diff, &end_size,
143 &end_exists, &clone_end_snap_id, &whole_object);
144 if (whole_object) {
145 ldout(cct, 1) << "object " << m_oid << ": need to provide full object"
146 << dendl;
147 }
148 ldout(cct, 20) << " diff " << diff << " end_exists=" << end_exists
149 << dendl;
150 if (diff.empty() && !whole_object) {
151 if (m_diff_context.from_snap_id == 0 && !end_exists) {
152 compute_parent_overlap(diffs);
153 }
154 return;
155 } else if (m_diff_context.whole_object || whole_object) {
156 // provide the full object extents to the callback
157 for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
158 q != m_object_extents.end(); ++q) {
159 diffs->push_back(boost::make_tuple(m_offset + q->offset, q->length,
160 end_exists));
161 }
162 return;
163 }
164
165 for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
166 q != m_object_extents.end(); ++q) {
167 ldout(cct, 20) << "diff_iterate object " << m_oid << " extent "
168 << q->offset << "~" << q->length << " from "
169 << q->buffer_extents << dendl;
170 uint64_t opos = q->offset;
171 for (vector<pair<uint64_t,uint64_t> >::iterator r =
172 q->buffer_extents.begin();
173 r != q->buffer_extents.end(); ++r) {
174 interval_set<uint64_t> overlap; // object extents
175 overlap.insert(opos, r->second);
176 overlap.intersection_of(diff);
177 ldout(cct, 20) << " opos " << opos
178 << " buf " << r->first << "~" << r->second
179 << " overlap " << overlap << dendl;
180 for (interval_set<uint64_t>::iterator s = overlap.begin();
181 s != overlap.end(); ++s) {
182 uint64_t su_off = s.get_start() - opos;
183 uint64_t logical_off = m_offset + r->first + su_off;
184 ldout(cct, 20) << " overlap extent " << s.get_start() << "~"
185 << s.get_len() << " logical " << logical_off << "~"
186 << s.get_len() << dendl;
187 diffs->push_back(boost::make_tuple(logical_off, s.get_len(),
188 end_exists));
189 }
190 opos += r->second;
191 }
192 ceph_assert(opos == q->offset + q->length);
193 }
194 }
195
196 void compute_parent_overlap(Diffs *diffs) {
197 if (m_diff_context.from_snap_id == 0 &&
198 !m_diff_context.parent_diff.empty()) {
199 // report parent diff instead
200 for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
201 q != m_object_extents.end(); ++q) {
202 for (vector<pair<uint64_t,uint64_t> >::iterator r =
203 q->buffer_extents.begin();
204 r != q->buffer_extents.end(); ++r) {
205 interval_set<uint64_t> o;
206 o.insert(m_offset + r->first, r->second);
207 o.intersection_of(m_diff_context.parent_diff);
208 ldout(m_cct, 20) << " reporting parent overlap " << o << dendl;
209 for (interval_set<uint64_t>::iterator s = o.begin(); s != o.end();
210 ++s) {
211 diffs->push_back(boost::make_tuple(s.get_start(), s.get_len(),
212 true));
213 }
214 }
215 }
216 }
217 }
218 };
219
220 int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
221 // it's possible for a discard to create a hole in the parent image -- ignore
222 if (exists) {
223 interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
224 diff->insert(off, len);
225 }
226 return 0;
227 }
228
229 } // anonymous namespace
230
231 template <typename I>
232 int DiffIterate<I>::diff_iterate(I *ictx,
233 const cls::rbd::SnapshotNamespace& from_snap_namespace,
234 const char *fromsnapname,
235 uint64_t off, uint64_t len,
236 bool include_parent, bool whole_object,
237 int (*cb)(uint64_t, size_t, int, void *),
238 void *arg)
239 {
240 ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
241 << " len = " << len << dendl;
242
243 if (!ictx->data_ctx.is_valid()) {
244 return -ENODEV;
245 }
246
247 // ensure previous writes are visible to listsnaps
248 C_SaferCond flush_ctx;
249 {
250 std::shared_lock owner_locker{ictx->owner_lock};
251 auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, ictx,
252 io::AIO_TYPE_FLUSH);
253 auto req = io::ImageDispatchSpec<I>::create_flush_request(
254 *ictx, aio_comp, io::FLUSH_SOURCE_INTERNAL, {});
255 req->send();
256 delete req;
257 }
258 int r = flush_ctx.wait();
259 if (r < 0) {
260 return r;
261 }
262
263 r = ictx->state->refresh_if_required();
264 if (r < 0) {
265 return r;
266 }
267
268 ictx->image_lock.lock_shared();
269 r = clip_io(ictx, off, &len);
270 ictx->image_lock.unlock_shared();
271 if (r < 0) {
272 return r;
273 }
274
275 DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len,
276 include_parent, whole_object, cb, arg);
277 r = command.execute();
278 return r;
279 }
280
281 template <typename I>
282 int DiffIterate<I>::execute() {
283 CephContext* cct = m_image_ctx.cct;
284
285 ceph_assert(m_image_ctx.data_ctx.is_valid());
286
287 librados::IoCtx head_ctx;
288 librados::snap_t from_snap_id = 0;
289 librados::snap_t end_snap_id;
290 uint64_t from_size = 0;
291 uint64_t end_size;
292 {
293 std::shared_lock image_locker{m_image_ctx.image_lock};
294 head_ctx.dup(m_image_ctx.data_ctx);
295 if (m_from_snap_name) {
296 from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace, m_from_snap_name);
297 from_size = m_image_ctx.get_image_size(from_snap_id);
298 }
299 end_snap_id = m_image_ctx.snap_id;
300 end_size = m_image_ctx.get_image_size(end_snap_id);
301 }
302
303 if (from_snap_id == CEPH_NOSNAP) {
304 return -ENOENT;
305 }
306 if (from_snap_id == end_snap_id) {
307 // no diff.
308 return 0;
309 }
310 if (from_snap_id >= end_snap_id) {
311 return -EINVAL;
312 }
313
314 int r;
315 bool fast_diff_enabled = false;
316 BitVector<2> object_diff_state;
317 if (m_whole_object) {
318 C_SaferCond ctx;
319 auto req = object_map::DiffRequest<I>::create(&m_image_ctx, from_snap_id,
320 end_snap_id,
321 &object_diff_state, &ctx);
322 req->send();
323
324 r = ctx.wait();
325 if (r < 0) {
326 ldout(cct, 5) << "fast diff disabled" << dendl;
327 } else {
328 ldout(cct, 5) << "fast diff enabled" << dendl;
329 fast_diff_enabled = true;
330 }
331 }
332
333 // we must list snaps via the head, not end snap
334 head_ctx.snap_set_read(CEPH_SNAPDIR);
335
336 ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to "
337 << end_snap_id << " size from " << from_size
338 << " to " << end_size << dendl;
339
340 // check parent overlap only if we are comparing to the beginning of time
341 DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg,
342 m_whole_object, from_snap_id, end_snap_id);
343 if (m_include_parent && from_snap_id == 0) {
344 std::shared_lock image_locker{m_image_ctx.image_lock};
345 uint64_t overlap = 0;
346 m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &overlap);
347 r = 0;
348 if (m_image_ctx.parent && overlap > 0) {
349 ldout(cct, 10) << " first getting parent diff" << dendl;
350 DiffIterate diff_parent(*m_image_ctx.parent, {},
351 nullptr, 0, overlap,
352 m_include_parent, m_whole_object,
353 &simple_diff_cb,
354 &diff_context.parent_diff);
355 r = diff_parent.execute();
356 }
357 if (r < 0) {
358 return r;
359 }
360 }
361
362 uint64_t period = m_image_ctx.get_stripe_period();
363 uint64_t off = m_offset;
364 uint64_t left = m_length;
365
366 while (left > 0) {
367 uint64_t period_off = off - (off % period);
368 uint64_t read_len = min(period_off + period - off, left);
369
370 // map to extents
371 map<object_t,vector<ObjectExtent> > object_extents;
372 Striper::file_to_extents(cct, m_image_ctx.format_string,
373 &m_image_ctx.layout, off, read_len, 0,
374 object_extents, 0);
375
376 // get snap info for each object
377 for (map<object_t,vector<ObjectExtent> >::iterator p =
378 object_extents.begin();
379 p != object_extents.end(); ++p) {
380 ldout(cct, 20) << "object " << p->first << dendl;
381
382 if (fast_diff_enabled) {
383 const uint64_t object_no = p->second.front().objectno;
384 if (object_diff_state[object_no] == OBJECT_DIFF_STATE_NONE &&
385 from_snap_id == 0 && !diff_context.parent_diff.empty()) {
386 // no data in child object -- report parent diff instead
387 for (auto& oe : p->second) {
388 for (auto& be : oe.buffer_extents) {
389 interval_set<uint64_t> o;
390 o.insert(off + be.first, be.second);
391 o.intersection_of(diff_context.parent_diff);
392 ldout(cct, 20) << " reporting parent overlap " << o << dendl;
393 for (auto e = o.begin(); e != o.end(); ++e) {
394 r = m_callback(e.get_start(), e.get_len(), true,
395 m_callback_arg);
396 if (r < 0) {
397 return r;
398 }
399 }
400 }
401 }
402 } else if (object_diff_state[object_no] != OBJECT_DIFF_STATE_NONE) {
403 bool updated = (object_diff_state[object_no] ==
404 OBJECT_DIFF_STATE_UPDATED);
405 for (std::vector<ObjectExtent>::iterator q = p->second.begin();
406 q != p->second.end(); ++q) {
407 r = m_callback(off + q->offset, q->length, updated, m_callback_arg);
408 if (r < 0) {
409 return r;
410 }
411 }
412 }
413 } else {
414 C_DiffObject *diff_object = new C_DiffObject(m_image_ctx, head_ctx,
415 diff_context,
416 p->first.name, off,
417 p->second);
418 diff_object->send();
419
420 if (diff_context.throttle.pending_error()) {
421 r = diff_context.throttle.wait_for_ret();
422 return r;
423 }
424 }
425 }
426
427 left -= read_len;
428 off += read_len;
429 }
430
431 r = diff_context.throttle.wait_for_ret();
432 if (r < 0) {
433 return r;
434 }
435 return 0;
436 }
437
438 } // namespace api
439 } // namespace librbd
440
441 template class librbd::api::DiffIterate<librbd::ImageCtx>;