]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/api/DiffIterate.cc
import ceph 14.2.5
[ceph.git] / ceph / src / librbd / api / DiffIterate.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "librbd/api/DiffIterate.h"
5#include "librbd/ImageCtx.h"
6#include "librbd/ImageState.h"
7#include "librbd/ObjectMap.h"
8#include "librbd/Utils.h"
9#include "librbd/internal.h"
11fdf7f2
TL
10#include "librbd/io/AioCompletion.h"
11#include "librbd/io/ImageDispatchSpec.h"
12#include "librbd/io/ImageRequestWQ.h"
7c673cae
FG
13#include "include/rados/librados.hpp"
14#include "include/interval_set.h"
15#include "common/errno.h"
16#include "common/Throttle.h"
11fdf7f2 17#include "osdc/Striper.h"
7c673cae
FG
18#include "librados/snap_set_diff.h"
19#include <boost/tuple/tuple.hpp>
20#include <list>
21#include <map>
22#include <vector>
23
24#define dout_subsys ceph_subsys_rbd
25#undef dout_prefix
26#define dout_prefix *_dout << "librbd::DiffIterate: "
27
28namespace librbd {
29namespace api {
30
31namespace {
32
33enum ObjectDiffState {
34 OBJECT_DIFF_STATE_NONE = 0,
35 OBJECT_DIFF_STATE_UPDATED = 1,
36 OBJECT_DIFF_STATE_HOLE = 2
37};
38
39struct DiffContext {
40 DiffIterate<>::Callback callback;
41 void *callback_arg;
42 bool whole_object;
43 uint64_t from_snap_id;
44 uint64_t end_snap_id;
45 interval_set<uint64_t> parent_diff;
46 OrderedThrottle throttle;
47
48 template <typename I>
49 DiffContext(I &image_ctx, DiffIterate<>::Callback callback,
50 void *callback_arg, bool _whole_object, uint64_t _from_snap_id,
51 uint64_t _end_snap_id)
52 : callback(callback), callback_arg(callback_arg),
53 whole_object(_whole_object), from_snap_id(_from_snap_id),
54 end_snap_id(_end_snap_id),
11fdf7f2 55 throttle(image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true) {
7c673cae
FG
56 }
57};
58
59class C_DiffObject : public Context {
60public:
61 template <typename I>
62 C_DiffObject(I &image_ctx, librados::IoCtx &head_ctx,
63 DiffContext &diff_context, const std::string &oid,
64 uint64_t offset, const std::vector<ObjectExtent> &object_extents)
65 : m_cct(image_ctx.cct), m_head_ctx(head_ctx),
66 m_diff_context(diff_context), m_oid(oid), m_offset(offset),
67 m_object_extents(object_extents), m_snap_ret(0) {
68 }
69
70 void send() {
71 C_OrderedThrottle *ctx = m_diff_context.throttle.start_op(this);
72 librados::AioCompletion *rados_completion =
73 util::create_rados_callback(ctx);
74
75 librados::ObjectReadOperation op;
76 op.list_snaps(&m_snap_set, &m_snap_ret);
77
78 int r = m_head_ctx.aio_operate(m_oid, rados_completion, &op, NULL);
11fdf7f2 79 ceph_assert(r == 0);
7c673cae
FG
80 rados_completion->release();
81 }
82
83protected:
84 typedef boost::tuple<uint64_t, size_t, bool> Diff;
85 typedef std::list<Diff> Diffs;
86
87 void finish(int r) override {
88 CephContext *cct = m_cct;
89 if (r == 0 && m_snap_ret < 0) {
90 r = m_snap_ret;
91 }
92
93 Diffs diffs;
94 if (r == 0) {
95 ldout(cct, 20) << "object " << m_oid << ": list_snaps complete" << dendl;
96 compute_diffs(&diffs);
97 } else if (r == -ENOENT) {
98 ldout(cct, 20) << "object " << m_oid << ": list_snaps (not found)"
99 << dendl;
100 r = 0;
101 compute_parent_overlap(&diffs);
102 } else {
103 ldout(cct, 20) << "object " << m_oid << ": list_snaps failed: "
104 << cpp_strerror(r) << dendl;
105 }
106
107 if (r == 0) {
108 for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) {
109 r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(),
110 m_diff_context.callback_arg);
111 if (r < 0) {
112 break;
113 }
114 }
115 }
116 m_diff_context.throttle.end_op(r);
117 }
118
119private:
120 CephContext *m_cct;
121 librados::IoCtx &m_head_ctx;
122 DiffContext &m_diff_context;
123 std::string m_oid;
124 uint64_t m_offset;
125 std::vector<ObjectExtent> m_object_extents;
126
127 librados::snap_set_t m_snap_set;
128 int m_snap_ret;
129
130 void compute_diffs(Diffs *diffs) {
131 CephContext *cct = m_cct;
132
133 // calc diff from from_snap_id -> to_snap_id
134 interval_set<uint64_t> diff;
135 uint64_t end_size;
136 bool end_exists;
137 librados::snap_t clone_end_snap_id;
94b18763 138 bool whole_object;
7c673cae
FG
139 calc_snap_set_diff(cct, m_snap_set, m_diff_context.from_snap_id,
140 m_diff_context.end_snap_id, &diff, &end_size,
94b18763
FG
141 &end_exists, &clone_end_snap_id, &whole_object);
142 if (whole_object) {
143 ldout(cct, 1) << "object " << m_oid << ": need to provide full object"
144 << dendl;
145 }
7c673cae
FG
146 ldout(cct, 20) << " diff " << diff << " end_exists=" << end_exists
147 << dendl;
94b18763 148 if (diff.empty() && !whole_object) {
7c673cae
FG
149 if (m_diff_context.from_snap_id == 0 && !end_exists) {
150 compute_parent_overlap(diffs);
151 }
152 return;
94b18763 153 } else if (m_diff_context.whole_object || whole_object) {
7c673cae
FG
154 // provide the full object extents to the callback
155 for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
156 q != m_object_extents.end(); ++q) {
157 diffs->push_back(boost::make_tuple(m_offset + q->offset, q->length,
158 end_exists));
159 }
160 return;
161 }
162
163 for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
164 q != m_object_extents.end(); ++q) {
165 ldout(cct, 20) << "diff_iterate object " << m_oid << " extent "
166 << q->offset << "~" << q->length << " from "
167 << q->buffer_extents << dendl;
168 uint64_t opos = q->offset;
169 for (vector<pair<uint64_t,uint64_t> >::iterator r =
170 q->buffer_extents.begin();
171 r != q->buffer_extents.end(); ++r) {
172 interval_set<uint64_t> overlap; // object extents
173 overlap.insert(opos, r->second);
174 overlap.intersection_of(diff);
175 ldout(cct, 20) << " opos " << opos
176 << " buf " << r->first << "~" << r->second
177 << " overlap " << overlap << dendl;
178 for (interval_set<uint64_t>::iterator s = overlap.begin();
179 s != overlap.end(); ++s) {
180 uint64_t su_off = s.get_start() - opos;
181 uint64_t logical_off = m_offset + r->first + su_off;
182 ldout(cct, 20) << " overlap extent " << s.get_start() << "~"
183 << s.get_len() << " logical " << logical_off << "~"
184 << s.get_len() << dendl;
185 diffs->push_back(boost::make_tuple(logical_off, s.get_len(),
186 end_exists));
187 }
188 opos += r->second;
189 }
11fdf7f2 190 ceph_assert(opos == q->offset + q->length);
7c673cae
FG
191 }
192 }
193
194 void compute_parent_overlap(Diffs *diffs) {
195 if (m_diff_context.from_snap_id == 0 &&
196 !m_diff_context.parent_diff.empty()) {
197 // report parent diff instead
198 for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
199 q != m_object_extents.end(); ++q) {
200 for (vector<pair<uint64_t,uint64_t> >::iterator r =
201 q->buffer_extents.begin();
202 r != q->buffer_extents.end(); ++r) {
203 interval_set<uint64_t> o;
204 o.insert(m_offset + r->first, r->second);
205 o.intersection_of(m_diff_context.parent_diff);
206 ldout(m_cct, 20) << " reporting parent overlap " << o << dendl;
207 for (interval_set<uint64_t>::iterator s = o.begin(); s != o.end();
208 ++s) {
209 diffs->push_back(boost::make_tuple(s.get_start(), s.get_len(),
210 true));
211 }
212 }
213 }
214 }
215 }
216};
217
218int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
219 // it's possible for a discard to create a hole in the parent image -- ignore
220 if (exists) {
221 interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
222 diff->insert(off, len);
223 }
224 return 0;
225}
226
227} // anonymous namespace
228
229template <typename I>
230int DiffIterate<I>::diff_iterate(I *ictx,
231 const cls::rbd::SnapshotNamespace& from_snap_namespace,
232 const char *fromsnapname,
233 uint64_t off, uint64_t len,
234 bool include_parent, bool whole_object,
235 int (*cb)(uint64_t, size_t, int, void *),
236 void *arg)
237{
238 ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
239 << " len = " << len << dendl;
240
eafe8130
TL
241 if (!ictx->data_ctx.is_valid()) {
242 return -ENODEV;
243 }
244
7c673cae 245 // ensure previous writes are visible to listsnaps
11fdf7f2 246 C_SaferCond flush_ctx;
7c673cae
FG
247 {
248 RWLock::RLocker owner_locker(ictx->owner_lock);
494da23a
TL
249 auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, ictx,
250 io::AIO_TYPE_FLUSH);
11fdf7f2
TL
251 auto req = io::ImageDispatchSpec<I>::create_flush_request(
252 *ictx, aio_comp, io::FLUSH_SOURCE_INTERNAL, {});
253 req->send();
254 delete req;
255 }
256 int r = flush_ctx.wait();
257 if (r < 0) {
258 return r;
7c673cae
FG
259 }
260
11fdf7f2 261 r = ictx->state->refresh_if_required();
7c673cae
FG
262 if (r < 0) {
263 return r;
264 }
265
266 ictx->snap_lock.get_read();
267 r = clip_io(ictx, off, &len);
268 ictx->snap_lock.put_read();
269 if (r < 0) {
270 return r;
271 }
272
273 DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len,
274 include_parent, whole_object, cb, arg);
275 r = command.execute();
276 return r;
277}
278
279template <typename I>
280int DiffIterate<I>::execute() {
281 CephContext* cct = m_image_ctx.cct;
282
eafe8130
TL
283 ceph_assert(m_image_ctx.data_ctx.is_valid());
284
7c673cae
FG
285 librados::IoCtx head_ctx;
286 librados::snap_t from_snap_id = 0;
287 librados::snap_t end_snap_id;
288 uint64_t from_size = 0;
289 uint64_t end_size;
290 {
291 RWLock::RLocker md_locker(m_image_ctx.md_lock);
292 RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
293 head_ctx.dup(m_image_ctx.data_ctx);
294 if (m_from_snap_name) {
295 from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace, m_from_snap_name);
296 from_size = m_image_ctx.get_image_size(from_snap_id);
297 }
298 end_snap_id = m_image_ctx.snap_id;
299 end_size = m_image_ctx.get_image_size(end_snap_id);
300 }
301
302 if (from_snap_id == CEPH_NOSNAP) {
303 return -ENOENT;
304 }
305 if (from_snap_id == end_snap_id) {
306 // no diff.
307 return 0;
308 }
309 if (from_snap_id >= end_snap_id) {
310 return -EINVAL;
311 }
312
313 int r;
314 bool fast_diff_enabled = false;
315 BitVector<2> object_diff_state;
316 {
317 RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
318 if (m_whole_object && (m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0) {
319 r = diff_object_map(from_snap_id, end_snap_id, &object_diff_state);
320 if (r < 0) {
321 ldout(cct, 5) << "fast diff disabled" << dendl;
322 } else {
323 ldout(cct, 5) << "fast diff enabled" << dendl;
324 fast_diff_enabled = true;
325 }
326 }
327 }
328
329 // we must list snaps via the head, not end snap
330 head_ctx.snap_set_read(CEPH_SNAPDIR);
331
332 ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to "
333 << end_snap_id << " size from " << from_size
334 << " to " << end_size << dendl;
335
336 // check parent overlap only if we are comparing to the beginning of time
337 DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg,
338 m_whole_object, from_snap_id, end_snap_id);
339 if (m_include_parent && from_snap_id == 0) {
340 RWLock::RLocker l(m_image_ctx.snap_lock);
341 RWLock::RLocker l2(m_image_ctx.parent_lock);
342 uint64_t overlap = 0;
343 m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &overlap);
344 r = 0;
345 if (m_image_ctx.parent && overlap > 0) {
346 ldout(cct, 10) << " first getting parent diff" << dendl;
347 DiffIterate diff_parent(*m_image_ctx.parent, {},
348 nullptr, 0, overlap,
349 m_include_parent, m_whole_object,
350 &simple_diff_cb,
351 &diff_context.parent_diff);
352 r = diff_parent.execute();
353 }
354 if (r < 0) {
355 return r;
356 }
357 }
358
359 uint64_t period = m_image_ctx.get_stripe_period();
360 uint64_t off = m_offset;
361 uint64_t left = m_length;
362
363 while (left > 0) {
364 uint64_t period_off = off - (off % period);
365 uint64_t read_len = min(period_off + period - off, left);
366
367 // map to extents
368 map<object_t,vector<ObjectExtent> > object_extents;
369 Striper::file_to_extents(cct, m_image_ctx.format_string,
370 &m_image_ctx.layout, off, read_len, 0,
371 object_extents, 0);
372
373 // get snap info for each object
374 for (map<object_t,vector<ObjectExtent> >::iterator p =
375 object_extents.begin();
376 p != object_extents.end(); ++p) {
377 ldout(cct, 20) << "object " << p->first << dendl;
378
379 if (fast_diff_enabled) {
380 const uint64_t object_no = p->second.front().objectno;
381 if (object_diff_state[object_no] != OBJECT_DIFF_STATE_NONE) {
382 bool updated = (object_diff_state[object_no] ==
383 OBJECT_DIFF_STATE_UPDATED);
384 for (std::vector<ObjectExtent>::iterator q = p->second.begin();
385 q != p->second.end(); ++q) {
386 r = m_callback(off + q->offset, q->length, updated, m_callback_arg);
387 if (r < 0) {
388 return r;
389 }
390 }
391 }
392 } else {
393 C_DiffObject *diff_object = new C_DiffObject(m_image_ctx, head_ctx,
394 diff_context,
395 p->first.name, off,
396 p->second);
397 diff_object->send();
398
399 if (diff_context.throttle.pending_error()) {
400 r = diff_context.throttle.wait_for_ret();
401 return r;
402 }
403 }
404 }
405
406 left -= read_len;
407 off += read_len;
408 }
409
410 r = diff_context.throttle.wait_for_ret();
411 if (r < 0) {
412 return r;
413 }
414 return 0;
415}
416
417template <typename I>
418int DiffIterate<I>::diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id,
419 BitVector<2>* object_diff_state) {
11fdf7f2 420 ceph_assert(m_image_ctx.snap_lock.is_locked());
7c673cae
FG
421 CephContext* cct = m_image_ctx.cct;
422
423 bool diff_from_start = (from_snap_id == 0);
424 if (from_snap_id == 0) {
425 if (!m_image_ctx.snaps.empty()) {
426 from_snap_id = m_image_ctx.snaps.back();
427 } else {
428 from_snap_id = CEPH_NOSNAP;
429 }
430 }
431
432 object_diff_state->clear();
433 uint64_t current_snap_id = from_snap_id;
434 uint64_t next_snap_id = to_snap_id;
435 BitVector<2> prev_object_map;
436 bool prev_object_map_valid = false;
437 while (true) {
438 uint64_t current_size = m_image_ctx.size;
439 if (current_snap_id != CEPH_NOSNAP) {
440 std::map<librados::snap_t, SnapInfo>::const_iterator snap_it =
441 m_image_ctx.snap_info.find(current_snap_id);
11fdf7f2 442 ceph_assert(snap_it != m_image_ctx.snap_info.end());
7c673cae
FG
443 current_size = snap_it->second.size;
444
445 ++snap_it;
446 if (snap_it != m_image_ctx.snap_info.end()) {
447 next_snap_id = snap_it->first;
448 } else {
449 next_snap_id = CEPH_NOSNAP;
450 }
451 }
452
453 uint64_t flags;
454 int r = m_image_ctx.get_flags(from_snap_id, &flags);
455 if (r < 0) {
456 lderr(cct) << "diff_object_map: failed to retrieve image flags" << dendl;
457 return r;
458 }
459 if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) {
460 ldout(cct, 1) << "diff_object_map: cannot perform fast diff on invalid "
461 << "object map" << dendl;
462 return -EINVAL;
463 }
464
465 BitVector<2> object_map;
466 std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id,
467 current_snap_id));
468 r = cls_client::object_map_load(&m_image_ctx.md_ctx, oid, &object_map);
469 if (r < 0) {
470 lderr(cct) << "diff_object_map: failed to load object map " << oid
471 << dendl;
472 return r;
473 }
474 ldout(cct, 20) << "diff_object_map: loaded object map " << oid << dendl;
475
476 uint64_t num_objs = Striper::get_num_objects(m_image_ctx.layout,
477 current_size);
478 if (object_map.size() < num_objs) {
479 ldout(cct, 1) << "diff_object_map: object map too small: "
480 << object_map.size() << " < " << num_objs << dendl;
481 return -EINVAL;
482 }
483 object_map.resize(num_objs);
11fdf7f2 484 object_diff_state->resize(object_map.size());
7c673cae 485
11fdf7f2
TL
486 uint64_t overlap = std::min(object_map.size(), prev_object_map.size());
487 auto it = object_map.begin();
488 auto overlap_end_it = it + overlap;
489 auto pre_it = prev_object_map.begin();
490 auto diff_it = object_diff_state->begin();
491 uint64_t i = 0;
492 for (; it != overlap_end_it; ++it, ++pre_it, ++diff_it, ++i) {
7c673cae 493 ldout(cct, 20) << __func__ << ": object state: " << i << " "
11fdf7f2
TL
494 << static_cast<uint32_t>(*pre_it)
495 << "->" << static_cast<uint32_t>(*it) << dendl;
496 if (*it == OBJECT_NONEXISTENT) {
497 if (*pre_it != OBJECT_NONEXISTENT) {
498 *diff_it = OBJECT_DIFF_STATE_HOLE;
7c673cae 499 }
11fdf7f2
TL
500 } else if (*it == OBJECT_EXISTS ||
501 (*pre_it != *it &&
502 !(*pre_it == OBJECT_EXISTS &&
503 *it == OBJECT_EXISTS_CLEAN))) {
504 *diff_it = OBJECT_DIFF_STATE_UPDATED;
7c673cae
FG
505 }
506 }
507 ldout(cct, 20) << "diff_object_map: computed overlap diffs" << dendl;
11fdf7f2 508 auto end_it = object_map.end();
7c673cae
FG
509 if (object_map.size() > prev_object_map.size() &&
510 (diff_from_start || prev_object_map_valid)) {
11fdf7f2 511 for (; it != end_it; ++it,++diff_it, ++i) {
7c673cae 512 ldout(cct, 20) << __func__ << ": object state: " << i << " "
11fdf7f2
TL
513 << "->" << static_cast<uint32_t>(*it) << dendl;
514 if (*it == OBJECT_NONEXISTENT) {
515 *diff_it = OBJECT_DIFF_STATE_NONE;
7c673cae 516 } else {
11fdf7f2 517 *diff_it = OBJECT_DIFF_STATE_UPDATED;
7c673cae
FG
518 }
519 }
520 }
521 ldout(cct, 20) << "diff_object_map: computed resize diffs" << dendl;
522
523 if (current_snap_id == next_snap_id || next_snap_id > to_snap_id) {
524 break;
525 }
526 current_snap_id = next_snap_id;
527 prev_object_map = object_map;
528 prev_object_map_valid = true;
529 }
530 return 0;
531}
532
533} // namespace api
534} // namespace librbd
535
536template class librbd::api::DiffIterate<librbd::ImageCtx>;