]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/api/DiffIterate.cc
import ceph 15.2.16
[ceph.git] / ceph / src / librbd / api / DiffIterate.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/api/DiffIterate.h"
5 #include "librbd/ImageCtx.h"
6 #include "librbd/ImageState.h"
7 #include "librbd/ObjectMap.h"
8 #include "librbd/Utils.h"
9 #include "librbd/internal.h"
10 #include "librbd/io/AioCompletion.h"
11 #include "librbd/io/ImageDispatchSpec.h"
12 #include "librbd/io/ImageRequestWQ.h"
13 #include "librbd/object_map/DiffRequest.h"
14 #include "include/rados/librados.hpp"
15 #include "include/interval_set.h"
16 #include "common/errno.h"
17 #include "common/Cond.h"
18 #include "common/Throttle.h"
19 #include "osdc/Striper.h"
20 #include "librados/snap_set_diff.h"
21 #include <boost/tuple/tuple.hpp>
22 #include <list>
23 #include <map>
24 #include <vector>
25
26 #define dout_subsys ceph_subsys_rbd
27 #undef dout_prefix
28 #define dout_prefix *_dout << "librbd::DiffIterate: "
29
30 namespace librbd {
31 namespace api {
32
33 namespace {
34
35 struct DiffContext {
36 DiffIterate<>::Callback callback;
37 void *callback_arg;
38 bool whole_object;
39 uint64_t from_snap_id;
40 uint64_t end_snap_id;
41 interval_set<uint64_t> parent_diff;
42 OrderedThrottle throttle;
43
44 template <typename I>
45 DiffContext(I &image_ctx, DiffIterate<>::Callback callback,
46 void *callback_arg, bool _whole_object, uint64_t _from_snap_id,
47 uint64_t _end_snap_id)
48 : callback(callback), callback_arg(callback_arg),
49 whole_object(_whole_object), from_snap_id(_from_snap_id),
50 end_snap_id(_end_snap_id),
51 throttle(image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true) {
52 }
53 };
54
55 class C_DiffObject : public Context {
56 public:
57 template <typename I>
58 C_DiffObject(I &image_ctx, librados::IoCtx &head_ctx,
59 DiffContext &diff_context, const std::string &oid,
60 uint64_t offset, const std::vector<ObjectExtent> &object_extents)
61 : m_cct(image_ctx.cct), m_head_ctx(head_ctx),
62 m_diff_context(diff_context), m_oid(oid), m_offset(offset),
63 m_object_extents(object_extents), m_snap_ret(0) {
64 }
65
66 void send() {
67 C_OrderedThrottle *ctx = m_diff_context.throttle.start_op(this);
68 librados::AioCompletion *rados_completion =
69 util::create_rados_callback(ctx);
70
71 librados::ObjectReadOperation op;
72 op.list_snaps(&m_snap_set, &m_snap_ret);
73
74 int r = m_head_ctx.aio_operate(m_oid, rados_completion, &op, NULL);
75 ceph_assert(r == 0);
76 rados_completion->release();
77 }
78
79 protected:
80 typedef boost::tuple<uint64_t, size_t, bool> Diff;
81 typedef std::list<Diff> Diffs;
82
83 void finish(int r) override {
84 CephContext *cct = m_cct;
85 if (r == 0 && m_snap_ret < 0) {
86 r = m_snap_ret;
87 }
88
89 Diffs diffs;
90 if (r == 0) {
91 ldout(cct, 20) << "object " << m_oid << ": list_snaps complete" << dendl;
92 compute_diffs(&diffs);
93 } else if (r == -ENOENT) {
94 ldout(cct, 20) << "object " << m_oid << ": list_snaps (not found)"
95 << dendl;
96 r = 0;
97 compute_parent_overlap(&diffs);
98 } else {
99 ldout(cct, 20) << "object " << m_oid << ": list_snaps failed: "
100 << cpp_strerror(r) << dendl;
101 }
102
103 if (r == 0) {
104 for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) {
105 r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(),
106 m_diff_context.callback_arg);
107 if (r < 0) {
108 break;
109 }
110 }
111 }
112 m_diff_context.throttle.end_op(r);
113 }
114
115 private:
116 CephContext *m_cct;
117 librados::IoCtx &m_head_ctx;
118 DiffContext &m_diff_context;
119 std::string m_oid;
120 uint64_t m_offset;
121 std::vector<ObjectExtent> m_object_extents;
122
123 librados::snap_set_t m_snap_set;
124 int m_snap_ret;
125
126 void compute_diffs(Diffs *diffs) {
127 CephContext *cct = m_cct;
128
129 // calc diff from from_snap_id -> to_snap_id
130 interval_set<uint64_t> diff;
131 uint64_t end_size;
132 bool end_exists;
133 librados::snap_t clone_end_snap_id;
134 bool whole_object;
135 calc_snap_set_diff(cct, m_snap_set, m_diff_context.from_snap_id,
136 m_diff_context.end_snap_id, &diff, &end_size,
137 &end_exists, &clone_end_snap_id, &whole_object);
138 if (whole_object) {
139 ldout(cct, 1) << "object " << m_oid << ": need to provide full object"
140 << dendl;
141 }
142 ldout(cct, 20) << " diff " << diff << " end_exists=" << end_exists
143 << dendl;
144 if (diff.empty() && !whole_object) {
145 if (m_diff_context.from_snap_id == 0 && !end_exists) {
146 compute_parent_overlap(diffs);
147 }
148 return;
149 } else if (m_diff_context.whole_object || whole_object) {
150 // provide the full object extents to the callback
151 for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
152 q != m_object_extents.end(); ++q) {
153 for (vector<pair<uint64_t,uint64_t> >::iterator r =
154 q->buffer_extents.begin();
155 r != q->buffer_extents.end(); ++r) {
156 diffs->push_back(boost::make_tuple(m_offset + r->first, r->second,
157 end_exists));
158 }
159 }
160 return;
161 }
162
163 for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
164 q != m_object_extents.end(); ++q) {
165 ldout(cct, 20) << "diff_iterate object " << m_oid << " extent "
166 << q->offset << "~" << q->length << " from "
167 << q->buffer_extents << dendl;
168 uint64_t opos = q->offset;
169 for (vector<pair<uint64_t,uint64_t> >::iterator r =
170 q->buffer_extents.begin();
171 r != q->buffer_extents.end(); ++r) {
172 interval_set<uint64_t> overlap; // object extents
173 overlap.insert(opos, r->second);
174 overlap.intersection_of(diff);
175 ldout(cct, 20) << " opos " << opos
176 << " buf " << r->first << "~" << r->second
177 << " overlap " << overlap << dendl;
178 for (interval_set<uint64_t>::iterator s = overlap.begin();
179 s != overlap.end(); ++s) {
180 uint64_t su_off = s.get_start() - opos;
181 uint64_t logical_off = m_offset + r->first + su_off;
182 ldout(cct, 20) << " overlap extent " << s.get_start() << "~"
183 << s.get_len() << " logical " << logical_off << "~"
184 << s.get_len() << dendl;
185 diffs->push_back(boost::make_tuple(logical_off, s.get_len(),
186 end_exists));
187 }
188 opos += r->second;
189 }
190 ceph_assert(opos == q->offset + q->length);
191 }
192 }
193
194 void compute_parent_overlap(Diffs *diffs) {
195 if (m_diff_context.from_snap_id == 0 &&
196 !m_diff_context.parent_diff.empty()) {
197 // report parent diff instead
198 for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
199 q != m_object_extents.end(); ++q) {
200 for (vector<pair<uint64_t,uint64_t> >::iterator r =
201 q->buffer_extents.begin();
202 r != q->buffer_extents.end(); ++r) {
203 interval_set<uint64_t> o;
204 o.insert(m_offset + r->first, r->second);
205 o.intersection_of(m_diff_context.parent_diff);
206 ldout(m_cct, 20) << " reporting parent overlap " << o << dendl;
207 for (interval_set<uint64_t>::iterator s = o.begin(); s != o.end();
208 ++s) {
209 diffs->push_back(boost::make_tuple(s.get_start(), s.get_len(),
210 true));
211 }
212 }
213 }
214 }
215 }
216 };
217
218 int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
219 // it's possible for a discard to create a hole in the parent image -- ignore
220 if (exists) {
221 interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
222 diff->insert(off, len);
223 }
224 return 0;
225 }
226
227 } // anonymous namespace
228
229 template <typename I>
230 int DiffIterate<I>::diff_iterate(I *ictx,
231 const cls::rbd::SnapshotNamespace& from_snap_namespace,
232 const char *fromsnapname,
233 uint64_t off, uint64_t len,
234 bool include_parent, bool whole_object,
235 int (*cb)(uint64_t, size_t, int, void *),
236 void *arg)
237 {
238 ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
239 << " len = " << len << dendl;
240
241 if (!ictx->data_ctx.is_valid()) {
242 return -ENODEV;
243 }
244
245 // ensure previous writes are visible to listsnaps
246 C_SaferCond flush_ctx;
247 {
248 std::shared_lock owner_locker{ictx->owner_lock};
249 auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, ictx,
250 io::AIO_TYPE_FLUSH);
251 auto req = io::ImageDispatchSpec<I>::create_flush_request(
252 *ictx, aio_comp, io::FLUSH_SOURCE_INTERNAL, {});
253 req->send();
254 delete req;
255 }
256 int r = flush_ctx.wait();
257 if (r < 0) {
258 return r;
259 }
260
261 r = ictx->state->refresh_if_required();
262 if (r < 0) {
263 return r;
264 }
265
266 ictx->image_lock.lock_shared();
267 r = clip_io(ictx, off, &len);
268 ictx->image_lock.unlock_shared();
269 if (r < 0) {
270 return r;
271 }
272
273 DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len,
274 include_parent, whole_object, cb, arg);
275 r = command.execute();
276 return r;
277 }
278
279 template <typename I>
280 int DiffIterate<I>::execute() {
281 CephContext* cct = m_image_ctx.cct;
282
283 ceph_assert(m_image_ctx.data_ctx.is_valid());
284
285 librados::IoCtx head_ctx;
286 librados::snap_t from_snap_id = 0;
287 librados::snap_t end_snap_id;
288 uint64_t from_size = 0;
289 uint64_t end_size;
290 {
291 std::shared_lock image_locker{m_image_ctx.image_lock};
292 head_ctx.dup(m_image_ctx.data_ctx);
293 if (m_from_snap_name) {
294 from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace, m_from_snap_name);
295 from_size = m_image_ctx.get_image_size(from_snap_id);
296 }
297 end_snap_id = m_image_ctx.snap_id;
298 end_size = m_image_ctx.get_image_size(end_snap_id);
299 }
300
301 if (from_snap_id == CEPH_NOSNAP) {
302 return -ENOENT;
303 }
304 if (from_snap_id == end_snap_id) {
305 // no diff.
306 return 0;
307 }
308 if (from_snap_id >= end_snap_id) {
309 return -EINVAL;
310 }
311
312 int r;
313 bool fast_diff_enabled = false;
314 BitVector<2> object_diff_state;
315 if (m_whole_object) {
316 C_SaferCond ctx;
317 auto req = object_map::DiffRequest<I>::create(&m_image_ctx, from_snap_id,
318 end_snap_id,
319 &object_diff_state, &ctx);
320 req->send();
321
322 r = ctx.wait();
323 if (r < 0) {
324 ldout(cct, 5) << "fast diff disabled" << dendl;
325 } else {
326 ldout(cct, 5) << "fast diff enabled" << dendl;
327 fast_diff_enabled = true;
328 }
329 }
330
331 // we must list snaps via the head, not end snap
332 head_ctx.snap_set_read(CEPH_SNAPDIR);
333
334 ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to "
335 << end_snap_id << " size from " << from_size
336 << " to " << end_size << dendl;
337
338 // check parent overlap only if we are comparing to the beginning of time
339 DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg,
340 m_whole_object, from_snap_id, end_snap_id);
341 if (m_include_parent && from_snap_id == 0) {
342 std::shared_lock image_locker{m_image_ctx.image_lock};
343 uint64_t overlap = 0;
344 m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &overlap);
345 r = 0;
346 if (m_image_ctx.parent && overlap > 0) {
347 ldout(cct, 10) << " first getting parent diff" << dendl;
348 DiffIterate diff_parent(*m_image_ctx.parent, {},
349 nullptr, 0, overlap,
350 m_include_parent, m_whole_object,
351 &simple_diff_cb,
352 &diff_context.parent_diff);
353 r = diff_parent.execute();
354 }
355 if (r < 0) {
356 return r;
357 }
358 }
359
360 uint64_t period = m_image_ctx.get_stripe_period();
361 uint64_t off = m_offset;
362 uint64_t left = m_length;
363
364 while (left > 0) {
365 uint64_t period_off = off - (off % period);
366 uint64_t read_len = min(period_off + period - off, left);
367
368 // map to extents
369 map<object_t,vector<ObjectExtent> > object_extents;
370 Striper::file_to_extents(cct, m_image_ctx.format_string,
371 &m_image_ctx.layout, off, read_len, 0,
372 object_extents, 0);
373
374 // get snap info for each object
375 for (map<object_t,vector<ObjectExtent> >::iterator p =
376 object_extents.begin();
377 p != object_extents.end(); ++p) {
378 ldout(cct, 20) << "object " << p->first << dendl;
379
380 if (fast_diff_enabled) {
381 const uint64_t object_no = p->second.front().objectno;
382 uint8_t diff_state = object_diff_state[object_no];
383 if (diff_state == object_map::DIFF_STATE_HOLE &&
384 from_snap_id == 0 && !diff_context.parent_diff.empty()) {
385 // no data in child object -- report parent diff instead
386 for (auto& oe : p->second) {
387 for (auto& be : oe.buffer_extents) {
388 interval_set<uint64_t> o;
389 o.insert(off + be.first, be.second);
390 o.intersection_of(diff_context.parent_diff);
391 ldout(cct, 20) << " reporting parent overlap " << o << dendl;
392 for (auto e = o.begin(); e != o.end(); ++e) {
393 r = m_callback(e.get_start(), e.get_len(), true,
394 m_callback_arg);
395 if (r < 0) {
396 return r;
397 }
398 }
399 }
400 }
401 } else if (diff_state == object_map::DIFF_STATE_HOLE_UPDATED ||
402 diff_state == object_map::DIFF_STATE_DATA_UPDATED) {
403 bool updated = (diff_state == object_map::DIFF_STATE_DATA_UPDATED);
404 for (std::vector<ObjectExtent>::iterator q = p->second.begin();
405 q != p->second.end(); ++q) {
406 for (auto& be : q->buffer_extents) {
407 r = m_callback(off + be.first, be.second, updated,
408 m_callback_arg);
409 if (r < 0) {
410 return r;
411 }
412 }
413 }
414 }
415 } else {
416 C_DiffObject *diff_object = new C_DiffObject(m_image_ctx, head_ctx,
417 diff_context,
418 p->first.name, off,
419 p->second);
420 diff_object->send();
421
422 if (diff_context.throttle.pending_error()) {
423 r = diff_context.throttle.wait_for_ret();
424 return r;
425 }
426 }
427 }
428
429 left -= read_len;
430 off += read_len;
431 }
432
433 r = diff_context.throttle.wait_for_ret();
434 if (r < 0) {
435 return r;
436 }
437 return 0;
438 }
439
440 } // namespace api
441 } // namespace librbd
442
443 template class librbd::api::DiffIterate<librbd::ImageCtx>;