]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/api/DiffIterate.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / librbd / api / DiffIterate.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "librbd/api/DiffIterate.h"
5#include "librbd/ImageCtx.h"
6#include "librbd/ImageState.h"
7#include "librbd/ObjectMap.h"
8#include "librbd/Utils.h"
9#include "librbd/internal.h"
11fdf7f2
TL
10#include "librbd/io/AioCompletion.h"
11#include "librbd/io/ImageDispatchSpec.h"
9f95a23c 12#include "librbd/object_map/DiffRequest.h"
7c673cae
FG
13#include "include/rados/librados.hpp"
14#include "include/interval_set.h"
15#include "common/errno.h"
9f95a23c 16#include "common/Cond.h"
7c673cae 17#include "common/Throttle.h"
11fdf7f2 18#include "osdc/Striper.h"
7c673cae
FG
19#include <boost/tuple/tuple.hpp>
20#include <list>
21#include <map>
22#include <vector>
23
24#define dout_subsys ceph_subsys_rbd
25#undef dout_prefix
26#define dout_prefix *_dout << "librbd::DiffIterate: "
27
28namespace librbd {
29namespace api {
30
31namespace {
32
7c673cae
FG
33struct DiffContext {
34 DiffIterate<>::Callback callback;
35 void *callback_arg;
36 bool whole_object;
f67539c2 37 bool include_parent;
7c673cae
FG
38 uint64_t from_snap_id;
39 uint64_t end_snap_id;
7c673cae
FG
40 OrderedThrottle throttle;
41
42 template <typename I>
43 DiffContext(I &image_ctx, DiffIterate<>::Callback callback,
f67539c2
TL
44 void *callback_arg, bool _whole_object, bool _include_parent,
45 uint64_t _from_snap_id, uint64_t _end_snap_id)
7c673cae 46 : callback(callback), callback_arg(callback_arg),
f67539c2
TL
47 whole_object(_whole_object), include_parent(_include_parent),
48 from_snap_id(_from_snap_id), end_snap_id(_end_snap_id),
11fdf7f2 49 throttle(image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true) {
7c673cae
FG
50 }
51};
52
f67539c2 53template <typename I>
7c673cae
FG
54class C_DiffObject : public Context {
55public:
f67539c2
TL
56 C_DiffObject(I &image_ctx, DiffContext &diff_context, uint64_t image_offset,
57 uint64_t image_length)
58 : m_image_ctx(image_ctx), m_cct(image_ctx.cct),
59 m_diff_context(diff_context), m_image_offset(image_offset),
60 m_image_length(image_length) {
7c673cae
FG
61 }
62
63 void send() {
f67539c2
TL
64 Context* ctx = m_diff_context.throttle.start_op(this);
65 auto aio_comp = io::AioCompletion::create_and_start(
66 ctx, util::get_image_ctx(&m_image_ctx), io::AIO_TYPE_GENERIC);
20effc67
TL
67 int list_snaps_flags = 0;
68 if (!m_diff_context.include_parent || m_diff_context.from_snap_id != 0) {
69 list_snaps_flags |= io::LIST_SNAPS_FLAG_DISABLE_LIST_FROM_PARENT;
70 }
71 if (m_diff_context.whole_object) {
72 list_snaps_flags |= io::LIST_SNAPS_FLAG_WHOLE_OBJECT;
73 }
f67539c2
TL
74 auto req = io::ImageDispatchSpec::create_list_snaps(
75 m_image_ctx, io::IMAGE_DISPATCH_LAYER_INTERNAL_START,
1e59de90 76 aio_comp, {{m_image_offset, m_image_length}}, io::ImageArea::DATA,
f67539c2 77 {m_diff_context.from_snap_id, m_diff_context.end_snap_id},
20effc67 78 list_snaps_flags, &m_snapshot_delta, {});
f67539c2 79 req->send();
7c673cae
FG
80 }
81
82protected:
83 typedef boost::tuple<uint64_t, size_t, bool> Diff;
84 typedef std::list<Diff> Diffs;
85
86 void finish(int r) override {
87 CephContext *cct = m_cct;
7c673cae 88
f67539c2
TL
89 if (r < 0) {
90 ldout(cct, 20) << "list_snaps failed: " << m_image_offset << "~"
91 << m_image_length << ": " << cpp_strerror(r) << dendl;
7c673cae
FG
92 }
93
f67539c2
TL
94 Diffs diffs;
95 ldout(cct, 20) << "image extent " << m_image_offset << "~"
96 << m_image_length << ": list_snaps complete" << dendl;
97
98 compute_diffs(&diffs);
99 for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) {
100 r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(),
101 m_diff_context.callback_arg);
102 if (r < 0) {
103 break;
7c673cae
FG
104 }
105 }
106 m_diff_context.throttle.end_op(r);
107 }
108
109private:
f67539c2 110 I& m_image_ctx;
7c673cae 111 CephContext *m_cct;
7c673cae 112 DiffContext &m_diff_context;
f67539c2
TL
113 uint64_t m_image_offset;
114 uint64_t m_image_length;
7c673cae 115
f67539c2 116 io::SnapshotDelta m_snapshot_delta;
7c673cae
FG
117
118 void compute_diffs(Diffs *diffs) {
119 CephContext *cct = m_cct;
120
f67539c2
TL
121 // merge per-snapshot deltas into an aggregate
122 io::SparseExtents aggregate_snapshot_extents;
123 for (auto& [key, snapshot_extents] : m_snapshot_delta) {
124 for (auto& snapshot_extent : snapshot_extents) {
125 auto state = snapshot_extent.get_val().state;
126
127 // ignore DNE object (and parent)
128 if ((state == io::SPARSE_EXTENT_STATE_DNE) ||
129 (key == io::INITIAL_WRITE_READ_SNAP_IDS &&
130 state == io::SPARSE_EXTENT_STATE_ZEROED)) {
131 continue;
7c673cae 132 }
f67539c2
TL
133
134 aggregate_snapshot_extents.insert(
135 snapshot_extent.get_off(), snapshot_extent.get_len(),
136 {state, snapshot_extent.get_len()});
7c673cae 137 }
7c673cae 138 }
7c673cae 139
f67539c2
TL
140 // build delta callback set
141 for (auto& snapshot_extent : aggregate_snapshot_extents) {
142 ldout(cct, 20) << "off=" << snapshot_extent.get_off() << ", "
143 << "len=" << snapshot_extent.get_len() << ", "
144 << "state=" << snapshot_extent.get_val().state << dendl;
145 diffs->emplace_back(
146 snapshot_extent.get_off(), snapshot_extent.get_len(),
147 snapshot_extent.get_val().state == io::SPARSE_EXTENT_STATE_DATA);
7c673cae
FG
148 }
149 }
150};
151
20effc67
TL
152int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
153 // it's possible for a discard to create a hole in the parent image -- ignore
154 if (exists) {
155 interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
156 diff->insert(off, len);
157 }
158 return 0;
159}
160
7c673cae
FG
161} // anonymous namespace
162
163template <typename I>
164int DiffIterate<I>::diff_iterate(I *ictx,
165 const cls::rbd::SnapshotNamespace& from_snap_namespace,
166 const char *fromsnapname,
167 uint64_t off, uint64_t len,
168 bool include_parent, bool whole_object,
169 int (*cb)(uint64_t, size_t, int, void *),
170 void *arg)
171{
172 ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
173 << " len = " << len << dendl;
174
eafe8130
TL
175 if (!ictx->data_ctx.is_valid()) {
176 return -ENODEV;
177 }
178
7c673cae 179 // ensure previous writes are visible to listsnaps
11fdf7f2 180 C_SaferCond flush_ctx;
7c673cae 181 {
9f95a23c 182 std::shared_lock owner_locker{ictx->owner_lock};
494da23a
TL
183 auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, ictx,
184 io::AIO_TYPE_FLUSH);
f67539c2
TL
185 auto req = io::ImageDispatchSpec::create_flush(
186 *ictx, io::IMAGE_DISPATCH_LAYER_INTERNAL_START,
187 aio_comp, io::FLUSH_SOURCE_INTERNAL, {});
11fdf7f2 188 req->send();
11fdf7f2
TL
189 }
190 int r = flush_ctx.wait();
191 if (r < 0) {
192 return r;
7c673cae
FG
193 }
194
11fdf7f2 195 r = ictx->state->refresh_if_required();
7c673cae
FG
196 if (r < 0) {
197 return r;
198 }
199
9f95a23c 200 ictx->image_lock.lock_shared();
1e59de90 201 r = clip_io(ictx, off, &len, io::ImageArea::DATA);
9f95a23c 202 ictx->image_lock.unlock_shared();
7c673cae
FG
203 if (r < 0) {
204 return r;
205 }
206
207 DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len,
208 include_parent, whole_object, cb, arg);
209 r = command.execute();
210 return r;
211}
212
213template <typename I>
214int DiffIterate<I>::execute() {
215 CephContext* cct = m_image_ctx.cct;
216
eafe8130
TL
217 ceph_assert(m_image_ctx.data_ctx.is_valid());
218
7c673cae
FG
219 librados::snap_t from_snap_id = 0;
220 librados::snap_t end_snap_id;
221 uint64_t from_size = 0;
222 uint64_t end_size;
223 {
9f95a23c 224 std::shared_lock image_locker{m_image_ctx.image_lock};
7c673cae 225 if (m_from_snap_name) {
f67539c2
TL
226 from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace,
227 m_from_snap_name);
7c673cae
FG
228 from_size = m_image_ctx.get_image_size(from_snap_id);
229 }
230 end_snap_id = m_image_ctx.snap_id;
231 end_size = m_image_ctx.get_image_size(end_snap_id);
232 }
233
234 if (from_snap_id == CEPH_NOSNAP) {
235 return -ENOENT;
236 }
237 if (from_snap_id == end_snap_id) {
238 // no diff.
239 return 0;
240 }
241 if (from_snap_id >= end_snap_id) {
242 return -EINVAL;
243 }
244
245 int r;
246 bool fast_diff_enabled = false;
247 BitVector<2> object_diff_state;
20effc67 248 interval_set<uint64_t> parent_diff;
9f95a23c
TL
249 if (m_whole_object) {
250 C_SaferCond ctx;
251 auto req = object_map::DiffRequest<I>::create(&m_image_ctx, from_snap_id,
252 end_snap_id,
253 &object_diff_state, &ctx);
254 req->send();
255
256 r = ctx.wait();
257 if (r < 0) {
258 ldout(cct, 5) << "fast diff disabled" << dendl;
259 } else {
260 ldout(cct, 5) << "fast diff enabled" << dendl;
261 fast_diff_enabled = true;
20effc67
TL
262
263 // check parent overlap only if we are comparing to the beginning of time
264 if (m_include_parent && from_snap_id == 0) {
265 std::shared_lock image_locker{m_image_ctx.image_lock};
1e59de90
TL
266 uint64_t raw_overlap = 0;
267 m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &raw_overlap);
268 auto overlap = m_image_ctx.reduce_parent_overlap(raw_overlap, false);
269 if (overlap.first > 0 && overlap.second == io::ImageArea::DATA) {
20effc67 270 ldout(cct, 10) << " first getting parent diff" << dendl;
1e59de90
TL
271 DiffIterate diff_parent(*m_image_ctx.parent, {}, nullptr, 0,
272 overlap.first, true, true, &simple_diff_cb,
273 &parent_diff);
20effc67
TL
274 r = diff_parent.execute();
275 if (r < 0) {
276 return r;
277 }
278 }
279 }
7c673cae
FG
280 }
281 }
282
7c673cae
FG
283 ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to "
284 << end_snap_id << " size from " << from_size
285 << " to " << end_size << dendl;
7c673cae 286 DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg,
20effc67 287 m_whole_object, m_include_parent, from_snap_id,
f67539c2 288 end_snap_id);
7c673cae
FG
289
290 uint64_t period = m_image_ctx.get_stripe_period();
291 uint64_t off = m_offset;
292 uint64_t left = m_length;
293
294 while (left > 0) {
295 uint64_t period_off = off - (off % period);
20effc67 296 uint64_t read_len = std::min(period_off + period - off, left);
7c673cae 297
f67539c2
TL
298 if (fast_diff_enabled) {
299 // map to extents
20effc67 300 std::map<object_t,std::vector<ObjectExtent> > object_extents;
f67539c2
TL
301 Striper::file_to_extents(cct, m_image_ctx.format_string,
302 &m_image_ctx.layout, off, read_len, 0,
303 object_extents, 0);
7c673cae 304
1d09f67e
TL
305 // get diff info for each object and merge adjacent stripe units
306 // into an aggregate (this also sorts them)
307 io::SparseExtents aggregate_sparse_extents;
f67539c2 308 for (auto& [object, extents] : object_extents) {
f67539c2 309 const uint64_t object_no = extents.front().objectno;
cd265ab1 310 uint8_t diff_state = object_diff_state[object_no];
1d09f67e
TL
311 ldout(cct, 20) << "object " << object << ": diff_state="
312 << (int)diff_state << dendl;
313
cd265ab1 314 if (diff_state == object_map::DIFF_STATE_HOLE &&
20effc67 315 from_snap_id == 0 && !parent_diff.empty()) {
92f5a8d4 316 // no data in child object -- report parent diff instead
f67539c2 317 for (auto& oe : extents) {
92f5a8d4
TL
318 for (auto& be : oe.buffer_extents) {
319 interval_set<uint64_t> o;
320 o.insert(off + be.first, be.second);
20effc67 321 o.intersection_of(parent_diff);
92f5a8d4
TL
322 ldout(cct, 20) << " reporting parent overlap " << o << dendl;
323 for (auto e = o.begin(); e != o.end(); ++e) {
1d09f67e
TL
324 aggregate_sparse_extents.insert(e.get_start(), e.get_len(),
325 {io::SPARSE_EXTENT_STATE_DATA,
326 e.get_len()});
92f5a8d4
TL
327 }
328 }
329 }
cd265ab1
TL
330 } else if (diff_state == object_map::DIFF_STATE_HOLE_UPDATED ||
331 diff_state == object_map::DIFF_STATE_DATA_UPDATED) {
1d09f67e
TL
332 auto state = (diff_state == object_map::DIFF_STATE_HOLE_UPDATED ?
333 io::SPARSE_EXTENT_STATE_ZEROED : io::SPARSE_EXTENT_STATE_DATA);
f67539c2 334 for (auto& oe : extents) {
20effc67 335 for (auto& be : oe.buffer_extents) {
1d09f67e
TL
336 aggregate_sparse_extents.insert(off + be.first, be.second,
337 {state, be.second});
7c673cae
FG
338 }
339 }
340 }
f67539c2 341 }
1d09f67e
TL
342
343 for (const auto& se : aggregate_sparse_extents) {
344 ldout(cct, 20) << "off=" << se.get_off() << ", len=" << se.get_len()
345 << ", state=" << se.get_val().state << dendl;
346 r = m_callback(se.get_off(), se.get_len(),
347 se.get_val().state == io::SPARSE_EXTENT_STATE_DATA,
348 m_callback_arg);
349 if (r < 0) {
350 return r;
351 }
352 }
353 } else {
f67539c2
TL
354 auto diff_object = new C_DiffObject<I>(m_image_ctx, diff_context, off,
355 read_len);
356 diff_object->send();
357
358 if (diff_context.throttle.pending_error()) {
359 r = diff_context.throttle.wait_for_ret();
360 return r;
7c673cae
FG
361 }
362 }
363
364 left -= read_len;
365 off += read_len;
366 }
367
368 r = diff_context.throttle.wait_for_ret();
369 if (r < 0) {
370 return r;
371 }
372 return 0;
373}
374
7c673cae
FG
375} // namespace api
376} // namespace librbd
377
378template class librbd::api::DiffIterate<librbd::ImageCtx>;