]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/api/DiffIterate.h" | |
5 | #include "librbd/ImageCtx.h" | |
6 | #include "librbd/ImageState.h" | |
7 | #include "librbd/ObjectMap.h" | |
8 | #include "librbd/Utils.h" | |
9 | #include "librbd/internal.h" | |
11fdf7f2 TL |
10 | #include "librbd/io/AioCompletion.h" |
11 | #include "librbd/io/ImageDispatchSpec.h" | |
9f95a23c | 12 | #include "librbd/object_map/DiffRequest.h" |
7c673cae FG |
13 | #include "include/rados/librados.hpp" |
14 | #include "include/interval_set.h" | |
15 | #include "common/errno.h" | |
9f95a23c | 16 | #include "common/Cond.h" |
7c673cae | 17 | #include "common/Throttle.h" |
11fdf7f2 | 18 | #include "osdc/Striper.h" |
7c673cae FG |
19 | #include <boost/tuple/tuple.hpp> |
20 | #include <list> | |
21 | #include <map> | |
22 | #include <vector> | |
23 | ||
24 | #define dout_subsys ceph_subsys_rbd | |
25 | #undef dout_prefix | |
26 | #define dout_prefix *_dout << "librbd::DiffIterate: " | |
27 | ||
28 | namespace librbd { | |
29 | namespace api { | |
30 | ||
31 | namespace { | |
32 | ||
7c673cae FG |
33 | struct DiffContext { |
34 | DiffIterate<>::Callback callback; | |
35 | void *callback_arg; | |
36 | bool whole_object; | |
f67539c2 | 37 | bool include_parent; |
7c673cae FG |
38 | uint64_t from_snap_id; |
39 | uint64_t end_snap_id; | |
7c673cae FG |
40 | OrderedThrottle throttle; |
41 | ||
42 | template <typename I> | |
43 | DiffContext(I &image_ctx, DiffIterate<>::Callback callback, | |
f67539c2 TL |
44 | void *callback_arg, bool _whole_object, bool _include_parent, |
45 | uint64_t _from_snap_id, uint64_t _end_snap_id) | |
7c673cae | 46 | : callback(callback), callback_arg(callback_arg), |
f67539c2 TL |
47 | whole_object(_whole_object), include_parent(_include_parent), |
48 | from_snap_id(_from_snap_id), end_snap_id(_end_snap_id), | |
11fdf7f2 | 49 | throttle(image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"), true) { |
7c673cae FG |
50 | } |
51 | }; | |
52 | ||
f67539c2 | 53 | template <typename I> |
7c673cae FG |
54 | class C_DiffObject : public Context { |
55 | public: | |
f67539c2 TL |
56 | C_DiffObject(I &image_ctx, DiffContext &diff_context, uint64_t image_offset, |
57 | uint64_t image_length) | |
58 | : m_image_ctx(image_ctx), m_cct(image_ctx.cct), | |
59 | m_diff_context(diff_context), m_image_offset(image_offset), | |
60 | m_image_length(image_length) { | |
7c673cae FG |
61 | } |
62 | ||
63 | void send() { | |
f67539c2 TL |
64 | Context* ctx = m_diff_context.throttle.start_op(this); |
65 | auto aio_comp = io::AioCompletion::create_and_start( | |
66 | ctx, util::get_image_ctx(&m_image_ctx), io::AIO_TYPE_GENERIC); | |
20effc67 TL |
67 | int list_snaps_flags = 0; |
68 | if (!m_diff_context.include_parent || m_diff_context.from_snap_id != 0) { | |
69 | list_snaps_flags |= io::LIST_SNAPS_FLAG_DISABLE_LIST_FROM_PARENT; | |
70 | } | |
71 | if (m_diff_context.whole_object) { | |
72 | list_snaps_flags |= io::LIST_SNAPS_FLAG_WHOLE_OBJECT; | |
73 | } | |
f67539c2 TL |
74 | auto req = io::ImageDispatchSpec::create_list_snaps( |
75 | m_image_ctx, io::IMAGE_DISPATCH_LAYER_INTERNAL_START, | |
1e59de90 | 76 | aio_comp, {{m_image_offset, m_image_length}}, io::ImageArea::DATA, |
f67539c2 | 77 | {m_diff_context.from_snap_id, m_diff_context.end_snap_id}, |
20effc67 | 78 | list_snaps_flags, &m_snapshot_delta, {}); |
f67539c2 | 79 | req->send(); |
7c673cae FG |
80 | } |
81 | ||
82 | protected: | |
83 | typedef boost::tuple<uint64_t, size_t, bool> Diff; | |
84 | typedef std::list<Diff> Diffs; | |
85 | ||
86 | void finish(int r) override { | |
87 | CephContext *cct = m_cct; | |
7c673cae | 88 | |
f67539c2 TL |
89 | if (r < 0) { |
90 | ldout(cct, 20) << "list_snaps failed: " << m_image_offset << "~" | |
91 | << m_image_length << ": " << cpp_strerror(r) << dendl; | |
7c673cae FG |
92 | } |
93 | ||
f67539c2 TL |
94 | Diffs diffs; |
95 | ldout(cct, 20) << "image extent " << m_image_offset << "~" | |
96 | << m_image_length << ": list_snaps complete" << dendl; | |
97 | ||
98 | compute_diffs(&diffs); | |
99 | for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) { | |
100 | r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(), | |
101 | m_diff_context.callback_arg); | |
102 | if (r < 0) { | |
103 | break; | |
7c673cae FG |
104 | } |
105 | } | |
106 | m_diff_context.throttle.end_op(r); | |
107 | } | |
108 | ||
109 | private: | |
f67539c2 | 110 | I& m_image_ctx; |
7c673cae | 111 | CephContext *m_cct; |
7c673cae | 112 | DiffContext &m_diff_context; |
f67539c2 TL |
113 | uint64_t m_image_offset; |
114 | uint64_t m_image_length; | |
7c673cae | 115 | |
f67539c2 | 116 | io::SnapshotDelta m_snapshot_delta; |
7c673cae FG |
117 | |
118 | void compute_diffs(Diffs *diffs) { | |
119 | CephContext *cct = m_cct; | |
120 | ||
f67539c2 TL |
121 | // merge per-snapshot deltas into an aggregate |
122 | io::SparseExtents aggregate_snapshot_extents; | |
123 | for (auto& [key, snapshot_extents] : m_snapshot_delta) { | |
124 | for (auto& snapshot_extent : snapshot_extents) { | |
125 | auto state = snapshot_extent.get_val().state; | |
126 | ||
127 | // ignore DNE object (and parent) | |
128 | if ((state == io::SPARSE_EXTENT_STATE_DNE) || | |
129 | (key == io::INITIAL_WRITE_READ_SNAP_IDS && | |
130 | state == io::SPARSE_EXTENT_STATE_ZEROED)) { | |
131 | continue; | |
7c673cae | 132 | } |
f67539c2 TL |
133 | |
134 | aggregate_snapshot_extents.insert( | |
135 | snapshot_extent.get_off(), snapshot_extent.get_len(), | |
136 | {state, snapshot_extent.get_len()}); | |
7c673cae | 137 | } |
7c673cae | 138 | } |
7c673cae | 139 | |
f67539c2 TL |
140 | // build delta callback set |
141 | for (auto& snapshot_extent : aggregate_snapshot_extents) { | |
142 | ldout(cct, 20) << "off=" << snapshot_extent.get_off() << ", " | |
143 | << "len=" << snapshot_extent.get_len() << ", " | |
144 | << "state=" << snapshot_extent.get_val().state << dendl; | |
145 | diffs->emplace_back( | |
146 | snapshot_extent.get_off(), snapshot_extent.get_len(), | |
147 | snapshot_extent.get_val().state == io::SPARSE_EXTENT_STATE_DATA); | |
7c673cae FG |
148 | } |
149 | } | |
150 | }; | |
151 | ||
20effc67 TL |
152 | int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) { |
153 | // it's possible for a discard to create a hole in the parent image -- ignore | |
154 | if (exists) { | |
155 | interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg); | |
156 | diff->insert(off, len); | |
157 | } | |
158 | return 0; | |
159 | } | |
160 | ||
7c673cae FG |
161 | } // anonymous namespace |
162 | ||
163 | template <typename I> | |
164 | int DiffIterate<I>::diff_iterate(I *ictx, | |
165 | const cls::rbd::SnapshotNamespace& from_snap_namespace, | |
166 | const char *fromsnapname, | |
167 | uint64_t off, uint64_t len, | |
168 | bool include_parent, bool whole_object, | |
169 | int (*cb)(uint64_t, size_t, int, void *), | |
170 | void *arg) | |
171 | { | |
172 | ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off | |
173 | << " len = " << len << dendl; | |
174 | ||
eafe8130 TL |
175 | if (!ictx->data_ctx.is_valid()) { |
176 | return -ENODEV; | |
177 | } | |
178 | ||
7c673cae | 179 | // ensure previous writes are visible to listsnaps |
11fdf7f2 | 180 | C_SaferCond flush_ctx; |
7c673cae | 181 | { |
9f95a23c | 182 | std::shared_lock owner_locker{ictx->owner_lock}; |
494da23a TL |
183 | auto aio_comp = io::AioCompletion::create_and_start(&flush_ctx, ictx, |
184 | io::AIO_TYPE_FLUSH); | |
f67539c2 TL |
185 | auto req = io::ImageDispatchSpec::create_flush( |
186 | *ictx, io::IMAGE_DISPATCH_LAYER_INTERNAL_START, | |
187 | aio_comp, io::FLUSH_SOURCE_INTERNAL, {}); | |
11fdf7f2 | 188 | req->send(); |
11fdf7f2 TL |
189 | } |
190 | int r = flush_ctx.wait(); | |
191 | if (r < 0) { | |
192 | return r; | |
7c673cae FG |
193 | } |
194 | ||
11fdf7f2 | 195 | r = ictx->state->refresh_if_required(); |
7c673cae FG |
196 | if (r < 0) { |
197 | return r; | |
198 | } | |
199 | ||
9f95a23c | 200 | ictx->image_lock.lock_shared(); |
1e59de90 | 201 | r = clip_io(ictx, off, &len, io::ImageArea::DATA); |
9f95a23c | 202 | ictx->image_lock.unlock_shared(); |
7c673cae FG |
203 | if (r < 0) { |
204 | return r; | |
205 | } | |
206 | ||
207 | DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len, | |
208 | include_parent, whole_object, cb, arg); | |
209 | r = command.execute(); | |
210 | return r; | |
211 | } | |
212 | ||
213 | template <typename I> | |
214 | int DiffIterate<I>::execute() { | |
215 | CephContext* cct = m_image_ctx.cct; | |
216 | ||
eafe8130 TL |
217 | ceph_assert(m_image_ctx.data_ctx.is_valid()); |
218 | ||
7c673cae FG |
219 | librados::snap_t from_snap_id = 0; |
220 | librados::snap_t end_snap_id; | |
221 | uint64_t from_size = 0; | |
222 | uint64_t end_size; | |
223 | { | |
9f95a23c | 224 | std::shared_lock image_locker{m_image_ctx.image_lock}; |
7c673cae | 225 | if (m_from_snap_name) { |
f67539c2 TL |
226 | from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace, |
227 | m_from_snap_name); | |
7c673cae FG |
228 | from_size = m_image_ctx.get_image_size(from_snap_id); |
229 | } | |
230 | end_snap_id = m_image_ctx.snap_id; | |
231 | end_size = m_image_ctx.get_image_size(end_snap_id); | |
232 | } | |
233 | ||
234 | if (from_snap_id == CEPH_NOSNAP) { | |
235 | return -ENOENT; | |
236 | } | |
237 | if (from_snap_id == end_snap_id) { | |
238 | // no diff. | |
239 | return 0; | |
240 | } | |
241 | if (from_snap_id >= end_snap_id) { | |
242 | return -EINVAL; | |
243 | } | |
244 | ||
245 | int r; | |
246 | bool fast_diff_enabled = false; | |
247 | BitVector<2> object_diff_state; | |
20effc67 | 248 | interval_set<uint64_t> parent_diff; |
9f95a23c TL |
249 | if (m_whole_object) { |
250 | C_SaferCond ctx; | |
251 | auto req = object_map::DiffRequest<I>::create(&m_image_ctx, from_snap_id, | |
252 | end_snap_id, | |
253 | &object_diff_state, &ctx); | |
254 | req->send(); | |
255 | ||
256 | r = ctx.wait(); | |
257 | if (r < 0) { | |
258 | ldout(cct, 5) << "fast diff disabled" << dendl; | |
259 | } else { | |
260 | ldout(cct, 5) << "fast diff enabled" << dendl; | |
261 | fast_diff_enabled = true; | |
20effc67 TL |
262 | |
263 | // check parent overlap only if we are comparing to the beginning of time | |
264 | if (m_include_parent && from_snap_id == 0) { | |
265 | std::shared_lock image_locker{m_image_ctx.image_lock}; | |
1e59de90 TL |
266 | uint64_t raw_overlap = 0; |
267 | m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &raw_overlap); | |
268 | auto overlap = m_image_ctx.reduce_parent_overlap(raw_overlap, false); | |
269 | if (overlap.first > 0 && overlap.second == io::ImageArea::DATA) { | |
20effc67 | 270 | ldout(cct, 10) << " first getting parent diff" << dendl; |
1e59de90 TL |
271 | DiffIterate diff_parent(*m_image_ctx.parent, {}, nullptr, 0, |
272 | overlap.first, true, true, &simple_diff_cb, | |
273 | &parent_diff); | |
20effc67 TL |
274 | r = diff_parent.execute(); |
275 | if (r < 0) { | |
276 | return r; | |
277 | } | |
278 | } | |
279 | } | |
7c673cae FG |
280 | } |
281 | } | |
282 | ||
7c673cae FG |
283 | ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to " |
284 | << end_snap_id << " size from " << from_size | |
285 | << " to " << end_size << dendl; | |
7c673cae | 286 | DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg, |
20effc67 | 287 | m_whole_object, m_include_parent, from_snap_id, |
f67539c2 | 288 | end_snap_id); |
7c673cae FG |
289 | |
290 | uint64_t period = m_image_ctx.get_stripe_period(); | |
291 | uint64_t off = m_offset; | |
292 | uint64_t left = m_length; | |
293 | ||
294 | while (left > 0) { | |
295 | uint64_t period_off = off - (off % period); | |
20effc67 | 296 | uint64_t read_len = std::min(period_off + period - off, left); |
7c673cae | 297 | |
f67539c2 TL |
298 | if (fast_diff_enabled) { |
299 | // map to extents | |
20effc67 | 300 | std::map<object_t,std::vector<ObjectExtent> > object_extents; |
f67539c2 TL |
301 | Striper::file_to_extents(cct, m_image_ctx.format_string, |
302 | &m_image_ctx.layout, off, read_len, 0, | |
303 | object_extents, 0); | |
7c673cae | 304 | |
1d09f67e TL |
305 | // get diff info for each object and merge adjacent stripe units |
306 | // into an aggregate (this also sorts them) | |
307 | io::SparseExtents aggregate_sparse_extents; | |
f67539c2 | 308 | for (auto& [object, extents] : object_extents) { |
f67539c2 | 309 | const uint64_t object_no = extents.front().objectno; |
cd265ab1 | 310 | uint8_t diff_state = object_diff_state[object_no]; |
1d09f67e TL |
311 | ldout(cct, 20) << "object " << object << ": diff_state=" |
312 | << (int)diff_state << dendl; | |
313 | ||
cd265ab1 | 314 | if (diff_state == object_map::DIFF_STATE_HOLE && |
20effc67 | 315 | from_snap_id == 0 && !parent_diff.empty()) { |
92f5a8d4 | 316 | // no data in child object -- report parent diff instead |
f67539c2 | 317 | for (auto& oe : extents) { |
92f5a8d4 TL |
318 | for (auto& be : oe.buffer_extents) { |
319 | interval_set<uint64_t> o; | |
320 | o.insert(off + be.first, be.second); | |
20effc67 | 321 | o.intersection_of(parent_diff); |
92f5a8d4 TL |
322 | ldout(cct, 20) << " reporting parent overlap " << o << dendl; |
323 | for (auto e = o.begin(); e != o.end(); ++e) { | |
1d09f67e TL |
324 | aggregate_sparse_extents.insert(e.get_start(), e.get_len(), |
325 | {io::SPARSE_EXTENT_STATE_DATA, | |
326 | e.get_len()}); | |
92f5a8d4 TL |
327 | } |
328 | } | |
329 | } | |
cd265ab1 TL |
330 | } else if (diff_state == object_map::DIFF_STATE_HOLE_UPDATED || |
331 | diff_state == object_map::DIFF_STATE_DATA_UPDATED) { | |
1d09f67e TL |
332 | auto state = (diff_state == object_map::DIFF_STATE_HOLE_UPDATED ? |
333 | io::SPARSE_EXTENT_STATE_ZEROED : io::SPARSE_EXTENT_STATE_DATA); | |
f67539c2 | 334 | for (auto& oe : extents) { |
20effc67 | 335 | for (auto& be : oe.buffer_extents) { |
1d09f67e TL |
336 | aggregate_sparse_extents.insert(off + be.first, be.second, |
337 | {state, be.second}); | |
7c673cae FG |
338 | } |
339 | } | |
340 | } | |
f67539c2 | 341 | } |
1d09f67e TL |
342 | |
343 | for (const auto& se : aggregate_sparse_extents) { | |
344 | ldout(cct, 20) << "off=" << se.get_off() << ", len=" << se.get_len() | |
345 | << ", state=" << se.get_val().state << dendl; | |
346 | r = m_callback(se.get_off(), se.get_len(), | |
347 | se.get_val().state == io::SPARSE_EXTENT_STATE_DATA, | |
348 | m_callback_arg); | |
349 | if (r < 0) { | |
350 | return r; | |
351 | } | |
352 | } | |
353 | } else { | |
f67539c2 TL |
354 | auto diff_object = new C_DiffObject<I>(m_image_ctx, diff_context, off, |
355 | read_len); | |
356 | diff_object->send(); | |
357 | ||
358 | if (diff_context.throttle.pending_error()) { | |
359 | r = diff_context.throttle.wait_for_ret(); | |
360 | return r; | |
7c673cae FG |
361 | } |
362 | } | |
363 | ||
364 | left -= read_len; | |
365 | off += read_len; | |
366 | } | |
367 | ||
368 | r = diff_context.throttle.wait_for_ret(); | |
369 | if (r < 0) { | |
370 | return r; | |
371 | } | |
372 | return 0; | |
373 | } | |
374 | ||
7c673cae FG |
375 | } // namespace api |
376 | } // namespace librbd | |
377 | ||
378 | template class librbd::api::DiffIterate<librbd::ImageCtx>; |