]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/operation/SparsifyRequest.cc
import ceph 14.2.5
[ceph.git] / ceph / src / librbd / operation / SparsifyRequest.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/operation/SparsifyRequest.h"
5 #include "cls/rbd/cls_rbd_client.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "include/err.h"
9 #include "librbd/AsyncObjectThrottle.h"
10 #include "librbd/ExclusiveLock.h"
11 #include "librbd/ImageCtx.h"
12 #include "librbd/Types.h"
13 #include "librbd/io/ObjectRequest.h"
14 #include "osdc/Striper.h"
15 #include <boost/lambda/bind.hpp>
16 #include <boost/lambda/construct.hpp>
17
18 #define dout_subsys ceph_subsys_rbd
19
20 namespace librbd {
21 namespace operation {
22
23 namespace {
24
25 bool may_be_trimmed(const std::map<uint64_t,uint64_t> &extent_map,
26 const bufferlist &bl, size_t sparse_size,
27 uint64_t *new_end_ptr) {
28 if (extent_map.empty()) {
29 *new_end_ptr = 0;
30 return true;
31 }
32
33 uint64_t end = extent_map.rbegin()->first + extent_map.rbegin()->second;
34 uint64_t new_end = end;
35 uint64_t bl_off = bl.length();
36
37 for (auto it = extent_map.rbegin(); it != extent_map.rend(); it++) {
38 auto off = it->first;
39 auto len = it->second;
40
41 new_end = p2roundup<uint64_t>(off + len, sparse_size);
42
43 uint64_t extent_left = len;
44 uint64_t sub_len = len % sparse_size;
45 if (sub_len == 0) {
46 sub_len = sparse_size;
47 }
48 while (extent_left > 0) {
49 ceph_assert(bl_off >= sub_len);
50 bl_off -= sub_len;
51 bufferlist sub_bl;
52 sub_bl.substr_of(bl, bl_off, sub_len);
53 if (!sub_bl.is_zero()) {
54 break;
55 }
56 new_end -= sparse_size;
57 extent_left -= sub_len;
58 sub_len = sparse_size;
59 }
60 if (extent_left > 0) {
61 break;
62 }
63 }
64
65 if (new_end < end) {
66 *new_end_ptr = new_end;
67 return true;
68 }
69
70 return false;
71 }
72
73 } // anonymous namespace
74
75 using util::create_context_callback;
76 using util::create_rados_callback;
77
78 #undef dout_prefix
79 #define dout_prefix *_dout << "librbd::operation::SparsifyObject: " << this \
80 << " " << m_oid << " " << __func__ << ": "
81
82 template <typename I>
83 class C_SparsifyObject : public C_AsyncObjectThrottle<I> {
84 public:
85
86 /**
87 * @verbatim
88 *
89 * <start>
90 * |
91 * v (not supported)
92 * SPARSIFY * * * * * * * * * * * * > READ < * * * * * * * * * * (concurrent
93 * | | * update is
94 * | (object map disabled) | (can trim) * detected)
95 * |------------------------\ V *
96 * | | PRE UPDATE OBJECT MAP *
97 * | (object map enabled) | | (if needed) *
98 * v | V *
99 * PRE UPDATE OBJECT MAP | TRIM * * * * * * * * * * *
100 * | | |
101 * v | V
102 * CHECK EXISTS | POST UPDATE OBJECT MAP
103 * | | | (if needed)
104 * v | |
105 * POST UPDATE OBJECT MAP | |
106 * | | |
107 * v | |
108 * <finish> <------------------/<-------/
109 *
110 * @endverbatim
111 *
112 */
113
114 C_SparsifyObject(AsyncObjectThrottle<I> &throttle, I *image_ctx,
115 uint64_t object_no, size_t sparse_size)
116 : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_cct(image_ctx->cct),
117 m_object_no(object_no), m_sparse_size(sparse_size),
118 m_oid(image_ctx->get_object_name(object_no)) {
119 }
120
121 int send() override {
122 I &image_ctx = this->m_image_ctx;
123 ceph_assert(image_ctx.owner_lock.is_locked());
124
125 ldout(m_cct, 20) << dendl;
126
127 if (!image_ctx.data_ctx.is_valid()) {
128 lderr(m_cct) << "missing data pool" << dendl;
129 return -ENODEV;
130 }
131
132 if (image_ctx.exclusive_lock != nullptr &&
133 !image_ctx.exclusive_lock->is_lock_owner()) {
134 ldout(m_cct, 1) << "lost exclusive lock during sparsify" << dendl;
135 return -ERESTART;
136 }
137
138 {
139 RWLock::RLocker snap_locker(image_ctx.snap_lock);
140 if (image_ctx.object_map != nullptr &&
141 !image_ctx.object_map->object_may_exist(m_object_no)) {
142 // can skip because the object does not exist
143 return 1;
144 }
145
146 RWLock::RLocker parent_locker(image_ctx.parent_lock);
147 uint64_t overlap_objects = 0;
148 uint64_t overlap;
149 int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &overlap);
150 if (r == 0 && overlap > 0) {
151 overlap_objects = Striper::get_num_objects(image_ctx.layout, overlap);
152 }
153 m_remove_empty = (m_object_no >= overlap_objects);
154 }
155
156 send_sparsify();
157 return 0;
158 }
159
160 void send_sparsify() {
161 I &image_ctx = this->m_image_ctx;
162 ldout(m_cct, 20) << dendl;
163
164 librados::ObjectWriteOperation op;
165 cls_client::sparsify(&op, m_sparse_size, m_remove_empty);
166 auto comp = create_rados_callback<
167 C_SparsifyObject, &C_SparsifyObject::handle_sparsify>(this);
168 int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op);
169 ceph_assert(r == 0);
170 comp->release();
171 }
172
173 void handle_sparsify(int r) {
174 ldout(m_cct, 20) << "r=" << r << dendl;
175
176 if (r == -EOPNOTSUPP) {
177 m_trying_trim = true;
178 send_read();
179 return;
180 }
181
182 if (r == -ENOENT) {
183 finish_op(0);
184 return;
185 }
186
187 if (r < 0) {
188 lderr(m_cct) << "failed to sparsify: " << cpp_strerror(r) << dendl;
189 finish_op(r);
190 return;
191 }
192
193 send_pre_update_object_map();
194 }
195
196 void send_pre_update_object_map() {
197 I &image_ctx = this->m_image_ctx;
198
199 if (m_trying_trim) {
200 if (!m_remove_empty || m_new_end != 0 ||
201 !image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
202 send_trim();
203 return;
204 }
205 } else if (!m_remove_empty ||
206 !image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
207 finish_op(0);
208 return;
209 }
210
211 ldout(m_cct, 20) << dendl;
212
213 image_ctx.owner_lock.get_read();
214 image_ctx.snap_lock.get_read();
215 if (image_ctx.object_map == nullptr) {
216 // possible that exclusive lock was lost in background
217 lderr(m_cct) << "object map is not initialized" << dendl;
218
219 image_ctx.snap_lock.put_read();
220 image_ctx.owner_lock.put_read();
221 finish_op(-EINVAL);
222 return;
223 }
224
225 int r;
226 m_finish_op_ctx = image_ctx.exclusive_lock->start_op(&r);
227 if (m_finish_op_ctx == nullptr) {
228 lderr(m_cct) << "lost exclusive lock" << dendl;
229 image_ctx.snap_lock.put_read();
230 image_ctx.owner_lock.put_read();
231 finish_op(r);
232 return;
233 }
234
235 auto ctx = create_context_callback<
236 C_SparsifyObject<I>,
237 &C_SparsifyObject<I>::handle_pre_update_object_map>(this);
238
239 image_ctx.object_map_lock.get_write();
240 bool sent = image_ctx.object_map->template aio_update<
241 Context, &Context::complete>(CEPH_NOSNAP, m_object_no, OBJECT_PENDING,
242 OBJECT_EXISTS, {}, false, ctx);
243
244 // NOTE: state machine might complete before we reach here
245 image_ctx.object_map_lock.put_write();
246 image_ctx.snap_lock.put_read();
247 image_ctx.owner_lock.put_read();
248 if (!sent) {
249 finish_op(0);
250 }
251 }
252
253 void handle_pre_update_object_map(int r) {
254 ldout(m_cct, 20) << "r=" << r << dendl;
255
256 if (r < 0) {
257 lderr(m_cct) << "failed to update object map: " << cpp_strerror(r)
258 << dendl;
259 finish_op(r);
260 return;
261 }
262
263 if (m_trying_trim) {
264 send_trim();
265 } else {
266 send_check_exists();
267 }
268 }
269
270 void send_check_exists() {
271 I &image_ctx = this->m_image_ctx;
272
273 ldout(m_cct, 20) << dendl;
274
275 librados::ObjectReadOperation op;
276 op.stat(NULL, NULL, NULL);
277 m_bl.clear();
278 auto comp = create_rados_callback<
279 C_SparsifyObject, &C_SparsifyObject::handle_check_exists>(this);
280 int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op, &m_bl);
281 ceph_assert(r == 0);
282 comp->release();
283 }
284
285 void handle_check_exists(int r) {
286 ldout(m_cct, 20) << "r=" << r << dendl;
287
288 if (r < 0 && r != -ENOENT) {
289 lderr(m_cct) << "stat failed: " << cpp_strerror(r) << dendl;
290 finish_op(r);
291 return;
292 }
293
294 send_post_update_object_map(r == 0);
295 }
296
297 void send_post_update_object_map(bool exists) {
298 I &image_ctx = this->m_image_ctx;
299
300 ldout(m_cct, 20) << dendl;
301
302 auto ctx = create_context_callback<
303 C_SparsifyObject<I>,
304 &C_SparsifyObject<I>::handle_post_update_object_map>(this);
305 bool sent;
306 {
307 RWLock::RLocker owner_locker(image_ctx.owner_lock);
308 RWLock::RLocker snap_locker(image_ctx.snap_lock);
309
310 assert(image_ctx.exclusive_lock->is_lock_owner());
311 assert(image_ctx.object_map != nullptr);
312
313 RWLock::WLocker object_map_locker(image_ctx.object_map_lock);
314
315 sent = image_ctx.object_map->template aio_update<
316 Context, &Context::complete>(CEPH_NOSNAP, m_object_no,
317 exists ? OBJECT_EXISTS : OBJECT_NONEXISTENT,
318 OBJECT_PENDING, {}, false, ctx);
319 }
320 if (!sent) {
321 ctx->complete(0);
322 }
323 }
324
325 void handle_post_update_object_map(int r) {
326 ldout(m_cct, 20) << "r=" << r << dendl;
327
328 if (r < 0) {
329 lderr(m_cct) << "failed to update object map: " << cpp_strerror(r)
330 << dendl;
331 finish_op(r);
332 return;
333 }
334
335 finish_op(0);
336 }
337
338 void send_read() {
339 I &image_ctx = this->m_image_ctx;
340
341 ldout(m_cct, 20) << dendl;
342
343 librados::ObjectReadOperation op;
344 m_bl.clear();
345 op.sparse_read(0, image_ctx.layout.object_size, &m_extent_map, &m_bl,
346 nullptr);
347 auto comp = create_rados_callback<
348 C_SparsifyObject, &C_SparsifyObject::handle_read>(this);
349 int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op, &m_bl);
350 ceph_assert(r == 0);
351 comp->release();
352 }
353
354 void handle_read(int r) {
355 ldout(m_cct, 20) << "r=" << r << dendl;
356
357 if (r < 0) {
358 if (r == -ENOENT) {
359 r = 0;
360 } else {
361 lderr(m_cct) << "failed to read object: " << cpp_strerror(r) << dendl;
362 }
363 finish_op(r);
364 return;
365 }
366
367 if (!may_be_trimmed(m_extent_map, m_bl, m_sparse_size, &m_new_end)) {
368 finish_op(0);
369 return;
370 }
371
372 send_pre_update_object_map();
373 }
374
375 void send_trim() {
376 I &image_ctx = this->m_image_ctx;
377
378 ldout(m_cct, 20) << dendl;
379
380 ceph_assert(m_new_end < image_ctx.layout.object_size);
381
382 librados::ObjectWriteOperation op;
383 m_bl.clear();
384 m_bl.append_zero(image_ctx.layout.object_size - m_new_end);
385 op.cmpext(m_new_end, m_bl, nullptr);
386 if (m_new_end == 0 && m_remove_empty) {
387 op.remove();
388 } else {
389 op.truncate(m_new_end);
390 }
391
392 auto comp = create_rados_callback<
393 C_SparsifyObject, &C_SparsifyObject::handle_trim>(this);
394 int r = image_ctx.data_ctx.aio_operate(m_oid, comp, &op);
395 ceph_assert(r == 0);
396 comp->release();
397 }
398
399 void handle_trim(int r) {
400 I &image_ctx = this->m_image_ctx;
401
402 ldout(m_cct, 20) << "r=" << r << dendl;
403
404 if (r <= -MAX_ERRNO) {
405 m_finish_op_ctx->complete(0);
406 m_finish_op_ctx = nullptr;
407 send_read();
408 return;
409 }
410
411 if (r < 0 && r != -ENOENT) {
412 lderr(m_cct) << "failed to trim: " << cpp_strerror(r) << dendl;
413 finish_op(r);
414 return;
415 }
416
417 if (!m_remove_empty || m_new_end != 0 ||
418 !image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
419 finish_op(0);
420 return;
421 }
422
423 send_post_update_object_map(false);
424 }
425
426 void finish_op(int r) {
427 ldout(m_cct, 20) << "r=" << r << dendl;
428
429 if (m_finish_op_ctx != nullptr) {
430 m_finish_op_ctx->complete(0);
431 }
432 this->complete(r);
433 }
434
435 private:
436 CephContext *m_cct;
437 uint64_t m_object_no;
438 size_t m_sparse_size;
439 std::string m_oid;
440
441 bool m_remove_empty = false;
442 bool m_trying_trim = false;
443 bufferlist m_bl;
444 std::map<uint64_t,uint64_t> m_extent_map;
445 uint64_t m_new_end = 0;
446 Context *m_finish_op_ctx = nullptr;
447 };
448
449 #undef dout_prefix
450 #define dout_prefix *_dout << "librbd::operation::SparsifyRequest: " << this \
451 << " " << __func__ << ": "
452
453 template <typename I>
454 bool SparsifyRequest<I>::should_complete(int r) {
455 I &image_ctx = this->m_image_ctx;
456 CephContext *cct = image_ctx.cct;
457 ldout(cct, 5) << "r=" << r << dendl;
458 if (r < 0) {
459 lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
460 }
461 return true;
462 }
463
464 template <typename I>
465 void SparsifyRequest<I>::send_op() {
466 sparsify_objects();
467 }
468
469 template <typename I>
470 void SparsifyRequest<I>::sparsify_objects() {
471 I &image_ctx = this->m_image_ctx;
472 ceph_assert(image_ctx.owner_lock.is_locked());
473
474 CephContext *cct = image_ctx.cct;
475 ldout(cct, 5) << dendl;
476
477 assert(image_ctx.owner_lock.is_locked());
478
479 uint64_t objects = 0;
480 {
481 RWLock::RLocker snap_locker(image_ctx.snap_lock);
482 objects = image_ctx.get_object_count(CEPH_NOSNAP);
483 }
484
485 auto ctx = create_context_callback<
486 SparsifyRequest<I>,
487 &SparsifyRequest<I>::handle_sparsify_objects>(this);
488 typename AsyncObjectThrottle<I>::ContextFactory context_factory(
489 boost::lambda::bind(boost::lambda::new_ptr<C_SparsifyObject<I> >(),
490 boost::lambda::_1, &image_ctx, boost::lambda::_2, m_sparse_size));
491 AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
492 this, image_ctx, context_factory, ctx, &m_prog_ctx, 0, objects);
493 throttle->start_ops(
494 image_ctx.config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
495 }
496
497 template <typename I>
498 void SparsifyRequest<I>::handle_sparsify_objects(int r) {
499 I &image_ctx = this->m_image_ctx;
500 CephContext *cct = image_ctx.cct;
501 ldout(cct, 5) << "r=" << r << dendl;
502
503 if (r == -ERESTART) {
504 ldout(cct, 5) << "sparsify operation interrupted" << dendl;
505 this->complete(r);
506 return;
507 } else if (r < 0) {
508 lderr(cct) << "sparsify encountered an error: " << cpp_strerror(r) << dendl;
509 this->complete(r);
510 return;
511 }
512
513 this->complete(0);
514 }
515
516 } // namespace operation
517 } // namespace librbd
518
519 template class librbd::operation::SparsifyRequest<librbd::ImageCtx>;