]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/io/CopyupRequest.cc
94b80ecca1e86e8f95e79209db9e9dbe62f2ae35
[ceph.git] / ceph / src / librbd / io / CopyupRequest.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/io/CopyupRequest.h"
5 #include "common/ceph_context.h"
6 #include "common/ceph_mutex.h"
7 #include "common/dout.h"
8 #include "common/errno.h"
9 #include "common/WorkQueue.h"
10 #include "librbd/AsyncObjectThrottle.h"
11 #include "librbd/ExclusiveLock.h"
12 #include "librbd/ImageCtx.h"
13 #include "librbd/ObjectMap.h"
14 #include "librbd/Utils.h"
15 #include "librbd/deep_copy/ObjectCopyRequest.h"
16 #include "librbd/io/AioCompletion.h"
17 #include "librbd/io/ImageRequest.h"
18 #include "librbd/io/ObjectRequest.h"
19 #include "librbd/io/ReadResult.h"
20
21 #include <boost/bind.hpp>
22 #include <boost/lambda/bind.hpp>
23 #include <boost/lambda/construct.hpp>
24
25 #define dout_subsys ceph_subsys_rbd
26 #undef dout_prefix
27 #define dout_prefix *_dout << "librbd::io::CopyupRequest: " << this \
28 << " " << __func__ << ": " \
29 << data_object_name(m_image_ctx, m_object_no) << " "
30
31 namespace librbd {
32 namespace io {
33
34 using librbd::util::data_object_name;
35
36 namespace {
37
38 template <typename I>
39 class C_UpdateObjectMap : public C_AsyncObjectThrottle<I> {
40 public:
41 C_UpdateObjectMap(AsyncObjectThrottle<I> &throttle, I *image_ctx,
42 uint64_t object_no, uint8_t head_object_map_state,
43 const std::vector<uint64_t> *snap_ids,
44 bool first_snap_is_clean, const ZTracer::Trace &trace,
45 size_t snap_id_idx)
46 : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_object_no(object_no),
47 m_head_object_map_state(head_object_map_state), m_snap_ids(*snap_ids),
48 m_first_snap_is_clean(first_snap_is_clean), m_trace(trace),
49 m_snap_id_idx(snap_id_idx)
50 {
51 }
52
53 int send() override {
54 auto& image_ctx = this->m_image_ctx;
55 ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
56 if (image_ctx.exclusive_lock == nullptr) {
57 return 1;
58 }
59 ceph_assert(image_ctx.exclusive_lock->is_lock_owner());
60
61 std::shared_lock image_locker{image_ctx.image_lock};
62 if (image_ctx.object_map == nullptr) {
63 return 1;
64 }
65
66 uint64_t snap_id = m_snap_ids[m_snap_id_idx];
67 if (snap_id == CEPH_NOSNAP) {
68 return update_head();
69 } else {
70 return update_snapshot(snap_id);
71 }
72 }
73
74 int update_head() {
75 auto& image_ctx = this->m_image_ctx;
76 ceph_assert(ceph_mutex_is_locked(image_ctx.image_lock));
77
78 bool sent = image_ctx.object_map->template aio_update<Context>(
79 CEPH_NOSNAP, m_object_no, m_head_object_map_state, {}, m_trace, false,
80 this);
81 return (sent ? 0 : 1);
82 }
83
84 int update_snapshot(uint64_t snap_id) {
85 auto& image_ctx = this->m_image_ctx;
86 ceph_assert(ceph_mutex_is_locked(image_ctx.image_lock));
87
88 uint8_t state = OBJECT_EXISTS;
89 if (image_ctx.test_features(RBD_FEATURE_FAST_DIFF, image_ctx.image_lock) &&
90 (m_snap_id_idx > 0 || m_first_snap_is_clean)) {
91 // first snapshot should be exists+dirty since it contains
92 // the copyup data -- later snapshots inherit the data.
93 state = OBJECT_EXISTS_CLEAN;
94 }
95
96 bool sent = image_ctx.object_map->template aio_update<Context>(
97 snap_id, m_object_no, state, {}, m_trace, true, this);
98 ceph_assert(sent);
99 return 0;
100 }
101
102 private:
103 uint64_t m_object_no;
104 uint8_t m_head_object_map_state;
105 const std::vector<uint64_t> &m_snap_ids;
106 bool m_first_snap_is_clean;
107 const ZTracer::Trace &m_trace;
108 size_t m_snap_id_idx;
109 };
110
111 } // anonymous namespace
112
113 template <typename I>
114 CopyupRequest<I>::CopyupRequest(I *ictx, uint64_t objectno,
115 Extents &&image_extents,
116 const ZTracer::Trace &parent_trace)
117 : m_image_ctx(ictx), m_object_no(objectno), m_image_extents(image_extents),
118 m_trace(util::create_trace(*m_image_ctx, "copy-up", parent_trace))
119 {
120 ceph_assert(m_image_ctx->data_ctx.is_valid());
121 m_async_op.start_op(*util::get_image_ctx(m_image_ctx));
122 }
123
124 template <typename I>
125 CopyupRequest<I>::~CopyupRequest() {
126 ceph_assert(m_pending_requests.empty());
127 m_async_op.finish_op();
128 }
129
130 template <typename I>
131 void CopyupRequest<I>::append_request(AbstractObjectWriteRequest<I> *req) {
132 std::lock_guard locker{m_lock};
133
134 auto cct = m_image_ctx->cct;
135 ldout(cct, 20) << "object_request=" << req << ", "
136 << "append=" << m_append_request_permitted << dendl;
137 if (m_append_request_permitted) {
138 m_pending_requests.push_back(req);
139 } else {
140 m_restart_requests.push_back(req);
141 }
142 }
143
144 template <typename I>
145 void CopyupRequest<I>::send() {
146 read_from_parent();
147 }
148
149 template <typename I>
150 void CopyupRequest<I>::read_from_parent() {
151 auto cct = m_image_ctx->cct;
152 std::shared_lock image_locker{m_image_ctx->image_lock};
153
154 if (m_image_ctx->parent == nullptr) {
155 ldout(cct, 5) << "parent detached" << dendl;
156
157 m_image_ctx->op_work_queue->queue(
158 util::create_context_callback<
159 CopyupRequest<I>, &CopyupRequest<I>::handle_read_from_parent>(this),
160 -ENOENT);
161 return;
162 } else if (is_deep_copy()) {
163 deep_copy();
164 return;
165 }
166
167 auto comp = AioCompletion::create_and_start<
168 CopyupRequest<I>,
169 &CopyupRequest<I>::handle_read_from_parent>(
170 this, util::get_image_ctx(m_image_ctx->parent), AIO_TYPE_READ);
171
172 ldout(cct, 20) << "completion=" << comp << ", "
173 << "extents=" << m_image_extents
174 << dendl;
175 if (m_image_ctx->enable_sparse_copyup) {
176 ImageRequest<I>::aio_read(
177 m_image_ctx->parent, comp, std::move(m_image_extents),
178 ReadResult{&m_copyup_extent_map, &m_copyup_data}, 0, m_trace);
179 } else {
180 ImageRequest<I>::aio_read(
181 m_image_ctx->parent, comp, std::move(m_image_extents),
182 ReadResult{&m_copyup_data}, 0, m_trace);
183 }
184 }
185
186 template <typename I>
187 void CopyupRequest<I>::handle_read_from_parent(int r) {
188 auto cct = m_image_ctx->cct;
189 ldout(cct, 20) << "r=" << r << dendl;
190
191 m_image_ctx->image_lock.lock_shared();
192 m_lock.lock();
193 m_copyup_is_zero = m_copyup_data.is_zero();
194 m_copyup_required = is_copyup_required();
195 disable_append_requests();
196
197 if (r < 0 && r != -ENOENT) {
198 m_lock.unlock();
199 m_image_ctx->image_lock.unlock_shared();
200
201 lderr(cct) << "error reading from parent: " << cpp_strerror(r) << dendl;
202 finish(r);
203 return;
204 }
205
206 if (!m_copyup_required) {
207 m_lock.unlock();
208 m_image_ctx->image_lock.unlock_shared();
209
210 ldout(cct, 20) << "no-op, skipping" << dendl;
211 finish(0);
212 return;
213 }
214
215 // copyup() will affect snapshots only if parent data is not all
216 // zeros.
217 if (!m_copyup_is_zero) {
218 m_snap_ids.insert(m_snap_ids.end(), m_image_ctx->snaps.rbegin(),
219 m_image_ctx->snaps.rend());
220 }
221
222 m_lock.unlock();
223 m_image_ctx->image_lock.unlock_shared();
224
225 update_object_maps();
226 }
227
228 template <typename I>
229 void CopyupRequest<I>::deep_copy() {
230 auto cct = m_image_ctx->cct;
231 ceph_assert(ceph_mutex_is_locked(m_image_ctx->image_lock));
232 ceph_assert(m_image_ctx->parent != nullptr);
233
234 m_lock.lock();
235 m_flatten = is_copyup_required() ? true : m_image_ctx->migration_info.flatten;
236 m_lock.unlock();
237
238 ldout(cct, 20) << "flatten=" << m_flatten << dendl;
239
240 auto ctx = util::create_context_callback<
241 CopyupRequest<I>, &CopyupRequest<I>::handle_deep_copy>(this);
242 auto req = deep_copy::ObjectCopyRequest<I>::create(
243 m_image_ctx->parent, m_image_ctx, 0, 0,
244 m_image_ctx->migration_info.snap_map, m_object_no, m_flatten, nullptr, ctx);
245
246 req->send();
247 }
248
249 template <typename I>
250 void CopyupRequest<I>::handle_deep_copy(int r) {
251 auto cct = m_image_ctx->cct;
252 ldout(cct, 20) << "r=" << r << dendl;
253
254 m_image_ctx->image_lock.lock_shared();
255 m_lock.lock();
256 m_copyup_required = is_copyup_required();
257 if (r == -ENOENT && !m_flatten && m_copyup_required) {
258 m_lock.unlock();
259 m_image_ctx->image_lock.unlock_shared();
260
261 ldout(cct, 10) << "restart deep-copy with flatten" << dendl;
262 send();
263 return;
264 }
265
266 disable_append_requests();
267
268 if (r < 0 && r != -ENOENT) {
269 m_lock.unlock();
270 m_image_ctx->image_lock.unlock_shared();
271
272 lderr(cct) << "error encountered during deep-copy: " << cpp_strerror(r)
273 << dendl;
274 finish(r);
275 return;
276 }
277
278 if (!m_copyup_required && !is_update_object_map_required(r)) {
279 m_lock.unlock();
280 m_image_ctx->image_lock.unlock_shared();
281
282 if (r == -ENOENT) {
283 r = 0;
284 }
285
286 ldout(cct, 20) << "skipping" << dendl;
287 finish(r);
288 return;
289 }
290
291 // For deep-copy, copyup() will never affect snapshots. However,
292 // this state machine is responsible for updating object maps for
293 // snapshots that have been created on destination image after
294 // migration started.
295 if (r != -ENOENT) {
296 compute_deep_copy_snap_ids();
297 }
298
299 m_lock.unlock();
300 m_image_ctx->image_lock.unlock_shared();
301
302 update_object_maps();
303 }
304
305 template <typename I>
306 void CopyupRequest<I>::update_object_maps() {
307 std::shared_lock owner_locker{m_image_ctx->owner_lock};
308 std::shared_lock image_locker{m_image_ctx->image_lock};
309 if (m_image_ctx->object_map == nullptr) {
310 image_locker.unlock();
311 owner_locker.unlock();
312
313 copyup();
314 return;
315 }
316
317 auto cct = m_image_ctx->cct;
318 ldout(cct, 20) << dendl;
319
320 bool copy_on_read = m_pending_requests.empty();
321 uint8_t head_object_map_state = OBJECT_EXISTS;
322 if (copy_on_read && !m_snap_ids.empty() &&
323 m_image_ctx->test_features(RBD_FEATURE_FAST_DIFF,
324 m_image_ctx->image_lock)) {
325 // HEAD is non-dirty since data is tied to first snapshot
326 head_object_map_state = OBJECT_EXISTS_CLEAN;
327 }
328
329 auto r_it = m_pending_requests.rbegin();
330 if (r_it != m_pending_requests.rend()) {
331 // last write-op determines the final object map state
332 head_object_map_state = (*r_it)->get_pre_write_object_map_state();
333 }
334
335 if ((*m_image_ctx->object_map)[m_object_no] != head_object_map_state) {
336 // (maybe) need to update the HEAD object map state
337 m_snap_ids.push_back(CEPH_NOSNAP);
338 }
339 image_locker.unlock();
340
341 ceph_assert(m_image_ctx->exclusive_lock->is_lock_owner());
342 typename AsyncObjectThrottle<I>::ContextFactory context_factory(
343 boost::lambda::bind(boost::lambda::new_ptr<C_UpdateObjectMap<I>>(),
344 boost::lambda::_1, m_image_ctx, m_object_no, head_object_map_state,
345 &m_snap_ids, m_first_snap_is_clean, m_trace, boost::lambda::_2));
346 auto ctx = util::create_context_callback<
347 CopyupRequest<I>, &CopyupRequest<I>::handle_update_object_maps>(this);
348 auto throttle = new AsyncObjectThrottle<I>(
349 nullptr, *m_image_ctx, context_factory, ctx, nullptr, 0, m_snap_ids.size());
350 throttle->start_ops(
351 m_image_ctx->config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
352 }
353
354 template <typename I>
355 void CopyupRequest<I>::handle_update_object_maps(int r) {
356 auto cct = m_image_ctx->cct;
357 ldout(cct, 20) << "r=" << r << dendl;
358
359 if (r < 0) {
360 lderr(m_image_ctx->cct) << "failed to update object map: "
361 << cpp_strerror(r) << dendl;
362
363 finish(r);
364 return;
365 }
366
367 copyup();
368 }
369
370 template <typename I>
371 void CopyupRequest<I>::copyup() {
372 auto cct = m_image_ctx->cct;
373 m_image_ctx->image_lock.lock_shared();
374 auto snapc = m_image_ctx->snapc;
375 m_image_ctx->image_lock.unlock_shared();
376
377 m_lock.lock();
378 if (!m_copyup_required) {
379 m_lock.unlock();
380
381 ldout(cct, 20) << "skipping copyup" << dendl;
382 finish(0);
383 return;
384 }
385
386 ldout(cct, 20) << dendl;
387
388 bool copy_on_read = m_pending_requests.empty();
389 bool deep_copyup = !snapc.snaps.empty() && !m_copyup_is_zero;
390 if (m_copyup_is_zero) {
391 m_copyup_data.clear();
392 m_copyup_extent_map.clear();
393 }
394
395 int r;
396 librados::ObjectWriteOperation copyup_op;
397 if (copy_on_read || deep_copyup) {
398 if (m_image_ctx->enable_sparse_copyup) {
399 cls_client::sparse_copyup(&copyup_op, m_copyup_extent_map, m_copyup_data);
400 } else {
401 cls_client::copyup(&copyup_op, m_copyup_data);
402 }
403 ObjectRequest<I>::add_write_hint(*m_image_ctx, &copyup_op);
404 ++m_pending_copyups;
405 }
406
407 librados::ObjectWriteOperation write_op;
408 if (!copy_on_read) {
409 if (!deep_copyup) {
410 if (m_image_ctx->enable_sparse_copyup) {
411 cls_client::sparse_copyup(&write_op, m_copyup_extent_map,
412 m_copyup_data);
413 } else {
414 cls_client::copyup(&write_op, m_copyup_data);
415 }
416 ObjectRequest<I>::add_write_hint(*m_image_ctx, &write_op);
417 }
418
419 // merge all pending write ops into this single RADOS op
420 for (auto req : m_pending_requests) {
421 ldout(cct, 20) << "add_copyup_ops " << req << dendl;
422 req->add_copyup_ops(&write_op);
423 }
424
425 if (write_op.size() > 0) {
426 ++m_pending_copyups;
427 }
428 }
429 m_lock.unlock();
430
431 // issue librados ops at the end to simplify test cases
432 std::string oid(data_object_name(m_image_ctx, m_object_no));
433 std::vector<librados::snap_t> snaps;
434 if (copyup_op.size() > 0) {
435 // send only the copyup request with a blank snapshot context so that
436 // all snapshots are detected from the parent for this object. If
437 // this is a CoW request, a second request will be created for the
438 // actual modification.
439 ldout(cct, 20) << "copyup with empty snapshot context" << dendl;
440
441 auto comp = util::create_rados_callback<
442 CopyupRequest<I>, &CopyupRequest<I>::handle_copyup>(this);
443 r = m_image_ctx->data_ctx.aio_operate(
444 oid, comp, &copyup_op, 0, snaps,
445 (m_trace.valid() ? m_trace.get_info() : nullptr));
446 ceph_assert(r == 0);
447 comp->release();
448 }
449
450 if (write_op.size() > 0) {
451 // compare-and-write doesn't add any write ops (copyup+cmpext+write
452 // can't be executed in the same RADOS op because, unless the object
453 // was already present in the clone, cmpext wouldn't see it)
454 ldout(cct, 20) << (!deep_copyup && write_op.size() > 2 ?
455 "copyup + ops" : !deep_copyup ? "copyup" : "ops")
456 << " with current snapshot context" << dendl;
457
458 snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
459 auto comp = util::create_rados_callback<
460 CopyupRequest<I>, &CopyupRequest<I>::handle_copyup>(this);
461 r = m_image_ctx->data_ctx.aio_operate(
462 oid, comp, &write_op, snapc.seq, snaps,
463 (m_trace.valid() ? m_trace.get_info() : nullptr));
464 ceph_assert(r == 0);
465 comp->release();
466 }
467 }
468
469 template <typename I>
470 void CopyupRequest<I>::handle_copyup(int r) {
471 auto cct = m_image_ctx->cct;
472 unsigned pending_copyups;
473 {
474 std::lock_guard locker{m_lock};
475 ceph_assert(m_pending_copyups > 0);
476 pending_copyups = --m_pending_copyups;
477 }
478
479 ldout(cct, 20) << "r=" << r << ", "
480 << "pending=" << pending_copyups << dendl;
481
482 if (r < 0 && r != -ENOENT) {
483 lderr(cct) << "failed to copyup object: " << cpp_strerror(r) << dendl;
484 complete_requests(false, r);
485 }
486
487 if (pending_copyups == 0) {
488 finish(0);
489 }
490 }
491
492 template <typename I>
493 void CopyupRequest<I>::finish(int r) {
494 auto cct = m_image_ctx->cct;
495 ldout(cct, 20) << "r=" << r << dendl;
496
497 complete_requests(true, r);
498 delete this;
499 }
500
501 template <typename I>
502 void CopyupRequest<I>::complete_requests(bool override_restart_retval, int r) {
503 auto cct = m_image_ctx->cct;
504 remove_from_list();
505
506 while (!m_pending_requests.empty()) {
507 auto it = m_pending_requests.begin();
508 auto req = *it;
509 ldout(cct, 20) << "completing request " << req << dendl;
510 req->handle_copyup(r);
511 m_pending_requests.erase(it);
512 }
513
514 if (override_restart_retval) {
515 r = -ERESTART;
516 }
517
518 while (!m_restart_requests.empty()) {
519 auto it = m_restart_requests.begin();
520 auto req = *it;
521 ldout(cct, 20) << "restarting request " << req << dendl;
522 req->handle_copyup(r);
523 m_restart_requests.erase(it);
524 }
525 }
526
527 template <typename I>
528 void CopyupRequest<I>::disable_append_requests() {
529 ceph_assert(ceph_mutex_is_locked(m_lock));
530 m_append_request_permitted = false;
531 }
532
533 template <typename I>
534 void CopyupRequest<I>::remove_from_list() {
535 std::lock_guard copyup_list_locker{m_image_ctx->copyup_list_lock};
536
537 auto it = m_image_ctx->copyup_list.find(m_object_no);
538 if (it != m_image_ctx->copyup_list.end()) {
539 m_image_ctx->copyup_list.erase(it);
540 }
541 }
542
543 template <typename I>
544 bool CopyupRequest<I>::is_copyup_required() {
545 ceph_assert(ceph_mutex_is_locked(m_lock));
546
547 bool copy_on_read = m_pending_requests.empty();
548 if (copy_on_read) {
549 // always force a copyup if CoR enabled
550 return true;
551 }
552
553 if (!m_copyup_is_zero) {
554 return true;
555 }
556
557 for (auto req : m_pending_requests) {
558 if (!req->is_empty_write_op()) {
559 return true;
560 }
561 }
562 return false;
563 }
564
565 template <typename I>
566 bool CopyupRequest<I>::is_deep_copy() const {
567 ceph_assert(ceph_mutex_is_locked(m_image_ctx->image_lock));
568 return !m_image_ctx->migration_info.empty();
569 }
570
571 template <typename I>
572 bool CopyupRequest<I>::is_update_object_map_required(int r) {
573 ceph_assert(ceph_mutex_is_locked(m_image_ctx->image_lock));
574
575 if (r < 0) {
576 return false;
577 }
578
579 if (m_image_ctx->object_map == nullptr) {
580 return false;
581 }
582
583 if (m_image_ctx->migration_info.empty()) {
584 // migration might have completed while IO was in-flight,
585 // assume worst-case and perform an object map update
586 return true;
587 }
588
589 auto it = m_image_ctx->migration_info.snap_map.find(CEPH_NOSNAP);
590 ceph_assert(it != m_image_ctx->migration_info.snap_map.end());
591 return it->second[0] != CEPH_NOSNAP;
592 }
593
594 template <typename I>
595 void CopyupRequest<I>::compute_deep_copy_snap_ids() {
596 ceph_assert(ceph_mutex_is_locked(m_image_ctx->image_lock));
597
598 // don't copy ids for the snaps updated by object deep copy or
599 // that don't overlap
600 std::set<uint64_t> deep_copied;
601 for (auto &it : m_image_ctx->migration_info.snap_map) {
602 if (it.first != CEPH_NOSNAP) {
603 deep_copied.insert(it.second.front());
604 }
605 }
606
607 std::copy_if(m_image_ctx->snaps.rbegin(), m_image_ctx->snaps.rend(),
608 std::back_inserter(m_snap_ids),
609 [this, cct=m_image_ctx->cct, &deep_copied](uint64_t snap_id) {
610 if (deep_copied.count(snap_id)) {
611 m_first_snap_is_clean = true;
612 return false;
613 }
614
615 uint64_t parent_overlap = 0;
616 int r = m_image_ctx->get_parent_overlap(snap_id, &parent_overlap);
617 if (r < 0) {
618 ldout(cct, 5) << "failed getting parent overlap for snap_id: "
619 << snap_id << ": " << cpp_strerror(r) << dendl;
620 }
621 if (parent_overlap == 0) {
622 return false;
623 }
624 std::vector<std::pair<uint64_t, uint64_t>> extents;
625 Striper::extent_to_file(cct, &m_image_ctx->layout,
626 m_object_no, 0,
627 m_image_ctx->layout.object_size,
628 extents);
629 auto overlap = m_image_ctx->prune_parent_extents(
630 extents, parent_overlap);
631 return overlap > 0;
632 });
633 }
634
635 } // namespace io
636 } // namespace librbd
637
638 template class librbd::io::CopyupRequest<librbd::ImageCtx>;