]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/io/CopyupRequest.cc
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / librbd / io / CopyupRequest.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/io/CopyupRequest.h"
5 #include "common/ceph_context.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "common/Mutex.h"
9 #include "common/WorkQueue.h"
10 #include "librbd/AsyncObjectThrottle.h"
11 #include "librbd/ExclusiveLock.h"
12 #include "librbd/ImageCtx.h"
13 #include "librbd/ObjectMap.h"
14 #include "librbd/Utils.h"
15 #include "librbd/deep_copy/ObjectCopyRequest.h"
16 #include "librbd/io/AioCompletion.h"
17 #include "librbd/io/ImageRequest.h"
18 #include "librbd/io/ObjectRequest.h"
19 #include "librbd/io/ReadResult.h"
20
21 #include <boost/bind.hpp>
22 #include <boost/lambda/bind.hpp>
23 #include <boost/lambda/construct.hpp>
24
25 #define dout_subsys ceph_subsys_rbd
26 #undef dout_prefix
27 #define dout_prefix *_dout << "librbd::io::CopyupRequest: " << this \
28 << " " << __func__ << ": "
29
30 namespace librbd {
31 namespace io {
32
33 namespace {
34
35 template <typename I>
36 class C_UpdateObjectMap : public C_AsyncObjectThrottle<I> {
37 public:
38 C_UpdateObjectMap(AsyncObjectThrottle<I> &throttle, I *image_ctx,
39 uint64_t object_no, uint8_t head_object_map_state,
40 const std::vector<uint64_t> *snap_ids,
41 bool first_snap_is_clean, const ZTracer::Trace &trace,
42 size_t snap_id_idx)
43 : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_object_no(object_no),
44 m_head_object_map_state(head_object_map_state), m_snap_ids(*snap_ids),
45 m_first_snap_is_clean(first_snap_is_clean), m_trace(trace),
46 m_snap_id_idx(snap_id_idx)
47 {
48 }
49
50 int send() override {
51 auto& image_ctx = this->m_image_ctx;
52 ceph_assert(image_ctx.owner_lock.is_locked());
53 if (image_ctx.exclusive_lock == nullptr) {
54 return 1;
55 }
56 ceph_assert(image_ctx.exclusive_lock->is_lock_owner());
57
58 RWLock::RLocker snap_locker(image_ctx.snap_lock);
59 if (image_ctx.object_map == nullptr) {
60 return 1;
61 }
62
63 uint64_t snap_id = m_snap_ids[m_snap_id_idx];
64 if (snap_id == CEPH_NOSNAP) {
65 return update_head();
66 } else {
67 return update_snapshot(snap_id);
68 }
69 }
70
71 int update_head() {
72 auto& image_ctx = this->m_image_ctx;
73 RWLock::WLocker object_map_locker(image_ctx.object_map_lock);
74 bool sent = image_ctx.object_map->template aio_update<Context>(
75 CEPH_NOSNAP, m_object_no, m_head_object_map_state, {}, m_trace, false,
76 this);
77 return (sent ? 0 : 1);
78 }
79
80 int update_snapshot(uint64_t snap_id) {
81 auto& image_ctx = this->m_image_ctx;
82 uint8_t state = OBJECT_EXISTS;
83 if (image_ctx.test_features(RBD_FEATURE_FAST_DIFF, image_ctx.snap_lock) &&
84 (m_snap_id_idx > 0 || m_first_snap_is_clean)) {
85 // first snapshot should be exists+dirty since it contains
86 // the copyup data -- later snapshots inherit the data.
87 state = OBJECT_EXISTS_CLEAN;
88 }
89
90 RWLock::RLocker object_map_locker(image_ctx.object_map_lock);
91 bool sent = image_ctx.object_map->template aio_update<Context>(
92 snap_id, m_object_no, state, {}, m_trace, true, this);
93 ceph_assert(sent);
94 return 0;
95 }
96
97 private:
98 uint64_t m_object_no;
99 uint8_t m_head_object_map_state;
100 const std::vector<uint64_t> &m_snap_ids;
101 bool m_first_snap_is_clean;
102 const ZTracer::Trace &m_trace;
103 size_t m_snap_id_idx;
104 };
105
106 } // anonymous namespace
107
108 template <typename I>
109 CopyupRequest<I>::CopyupRequest(I *ictx, const std::string &oid,
110 uint64_t objectno, Extents &&image_extents,
111 const ZTracer::Trace &parent_trace)
112 : m_image_ctx(ictx), m_oid(oid), m_object_no(objectno),
113 m_image_extents(image_extents),
114 m_trace(util::create_trace(*m_image_ctx, "copy-up", parent_trace)),
115 m_lock("CopyupRequest", false, false)
116 {
117 m_async_op.start_op(*util::get_image_ctx(m_image_ctx));
118 }
119
120 template <typename I>
121 CopyupRequest<I>::~CopyupRequest() {
122 ceph_assert(m_pending_requests.empty());
123 m_async_op.finish_op();
124 }
125
126 template <typename I>
127 void CopyupRequest<I>::append_request(AbstractObjectWriteRequest<I> *req) {
128 Mutex::Locker locker(m_lock);
129
130 auto cct = m_image_ctx->cct;
131 ldout(cct, 20) << "oid=" << m_oid << ", "
132 << "object_request=" << req << ", "
133 << "append=" << m_append_request_permitted << dendl;
134 if (m_append_request_permitted) {
135 m_pending_requests.push_back(req);
136 } else {
137 m_restart_requests.push_back(req);
138 }
139 }
140
141 template <typename I>
142 void CopyupRequest<I>::send() {
143 read_from_parent();
144 }
145
146 template <typename I>
147 void CopyupRequest<I>::read_from_parent() {
148 auto cct = m_image_ctx->cct;
149 RWLock::RLocker snap_locker(m_image_ctx->snap_lock);
150 RWLock::RLocker parent_locker(m_image_ctx->parent_lock);
151
152 if (m_image_ctx->parent == nullptr) {
153 ldout(cct, 5) << "parent detached" << dendl;
154
155 m_image_ctx->op_work_queue->queue(
156 util::create_context_callback<
157 CopyupRequest<I>, &CopyupRequest<I>::handle_read_from_parent>(this),
158 -ENOENT);
159 return;
160 } else if (is_deep_copy()) {
161 deep_copy();
162 return;
163 }
164
165 auto comp = AioCompletion::create_and_start<
166 CopyupRequest<I>,
167 &CopyupRequest<I>::handle_read_from_parent>(
168 this, util::get_image_ctx(m_image_ctx->parent), AIO_TYPE_READ);
169
170 ldout(cct, 20) << "oid=" << m_oid << ", "
171 << "completion=" << comp << ", "
172 << "extents=" << m_image_extents
173 << dendl;
174 ImageRequest<I>::aio_read(m_image_ctx->parent, comp,
175 std::move(m_image_extents),
176 ReadResult{&m_copyup_data}, 0, m_trace);
177 }
178
179 template <typename I>
180 void CopyupRequest<I>::handle_read_from_parent(int r) {
181 auto cct = m_image_ctx->cct;
182 ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl;
183
184 m_image_ctx->snap_lock.get_read();
185 m_lock.Lock();
186 m_copyup_is_zero = m_copyup_data.is_zero();
187 m_copyup_required = is_copyup_required();
188 disable_append_requests();
189
190 if (r < 0 && r != -ENOENT) {
191 m_lock.Unlock();
192 m_image_ctx->snap_lock.put_read();
193
194 lderr(cct) << "error reading from parent: " << cpp_strerror(r) << dendl;
195 finish(r);
196 return;
197 }
198
199 if (!m_copyup_required) {
200 m_lock.Unlock();
201 m_image_ctx->snap_lock.put_read();
202
203 ldout(cct, 20) << "no-op, skipping" << dendl;
204 finish(0);
205 return;
206 }
207
208 // copyup() will affect snapshots only if parent data is not all
209 // zeros.
210 if (!m_copyup_is_zero) {
211 m_snap_ids.insert(m_snap_ids.end(), m_image_ctx->snaps.rbegin(),
212 m_image_ctx->snaps.rend());
213 }
214
215 m_lock.Unlock();
216 m_image_ctx->snap_lock.put_read();
217
218 update_object_maps();
219 }
220
221 template <typename I>
222 void CopyupRequest<I>::deep_copy() {
223 auto cct = m_image_ctx->cct;
224 ceph_assert(m_image_ctx->snap_lock.is_locked());
225 ceph_assert(m_image_ctx->parent_lock.is_locked());
226 ceph_assert(m_image_ctx->parent != nullptr);
227
228 m_lock.Lock();
229 m_flatten = is_copyup_required() ? true : m_image_ctx->migration_info.flatten;
230 m_lock.Unlock();
231
232 ldout(cct, 20) << "oid=" << m_oid << ", flatten=" << m_flatten << dendl;
233
234 auto ctx = util::create_context_callback<
235 CopyupRequest<I>, &CopyupRequest<I>::handle_deep_copy>(this);
236 auto req = deep_copy::ObjectCopyRequest<I>::create(
237 m_image_ctx->parent, m_image_ctx, m_image_ctx->migration_info.snap_map,
238 m_object_no, m_flatten, ctx);
239
240 req->send();
241 }
242
243 template <typename I>
244 void CopyupRequest<I>::handle_deep_copy(int r) {
245 auto cct = m_image_ctx->cct;
246 ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl;
247
248 m_image_ctx->snap_lock.get_read();
249 m_lock.Lock();
250 m_copyup_required = is_copyup_required();
251 if (r == -ENOENT && !m_flatten && m_copyup_required) {
252 m_lock.Unlock();
253 m_image_ctx->snap_lock.put_read();
254
255 ldout(cct, 10) << "restart deep-copy with flatten" << dendl;
256 send();
257 return;
258 }
259
260 disable_append_requests();
261
262 if (r < 0 && r != -ENOENT) {
263 m_lock.Unlock();
264 m_image_ctx->snap_lock.put_read();
265
266 lderr(cct) << "error encountered during deep-copy: " << cpp_strerror(r)
267 << dendl;
268 finish(r);
269 return;
270 }
271
272 if (!m_copyup_required && !is_update_object_map_required(r)) {
273 m_lock.Unlock();
274 m_image_ctx->snap_lock.put_read();
275
276 if (r == -ENOENT) {
277 r = 0;
278 }
279
280 ldout(cct, 20) << "skipping" << dendl;
281 finish(r);
282 return;
283 }
284
285 // For deep-copy, copyup() will never affect snapshots. However,
286 // this state machine is responsible for updating object maps for
287 // snapshots that have been created on destination image after
288 // migration started.
289 if (r != -ENOENT) {
290 compute_deep_copy_snap_ids();
291 }
292
293 m_lock.Unlock();
294 m_image_ctx->snap_lock.put_read();
295
296 update_object_maps();
297 }
298
299 template <typename I>
300 void CopyupRequest<I>::update_object_maps() {
301 RWLock::RLocker owner_locker(m_image_ctx->owner_lock);
302 RWLock::RLocker snap_locker(m_image_ctx->snap_lock);
303 if (m_image_ctx->object_map == nullptr) {
304 snap_locker.unlock();
305 owner_locker.unlock();
306
307 copyup();
308 return;
309 }
310
311 auto cct = m_image_ctx->cct;
312 ldout(cct, 20) << "oid=" << m_oid << dendl;
313
314 bool copy_on_read = m_pending_requests.empty();
315 uint8_t head_object_map_state = OBJECT_EXISTS;
316 if (copy_on_read && !m_snap_ids.empty() &&
317 m_image_ctx->test_features(RBD_FEATURE_FAST_DIFF,
318 m_image_ctx->snap_lock)) {
319 // HEAD is non-dirty since data is tied to first snapshot
320 head_object_map_state = OBJECT_EXISTS_CLEAN;
321 }
322
323 auto r_it = m_pending_requests.rbegin();
324 if (r_it != m_pending_requests.rend()) {
325 // last write-op determines the final object map state
326 head_object_map_state = (*r_it)->get_pre_write_object_map_state();
327 }
328
329 RWLock::WLocker object_map_locker(m_image_ctx->object_map_lock);
330 if ((*m_image_ctx->object_map)[m_object_no] != head_object_map_state) {
331 // (maybe) need to update the HEAD object map state
332 m_snap_ids.push_back(CEPH_NOSNAP);
333 }
334 object_map_locker.unlock();
335 snap_locker.unlock();
336
337 ceph_assert(m_image_ctx->exclusive_lock->is_lock_owner());
338 typename AsyncObjectThrottle<I>::ContextFactory context_factory(
339 boost::lambda::bind(boost::lambda::new_ptr<C_UpdateObjectMap<I>>(),
340 boost::lambda::_1, m_image_ctx, m_object_no, head_object_map_state,
341 &m_snap_ids, m_first_snap_is_clean, m_trace, boost::lambda::_2));
342 auto ctx = util::create_context_callback<
343 CopyupRequest<I>, &CopyupRequest<I>::handle_update_object_maps>(this);
344 auto throttle = new AsyncObjectThrottle<I>(
345 nullptr, *m_image_ctx, context_factory, ctx, nullptr, 0, m_snap_ids.size());
346 throttle->start_ops(
347 m_image_ctx->config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
348 }
349
350 template <typename I>
351 void CopyupRequest<I>::handle_update_object_maps(int r) {
352 auto cct = m_image_ctx->cct;
353 ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl;
354
355 if (r < 0) {
356 lderr(m_image_ctx->cct) << "failed to update object map: "
357 << cpp_strerror(r) << dendl;
358
359 finish(r);
360 return;
361 }
362
363 copyup();
364 }
365
366 template <typename I>
367 void CopyupRequest<I>::copyup() {
368 auto cct = m_image_ctx->cct;
369 m_image_ctx->snap_lock.get_read();
370 auto snapc = m_image_ctx->snapc;
371 m_image_ctx->snap_lock.put_read();
372
373 m_lock.Lock();
374 if (!m_copyup_required) {
375 m_lock.Unlock();
376
377 ldout(cct, 20) << "skipping copyup" << dendl;
378 finish(0);
379 return;
380 }
381
382 ldout(cct, 20) << "oid=" << m_oid << dendl;
383
384 bool copy_on_read = m_pending_requests.empty();
385 bool deep_copyup = !snapc.snaps.empty() && !m_copyup_is_zero;
386 if (m_copyup_is_zero) {
387 m_copyup_data.clear();
388 }
389
390 int r;
391 librados::ObjectWriteOperation copyup_op;
392 if (copy_on_read || deep_copyup) {
393 copyup_op.exec("rbd", "copyup", m_copyup_data);
394 ObjectRequest<I>::add_write_hint(*m_image_ctx, &copyup_op);
395 ++m_pending_copyups;
396 }
397
398 librados::ObjectWriteOperation write_op;
399 if (!copy_on_read) {
400 if (!deep_copyup) {
401 write_op.exec("rbd", "copyup", m_copyup_data);
402 ObjectRequest<I>::add_write_hint(*m_image_ctx, &write_op);
403 }
404
405 // merge all pending write ops into this single RADOS op
406 for (auto req : m_pending_requests) {
407 ldout(cct, 20) << "add_copyup_ops " << req << dendl;
408 req->add_copyup_ops(&write_op);
409 }
410
411 if (write_op.size() > 0) {
412 ++m_pending_copyups;
413 }
414 }
415 m_lock.Unlock();
416
417 // issue librados ops at the end to simplify test cases
418 std::vector<librados::snap_t> snaps;
419 if (copyup_op.size() > 0) {
420 // send only the copyup request with a blank snapshot context so that
421 // all snapshots are detected from the parent for this object. If
422 // this is a CoW request, a second request will be created for the
423 // actual modification.
424 ldout(cct, 20) << "copyup with empty snapshot context" << dendl;
425
426 auto comp = util::create_rados_callback<
427 CopyupRequest<I>, &CopyupRequest<I>::handle_copyup>(this);
428 r = m_image_ctx->data_ctx.aio_operate(
429 m_oid, comp, &copyup_op, 0, snaps,
430 (m_trace.valid() ? m_trace.get_info() : nullptr));
431 ceph_assert(r == 0);
432 comp->release();
433 }
434
435 if (write_op.size() > 0) {
436 // compare-and-write doesn't add any write ops (copyup+cmpext+write
437 // can't be executed in the same RADOS op because, unless the object
438 // was already present in the clone, cmpext wouldn't see it)
439 ldout(cct, 20) << (!deep_copyup && write_op.size() > 2 ?
440 "copyup + ops" : !deep_copyup ? "copyup" : "ops")
441 << " with current snapshot context" << dendl;
442
443 snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
444 auto comp = util::create_rados_callback<
445 CopyupRequest<I>, &CopyupRequest<I>::handle_copyup>(this);
446 r = m_image_ctx->data_ctx.aio_operate(
447 m_oid, comp, &write_op, snapc.seq, snaps,
448 (m_trace.valid() ? m_trace.get_info() : nullptr));
449 ceph_assert(r == 0);
450 comp->release();
451 }
452 }
453
454 template <typename I>
455 void CopyupRequest<I>::handle_copyup(int r) {
456 auto cct = m_image_ctx->cct;
457 unsigned pending_copyups;
458 {
459 Mutex::Locker locker(m_lock);
460 ceph_assert(m_pending_copyups > 0);
461 pending_copyups = --m_pending_copyups;
462 }
463
464 ldout(cct, 20) << "oid=" << m_oid << ", " << "r=" << r << ", "
465 << "pending=" << pending_copyups << dendl;
466
467 if (r < 0 && r != -ENOENT) {
468 lderr(cct) << "failed to copyup object: " << cpp_strerror(r) << dendl;
469 complete_requests(false, r);
470 }
471
472 if (pending_copyups == 0) {
473 finish(0);
474 }
475 }
476
477 template <typename I>
478 void CopyupRequest<I>::finish(int r) {
479 auto cct = m_image_ctx->cct;
480 ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl;
481
482 complete_requests(true, r);
483 delete this;
484 }
485
486 template <typename I>
487 void CopyupRequest<I>::complete_requests(bool override_restart_retval, int r) {
488 auto cct = m_image_ctx->cct;
489 remove_from_list();
490
491 while (!m_pending_requests.empty()) {
492 auto it = m_pending_requests.begin();
493 auto req = *it;
494 ldout(cct, 20) << "completing request " << req << dendl;
495 req->handle_copyup(r);
496 m_pending_requests.erase(it);
497 }
498
499 if (override_restart_retval) {
500 r = -ERESTART;
501 }
502
503 while (!m_restart_requests.empty()) {
504 auto it = m_restart_requests.begin();
505 auto req = *it;
506 ldout(cct, 20) << "restarting request " << req << dendl;
507 req->handle_copyup(r);
508 m_restart_requests.erase(it);
509 }
510 }
511
512 template <typename I>
513 void CopyupRequest<I>::disable_append_requests() {
514 ceph_assert(m_lock.is_locked());
515 m_append_request_permitted = false;
516 }
517
518 template <typename I>
519 void CopyupRequest<I>::remove_from_list() {
520 Mutex::Locker copyup_list_locker(m_image_ctx->copyup_list_lock);
521
522 auto it = m_image_ctx->copyup_list.find(m_object_no);
523 if (it != m_image_ctx->copyup_list.end()) {
524 m_image_ctx->copyup_list.erase(it);
525 }
526 }
527
528 template <typename I>
529 bool CopyupRequest<I>::is_copyup_required() {
530 ceph_assert(m_lock.is_locked());
531
532 bool copy_on_read = m_pending_requests.empty();
533 if (copy_on_read) {
534 // always force a copyup if CoR enabled
535 return true;
536 }
537
538 if (!m_copyup_is_zero) {
539 return true;
540 }
541
542 for (auto req : m_pending_requests) {
543 if (!req->is_empty_write_op()) {
544 return true;
545 }
546 }
547 return false;
548 }
549
550 template <typename I>
551 bool CopyupRequest<I>::is_deep_copy() const {
552 ceph_assert(m_image_ctx->snap_lock.is_locked());
553 return !m_image_ctx->migration_info.empty();
554 }
555
556 template <typename I>
557 bool CopyupRequest<I>::is_update_object_map_required(int r) {
558 ceph_assert(m_image_ctx->snap_lock.is_locked());
559
560 if (r < 0) {
561 return false;
562 }
563
564 if (m_image_ctx->object_map == nullptr) {
565 return false;
566 }
567
568 if (m_image_ctx->migration_info.empty()) {
569 // migration might have completed while IO was in-flight,
570 // assume worst-case and perform an object map update
571 return true;
572 }
573
574 auto it = m_image_ctx->migration_info.snap_map.find(CEPH_NOSNAP);
575 ceph_assert(it != m_image_ctx->migration_info.snap_map.end());
576 return it->second[0] != CEPH_NOSNAP;
577 }
578
579 template <typename I>
580 void CopyupRequest<I>::compute_deep_copy_snap_ids() {
581 ceph_assert(m_image_ctx->snap_lock.is_locked());
582
583 // don't copy ids for the snaps updated by object deep copy or
584 // that don't overlap
585 std::set<uint64_t> deep_copied;
586 for (auto &it : m_image_ctx->migration_info.snap_map) {
587 if (it.first != CEPH_NOSNAP) {
588 deep_copied.insert(it.second.front());
589 }
590 }
591
592 RWLock::RLocker parent_locker(m_image_ctx->parent_lock);
593 std::copy_if(m_image_ctx->snaps.rbegin(), m_image_ctx->snaps.rend(),
594 std::back_inserter(m_snap_ids),
595 [this, cct=m_image_ctx->cct, &deep_copied](uint64_t snap_id) {
596 if (deep_copied.count(snap_id)) {
597 m_first_snap_is_clean = true;
598 return false;
599 }
600
601 uint64_t parent_overlap = 0;
602 int r = m_image_ctx->get_parent_overlap(snap_id, &parent_overlap);
603 if (r < 0) {
604 ldout(cct, 5) << "failed getting parent overlap for snap_id: "
605 << snap_id << ": " << cpp_strerror(r) << dendl;
606 }
607 if (parent_overlap == 0) {
608 return false;
609 }
610 std::vector<std::pair<uint64_t, uint64_t>> extents;
611 Striper::extent_to_file(cct, &m_image_ctx->layout,
612 m_object_no, 0,
613 m_image_ctx->layout.object_size,
614 extents);
615 auto overlap = m_image_ctx->prune_parent_extents(
616 extents, parent_overlap);
617 return overlap > 0;
618 });
619 }
620
621 } // namespace io
622 } // namespace librbd
623
624 template class librbd::io::CopyupRequest<librbd::ImageCtx>;