]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/io/CopyupRequest.cc
import ceph 14.2.5
[ceph.git] / ceph / src / librbd / io / CopyupRequest.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/io/CopyupRequest.h"
5 #include "common/ceph_context.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "common/Mutex.h"
9 #include "common/WorkQueue.h"
10 #include "librbd/AsyncObjectThrottle.h"
11 #include "librbd/ExclusiveLock.h"
12 #include "librbd/ImageCtx.h"
13 #include "librbd/ObjectMap.h"
14 #include "librbd/Utils.h"
15 #include "librbd/deep_copy/ObjectCopyRequest.h"
16 #include "librbd/io/AioCompletion.h"
17 #include "librbd/io/ImageRequest.h"
18 #include "librbd/io/ObjectRequest.h"
19 #include "librbd/io/ReadResult.h"
20
21 #include <boost/bind.hpp>
22 #include <boost/lambda/bind.hpp>
23 #include <boost/lambda/construct.hpp>
24
25 #define dout_subsys ceph_subsys_rbd
26 #undef dout_prefix
27 #define dout_prefix *_dout << "librbd::io::CopyupRequest: " << this \
28 << " " << __func__ << ": "
29
30 namespace librbd {
31 namespace io {
32
33 namespace {
34
35 template <typename I>
36 class C_UpdateObjectMap : public C_AsyncObjectThrottle<I> {
37 public:
38 C_UpdateObjectMap(AsyncObjectThrottle<I> &throttle, I *image_ctx,
39 uint64_t object_no, uint8_t head_object_map_state,
40 const std::vector<uint64_t> *snap_ids,
41 bool first_snap_is_clean, const ZTracer::Trace &trace,
42 size_t snap_id_idx)
43 : C_AsyncObjectThrottle<I>(throttle, *image_ctx), m_object_no(object_no),
44 m_head_object_map_state(head_object_map_state), m_snap_ids(*snap_ids),
45 m_first_snap_is_clean(first_snap_is_clean), m_trace(trace),
46 m_snap_id_idx(snap_id_idx)
47 {
48 }
49
50 int send() override {
51 auto& image_ctx = this->m_image_ctx;
52 ceph_assert(image_ctx.owner_lock.is_locked());
53 if (image_ctx.exclusive_lock == nullptr) {
54 return 1;
55 }
56 ceph_assert(image_ctx.exclusive_lock->is_lock_owner());
57
58 RWLock::RLocker snap_locker(image_ctx.snap_lock);
59 if (image_ctx.object_map == nullptr) {
60 return 1;
61 }
62
63 uint64_t snap_id = m_snap_ids[m_snap_id_idx];
64 if (snap_id == CEPH_NOSNAP) {
65 return update_head();
66 } else {
67 return update_snapshot(snap_id);
68 }
69 }
70
71 int update_head() {
72 auto& image_ctx = this->m_image_ctx;
73 RWLock::WLocker object_map_locker(image_ctx.object_map_lock);
74 bool sent = image_ctx.object_map->template aio_update<Context>(
75 CEPH_NOSNAP, m_object_no, m_head_object_map_state, {}, m_trace, false,
76 this);
77 return (sent ? 0 : 1);
78 }
79
80 int update_snapshot(uint64_t snap_id) {
81 auto& image_ctx = this->m_image_ctx;
82 uint8_t state = OBJECT_EXISTS;
83 if (image_ctx.test_features(RBD_FEATURE_FAST_DIFF, image_ctx.snap_lock) &&
84 (m_snap_id_idx > 0 || m_first_snap_is_clean)) {
85 // first snapshot should be exists+dirty since it contains
86 // the copyup data -- later snapshots inherit the data.
87 state = OBJECT_EXISTS_CLEAN;
88 }
89
90 RWLock::RLocker object_map_locker(image_ctx.object_map_lock);
91 bool sent = image_ctx.object_map->template aio_update<Context>(
92 snap_id, m_object_no, state, {}, m_trace, true, this);
93 ceph_assert(sent);
94 return 0;
95 }
96
97 private:
98 uint64_t m_object_no;
99 uint8_t m_head_object_map_state;
100 const std::vector<uint64_t> &m_snap_ids;
101 bool m_first_snap_is_clean;
102 const ZTracer::Trace &m_trace;
103 size_t m_snap_id_idx;
104 };
105
106 } // anonymous namespace
107
108 template <typename I>
109 CopyupRequest<I>::CopyupRequest(I *ictx, const std::string &oid,
110 uint64_t objectno, Extents &&image_extents,
111 const ZTracer::Trace &parent_trace)
112 : m_image_ctx(ictx), m_oid(oid), m_object_no(objectno),
113 m_image_extents(image_extents),
114 m_trace(util::create_trace(*m_image_ctx, "copy-up", parent_trace)),
115 m_lock("CopyupRequest", false, false)
116 {
117 ceph_assert(m_image_ctx->data_ctx.is_valid());
118 m_async_op.start_op(*util::get_image_ctx(m_image_ctx));
119 }
120
121 template <typename I>
122 CopyupRequest<I>::~CopyupRequest() {
123 ceph_assert(m_pending_requests.empty());
124 m_async_op.finish_op();
125 }
126
127 template <typename I>
128 void CopyupRequest<I>::append_request(AbstractObjectWriteRequest<I> *req) {
129 Mutex::Locker locker(m_lock);
130
131 auto cct = m_image_ctx->cct;
132 ldout(cct, 20) << "oid=" << m_oid << ", "
133 << "object_request=" << req << ", "
134 << "append=" << m_append_request_permitted << dendl;
135 if (m_append_request_permitted) {
136 m_pending_requests.push_back(req);
137 } else {
138 m_restart_requests.push_back(req);
139 }
140 }
141
142 template <typename I>
143 void CopyupRequest<I>::send() {
144 read_from_parent();
145 }
146
147 template <typename I>
148 void CopyupRequest<I>::read_from_parent() {
149 auto cct = m_image_ctx->cct;
150 RWLock::RLocker snap_locker(m_image_ctx->snap_lock);
151 RWLock::RLocker parent_locker(m_image_ctx->parent_lock);
152
153 if (m_image_ctx->parent == nullptr) {
154 ldout(cct, 5) << "parent detached" << dendl;
155
156 m_image_ctx->op_work_queue->queue(
157 util::create_context_callback<
158 CopyupRequest<I>, &CopyupRequest<I>::handle_read_from_parent>(this),
159 -ENOENT);
160 return;
161 } else if (is_deep_copy()) {
162 deep_copy();
163 return;
164 }
165
166 auto comp = AioCompletion::create_and_start<
167 CopyupRequest<I>,
168 &CopyupRequest<I>::handle_read_from_parent>(
169 this, util::get_image_ctx(m_image_ctx->parent), AIO_TYPE_READ);
170
171 ldout(cct, 20) << "oid=" << m_oid << ", "
172 << "completion=" << comp << ", "
173 << "extents=" << m_image_extents
174 << dendl;
175 ImageRequest<I>::aio_read(m_image_ctx->parent, comp,
176 std::move(m_image_extents),
177 ReadResult{&m_copyup_data}, 0, m_trace);
178 }
179
180 template <typename I>
181 void CopyupRequest<I>::handle_read_from_parent(int r) {
182 auto cct = m_image_ctx->cct;
183 ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl;
184
185 m_image_ctx->snap_lock.get_read();
186 m_lock.Lock();
187 m_copyup_is_zero = m_copyup_data.is_zero();
188 m_copyup_required = is_copyup_required();
189 disable_append_requests();
190
191 if (r < 0 && r != -ENOENT) {
192 m_lock.Unlock();
193 m_image_ctx->snap_lock.put_read();
194
195 lderr(cct) << "error reading from parent: " << cpp_strerror(r) << dendl;
196 finish(r);
197 return;
198 }
199
200 if (!m_copyup_required) {
201 m_lock.Unlock();
202 m_image_ctx->snap_lock.put_read();
203
204 ldout(cct, 20) << "no-op, skipping" << dendl;
205 finish(0);
206 return;
207 }
208
209 // copyup() will affect snapshots only if parent data is not all
210 // zeros.
211 if (!m_copyup_is_zero) {
212 m_snap_ids.insert(m_snap_ids.end(), m_image_ctx->snaps.rbegin(),
213 m_image_ctx->snaps.rend());
214 }
215
216 m_lock.Unlock();
217 m_image_ctx->snap_lock.put_read();
218
219 update_object_maps();
220 }
221
222 template <typename I>
223 void CopyupRequest<I>::deep_copy() {
224 auto cct = m_image_ctx->cct;
225 ceph_assert(m_image_ctx->snap_lock.is_locked());
226 ceph_assert(m_image_ctx->parent_lock.is_locked());
227 ceph_assert(m_image_ctx->parent != nullptr);
228
229 m_lock.Lock();
230 m_flatten = is_copyup_required() ? true : m_image_ctx->migration_info.flatten;
231 m_lock.Unlock();
232
233 ldout(cct, 20) << "oid=" << m_oid << ", flatten=" << m_flatten << dendl;
234
235 auto ctx = util::create_context_callback<
236 CopyupRequest<I>, &CopyupRequest<I>::handle_deep_copy>(this);
237 auto req = deep_copy::ObjectCopyRequest<I>::create(
238 m_image_ctx->parent, m_image_ctx, m_image_ctx->migration_info.snap_map,
239 m_object_no, m_flatten, ctx);
240
241 req->send();
242 }
243
244 template <typename I>
245 void CopyupRequest<I>::handle_deep_copy(int r) {
246 auto cct = m_image_ctx->cct;
247 ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl;
248
249 m_image_ctx->snap_lock.get_read();
250 m_lock.Lock();
251 m_copyup_required = is_copyup_required();
252 if (r == -ENOENT && !m_flatten && m_copyup_required) {
253 m_lock.Unlock();
254 m_image_ctx->snap_lock.put_read();
255
256 ldout(cct, 10) << "restart deep-copy with flatten" << dendl;
257 send();
258 return;
259 }
260
261 disable_append_requests();
262
263 if (r < 0 && r != -ENOENT) {
264 m_lock.Unlock();
265 m_image_ctx->snap_lock.put_read();
266
267 lderr(cct) << "error encountered during deep-copy: " << cpp_strerror(r)
268 << dendl;
269 finish(r);
270 return;
271 }
272
273 if (!m_copyup_required && !is_update_object_map_required(r)) {
274 m_lock.Unlock();
275 m_image_ctx->snap_lock.put_read();
276
277 if (r == -ENOENT) {
278 r = 0;
279 }
280
281 ldout(cct, 20) << "skipping" << dendl;
282 finish(r);
283 return;
284 }
285
286 // For deep-copy, copyup() will never affect snapshots. However,
287 // this state machine is responsible for updating object maps for
288 // snapshots that have been created on destination image after
289 // migration started.
290 if (r != -ENOENT) {
291 compute_deep_copy_snap_ids();
292 }
293
294 m_lock.Unlock();
295 m_image_ctx->snap_lock.put_read();
296
297 update_object_maps();
298 }
299
300 template <typename I>
301 void CopyupRequest<I>::update_object_maps() {
302 RWLock::RLocker owner_locker(m_image_ctx->owner_lock);
303 RWLock::RLocker snap_locker(m_image_ctx->snap_lock);
304 if (m_image_ctx->object_map == nullptr) {
305 snap_locker.unlock();
306 owner_locker.unlock();
307
308 copyup();
309 return;
310 }
311
312 auto cct = m_image_ctx->cct;
313 ldout(cct, 20) << "oid=" << m_oid << dendl;
314
315 bool copy_on_read = m_pending_requests.empty();
316 uint8_t head_object_map_state = OBJECT_EXISTS;
317 if (copy_on_read && !m_snap_ids.empty() &&
318 m_image_ctx->test_features(RBD_FEATURE_FAST_DIFF,
319 m_image_ctx->snap_lock)) {
320 // HEAD is non-dirty since data is tied to first snapshot
321 head_object_map_state = OBJECT_EXISTS_CLEAN;
322 }
323
324 auto r_it = m_pending_requests.rbegin();
325 if (r_it != m_pending_requests.rend()) {
326 // last write-op determines the final object map state
327 head_object_map_state = (*r_it)->get_pre_write_object_map_state();
328 }
329
330 RWLock::WLocker object_map_locker(m_image_ctx->object_map_lock);
331 if ((*m_image_ctx->object_map)[m_object_no] != head_object_map_state) {
332 // (maybe) need to update the HEAD object map state
333 m_snap_ids.push_back(CEPH_NOSNAP);
334 }
335 object_map_locker.unlock();
336 snap_locker.unlock();
337
338 ceph_assert(m_image_ctx->exclusive_lock->is_lock_owner());
339 typename AsyncObjectThrottle<I>::ContextFactory context_factory(
340 boost::lambda::bind(boost::lambda::new_ptr<C_UpdateObjectMap<I>>(),
341 boost::lambda::_1, m_image_ctx, m_object_no, head_object_map_state,
342 &m_snap_ids, m_first_snap_is_clean, m_trace, boost::lambda::_2));
343 auto ctx = util::create_context_callback<
344 CopyupRequest<I>, &CopyupRequest<I>::handle_update_object_maps>(this);
345 auto throttle = new AsyncObjectThrottle<I>(
346 nullptr, *m_image_ctx, context_factory, ctx, nullptr, 0, m_snap_ids.size());
347 throttle->start_ops(
348 m_image_ctx->config.template get_val<uint64_t>("rbd_concurrent_management_ops"));
349 }
350
351 template <typename I>
352 void CopyupRequest<I>::handle_update_object_maps(int r) {
353 auto cct = m_image_ctx->cct;
354 ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl;
355
356 if (r < 0) {
357 lderr(m_image_ctx->cct) << "failed to update object map: "
358 << cpp_strerror(r) << dendl;
359
360 finish(r);
361 return;
362 }
363
364 copyup();
365 }
366
367 template <typename I>
368 void CopyupRequest<I>::copyup() {
369 auto cct = m_image_ctx->cct;
370 m_image_ctx->snap_lock.get_read();
371 auto snapc = m_image_ctx->snapc;
372 m_image_ctx->snap_lock.put_read();
373
374 m_lock.Lock();
375 if (!m_copyup_required) {
376 m_lock.Unlock();
377
378 ldout(cct, 20) << "skipping copyup" << dendl;
379 finish(0);
380 return;
381 }
382
383 ldout(cct, 20) << "oid=" << m_oid << dendl;
384
385 bool copy_on_read = m_pending_requests.empty();
386 bool deep_copyup = !snapc.snaps.empty() && !m_copyup_is_zero;
387 if (m_copyup_is_zero) {
388 m_copyup_data.clear();
389 }
390
391 int r;
392 librados::ObjectWriteOperation copyup_op;
393 if (copy_on_read || deep_copyup) {
394 copyup_op.exec("rbd", "copyup", m_copyup_data);
395 ObjectRequest<I>::add_write_hint(*m_image_ctx, &copyup_op);
396 ++m_pending_copyups;
397 }
398
399 librados::ObjectWriteOperation write_op;
400 if (!copy_on_read) {
401 if (!deep_copyup) {
402 write_op.exec("rbd", "copyup", m_copyup_data);
403 ObjectRequest<I>::add_write_hint(*m_image_ctx, &write_op);
404 }
405
406 // merge all pending write ops into this single RADOS op
407 for (auto req : m_pending_requests) {
408 ldout(cct, 20) << "add_copyup_ops " << req << dendl;
409 req->add_copyup_ops(&write_op);
410 }
411
412 if (write_op.size() > 0) {
413 ++m_pending_copyups;
414 }
415 }
416 m_lock.Unlock();
417
418 // issue librados ops at the end to simplify test cases
419 std::vector<librados::snap_t> snaps;
420 if (copyup_op.size() > 0) {
421 // send only the copyup request with a blank snapshot context so that
422 // all snapshots are detected from the parent for this object. If
423 // this is a CoW request, a second request will be created for the
424 // actual modification.
425 ldout(cct, 20) << "copyup with empty snapshot context" << dendl;
426
427 auto comp = util::create_rados_callback<
428 CopyupRequest<I>, &CopyupRequest<I>::handle_copyup>(this);
429 r = m_image_ctx->data_ctx.aio_operate(
430 m_oid, comp, &copyup_op, 0, snaps,
431 (m_trace.valid() ? m_trace.get_info() : nullptr));
432 ceph_assert(r == 0);
433 comp->release();
434 }
435
436 if (write_op.size() > 0) {
437 // compare-and-write doesn't add any write ops (copyup+cmpext+write
438 // can't be executed in the same RADOS op because, unless the object
439 // was already present in the clone, cmpext wouldn't see it)
440 ldout(cct, 20) << (!deep_copyup && write_op.size() > 2 ?
441 "copyup + ops" : !deep_copyup ? "copyup" : "ops")
442 << " with current snapshot context" << dendl;
443
444 snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
445 auto comp = util::create_rados_callback<
446 CopyupRequest<I>, &CopyupRequest<I>::handle_copyup>(this);
447 r = m_image_ctx->data_ctx.aio_operate(
448 m_oid, comp, &write_op, snapc.seq, snaps,
449 (m_trace.valid() ? m_trace.get_info() : nullptr));
450 ceph_assert(r == 0);
451 comp->release();
452 }
453 }
454
455 template <typename I>
456 void CopyupRequest<I>::handle_copyup(int r) {
457 auto cct = m_image_ctx->cct;
458 unsigned pending_copyups;
459 {
460 Mutex::Locker locker(m_lock);
461 ceph_assert(m_pending_copyups > 0);
462 pending_copyups = --m_pending_copyups;
463 }
464
465 ldout(cct, 20) << "oid=" << m_oid << ", " << "r=" << r << ", "
466 << "pending=" << pending_copyups << dendl;
467
468 if (r < 0 && r != -ENOENT) {
469 lderr(cct) << "failed to copyup object: " << cpp_strerror(r) << dendl;
470 complete_requests(false, r);
471 }
472
473 if (pending_copyups == 0) {
474 finish(0);
475 }
476 }
477
478 template <typename I>
479 void CopyupRequest<I>::finish(int r) {
480 auto cct = m_image_ctx->cct;
481 ldout(cct, 20) << "oid=" << m_oid << ", r=" << r << dendl;
482
483 complete_requests(true, r);
484 delete this;
485 }
486
487 template <typename I>
488 void CopyupRequest<I>::complete_requests(bool override_restart_retval, int r) {
489 auto cct = m_image_ctx->cct;
490 remove_from_list();
491
492 while (!m_pending_requests.empty()) {
493 auto it = m_pending_requests.begin();
494 auto req = *it;
495 ldout(cct, 20) << "completing request " << req << dendl;
496 req->handle_copyup(r);
497 m_pending_requests.erase(it);
498 }
499
500 if (override_restart_retval) {
501 r = -ERESTART;
502 }
503
504 while (!m_restart_requests.empty()) {
505 auto it = m_restart_requests.begin();
506 auto req = *it;
507 ldout(cct, 20) << "restarting request " << req << dendl;
508 req->handle_copyup(r);
509 m_restart_requests.erase(it);
510 }
511 }
512
513 template <typename I>
514 void CopyupRequest<I>::disable_append_requests() {
515 ceph_assert(m_lock.is_locked());
516 m_append_request_permitted = false;
517 }
518
519 template <typename I>
520 void CopyupRequest<I>::remove_from_list() {
521 Mutex::Locker copyup_list_locker(m_image_ctx->copyup_list_lock);
522
523 auto it = m_image_ctx->copyup_list.find(m_object_no);
524 if (it != m_image_ctx->copyup_list.end()) {
525 m_image_ctx->copyup_list.erase(it);
526 }
527 }
528
529 template <typename I>
530 bool CopyupRequest<I>::is_copyup_required() {
531 ceph_assert(m_lock.is_locked());
532
533 bool copy_on_read = m_pending_requests.empty();
534 if (copy_on_read) {
535 // always force a copyup if CoR enabled
536 return true;
537 }
538
539 if (!m_copyup_is_zero) {
540 return true;
541 }
542
543 for (auto req : m_pending_requests) {
544 if (!req->is_empty_write_op()) {
545 return true;
546 }
547 }
548 return false;
549 }
550
551 template <typename I>
552 bool CopyupRequest<I>::is_deep_copy() const {
553 ceph_assert(m_image_ctx->snap_lock.is_locked());
554 return !m_image_ctx->migration_info.empty();
555 }
556
557 template <typename I>
558 bool CopyupRequest<I>::is_update_object_map_required(int r) {
559 ceph_assert(m_image_ctx->snap_lock.is_locked());
560
561 if (r < 0) {
562 return false;
563 }
564
565 if (m_image_ctx->object_map == nullptr) {
566 return false;
567 }
568
569 if (m_image_ctx->migration_info.empty()) {
570 // migration might have completed while IO was in-flight,
571 // assume worst-case and perform an object map update
572 return true;
573 }
574
575 auto it = m_image_ctx->migration_info.snap_map.find(CEPH_NOSNAP);
576 ceph_assert(it != m_image_ctx->migration_info.snap_map.end());
577 return it->second[0] != CEPH_NOSNAP;
578 }
579
580 template <typename I>
581 void CopyupRequest<I>::compute_deep_copy_snap_ids() {
582 ceph_assert(m_image_ctx->snap_lock.is_locked());
583
584 // don't copy ids for the snaps updated by object deep copy or
585 // that don't overlap
586 std::set<uint64_t> deep_copied;
587 for (auto &it : m_image_ctx->migration_info.snap_map) {
588 if (it.first != CEPH_NOSNAP) {
589 deep_copied.insert(it.second.front());
590 }
591 }
592
593 RWLock::RLocker parent_locker(m_image_ctx->parent_lock);
594 std::copy_if(m_image_ctx->snaps.rbegin(), m_image_ctx->snaps.rend(),
595 std::back_inserter(m_snap_ids),
596 [this, cct=m_image_ctx->cct, &deep_copied](uint64_t snap_id) {
597 if (deep_copied.count(snap_id)) {
598 m_first_snap_is_clean = true;
599 return false;
600 }
601
602 uint64_t parent_overlap = 0;
603 int r = m_image_ctx->get_parent_overlap(snap_id, &parent_overlap);
604 if (r < 0) {
605 ldout(cct, 5) << "failed getting parent overlap for snap_id: "
606 << snap_id << ": " << cpp_strerror(r) << dendl;
607 }
608 if (parent_overlap == 0) {
609 return false;
610 }
611 std::vector<std::pair<uint64_t, uint64_t>> extents;
612 Striper::extent_to_file(cct, &m_image_ctx->layout,
613 m_object_no, 0,
614 m_image_ctx->layout.object_size,
615 extents);
616 auto overlap = m_image_ctx->prune_parent_extents(
617 extents, parent_overlap);
618 return overlap > 0;
619 });
620 }
621
622 } // namespace io
623 } // namespace librbd
624
625 template class librbd::io::CopyupRequest<librbd::ImageCtx>;