]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/io/ObjectRequest.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / librbd / io / ObjectRequest.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/io/ObjectRequest.h"
5 #include "common/ceph_context.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "common/ceph_mutex.h"
9 #include "include/Context.h"
10 #include "include/err.h"
11 #include "include/neorados/RADOS.hpp"
12 #include "osd/osd_types.h"
13 #include "librados/snap_set_diff.h"
14 #include "librbd/AsioEngine.h"
15 #include "librbd/ExclusiveLock.h"
16 #include "librbd/ImageCtx.h"
17 #include "librbd/ObjectMap.h"
18 #include "librbd/Utils.h"
19 #include "librbd/asio/Utils.h"
20 #include "librbd/io/AioCompletion.h"
21 #include "librbd/io/CopyupRequest.h"
22 #include "librbd/io/ImageRequest.h"
23 #include "librbd/io/Utils.h"
24
25 #include <boost/optional.hpp>
26
27 #define dout_subsys ceph_subsys_rbd
28 #undef dout_prefix
29 #define dout_prefix *_dout << "librbd::io::ObjectRequest: " << this \
30 << " " << __func__ << ": " \
31 << data_object_name(this->m_ictx, \
32 this->m_object_no) << " "
33
34 namespace librbd {
35 namespace io {
36
37 using librbd::util::data_object_name;
38 using librbd::util::create_context_callback;
39 using librbd::util::create_trace;
40
41 namespace {
42
43 template <typename I>
44 inline bool is_copy_on_read(I *ictx, const IOContext& io_context) {
45 std::shared_lock image_locker{ictx->image_lock};
46 return (ictx->clone_copy_on_read && !ictx->read_only &&
47 io_context->read_snap().value_or(CEPH_NOSNAP) == CEPH_NOSNAP &&
48 (ictx->exclusive_lock == nullptr ||
49 ictx->exclusive_lock->is_lock_owner()));
50 }
51
52 template <typename S, typename D>
53 void convert_snap_set(const S& src_snap_set,
54 D* dst_snap_set) {
55 dst_snap_set->seq = src_snap_set.seq;
56 dst_snap_set->clones.reserve(src_snap_set.clones.size());
57 for (auto& src_clone : src_snap_set.clones) {
58 dst_snap_set->clones.emplace_back();
59 auto& dst_clone = dst_snap_set->clones.back();
60 dst_clone.cloneid = src_clone.cloneid;
61 dst_clone.snaps = src_clone.snaps;
62 dst_clone.overlap = src_clone.overlap;
63 dst_clone.size = src_clone.size;
64 }
65 }
66
67 } // anonymous namespace
68
69 template <typename I>
70 ObjectRequest<I>*
71 ObjectRequest<I>::create_write(
72 I *ictx, uint64_t object_no, uint64_t object_off, ceph::bufferlist&& data,
73 IOContext io_context, int op_flags, int write_flags,
74 std::optional<uint64_t> assert_version,
75 const ZTracer::Trace &parent_trace, Context *completion) {
76 return new ObjectWriteRequest<I>(ictx, object_no, object_off,
77 std::move(data), io_context, op_flags,
78 write_flags, assert_version,
79 parent_trace, completion);
80 }
81
82 template <typename I>
83 ObjectRequest<I>*
84 ObjectRequest<I>::create_discard(
85 I *ictx, uint64_t object_no, uint64_t object_off, uint64_t object_len,
86 IOContext io_context, int discard_flags,
87 const ZTracer::Trace &parent_trace, Context *completion) {
88 return new ObjectDiscardRequest<I>(ictx, object_no, object_off,
89 object_len, io_context, discard_flags,
90 parent_trace, completion);
91 }
92
93 template <typename I>
94 ObjectRequest<I>*
95 ObjectRequest<I>::create_write_same(
96 I *ictx, uint64_t object_no, uint64_t object_off, uint64_t object_len,
97 ceph::bufferlist&& data, IOContext io_context, int op_flags,
98 const ZTracer::Trace &parent_trace, Context *completion) {
99 return new ObjectWriteSameRequest<I>(ictx, object_no, object_off,
100 object_len, std::move(data), io_context,
101 op_flags, parent_trace, completion);
102 }
103
104 template <typename I>
105 ObjectRequest<I>*
106 ObjectRequest<I>::create_compare_and_write(
107 I *ictx, uint64_t object_no, uint64_t object_off,
108 ceph::bufferlist&& cmp_data, ceph::bufferlist&& write_data,
109 IOContext io_context, uint64_t *mismatch_offset, int op_flags,
110 const ZTracer::Trace &parent_trace, Context *completion) {
111 return new ObjectCompareAndWriteRequest<I>(ictx, object_no, object_off,
112 std::move(cmp_data),
113 std::move(write_data), io_context,
114 mismatch_offset, op_flags,
115 parent_trace, completion);
116 }
117
118 template <typename I>
119 ObjectRequest<I>::ObjectRequest(
120 I *ictx, uint64_t objectno, IOContext io_context,
121 const char *trace_name, const ZTracer::Trace &trace, Context *completion)
122 : m_ictx(ictx), m_object_no(objectno), m_io_context(io_context),
123 m_completion(completion),
124 m_trace(create_trace(*ictx, "", trace)) {
125 ceph_assert(m_ictx->data_ctx.is_valid());
126 if (m_trace.valid()) {
127 m_trace.copy_name(trace_name + std::string(" ") +
128 data_object_name(ictx, objectno));
129 m_trace.event("start");
130 }
131 }
132
133 template <typename I>
134 void ObjectRequest<I>::add_write_hint(I& image_ctx, neorados::WriteOp* wr) {
135 auto alloc_hint_flags = static_cast<neorados::alloc_hint::alloc_hint_t>(
136 image_ctx.alloc_hint_flags);
137 if (image_ctx.enable_alloc_hint) {
138 wr->set_alloc_hint(image_ctx.get_object_size(),
139 image_ctx.get_object_size(),
140 alloc_hint_flags);
141 } else if (image_ctx.alloc_hint_flags != 0U) {
142 wr->set_alloc_hint(0, 0, alloc_hint_flags);
143 }
144 }
145
146 template <typename I>
147 bool ObjectRequest<I>::compute_parent_extents(Extents *parent_extents,
148 bool read_request) {
149 ceph_assert(ceph_mutex_is_locked(m_ictx->image_lock));
150
151 m_has_parent = false;
152 parent_extents->clear();
153
154 uint64_t parent_overlap;
155 int r = m_ictx->get_parent_overlap(
156 m_io_context->read_snap().value_or(CEPH_NOSNAP), &parent_overlap);
157 if (r < 0) {
158 // NOTE: it's possible for a snapshot to be deleted while we are
159 // still reading from it
160 lderr(m_ictx->cct) << "failed to retrieve parent overlap: "
161 << cpp_strerror(r) << dendl;
162 return false;
163 }
164
165 if (!read_request && !m_ictx->migration_info.empty()) {
166 parent_overlap = m_ictx->migration_info.overlap;
167 }
168
169 if (parent_overlap == 0) {
170 return false;
171 }
172
173 io::util::extent_to_file(m_ictx, m_object_no, 0, m_ictx->layout.object_size,
174 *parent_extents);
175 uint64_t object_overlap = m_ictx->prune_parent_extents(*parent_extents,
176 parent_overlap);
177 if (object_overlap > 0) {
178 ldout(m_ictx->cct, 20) << "overlap " << parent_overlap << " "
179 << "extents " << *parent_extents << dendl;
180 m_has_parent = !parent_extents->empty();
181 return true;
182 }
183 return false;
184 }
185
186 template <typename I>
187 void ObjectRequest<I>::async_finish(int r) {
188 ldout(m_ictx->cct, 20) << "r=" << r << dendl;
189 m_ictx->asio_engine->post([this, r]() { finish(r); });
190 }
191
192 template <typename I>
193 void ObjectRequest<I>::finish(int r) {
194 ldout(m_ictx->cct, 20) << "r=" << r << dendl;
195 m_completion->complete(r);
196 delete this;
197 }
198
199 /** read **/
200
201 template <typename I>
202 ObjectReadRequest<I>::ObjectReadRequest(
203 I *ictx, uint64_t objectno, ReadExtents* extents,
204 IOContext io_context, int op_flags, int read_flags,
205 const ZTracer::Trace &parent_trace, uint64_t* version,
206 Context *completion)
207 : ObjectRequest<I>(ictx, objectno, io_context, "read", parent_trace,
208 completion),
209 m_extents(extents), m_op_flags(op_flags),m_read_flags(read_flags),
210 m_version(version) {
211 }
212
213 template <typename I>
214 void ObjectReadRequest<I>::send() {
215 I *image_ctx = this->m_ictx;
216 ldout(image_ctx->cct, 20) << dendl;
217
218 read_object();
219 }
220
221 template <typename I>
222 void ObjectReadRequest<I>::read_object() {
223 I *image_ctx = this->m_ictx;
224
225 std::shared_lock image_locker{image_ctx->image_lock};
226 auto read_snap_id = this->m_io_context->read_snap().value_or(CEPH_NOSNAP);
227 if (read_snap_id == image_ctx->snap_id &&
228 image_ctx->object_map != nullptr &&
229 !image_ctx->object_map->object_may_exist(this->m_object_no)) {
230 image_ctx->asio_engine->post([this]() { read_parent(); });
231 return;
232 }
233 image_locker.unlock();
234
235 ldout(image_ctx->cct, 20) << "snap_id=" << read_snap_id << dendl;
236
237 neorados::ReadOp read_op;
238 for (auto& extent: *this->m_extents) {
239 if (extent.length >= image_ctx->sparse_read_threshold_bytes) {
240 read_op.sparse_read(extent.offset, extent.length, &extent.bl,
241 &extent.extent_map);
242 } else {
243 read_op.read(extent.offset, extent.length, &extent.bl);
244 }
245 }
246 util::apply_op_flags(
247 m_op_flags, image_ctx->get_read_flags(read_snap_id), &read_op);
248
249 image_ctx->rados_api.execute(
250 {data_object_name(this->m_ictx, this->m_object_no)},
251 *this->m_io_context, std::move(read_op), nullptr,
252 librbd::asio::util::get_callback_adapter(
253 [this](int r) { handle_read_object(r); }), m_version,
254 (this->m_trace.valid() ? this->m_trace.get_info() : nullptr));
255 }
256
257 template <typename I>
258 void ObjectReadRequest<I>::handle_read_object(int r) {
259 I *image_ctx = this->m_ictx;
260 ldout(image_ctx->cct, 20) << "r=" << r << dendl;
261 if (m_version != nullptr) {
262 ldout(image_ctx->cct, 20) << "version=" << *m_version << dendl;
263 }
264
265 if (r == -ENOENT) {
266 read_parent();
267 return;
268 } else if (r < 0) {
269 lderr(image_ctx->cct) << "failed to read from object: "
270 << cpp_strerror(r) << dendl;
271 this->finish(r);
272 return;
273 }
274
275 this->finish(0);
276 }
277
278 template <typename I>
279 void ObjectReadRequest<I>::read_parent() {
280 if ((m_read_flags & READ_FLAG_DISABLE_READ_FROM_PARENT) != 0) {
281 this->finish(-ENOENT);
282 return;
283 }
284
285 I *image_ctx = this->m_ictx;
286 ldout(image_ctx->cct, 20) << dendl;
287
288 auto ctx = create_context_callback<
289 ObjectReadRequest<I>, &ObjectReadRequest<I>::handle_read_parent>(this);
290
291 io::util::read_parent<I>(
292 image_ctx, this->m_object_no, this->m_extents,
293 this->m_io_context->read_snap().value_or(CEPH_NOSNAP), this->m_trace,
294 ctx);
295 }
296
297 template <typename I>
298 void ObjectReadRequest<I>::handle_read_parent(int r) {
299 I *image_ctx = this->m_ictx;
300 ldout(image_ctx->cct, 20) << "r=" << r << dendl;
301
302 if (r == -ENOENT) {
303 this->finish(r);
304 return;
305 } else if (r < 0) {
306 lderr(image_ctx->cct) << "failed to read parent extents: "
307 << cpp_strerror(r) << dendl;
308 this->finish(r);
309 return;
310 }
311
312 copyup();
313 }
314
315 template <typename I>
316 void ObjectReadRequest<I>::copyup() {
317 I *image_ctx = this->m_ictx;
318 if (!is_copy_on_read(image_ctx, this->m_io_context)) {
319 this->finish(0);
320 return;
321 }
322
323 image_ctx->owner_lock.lock_shared();
324 image_ctx->image_lock.lock_shared();
325 Extents parent_extents;
326 if (!this->compute_parent_extents(&parent_extents, true) ||
327 (image_ctx->exclusive_lock != nullptr &&
328 !image_ctx->exclusive_lock->is_lock_owner())) {
329 image_ctx->image_lock.unlock_shared();
330 image_ctx->owner_lock.unlock_shared();
331 this->finish(0);
332 return;
333 }
334
335 ldout(image_ctx->cct, 20) << dendl;
336
337 image_ctx->copyup_list_lock.lock();
338 auto it = image_ctx->copyup_list.find(this->m_object_no);
339 if (it == image_ctx->copyup_list.end()) {
340 // create and kick off a CopyupRequest
341 auto new_req = CopyupRequest<I>::create(
342 image_ctx, this->m_object_no, std::move(parent_extents), this->m_trace);
343
344 image_ctx->copyup_list[this->m_object_no] = new_req;
345 image_ctx->copyup_list_lock.unlock();
346 image_ctx->image_lock.unlock_shared();
347 new_req->send();
348 } else {
349 image_ctx->copyup_list_lock.unlock();
350 image_ctx->image_lock.unlock_shared();
351 }
352
353 image_ctx->owner_lock.unlock_shared();
354 this->finish(0);
355 }
356
357 /** write **/
358
359 template <typename I>
360 AbstractObjectWriteRequest<I>::AbstractObjectWriteRequest(
361 I *ictx, uint64_t object_no, uint64_t object_off, uint64_t len,
362 IOContext io_context, const char *trace_name,
363 const ZTracer::Trace &parent_trace, Context *completion)
364 : ObjectRequest<I>(ictx, object_no, io_context, trace_name, parent_trace,
365 completion),
366 m_object_off(object_off), m_object_len(len)
367 {
368 if (this->m_object_off == 0 &&
369 this->m_object_len == ictx->get_object_size()) {
370 m_full_object = true;
371 }
372
373 compute_parent_info();
374
375 ictx->image_lock.lock_shared();
376 if (!ictx->migration_info.empty()) {
377 m_guarding_migration_write = true;
378 }
379 ictx->image_lock.unlock_shared();
380 }
381
382 template <typename I>
383 void AbstractObjectWriteRequest<I>::compute_parent_info() {
384 I *image_ctx = this->m_ictx;
385 std::shared_lock image_locker{image_ctx->image_lock};
386
387 this->compute_parent_extents(&m_parent_extents, false);
388
389 if (!this->has_parent() ||
390 (m_full_object &&
391 !this->m_io_context->write_snap_context() &&
392 !is_post_copyup_write_required())) {
393 m_copyup_enabled = false;
394 }
395 }
396
397 template <typename I>
398 void AbstractObjectWriteRequest<I>::add_write_hint(
399 neorados::WriteOp *wr) {
400 I *image_ctx = this->m_ictx;
401 std::shared_lock image_locker{image_ctx->image_lock};
402 if (image_ctx->object_map == nullptr || !this->m_object_may_exist ||
403 image_ctx->alloc_hint_flags != 0U) {
404 ObjectRequest<I>::add_write_hint(*image_ctx, wr);
405 }
406 }
407
408 template <typename I>
409 void AbstractObjectWriteRequest<I>::send() {
410 I *image_ctx = this->m_ictx;
411 ldout(image_ctx->cct, 20) << this->get_op_type() << " "
412 << this->m_object_off << "~" << this->m_object_len
413 << dendl;
414 {
415 std::shared_lock image_lock{image_ctx->image_lock};
416 if (image_ctx->object_map == nullptr) {
417 m_object_may_exist = true;
418 } else {
419 // should have been flushed prior to releasing lock
420 ceph_assert(image_ctx->exclusive_lock->is_lock_owner());
421 m_object_may_exist = image_ctx->object_map->object_may_exist(
422 this->m_object_no);
423 }
424 }
425
426 if (!m_object_may_exist && is_no_op_for_nonexistent_object()) {
427 ldout(image_ctx->cct, 20) << "skipping no-op on nonexistent object"
428 << dendl;
429 this->async_finish(0);
430 return;
431 }
432
433 pre_write_object_map_update();
434 }
435
436 template <typename I>
437 void AbstractObjectWriteRequest<I>::pre_write_object_map_update() {
438 I *image_ctx = this->m_ictx;
439
440 image_ctx->image_lock.lock_shared();
441 if (image_ctx->object_map == nullptr || !is_object_map_update_enabled()) {
442 image_ctx->image_lock.unlock_shared();
443 write_object();
444 return;
445 }
446
447 if (!m_object_may_exist && m_copyup_enabled) {
448 // optimization: copyup required
449 image_ctx->image_lock.unlock_shared();
450 copyup();
451 return;
452 }
453
454 uint8_t new_state = this->get_pre_write_object_map_state();
455 ldout(image_ctx->cct, 20) << this->m_object_off << "~" << this->m_object_len
456 << dendl;
457
458 if (image_ctx->object_map->template aio_update<
459 AbstractObjectWriteRequest<I>,
460 &AbstractObjectWriteRequest<I>::handle_pre_write_object_map_update>(
461 CEPH_NOSNAP, this->m_object_no, new_state, {}, this->m_trace, false,
462 this)) {
463 image_ctx->image_lock.unlock_shared();
464 return;
465 }
466
467 image_ctx->image_lock.unlock_shared();
468 write_object();
469 }
470
471 template <typename I>
472 void AbstractObjectWriteRequest<I>::handle_pre_write_object_map_update(int r) {
473 I *image_ctx = this->m_ictx;
474 ldout(image_ctx->cct, 20) << "r=" << r << dendl;
475 if (r < 0) {
476 lderr(image_ctx->cct) << "failed to update object map: "
477 << cpp_strerror(r) << dendl;
478 this->finish(r);
479 return;
480 }
481
482 write_object();
483 }
484
485 template <typename I>
486 void AbstractObjectWriteRequest<I>::write_object() {
487 I *image_ctx = this->m_ictx;
488 ldout(image_ctx->cct, 20) << dendl;
489
490 neorados::WriteOp write_op;
491 if (m_copyup_enabled) {
492 if (m_guarding_migration_write) {
493 auto snap_seq = (this->m_io_context->write_snap_context() ?
494 this->m_io_context->write_snap_context()->first : 0);
495 ldout(image_ctx->cct, 20) << "guarding write: snap_seq=" << snap_seq
496 << dendl;
497
498 cls_client::assert_snapc_seq(
499 &write_op, snap_seq, cls::rbd::ASSERT_SNAPC_SEQ_LE_SNAPSET_SEQ);
500 } else {
501 ldout(image_ctx->cct, 20) << "guarding write" << dendl;
502 write_op.assert_exists();
503 }
504 }
505
506 add_write_hint(&write_op);
507 add_write_ops(&write_op);
508 ceph_assert(write_op.size() != 0);
509
510 image_ctx->rados_api.execute(
511 {data_object_name(this->m_ictx, this->m_object_no)},
512 *this->m_io_context, std::move(write_op),
513 librbd::asio::util::get_callback_adapter(
514 [this](int r) { handle_write_object(r); }), nullptr,
515 (this->m_trace.valid() ? this->m_trace.get_info() : nullptr));
516 }
517
518 template <typename I>
519 void AbstractObjectWriteRequest<I>::handle_write_object(int r) {
520 I *image_ctx = this->m_ictx;
521 ldout(image_ctx->cct, 20) << "r=" << r << dendl;
522
523 r = filter_write_result(r);
524 if (r == -ENOENT) {
525 if (m_copyup_enabled) {
526 copyup();
527 return;
528 }
529 } else if (r == -ERANGE && m_guarding_migration_write) {
530 image_ctx->image_lock.lock_shared();
531 m_guarding_migration_write = !image_ctx->migration_info.empty();
532 image_ctx->image_lock.unlock_shared();
533
534 if (m_guarding_migration_write) {
535 copyup();
536 } else {
537 ldout(image_ctx->cct, 10) << "migration parent gone, restart io" << dendl;
538 compute_parent_info();
539 write_object();
540 }
541 return;
542 } else if (r == -EILSEQ) {
543 ldout(image_ctx->cct, 10) << "failed to write object" << dendl;
544 this->finish(r);
545 return;
546 } else if (r < 0) {
547 lderr(image_ctx->cct) << "failed to write object: " << cpp_strerror(r)
548 << dendl;
549 this->finish(r);
550 return;
551 }
552
553 post_write_object_map_update();
554 }
555
556 template <typename I>
557 void AbstractObjectWriteRequest<I>::copyup() {
558 I *image_ctx = this->m_ictx;
559 ldout(image_ctx->cct, 20) << dendl;
560
561 ceph_assert(!m_copyup_in_progress);
562 m_copyup_in_progress = true;
563
564 image_ctx->copyup_list_lock.lock();
565 auto it = image_ctx->copyup_list.find(this->m_object_no);
566 if (it == image_ctx->copyup_list.end()) {
567 auto new_req = CopyupRequest<I>::create(
568 image_ctx, this->m_object_no, std::move(this->m_parent_extents),
569 this->m_trace);
570 this->m_parent_extents.clear();
571
572 // make sure to wait on this CopyupRequest
573 new_req->append_request(this, std::move(get_copyup_overwrite_extents()));
574 image_ctx->copyup_list[this->m_object_no] = new_req;
575
576 image_ctx->copyup_list_lock.unlock();
577 new_req->send();
578 } else {
579 it->second->append_request(this, std::move(get_copyup_overwrite_extents()));
580 image_ctx->copyup_list_lock.unlock();
581 }
582 }
583
584 template <typename I>
585 void AbstractObjectWriteRequest<I>::handle_copyup(int r) {
586 I *image_ctx = this->m_ictx;
587 ldout(image_ctx->cct, 20) << "r=" << r << dendl;
588
589 ceph_assert(m_copyup_in_progress);
590 m_copyup_in_progress = false;
591
592 if (r < 0 && r != -ERESTART) {
593 lderr(image_ctx->cct) << "failed to copyup object: " << cpp_strerror(r)
594 << dendl;
595 this->finish(r);
596 return;
597 }
598
599 if (r == -ERESTART || is_post_copyup_write_required()) {
600 write_object();
601 return;
602 }
603
604 post_write_object_map_update();
605 }
606
607 template <typename I>
608 void AbstractObjectWriteRequest<I>::post_write_object_map_update() {
609 I *image_ctx = this->m_ictx;
610
611 image_ctx->image_lock.lock_shared();
612 if (image_ctx->object_map == nullptr || !is_object_map_update_enabled() ||
613 !is_non_existent_post_write_object_map_state()) {
614 image_ctx->image_lock.unlock_shared();
615 this->finish(0);
616 return;
617 }
618
619 ldout(image_ctx->cct, 20) << dendl;
620
621 // should have been flushed prior to releasing lock
622 ceph_assert(image_ctx->exclusive_lock->is_lock_owner());
623 if (image_ctx->object_map->template aio_update<
624 AbstractObjectWriteRequest<I>,
625 &AbstractObjectWriteRequest<I>::handle_post_write_object_map_update>(
626 CEPH_NOSNAP, this->m_object_no, OBJECT_NONEXISTENT, OBJECT_PENDING,
627 this->m_trace, false, this)) {
628 image_ctx->image_lock.unlock_shared();
629 return;
630 }
631
632 image_ctx->image_lock.unlock_shared();
633 this->finish(0);
634 }
635
636 template <typename I>
637 void AbstractObjectWriteRequest<I>::handle_post_write_object_map_update(int r) {
638 I *image_ctx = this->m_ictx;
639 ldout(image_ctx->cct, 20) << "r=" << r << dendl;
640 if (r < 0) {
641 lderr(image_ctx->cct) << "failed to update object map: "
642 << cpp_strerror(r) << dendl;
643 this->finish(r);
644 return;
645 }
646
647 this->finish(0);
648 }
649
650 template <typename I>
651 void ObjectWriteRequest<I>::add_write_hint(neorados::WriteOp* wr) {
652 if ((m_write_flags & OBJECT_WRITE_FLAG_CREATE_EXCLUSIVE) != 0) {
653 wr->create(true);
654 } else if (m_assert_version.has_value()) {
655 wr->assert_version(m_assert_version.value());
656 }
657 AbstractObjectWriteRequest<I>::add_write_hint(wr);
658 }
659
660 template <typename I>
661 void ObjectWriteRequest<I>::add_write_ops(neorados::WriteOp* wr) {
662 if (this->m_full_object) {
663 wr->write_full(bufferlist{m_write_data});
664 } else {
665 wr->write(this->m_object_off, bufferlist{m_write_data});
666 }
667 util::apply_op_flags(m_op_flags, 0U, wr);
668 }
669
670 template <typename I>
671 void ObjectDiscardRequest<I>::add_write_ops(neorados::WriteOp* wr) {
672 switch (m_discard_action) {
673 case DISCARD_ACTION_REMOVE:
674 wr->remove();
675 break;
676 case DISCARD_ACTION_REMOVE_TRUNCATE:
677 wr->create(false);
678 // fall through
679 case DISCARD_ACTION_TRUNCATE:
680 wr->truncate(this->m_object_off);
681 break;
682 case DISCARD_ACTION_ZERO:
683 wr->zero(this->m_object_off, this->m_object_len);
684 break;
685 default:
686 ceph_abort();
687 break;
688 }
689 }
690
691 template <typename I>
692 void ObjectWriteSameRequest<I>::add_write_ops(neorados::WriteOp* wr) {
693 wr->writesame(this->m_object_off, this->m_object_len,
694 bufferlist{m_write_data});
695 util::apply_op_flags(m_op_flags, 0U, wr);
696 }
697
698 template <typename I>
699 void ObjectCompareAndWriteRequest<I>::add_write_ops(neorados::WriteOp* wr) {
700 wr->cmpext(this->m_object_off, bufferlist{m_cmp_bl}, nullptr);
701
702 if (this->m_full_object) {
703 wr->write_full(bufferlist{m_write_bl});
704 } else {
705 wr->write(this->m_object_off, bufferlist{m_write_bl});
706 }
707 util::apply_op_flags(m_op_flags, 0U, wr);
708 }
709
710 template <typename I>
711 int ObjectCompareAndWriteRequest<I>::filter_write_result(int r) const {
712 if (r <= -MAX_ERRNO) {
713 I *image_ctx = this->m_ictx;
714 Extents image_extents;
715
716 // object extent compare mismatch
717 uint64_t offset = -MAX_ERRNO - r;
718 io::util::extent_to_file(image_ctx, this->m_object_no, offset,
719 this->m_object_len, image_extents);
720 ceph_assert(image_extents.size() == 1);
721
722 if (m_mismatch_offset) {
723 *m_mismatch_offset = image_extents[0].first;
724 }
725 r = -EILSEQ;
726 }
727 return r;
728 }
729
730 template <typename I>
731 ObjectListSnapsRequest<I>::ObjectListSnapsRequest(
732 I *ictx, uint64_t objectno, Extents&& object_extents, SnapIds&& snap_ids,
733 int list_snaps_flags, const ZTracer::Trace &parent_trace,
734 SnapshotDelta* snapshot_delta, Context *completion)
735 : ObjectRequest<I>(
736 ictx, objectno, ictx->duplicate_data_io_context(), "snap_list",
737 parent_trace, completion),
738 m_object_extents(std::move(object_extents)),
739 m_snap_ids(std::move(snap_ids)), m_list_snaps_flags(list_snaps_flags),
740 m_snapshot_delta(snapshot_delta) {
741 this->m_io_context->read_snap(CEPH_SNAPDIR);
742 }
743
744 template <typename I>
745 void ObjectListSnapsRequest<I>::send() {
746 I *image_ctx = this->m_ictx;
747 ldout(image_ctx->cct, 20) << dendl;
748
749 if (m_snap_ids.size() < 2) {
750 lderr(image_ctx->cct) << "invalid snap ids: " << m_snap_ids << dendl;
751 this->async_finish(-EINVAL);
752 return;
753 }
754
755 list_snaps();
756 }
757
758 template <typename I>
759 void ObjectListSnapsRequest<I>::list_snaps() {
760 I *image_ctx = this->m_ictx;
761 ldout(image_ctx->cct, 20) << dendl;
762
763 neorados::ReadOp read_op;
764 read_op.list_snaps(&m_snap_set, &m_ec);
765
766 image_ctx->rados_api.execute(
767 {data_object_name(this->m_ictx, this->m_object_no)},
768 *this->m_io_context, std::move(read_op), nullptr,
769 librbd::asio::util::get_callback_adapter(
770 [this](int r) { handle_list_snaps(r); }), nullptr,
771 (this->m_trace.valid() ? this->m_trace.get_info() : nullptr));
772 }
773
774 template <typename I>
775 void ObjectListSnapsRequest<I>::handle_list_snaps(int r) {
776 I *image_ctx = this->m_ictx;
777 auto cct = image_ctx->cct;
778
779 if (r >= 0) {
780 r = -m_ec.value();
781 }
782
783 ldout(cct, 20) << "r=" << r << dendl;
784
785 m_snapshot_delta->clear();
786 auto& snapshot_delta = *m_snapshot_delta;
787
788 ceph_assert(!m_snap_ids.empty());
789 librados::snap_t start_snap_id = 0;
790 librados::snap_t first_snap_id = *m_snap_ids.begin();
791 librados::snap_t last_snap_id = *m_snap_ids.rbegin();
792
793 if (r == -ENOENT) {
794 // the object does not exist -- mark the missing extents
795 zero_extent(first_snap_id, true);
796 list_from_parent();
797 return;
798 } else if (r < 0) {
799 lderr(cct) << "failed to retrieve object snapshot list: " << cpp_strerror(r)
800 << dendl;
801 this->finish(r);
802 return;
803 }
804
805 // helper function requires the librados legacy data structure
806 librados::snap_set_t snap_set;
807 convert_snap_set(m_snap_set, &snap_set);
808
809 bool initial_extents_written = false;
810
811 interval_set<uint64_t> object_interval;
812 for (auto& object_extent : m_object_extents) {
813 object_interval.insert(object_extent.first, object_extent.second);
814 }
815 ldout(cct, 20) << "object_interval=" << object_interval << dendl;
816
817 // loop through all expected snapshots and build interval sets for
818 // data and zeroed ranges for each snapshot
819 uint64_t prev_end_size = 0;
820 interval_set<uint64_t> initial_written_extents;
821 for (auto end_snap_id : m_snap_ids) {
822 if (start_snap_id == end_snap_id) {
823 continue;
824 } else if (end_snap_id > last_snap_id) {
825 break;
826 }
827
828 interval_set<uint64_t> diff;
829 uint64_t end_size;
830 bool exists;
831 librados::snap_t clone_end_snap_id;
832 bool read_whole_object;
833 calc_snap_set_diff(cct, snap_set, start_snap_id,
834 end_snap_id, &diff, &end_size, &exists,
835 &clone_end_snap_id, &read_whole_object);
836
837 if (read_whole_object ||
838 (!diff.empty() &&
839 ((m_list_snaps_flags & LIST_SNAPS_FLAG_WHOLE_OBJECT) != 0))) {
840 ldout(cct, 1) << "need to read full object" << dendl;
841 diff.clear();
842 diff.insert(0, image_ctx->layout.object_size);
843 end_size = image_ctx->layout.object_size;
844 clone_end_snap_id = end_snap_id;
845 } else if (!exists) {
846 end_size = 0;
847 }
848
849 if (exists) {
850 // reads should be issued against the newest (existing) snapshot within
851 // the associated snapshot object clone. writes should be issued
852 // against the oldest snapshot in the snap_map.
853 ceph_assert(clone_end_snap_id >= end_snap_id);
854 if (clone_end_snap_id > last_snap_id) {
855 // do not read past the copy point snapshot
856 clone_end_snap_id = last_snap_id;
857 }
858 }
859
860 // clip diff to current object extent
861 interval_set<uint64_t> diff_interval;
862 diff_interval.intersection_of(object_interval, diff);
863
864 // clip diff to size of object (in case it was truncated)
865 interval_set<uint64_t> zero_interval;
866 if (end_size < prev_end_size) {
867 zero_interval.insert(end_size, prev_end_size - end_size);
868 zero_interval.intersection_of(object_interval);
869
870 interval_set<uint64_t> trunc_interval;
871 trunc_interval.intersection_of(zero_interval, diff_interval);
872 if (!trunc_interval.empty()) {
873 diff_interval.subtract(trunc_interval);
874 ldout(cct, 20) << "clearing truncate diff: " << trunc_interval << dendl;
875 }
876 }
877
878 ldout(cct, 20) << "start_snap_id=" << start_snap_id << ", "
879 << "end_snap_id=" << end_snap_id << ", "
880 << "clone_end_snap_id=" << clone_end_snap_id << ", "
881 << "diff=" << diff << ", "
882 << "diff_interval=" << diff_interval<< ", "
883 << "zero_interval=" << zero_interval<< ", "
884 << "end_size=" << end_size << ", "
885 << "prev_end_size=" << prev_end_size << ", "
886 << "exists=" << exists << ", "
887 << "whole_object=" << read_whole_object << dendl;
888
889 // check if object exists prior to start of incremental snap delta so that
890 // we don't DNE the object if no additional deltas exist
891 if (exists && start_snap_id == 0 &&
892 (!diff_interval.empty() || !zero_interval.empty())) {
893 ldout(cct, 20) << "object exists at snap id " << end_snap_id << dendl;
894 initial_extents_written = true;
895 }
896
897 prev_end_size = end_size;
898 start_snap_id = end_snap_id;
899
900 if (end_snap_id <= first_snap_id) {
901 // don't include deltas from the starting snapshots, but we iterate over
902 // it to track its existence and size
903 ldout(cct, 20) << "skipping prior snapshot " << dendl;
904 continue;
905 }
906
907 if (exists) {
908 for (auto& interval : diff_interval) {
909 snapshot_delta[{end_snap_id, clone_end_snap_id}].insert(
910 interval.first, interval.second,
911 SparseExtent(SPARSE_EXTENT_STATE_DATA, interval.second));
912 }
913 } else {
914 zero_interval.union_of(diff_interval);
915 }
916
917 if ((m_list_snaps_flags & LIST_SNAPS_FLAG_IGNORE_ZEROED_EXTENTS) == 0) {
918 for (auto& interval : zero_interval) {
919 snapshot_delta[{end_snap_id, end_snap_id}].insert(
920 interval.first, interval.second,
921 SparseExtent(SPARSE_EXTENT_STATE_ZEROED, interval.second));
922 }
923 }
924 }
925
926 bool snapshot_delta_empty = snapshot_delta.empty();
927 if (!initial_extents_written) {
928 zero_extent(first_snap_id, first_snap_id > 0);
929 }
930 ldout(cct, 20) << "snapshot_delta=" << snapshot_delta << dendl;
931
932 if (snapshot_delta_empty) {
933 list_from_parent();
934 return;
935 }
936
937 this->finish(0);
938 }
939
940 template <typename I>
941 void ObjectListSnapsRequest<I>::list_from_parent() {
942 I *image_ctx = this->m_ictx;
943 auto cct = image_ctx->cct;
944
945 ceph_assert(!m_snap_ids.empty());
946 librados::snap_t snap_id_start = *m_snap_ids.begin();
947 librados::snap_t snap_id_end = *m_snap_ids.rbegin();
948
949 std::unique_lock image_locker{image_ctx->image_lock};
950 if ((snap_id_start > 0) || (image_ctx->parent == nullptr) ||
951 ((m_list_snaps_flags & LIST_SNAPS_FLAG_DISABLE_LIST_FROM_PARENT) != 0)) {
952 image_locker.unlock();
953
954 this->finish(0);
955 return;
956 }
957
958 // calculate reverse mapping onto the parent image
959 Extents parent_image_extents;
960 for (auto [object_off, object_len]: m_object_extents) {
961 io::util::extent_to_file(image_ctx, this->m_object_no, object_off,
962 object_len, parent_image_extents);
963 }
964
965 uint64_t parent_overlap = 0;
966 uint64_t object_overlap = 0;
967 int r = image_ctx->get_parent_overlap(snap_id_end, &parent_overlap);
968 if (r == 0) {
969 object_overlap = image_ctx->prune_parent_extents(parent_image_extents,
970 parent_overlap);
971 }
972
973 if (object_overlap == 0) {
974 image_locker.unlock();
975
976 this->finish(0);
977 return;
978 }
979
980 auto ctx = create_context_callback<
981 ObjectListSnapsRequest<I>,
982 &ObjectListSnapsRequest<I>::handle_list_from_parent>(this);
983 auto aio_comp = AioCompletion::create_and_start(
984 ctx, librbd::util::get_image_ctx(image_ctx->parent), AIO_TYPE_GENERIC);
985 ldout(cct, 20) << "aio_comp=" << aio_comp<< ", "
986 << "parent_image_extents " << parent_image_extents << dendl;
987
988 auto list_snaps_flags = (
989 m_list_snaps_flags | LIST_SNAPS_FLAG_IGNORE_ZEROED_EXTENTS);
990
991 ImageListSnapsRequest<I> req(
992 *image_ctx->parent, aio_comp, std::move(parent_image_extents),
993 {0, image_ctx->parent->snap_id}, list_snaps_flags, &m_parent_snapshot_delta,
994 this->m_trace);
995 req.send();
996 }
997
998 template <typename I>
999 void ObjectListSnapsRequest<I>::handle_list_from_parent(int r) {
1000 I *image_ctx = this->m_ictx;
1001 auto cct = image_ctx->cct;
1002
1003 ldout(cct, 20) << "r=" << r << ", "
1004 << "parent_snapshot_delta=" << m_parent_snapshot_delta
1005 << dendl;
1006
1007 // ignore special-case of fully empty dataset (we ignore zeroes)
1008 if (m_parent_snapshot_delta.empty()) {
1009 this->finish(0);
1010 return;
1011 }
1012
1013 // the write/read snapshot id key is not useful for parent images so
1014 // map the the special-case INITIAL_WRITE_READ_SNAP_IDS key
1015 *m_snapshot_delta = {};
1016 auto& intervals = (*m_snapshot_delta)[INITIAL_WRITE_READ_SNAP_IDS];
1017 for (auto& [key, image_extents] : m_parent_snapshot_delta) {
1018 for (auto image_extent : image_extents) {
1019 auto state = image_extent.get_val().state;
1020
1021 // map image-extents back to this object
1022 striper::LightweightObjectExtents object_extents;
1023 io::util::file_to_extents(image_ctx, image_extent.get_off(),
1024 image_extent.get_len(), 0, &object_extents);
1025 for (auto& object_extent : object_extents) {
1026 ceph_assert(object_extent.object_no == this->m_object_no);
1027 intervals.insert(
1028 object_extent.offset, object_extent.length,
1029 {state, object_extent.length});
1030 }
1031 }
1032 }
1033
1034 ldout(cct, 20) << "snapshot_delta=" << *m_snapshot_delta << dendl;
1035 this->finish(0);
1036 }
1037
1038 template <typename I>
1039 void ObjectListSnapsRequest<I>::zero_extent(uint64_t snap_id, bool dne) {
1040 I *image_ctx = this->m_ictx;
1041 auto cct = image_ctx->cct;
1042
1043 // the object does not exist or is (partially) under whiteout -- mark the
1044 // missing extents which would be any portion of the object that does not
1045 // have data in the initial snapshot set
1046 if ((m_list_snaps_flags & LIST_SNAPS_FLAG_IGNORE_ZEROED_EXTENTS) == 0) {
1047 interval_set<uint64_t> interval;
1048 for (auto [object_offset, object_length] : m_object_extents) {
1049 interval.insert(object_offset, object_length);
1050 }
1051
1052 for (auto [offset, length] : interval) {
1053 ldout(cct, 20) << "snapshot " << snap_id << ": "
1054 << (dne ? "DNE" : "zeroed") << " extent "
1055 << offset << "~" << length << dendl;
1056 (*m_snapshot_delta)[{snap_id, snap_id}].insert(
1057 offset, length,
1058 SparseExtent(
1059 (dne ? SPARSE_EXTENT_STATE_DNE : SPARSE_EXTENT_STATE_ZEROED),
1060 length));
1061 }
1062 }
1063 }
1064
1065 } // namespace io
1066 } // namespace librbd
1067
1068 template class librbd::io::ObjectRequest<librbd::ImageCtx>;
1069 template class librbd::io::ObjectReadRequest<librbd::ImageCtx>;
1070 template class librbd::io::AbstractObjectWriteRequest<librbd::ImageCtx>;
1071 template class librbd::io::ObjectWriteRequest<librbd::ImageCtx>;
1072 template class librbd::io::ObjectDiscardRequest<librbd::ImageCtx>;
1073 template class librbd::io::ObjectWriteSameRequest<librbd::ImageCtx>;
1074 template class librbd::io::ObjectCompareAndWriteRequest<librbd::ImageCtx>;
1075 template class librbd::io::ObjectListSnapsRequest<librbd::ImageCtx>;