]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/io/ImageRequest.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / librbd / io / ImageRequest.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "librbd/io/ImageRequest.h"
5#include "librbd/ImageCtx.h"
6#include "librbd/internal.h"
7#include "librbd/Journal.h"
b32b8144 8#include "librbd/Types.h"
7c673cae 9#include "librbd/Utils.h"
f67539c2 10#include "librbd/asio/ContextWQ.h"
7c673cae 11#include "librbd/io/AioCompletion.h"
11fdf7f2
TL
12#include "librbd/io/AsyncOperation.h"
13#include "librbd/io/ObjectDispatchInterface.h"
14#include "librbd/io/ObjectDispatchSpec.h"
f67539c2 15#include "librbd/io/ObjectDispatcherInterface.h"
11fdf7f2 16#include "librbd/io/Utils.h"
7c673cae
FG
17#include "librbd/journal/Types.h"
18#include "include/rados/librados.hpp"
f67539c2 19#include "common/errno.h"
11fdf7f2 20#include "common/perf_counters.h"
7c673cae 21#include "osdc/Striper.h"
11fdf7f2
TL
22#include <algorithm>
23#include <functional>
f67539c2 24#include <map>
7c673cae
FG
25
26#define dout_subsys ceph_subsys_rbd
27#undef dout_prefix
9f95a23c 28#define dout_prefix *_dout << "librbd::io::ImageRequest: " << __func__ << ": "
7c673cae
FG
29
30namespace librbd {
31namespace io {
32
9f95a23c 33using librbd::util::data_object_name;
11fdf7f2 34using librbd::util::get_image_ctx;
7c673cae
FG
35
36namespace {
37
f67539c2
TL
38template <typename I>
39struct C_AssembleSnapshotDeltas : public C_AioRequest {
40 I* image_ctx;
41 SnapshotDelta* snapshot_delta;
42
43 ceph::mutex lock = ceph::make_mutex(
44 "librbd::io::C_AssembleSnapshotDeltas::lock", false);
45 std::map<uint64_t, SnapshotDelta> object_snapshot_delta;
46
47 C_AssembleSnapshotDeltas(I* image_ctx, AioCompletion* aio_comp,
48 SnapshotDelta* snapshot_delta)
49 : C_AioRequest(aio_comp),
50 image_ctx(image_ctx), snapshot_delta(snapshot_delta) {
51 }
52
53 SnapshotDelta* get_snapshot_delta(uint64_t object_no) {
54 std::unique_lock locker{lock};
55 return &object_snapshot_delta[object_no];
56 }
57
58 void finish(int r) override {
59 auto cct = image_ctx->cct;
60
61 if (r < 0) {
62 lderr(cct) << "C_AssembleSnapshotDeltas: list snaps failed: "
63 << cpp_strerror(r) << dendl;
64 C_AioRequest::finish(r);
65 return;
66 }
67
68 std::unique_lock locker{lock};
69 *snapshot_delta = {};
70 for (auto& [object_no, object_snapshot_delta] : object_snapshot_delta) {
71 SnapshotDelta image_snapshot_delta;
72 object_to_image_intervals(object_no, object_snapshot_delta,
73 &image_snapshot_delta, snapshot_delta);
74
75 ldout(cct, 20) << "object_no=" << object_no << ", "
76 << "object_snapshot_delta="
77 << object_snapshot_delta << ", "
78 << "image_snapshot_delta=" << image_snapshot_delta
79 << dendl;
80 }
81
82 ldout(cct, 20) << "snapshot_delta=" << *snapshot_delta << dendl;
83 C_AioRequest::finish(0);
84 }
85
86 void object_to_image_intervals(
87 uint64_t object_no, const SnapshotDelta& object_snapshot_delta,
88 SnapshotDelta* image_snapshot_delta,
89 SnapshotDelta* assembled_image_snapshot_delta) {
90 for (auto& [key, object_extents] : object_snapshot_delta) {
91 for (auto& object_extent : object_extents) {
1e59de90
TL
92 auto [image_extents, _] = io::util::object_to_area_extents(
93 image_ctx, object_no,
94 {{object_extent.get_off(), object_extent.get_len()}});
f67539c2
TL
95
96 auto& intervals = (*image_snapshot_delta)[key];
97 auto& assembled_intervals = (*assembled_image_snapshot_delta)[key];
98 for (auto [image_offset, image_length] : image_extents) {
99 SparseExtent sparse_extent{object_extent.get_val().state,
100 image_length};
101 intervals.insert(image_offset, image_length, sparse_extent);
102 assembled_intervals.insert(image_offset, image_length,
103 sparse_extent);
104 }
105 }
106 }
107 }
108};
109
9f95a23c
TL
110template <typename I>
111struct C_RBD_Readahead : public Context {
112 I *ictx;
113 uint64_t object_no;
f67539c2 114 io::ReadExtents extents;
9f95a23c
TL
115
116 C_RBD_Readahead(I *ictx, uint64_t object_no, uint64_t offset, uint64_t length)
f67539c2 117 : ictx(ictx), object_no(object_no), extents({{offset, length}}) {
9f95a23c
TL
118 ictx->readahead.inc_pending();
119 }
120
121 void finish(int r) override {
f67539c2
TL
122 ceph_assert(extents.size() == 1);
123 auto& extent = extents.front();
9f95a23c
TL
124 ldout(ictx->cct, 20) << "C_RBD_Readahead on "
125 << data_object_name(ictx, object_no) << ": "
f67539c2 126 << extent.offset << "~" << extent.length << dendl;
9f95a23c
TL
127 ictx->readahead.dec_pending();
128 }
129};
130
131template <typename I>
f67539c2 132void readahead(I *ictx, const Extents& image_extents, IOContext io_context) {
9f95a23c
TL
133 uint64_t total_bytes = 0;
134 for (auto& image_extent : image_extents) {
135 total_bytes += image_extent.second;
136 }
137
138 ictx->image_lock.lock_shared();
139 auto total_bytes_read = ictx->total_bytes_read.fetch_add(total_bytes);
140 bool abort = (
141 ictx->readahead_disable_after_bytes != 0 &&
142 total_bytes_read > ictx->readahead_disable_after_bytes);
143 if (abort) {
144 ictx->image_lock.unlock_shared();
145 return;
146 }
147
1e59de90 148 uint64_t data_size = ictx->get_area_size(ImageArea::DATA);
9f95a23c
TL
149 ictx->image_lock.unlock_shared();
150
1e59de90 151 auto readahead_extent = ictx->readahead.update(image_extents, data_size);
9f95a23c
TL
152 uint64_t readahead_offset = readahead_extent.first;
153 uint64_t readahead_length = readahead_extent.second;
154
155 if (readahead_length > 0) {
156 ldout(ictx->cct, 20) << "(readahead logical) " << readahead_offset << "~"
157 << readahead_length << dendl;
158 LightweightObjectExtents readahead_object_extents;
1e59de90
TL
159 io::util::area_to_object_extents(ictx, readahead_offset, readahead_length,
160 ImageArea::DATA, 0,
161 &readahead_object_extents);
9f95a23c
TL
162 for (auto& object_extent : readahead_object_extents) {
163 ldout(ictx->cct, 20) << "(readahead) "
164 << data_object_name(ictx,
165 object_extent.object_no) << " "
166 << object_extent.offset << "~"
167 << object_extent.length << dendl;
168
169 auto req_comp = new C_RBD_Readahead<I>(ictx, object_extent.object_no,
170 object_extent.offset,
171 object_extent.length);
172 auto req = io::ObjectDispatchSpec::create_read(
173 ictx, io::OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
f67539c2 174 &req_comp->extents, io_context, 0, 0, {}, nullptr, req_comp);
9f95a23c
TL
175 req->send();
176 }
177
178 ictx->perfcounter->inc(l_librbd_readahead);
179 ictx->perfcounter->inc(l_librbd_readahead_bytes, readahead_length);
180 }
181}
182
11fdf7f2
TL
183template <typename I>
184struct C_UpdateTimestamp : public Context {
185public:
186 I& m_image_ctx;
187 bool m_modify; // if modify set to 'true', modify timestamp is updated,
188 // access timestamp otherwise
189 AsyncOperation m_async_op;
91327a77 190
11fdf7f2
TL
191 C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) {
192 m_async_op.start_op(*get_image_ctx(&m_image_ctx));
91327a77 193 }
11fdf7f2
TL
194 ~C_UpdateTimestamp() override {
195 m_async_op.finish_op();
91327a77
AA
196 }
197
11fdf7f2
TL
198 void send() {
199 librados::ObjectWriteOperation op;
200 if (m_modify) {
201 cls_client::set_modify_timestamp(&op);
202 } else {
203 cls_client::set_access_timestamp(&op);
91327a77 204 }
7c673cae 205
11fdf7f2
TL
206 auto comp = librbd::util::create_rados_callback(this);
207 int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
208 ceph_assert(r == 0);
209 comp->release();
7c673cae
FG
210 }
211
212 void finish(int r) override {
11fdf7f2 213 // ignore errors updating timestamp
7c673cae
FG
214 }
215};
216
11fdf7f2
TL
217bool should_update_timestamp(const utime_t& now, const utime_t& timestamp,
218 uint64_t interval) {
219 return (interval &&
220 (static_cast<uint64_t>(now.sec()) >= interval + timestamp));
224ce89b
WB
221}
222
11fdf7f2 223} // anonymous namespace
c07f9fc5 224
9f95a23c
TL
225#undef dout_prefix
226#define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \
227 << " " << __func__ << ": "
228
7c673cae
FG
229template <typename I>
230void ImageRequest<I>::aio_read(I *ictx, AioCompletion *c,
1e59de90 231 Extents &&image_extents, ImageArea area,
f67539c2
TL
232 ReadResult &&read_result, IOContext io_context,
233 int op_flags, int read_flags,
31f18b77 234 const ZTracer::Trace &parent_trace) {
1e59de90 235 ImageReadRequest<I> req(*ictx, c, std::move(image_extents), area,
f67539c2
TL
236 std::move(read_result), io_context, op_flags,
237 read_flags, parent_trace);
7c673cae
FG
238 req.send();
239}
240
241template <typename I>
242void ImageRequest<I>::aio_write(I *ictx, AioCompletion *c,
1e59de90
TL
243 Extents &&image_extents, ImageArea area,
244 bufferlist &&bl, int op_flags,
31f18b77 245 const ZTracer::Trace &parent_trace) {
1e59de90
TL
246 ImageWriteRequest<I> req(*ictx, c, std::move(image_extents), area,
247 std::move(bl), op_flags, parent_trace);
7c673cae
FG
248 req.send();
249}
250
251template <typename I>
252void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c,
1e59de90 253 Extents &&image_extents, ImageArea area,
11fdf7f2 254 uint32_t discard_granularity_bytes,
f67539c2 255 const ZTracer::Trace &parent_trace) {
1e59de90
TL
256 ImageDiscardRequest<I> req(*ictx, c, std::move(image_extents), area,
257 discard_granularity_bytes, parent_trace);
7c673cae
FG
258 req.send();
259}
260
261template <typename I>
31f18b77 262void ImageRequest<I>::aio_flush(I *ictx, AioCompletion *c,
11fdf7f2
TL
263 FlushSource flush_source,
264 const ZTracer::Trace &parent_trace) {
265 ImageFlushRequest<I> req(*ictx, c, flush_source, parent_trace);
7c673cae
FG
266 req.send();
267}
268
269template <typename I>
270void ImageRequest<I>::aio_writesame(I *ictx, AioCompletion *c,
1e59de90
TL
271 Extents &&image_extents, ImageArea area,
272 bufferlist &&bl, int op_flags,
31f18b77 273 const ZTracer::Trace &parent_trace) {
1e59de90
TL
274 ImageWriteSameRequest<I> req(*ictx, c, std::move(image_extents), area,
275 std::move(bl), op_flags, parent_trace);
7c673cae
FG
276 req.send();
277}
278
c07f9fc5
FG
279template <typename I>
280void ImageRequest<I>::aio_compare_and_write(I *ictx, AioCompletion *c,
281 Extents &&image_extents,
1e59de90 282 ImageArea area,
c07f9fc5
FG
283 bufferlist &&cmp_bl,
284 bufferlist &&bl,
285 uint64_t *mismatch_offset,
1e59de90 286 int op_flags,
c07f9fc5 287 const ZTracer::Trace &parent_trace) {
1e59de90 288 ImageCompareAndWriteRequest<I> req(*ictx, c, std::move(image_extents), area,
c07f9fc5 289 std::move(cmp_bl), std::move(bl),
1e59de90 290 mismatch_offset, op_flags, parent_trace);
c07f9fc5
FG
291 req.send();
292}
293
7c673cae
FG
294template <typename I>
295void ImageRequest<I>::send() {
296 I &image_ctx = this->m_image_ctx;
11fdf7f2 297 ceph_assert(m_aio_comp->is_initialized(get_aio_type()));
494da23a 298 ceph_assert(m_aio_comp->is_started());
7c673cae
FG
299
300 CephContext *cct = image_ctx.cct;
301 AioCompletion *aio_comp = this->m_aio_comp;
302 ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", "
c07f9fc5 303 << "completion=" << aio_comp << dendl;
7c673cae 304
f67539c2
TL
305 update_timestamp();
306 send_request();
7c673cae
FG
307}
308
309template <typename I>
11fdf7f2
TL
310void ImageRequest<I>::update_timestamp() {
311 bool modify = (get_aio_type() != AIO_TYPE_READ);
312 uint64_t update_interval;
313 if (modify) {
314 update_interval = m_image_ctx.mtime_update_interval;
315 } else {
316 update_interval = m_image_ctx.atime_update_interval;
317 }
7c673cae 318
11fdf7f2
TL
319 if (update_interval == 0) {
320 return;
321 }
322
323 utime_t (I::*get_timestamp_fn)() const;
324 void (I::*set_timestamp_fn)(utime_t);
325 if (modify) {
326 get_timestamp_fn = &I::get_modify_timestamp;
327 set_timestamp_fn = &I::set_modify_timestamp;
328 } else {
329 get_timestamp_fn = &I::get_access_timestamp;
330 set_timestamp_fn = &I::set_access_timestamp;
331 }
332
333 utime_t ts = ceph_clock_now();
334 {
9f95a23c 335 std::shared_lock timestamp_locker{m_image_ctx.timestamp_lock};
11fdf7f2
TL
336 if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx),
337 update_interval)) {
338 return;
339 }
340 }
341
342 {
9f95a23c 343 std::unique_lock timestamp_locker{m_image_ctx.timestamp_lock};
11fdf7f2
TL
344 bool update = should_update_timestamp(
345 ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval);
346 if (!update) {
347 return;
348 }
349
350 std::invoke(set_timestamp_fn, m_image_ctx, ts);
351 }
352
353 // TODO we fire and forget this outside the IO path to prevent
354 // potential race conditions with librbd client IO callbacks
355 // between different threads (e.g. librados and object cacher)
356 ldout(m_image_ctx.cct, 10) << get_request_type() << dendl;
357 auto req = new C_UpdateTimestamp<I>(m_image_ctx, modify);
358 req->send();
7c673cae
FG
359}
360
361template <typename I>
362ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp,
1e59de90 363 Extents &&image_extents, ImageArea area,
f67539c2
TL
364 ReadResult &&read_result,
365 IOContext io_context, int op_flags,
366 int read_flags,
367 const ZTracer::Trace &parent_trace)
1e59de90
TL
368 : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents), area,
369 "read", parent_trace),
370 m_io_context(io_context), m_op_flags(op_flags), m_read_flags(read_flags) {
7c673cae
FG
371 aio_comp->read_result = std::move(read_result);
372}
373
7c673cae
FG
374template <typename I>
375void ImageReadRequest<I>::send_request() {
376 I &image_ctx = this->m_image_ctx;
377 CephContext *cct = image_ctx.cct;
378
379 auto &image_extents = this->m_image_extents;
1e59de90
TL
380 if (this->m_image_area == ImageArea::DATA &&
381 image_ctx.cache && image_ctx.readahead_max_bytes > 0 &&
7c673cae 382 !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) {
1e59de90 383 readahead(get_image_ctx(&image_ctx), image_extents, m_io_context);
9f95a23c 384 }
7c673cae 385
9f95a23c
TL
386 // map image extents to object extents
387 LightweightObjectExtents object_extents;
f67539c2 388 uint64_t buffer_ofs = 0;
9f95a23c
TL
389 for (auto &extent : image_extents) {
390 if (extent.second == 0) {
391 continue;
7c673cae 392 }
7c673cae 393
1e59de90
TL
394 util::area_to_object_extents(&image_ctx, extent.first, extent.second,
395 this->m_image_area, buffer_ofs,
396 &object_extents);
9f95a23c 397 buffer_ofs += extent.second;
7c673cae 398 }
7c673cae 399
9f95a23c 400 AioCompletion *aio_comp = this->m_aio_comp;
f67539c2
TL
401 aio_comp->read_result.set_image_extents(image_extents);
402
403 // issue the requests
9f95a23c
TL
404 aio_comp->set_request_count(object_extents.size());
405 for (auto &oe : object_extents) {
406 ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " "
407 << oe.offset << "~" << oe.length << " from "
408 << oe.buffer_extents << dendl;
7c673cae 409
9f95a23c 410 auto req_comp = new io::ReadResult::C_ObjectReadRequest(
f67539c2 411 aio_comp, {{oe.offset, oe.length, std::move(oe.buffer_extents)}});
9f95a23c 412 auto req = ObjectDispatchSpec::create_read(
f67539c2 413 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, oe.object_no,
1e59de90 414 &req_comp->extents, m_io_context, m_op_flags, m_read_flags,
f67539c2 415 this->m_trace, nullptr, req_comp);
9f95a23c
TL
416 req->send();
417 }
7c673cae
FG
418
419 image_ctx.perfcounter->inc(l_librbd_rd);
420 image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs);
421}
422
7c673cae
FG
423template <typename I>
424void AbstractImageWriteRequest<I>::send_request() {
425 I &image_ctx = this->m_image_ctx;
7c673cae 426
7c673cae
FG
427 bool journaling = false;
428
429 AioCompletion *aio_comp = this->m_aio_comp;
7c673cae
FG
430 {
431 // prevent image size from changing between computing clip and recording
432 // pending async operation
9f95a23c 433 std::shared_lock image_locker{image_ctx.image_lock};
7c673cae
FG
434 journaling = (image_ctx.journal != nullptr &&
435 image_ctx.journal->is_journal_appending());
436 }
437
9f95a23c
TL
438 uint64_t clip_len = 0;
439 LightweightObjectExtents object_extents;
440 for (auto &extent : this->m_image_extents) {
441 if (extent.second == 0) {
442 continue;
443 }
444
445 // map to object extents
1e59de90
TL
446 io::util::area_to_object_extents(&image_ctx, extent.first, extent.second,
447 this->m_image_area, clip_len,
448 &object_extents);
9f95a23c
TL
449 clip_len += extent.second;
450 }
451
11fdf7f2 452 int ret = prune_object_extents(&object_extents);
c07f9fc5
FG
453 if (ret < 0) {
454 aio_comp->fail(ret);
455 return;
456 }
7c673cae 457
39ae355f
TL
458 // reflect changes in object_extents back to m_image_extents
459 if (ret == 1) {
460 this->m_image_extents.clear();
461 for (auto& object_extent : object_extents) {
1e59de90
TL
462 auto [image_extents, _] = io::util::object_to_area_extents(
463 &image_ctx, object_extent.object_no,
464 {{object_extent.offset, object_extent.length}});
39ae355f
TL
465 this->m_image_extents.insert(this->m_image_extents.end(),
466 image_extents.begin(), image_extents.end());
467 }
468 }
469
9f95a23c 470 aio_comp->set_request_count(object_extents.size());
7c673cae
FG
471 if (!object_extents.empty()) {
472 uint64_t journal_tid = 0;
7c673cae
FG
473 if (journaling) {
474 // in-flight ops are flushed prior to closing the journal
11fdf7f2
TL
475 ceph_assert(image_ctx.journal != NULL);
476 journal_tid = append_journal_event(m_synchronous);
7c673cae
FG
477 }
478
1e59de90
TL
479 // it's very important that IOContext is captured here instead of
480 // e.g. at the API layer so that an up-to-date snap context is used
481 // when owning the exclusive lock
482 send_object_requests(object_extents, image_ctx.get_data_io_context(),
483 journal_tid);
7c673cae
FG
484 }
485
486 update_stats(clip_len);
7c673cae
FG
487}
488
489template <typename I>
490void AbstractImageWriteRequest<I>::send_object_requests(
f67539c2 491 const LightweightObjectExtents &object_extents, IOContext io_context,
11fdf7f2 492 uint64_t journal_tid) {
7c673cae
FG
493 I &image_ctx = this->m_image_ctx;
494 CephContext *cct = image_ctx.cct;
495
496 AioCompletion *aio_comp = this->m_aio_comp;
9f95a23c
TL
497 bool single_extent = (object_extents.size() == 1);
498 for (auto& oe : object_extents) {
499 ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " "
500 << oe.offset << "~" << oe.length << " from "
501 << oe.buffer_extents << dendl;
7c673cae 502 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
f67539c2
TL
503 auto request = create_object_request(oe, io_context, journal_tid,
504 single_extent, req_comp);
9f95a23c 505 request->send();
7c673cae
FG
506 }
507}
508
509template <typename I>
9f95a23c
TL
510void ImageWriteRequest<I>::assemble_extent(
511 const LightweightObjectExtent &object_extent, bufferlist *bl) {
7c673cae
FG
512 for (auto q = object_extent.buffer_extents.begin();
513 q != object_extent.buffer_extents.end(); ++q) {
514 bufferlist sub_bl;
515 sub_bl.substr_of(m_bl, q->first, q->second);
516 bl->claim_append(sub_bl);
517 }
518}
519
520template <typename I>
11fdf7f2 521uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) {
7c673cae
FG
522 I &image_ctx = this->m_image_ctx;
523
524 uint64_t tid = 0;
525 uint64_t buffer_offset = 0;
11fdf7f2 526 ceph_assert(!this->m_image_extents.empty());
7c673cae
FG
527 for (auto &extent : this->m_image_extents) {
528 bufferlist sub_bl;
529 sub_bl.substr_of(m_bl, buffer_offset, extent.second);
530 buffer_offset += extent.second;
531
532 tid = image_ctx.journal->append_write_event(extent.first, extent.second,
11fdf7f2 533 sub_bl, synchronous);
7c673cae
FG
534 }
535
7c673cae
FG
536 return tid;
537}
538
7c673cae 539template <typename I>
11fdf7f2 540ObjectDispatchSpec *ImageWriteRequest<I>::create_object_request(
f67539c2 541 const LightweightObjectExtent &object_extent, IOContext io_context,
9f95a23c 542 uint64_t journal_tid, bool single_extent, Context *on_finish) {
7c673cae 543 I &image_ctx = this->m_image_ctx;
7c673cae
FG
544
545 bufferlist bl;
f67539c2
TL
546 if (single_extent && object_extent.buffer_extents.size() == 1 &&
547 m_bl.length() == object_extent.length) {
9f95a23c
TL
548 // optimization for single object/buffer extent writes
549 bl = std::move(m_bl);
550 } else {
551 assemble_extent(object_extent, &bl);
552 }
553
11fdf7f2 554 auto req = ObjectDispatchSpec::create_write(
9f95a23c 555 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
f67539c2
TL
556 object_extent.offset, std::move(bl), io_context, m_op_flags, 0,
557 std::nullopt, journal_tid, this->m_trace, on_finish);
7c673cae
FG
558 return req;
559}
560
561template <typename I>
562void ImageWriteRequest<I>::update_stats(size_t length) {
563 I &image_ctx = this->m_image_ctx;
564 image_ctx.perfcounter->inc(l_librbd_wr);
565 image_ctx.perfcounter->inc(l_librbd_wr_bytes, length);
566}
567
568template <typename I>
11fdf7f2 569uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) {
7c673cae
FG
570 I &image_ctx = this->m_image_ctx;
571
572 uint64_t tid = 0;
11fdf7f2 573 ceph_assert(!this->m_image_extents.empty());
7c673cae 574 for (auto &extent : this->m_image_extents) {
11fdf7f2
TL
575 journal::EventEntry event_entry(
576 journal::AioDiscardEvent(extent.first,
577 extent.second,
578 this->m_discard_granularity_bytes));
7c673cae 579 tid = image_ctx.journal->append_io_event(std::move(event_entry),
11fdf7f2
TL
580 extent.first, extent.second,
581 synchronous, 0);
7c673cae
FG
582 }
583
7c673cae
FG
584 return tid;
585}
586
7c673cae 587template <typename I>
11fdf7f2 588ObjectDispatchSpec *ImageDiscardRequest<I>::create_object_request(
f67539c2 589 const LightweightObjectExtent &object_extent, IOContext io_context,
9f95a23c 590 uint64_t journal_tid, bool single_extent, Context *on_finish) {
7c673cae 591 I &image_ctx = this->m_image_ctx;
11fdf7f2 592 auto req = ObjectDispatchSpec::create_discard(
9f95a23c 593 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
f67539c2 594 object_extent.offset, object_extent.length, io_context,
11fdf7f2
TL
595 OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE, journal_tid, this->m_trace,
596 on_finish);
7c673cae
FG
597 return req;
598}
599
600template <typename I>
601void ImageDiscardRequest<I>::update_stats(size_t length) {
602 I &image_ctx = this->m_image_ctx;
603 image_ctx.perfcounter->inc(l_librbd_discard);
604 image_ctx.perfcounter->inc(l_librbd_discard_bytes, length);
605}
606
11fdf7f2
TL
607template <typename I>
608int ImageDiscardRequest<I>::prune_object_extents(
9f95a23c 609 LightweightObjectExtents* object_extents) const {
11fdf7f2
TL
610 if (m_discard_granularity_bytes == 0) {
611 return 0;
612 }
613
614 // Align the range to discard_granularity_bytes boundary and skip
615 // and discards that are too small to free up any space.
616 //
617 // discard_granularity_bytes >= object_size && tail truncation
618 // is a special case for filestore
619 bool prune_required = false;
39ae355f 620 bool length_modified = false;
11fdf7f2
TL
621 auto object_size = this->m_image_ctx.layout.object_size;
622 auto discard_granularity_bytes = std::min(m_discard_granularity_bytes,
623 object_size);
624 auto xform_lambda =
39ae355f 625 [discard_granularity_bytes, object_size, &prune_required, &length_modified]
9f95a23c 626 (LightweightObjectExtent& object_extent) {
11fdf7f2
TL
627 auto& offset = object_extent.offset;
628 auto& length = object_extent.length;
629 auto next_offset = offset + length;
630
631 if ((discard_granularity_bytes < object_size) ||
632 (next_offset < object_size)) {
633 offset = p2roundup<uint64_t>(offset, discard_granularity_bytes);
634 next_offset = p2align<uint64_t>(next_offset, discard_granularity_bytes);
635 if (offset >= next_offset) {
636 prune_required = true;
637 length = 0;
638 } else {
39ae355f
TL
639 auto new_length = next_offset - offset;
640 if (length != new_length) {
641 length_modified = true;
642 length = new_length;
643 }
11fdf7f2
TL
644 }
645 }
646 };
647 std::for_each(object_extents->begin(), object_extents->end(),
648 xform_lambda);
649
650 if (prune_required) {
651 // one or more object extents were skipped
652 auto remove_lambda =
9f95a23c 653 [](const LightweightObjectExtent& object_extent) {
11fdf7f2
TL
654 return (object_extent.length == 0);
655 };
656 object_extents->erase(
657 std::remove_if(object_extents->begin(), object_extents->end(),
658 remove_lambda),
659 object_extents->end());
660 }
39ae355f
TL
661
662 // object extents were modified, image extents needs updating
663 if (length_modified || prune_required) {
664 return 1;
665 }
666
11fdf7f2
TL
667 return 0;
668}
669
7c673cae
FG
670template <typename I>
671void ImageFlushRequest<I>::send_request() {
672 I &image_ctx = this->m_image_ctx;
7c673cae
FG
673
674 bool journaling = false;
675 {
9f95a23c 676 std::shared_lock image_locker{image_ctx.image_lock};
11fdf7f2
TL
677 journaling = (m_flush_source == FLUSH_SOURCE_USER &&
678 image_ctx.journal != nullptr &&
7c673cae
FG
679 image_ctx.journal->is_journal_appending());
680 }
681
682 AioCompletion *aio_comp = this->m_aio_comp;
11fdf7f2
TL
683 aio_comp->set_request_count(1);
684
685 Context *ctx = new C_AioRequest(aio_comp);
686
687 // ensure no locks are held when flush is complete
688 ctx = librbd::util::create_async_context_callback(image_ctx, ctx);
689
9f95a23c 690 uint64_t journal_tid = 0;
7c673cae
FG
691 if (journaling) {
692 // in-flight ops are flushed prior to closing the journal
9f95a23c
TL
693 ceph_assert(image_ctx.journal != NULL);
694 journal_tid = image_ctx.journal->append_io_event(
11fdf7f2 695 journal::EventEntry(journal::AioFlushEvent()), 0, 0, false, 0);
494da23a 696 image_ctx.journal->user_flushed();
7c673cae
FG
697 }
698
9f95a23c
TL
699 auto object_dispatch_spec = ObjectDispatchSpec::create_flush(
700 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, m_flush_source, journal_tid,
701 this->m_trace, ctx);
702 ctx = new LambdaContext([object_dispatch_spec](int r) {
703 object_dispatch_spec->send();
704 });
705
11fdf7f2 706 // ensure all in-flight IOs are settled if non-user flush request
f67539c2
TL
707 if (m_flush_source == FLUSH_SOURCE_WRITEBACK) {
708 ctx->complete(0);
709 } else {
710 aio_comp->async_op.flush(ctx);
711 }
11fdf7f2
TL
712
713 // might be flushing during image shutdown
714 if (image_ctx.perfcounter != nullptr) {
715 image_ctx.perfcounter->inc(l_librbd_flush);
716 }
7c673cae
FG
717}
718
7c673cae 719template <typename I>
11fdf7f2 720uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) {
7c673cae
FG
721 I &image_ctx = this->m_image_ctx;
722
723 uint64_t tid = 0;
11fdf7f2 724 ceph_assert(!this->m_image_extents.empty());
7c673cae
FG
725 for (auto &extent : this->m_image_extents) {
726 journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first,
727 extent.second,
728 m_data_bl));
729 tid = image_ctx.journal->append_io_event(std::move(event_entry),
11fdf7f2
TL
730 extent.first, extent.second,
731 synchronous, 0);
7c673cae
FG
732 }
733
7c673cae
FG
734 return tid;
735}
736
7c673cae 737template <typename I>
11fdf7f2 738ObjectDispatchSpec *ImageWriteSameRequest<I>::create_object_request(
f67539c2 739 const LightweightObjectExtent &object_extent, IOContext io_context,
9f95a23c 740 uint64_t journal_tid, bool single_extent, Context *on_finish) {
7c673cae 741 I &image_ctx = this->m_image_ctx;
7c673cae
FG
742
743 bufferlist bl;
11fdf7f2
TL
744 ObjectDispatchSpec *req;
745
746 if (util::assemble_write_same_extent(object_extent, m_data_bl, &bl, false)) {
9f95a23c 747 auto buffer_extents{object_extent.buffer_extents};
7c673cae 748
11fdf7f2 749 req = ObjectDispatchSpec::create_write_same(
9f95a23c
TL
750 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
751 object_extent.offset, object_extent.length, std::move(buffer_extents),
f67539c2 752 std::move(bl), io_context, m_op_flags, journal_tid,
11fdf7f2 753 this->m_trace, on_finish);
7c673cae
FG
754 return req;
755 }
11fdf7f2 756 req = ObjectDispatchSpec::create_write(
9f95a23c 757 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
f67539c2
TL
758 object_extent.offset, std::move(bl), io_context, m_op_flags, 0,
759 std::nullopt, journal_tid, this->m_trace, on_finish);
7c673cae
FG
760 return req;
761}
762
763template <typename I>
764void ImageWriteSameRequest<I>::update_stats(size_t length) {
765 I &image_ctx = this->m_image_ctx;
766 image_ctx.perfcounter->inc(l_librbd_ws);
767 image_ctx.perfcounter->inc(l_librbd_ws_bytes, length);
768}
769
c07f9fc5
FG
770template <typename I>
771uint64_t ImageCompareAndWriteRequest<I>::append_journal_event(
11fdf7f2 772 bool synchronous) {
c07f9fc5
FG
773 I &image_ctx = this->m_image_ctx;
774
775 uint64_t tid = 0;
11fdf7f2 776 ceph_assert(this->m_image_extents.size() == 1);
c07f9fc5 777 auto &extent = this->m_image_extents.front();
39ae355f
TL
778 tid = image_ctx.journal->append_compare_and_write_event(extent.first,
779 extent.second,
780 m_cmp_bl,
781 m_bl,
782 synchronous);
c07f9fc5
FG
783
784 return tid;
785}
786
c07f9fc5
FG
787template <typename I>
788void ImageCompareAndWriteRequest<I>::assemble_extent(
39ae355f
TL
789 const LightweightObjectExtent &object_extent, bufferlist *bl,
790 bufferlist *cmp_bl) {
c07f9fc5
FG
791 for (auto q = object_extent.buffer_extents.begin();
792 q != object_extent.buffer_extents.end(); ++q) {
793 bufferlist sub_bl;
794 sub_bl.substr_of(m_bl, q->first, q->second);
795 bl->claim_append(sub_bl);
39ae355f
TL
796
797 bufferlist sub_cmp_bl;
798 sub_cmp_bl.substr_of(m_cmp_bl, q->first, q->second);
799 cmp_bl->claim_append(sub_cmp_bl);
c07f9fc5
FG
800 }
801}
802
c07f9fc5 803template <typename I>
11fdf7f2 804ObjectDispatchSpec *ImageCompareAndWriteRequest<I>::create_object_request(
f67539c2 805 const LightweightObjectExtent &object_extent, IOContext io_context,
9f95a23c 806 uint64_t journal_tid, bool single_extent, Context *on_finish) {
c07f9fc5
FG
807 I &image_ctx = this->m_image_ctx;
808
809 bufferlist bl;
39ae355f
TL
810 bufferlist cmp_bl;
811 assemble_extent(object_extent, &bl, &cmp_bl);
11fdf7f2 812 auto req = ObjectDispatchSpec::create_compare_and_write(
9f95a23c 813 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
39ae355f 814 object_extent.offset, std::move(cmp_bl), std::move(bl), io_context,
9f95a23c 815 m_mismatch_offset, m_op_flags, journal_tid, this->m_trace, on_finish);
c07f9fc5
FG
816 return req;
817}
818
819template <typename I>
820void ImageCompareAndWriteRequest<I>::update_stats(size_t length) {
821 I &image_ctx = this->m_image_ctx;
822 image_ctx.perfcounter->inc(l_librbd_cmp);
823 image_ctx.perfcounter->inc(l_librbd_cmp_bytes, length);
824}
825
826template <typename I>
11fdf7f2 827int ImageCompareAndWriteRequest<I>::prune_object_extents(
9f95a23c 828 LightweightObjectExtents* object_extents) const {
11fdf7f2 829 if (object_extents->size() > 1)
c07f9fc5
FG
830 return -EINVAL;
831
832 I &image_ctx = this->m_image_ctx;
c07f9fc5 833 uint64_t su = image_ctx.layout.stripe_unit;
9f95a23c 834 auto& object_extent = object_extents->front();
39ae355f 835 if (su == 0 || (object_extent.offset % su + object_extent.length > su))
c07f9fc5
FG
836 return -EINVAL;
837
838 return 0;
839}
840
f67539c2
TL
841template <typename I>
842ImageListSnapsRequest<I>::ImageListSnapsRequest(
843 I& image_ctx, AioCompletion* aio_comp, Extents&& image_extents,
1e59de90
TL
844 ImageArea area, SnapIds&& snap_ids, int list_snaps_flags,
845 SnapshotDelta* snapshot_delta, const ZTracer::Trace& parent_trace)
846 : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents), area,
847 "list-snaps", parent_trace),
f67539c2
TL
848 m_snap_ids(std::move(snap_ids)), m_list_snaps_flags(list_snaps_flags),
849 m_snapshot_delta(snapshot_delta) {
850}
851
852template <typename I>
853void ImageListSnapsRequest<I>::send_request() {
854 I &image_ctx = this->m_image_ctx;
855 CephContext *cct = image_ctx.cct;
856
857 // map image extents to object extents
858 auto &image_extents = this->m_image_extents;
859 std::map<uint64_t, Extents> object_number_extents;
860 for (auto& image_extent : image_extents) {
861 if (image_extent.second == 0) {
862 continue;
863 }
864
865 striper::LightweightObjectExtents object_extents;
1e59de90
TL
866 io::util::area_to_object_extents(&image_ctx, image_extent.first,
867 image_extent.second, this->m_image_area, 0,
868 &object_extents);
f67539c2
TL
869 for (auto& object_extent : object_extents) {
870 object_number_extents[object_extent.object_no].emplace_back(
871 object_extent.offset, object_extent.length);
872 }
873 }
874
875 // reassemble the deltas back into image-extents when complete
876 auto aio_comp = this->m_aio_comp;
877 aio_comp->set_request_count(1);
878 auto assemble_ctx = new C_AssembleSnapshotDeltas<I>(
879 &image_ctx, aio_comp, m_snapshot_delta);
880 auto sub_aio_comp = AioCompletion::create_and_start<
881 Context, &Context::complete>(assemble_ctx, get_image_ctx(&image_ctx),
882 AIO_TYPE_GENERIC);
883
884 // issue the requests
885 sub_aio_comp->set_request_count(object_number_extents.size());
886 for (auto& oe : object_number_extents) {
887 ldout(cct, 20) << data_object_name(&image_ctx, oe.first) << " "
888 << oe.second << dendl;
889 auto ctx = new C_AioRequest(sub_aio_comp);
890 auto req = ObjectDispatchSpec::create_list_snaps(
891 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, oe.first, std::move(oe.second),
892 SnapIds{m_snap_ids}, m_list_snaps_flags, this->m_trace,
893 assemble_ctx->get_snapshot_delta(oe.first), ctx);
894 req->send();
895 }
896}
897
7c673cae
FG
898} // namespace io
899} // namespace librbd
900
901template class librbd::io::ImageRequest<librbd::ImageCtx>;
902template class librbd::io::ImageReadRequest<librbd::ImageCtx>;
903template class librbd::io::AbstractImageWriteRequest<librbd::ImageCtx>;
904template class librbd::io::ImageWriteRequest<librbd::ImageCtx>;
905template class librbd::io::ImageDiscardRequest<librbd::ImageCtx>;
906template class librbd::io::ImageFlushRequest<librbd::ImageCtx>;
907template class librbd::io::ImageWriteSameRequest<librbd::ImageCtx>;
c07f9fc5 908template class librbd::io::ImageCompareAndWriteRequest<librbd::ImageCtx>;
f67539c2 909template class librbd::io::ImageListSnapsRequest<librbd::ImageCtx>;