]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/io/ImageRequest.cc
import ceph quincy 17.2.6
[ceph.git] / ceph / src / librbd / io / ImageRequest.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "librbd/io/ImageRequest.h"
5#include "librbd/ImageCtx.h"
6#include "librbd/internal.h"
7#include "librbd/Journal.h"
b32b8144 8#include "librbd/Types.h"
7c673cae 9#include "librbd/Utils.h"
f67539c2 10#include "librbd/asio/ContextWQ.h"
7c673cae 11#include "librbd/io/AioCompletion.h"
11fdf7f2
TL
12#include "librbd/io/AsyncOperation.h"
13#include "librbd/io/ObjectDispatchInterface.h"
14#include "librbd/io/ObjectDispatchSpec.h"
f67539c2 15#include "librbd/io/ObjectDispatcherInterface.h"
11fdf7f2 16#include "librbd/io/Utils.h"
7c673cae
FG
17#include "librbd/journal/Types.h"
18#include "include/rados/librados.hpp"
f67539c2 19#include "common/errno.h"
11fdf7f2 20#include "common/perf_counters.h"
7c673cae 21#include "osdc/Striper.h"
11fdf7f2
TL
22#include <algorithm>
23#include <functional>
f67539c2 24#include <map>
7c673cae
FG
25
26#define dout_subsys ceph_subsys_rbd
27#undef dout_prefix
9f95a23c 28#define dout_prefix *_dout << "librbd::io::ImageRequest: " << __func__ << ": "
7c673cae
FG
29
30namespace librbd {
31namespace io {
32
9f95a23c 33using librbd::util::data_object_name;
11fdf7f2 34using librbd::util::get_image_ctx;
7c673cae
FG
35
36namespace {
37
f67539c2
TL
38template <typename I>
39struct C_AssembleSnapshotDeltas : public C_AioRequest {
40 I* image_ctx;
41 SnapshotDelta* snapshot_delta;
42
43 ceph::mutex lock = ceph::make_mutex(
44 "librbd::io::C_AssembleSnapshotDeltas::lock", false);
45 std::map<uint64_t, SnapshotDelta> object_snapshot_delta;
46
47 C_AssembleSnapshotDeltas(I* image_ctx, AioCompletion* aio_comp,
48 SnapshotDelta* snapshot_delta)
49 : C_AioRequest(aio_comp),
50 image_ctx(image_ctx), snapshot_delta(snapshot_delta) {
51 }
52
53 SnapshotDelta* get_snapshot_delta(uint64_t object_no) {
54 std::unique_lock locker{lock};
55 return &object_snapshot_delta[object_no];
56 }
57
58 void finish(int r) override {
59 auto cct = image_ctx->cct;
60
61 if (r < 0) {
62 lderr(cct) << "C_AssembleSnapshotDeltas: list snaps failed: "
63 << cpp_strerror(r) << dendl;
64 C_AioRequest::finish(r);
65 return;
66 }
67
68 std::unique_lock locker{lock};
69 *snapshot_delta = {};
70 for (auto& [object_no, object_snapshot_delta] : object_snapshot_delta) {
71 SnapshotDelta image_snapshot_delta;
72 object_to_image_intervals(object_no, object_snapshot_delta,
73 &image_snapshot_delta, snapshot_delta);
74
75 ldout(cct, 20) << "object_no=" << object_no << ", "
76 << "object_snapshot_delta="
77 << object_snapshot_delta << ", "
78 << "image_snapshot_delta=" << image_snapshot_delta
79 << dendl;
80 }
81
82 ldout(cct, 20) << "snapshot_delta=" << *snapshot_delta << dendl;
83 C_AioRequest::finish(0);
84 }
85
86 void object_to_image_intervals(
87 uint64_t object_no, const SnapshotDelta& object_snapshot_delta,
88 SnapshotDelta* image_snapshot_delta,
89 SnapshotDelta* assembled_image_snapshot_delta) {
90 for (auto& [key, object_extents] : object_snapshot_delta) {
91 for (auto& object_extent : object_extents) {
92 Extents image_extents;
93 io::util::extent_to_file(image_ctx, object_no, object_extent.get_off(),
94 object_extent.get_len(), image_extents);
95
96 auto& intervals = (*image_snapshot_delta)[key];
97 auto& assembled_intervals = (*assembled_image_snapshot_delta)[key];
98 for (auto [image_offset, image_length] : image_extents) {
99 SparseExtent sparse_extent{object_extent.get_val().state,
100 image_length};
101 intervals.insert(image_offset, image_length, sparse_extent);
102 assembled_intervals.insert(image_offset, image_length,
103 sparse_extent);
104 }
105 }
106 }
107 }
108};
109
9f95a23c
TL
110template <typename I>
111struct C_RBD_Readahead : public Context {
112 I *ictx;
113 uint64_t object_no;
f67539c2 114 io::ReadExtents extents;
9f95a23c
TL
115
116 C_RBD_Readahead(I *ictx, uint64_t object_no, uint64_t offset, uint64_t length)
f67539c2 117 : ictx(ictx), object_no(object_no), extents({{offset, length}}) {
9f95a23c
TL
118 ictx->readahead.inc_pending();
119 }
120
121 void finish(int r) override {
f67539c2
TL
122 ceph_assert(extents.size() == 1);
123 auto& extent = extents.front();
9f95a23c
TL
124 ldout(ictx->cct, 20) << "C_RBD_Readahead on "
125 << data_object_name(ictx, object_no) << ": "
f67539c2 126 << extent.offset << "~" << extent.length << dendl;
9f95a23c
TL
127 ictx->readahead.dec_pending();
128 }
129};
130
131template <typename I>
f67539c2 132void readahead(I *ictx, const Extents& image_extents, IOContext io_context) {
9f95a23c
TL
133 uint64_t total_bytes = 0;
134 for (auto& image_extent : image_extents) {
135 total_bytes += image_extent.second;
136 }
137
138 ictx->image_lock.lock_shared();
139 auto total_bytes_read = ictx->total_bytes_read.fetch_add(total_bytes);
140 bool abort = (
141 ictx->readahead_disable_after_bytes != 0 &&
142 total_bytes_read > ictx->readahead_disable_after_bytes);
143 if (abort) {
144 ictx->image_lock.unlock_shared();
145 return;
146 }
147
f67539c2 148 uint64_t image_size = ictx->get_effective_image_size(ictx->snap_id);
9f95a23c
TL
149 ictx->image_lock.unlock_shared();
150
151 auto readahead_extent = ictx->readahead.update(image_extents, image_size);
152 uint64_t readahead_offset = readahead_extent.first;
153 uint64_t readahead_length = readahead_extent.second;
154
155 if (readahead_length > 0) {
156 ldout(ictx->cct, 20) << "(readahead logical) " << readahead_offset << "~"
157 << readahead_length << dendl;
158 LightweightObjectExtents readahead_object_extents;
f67539c2
TL
159 io::util::file_to_extents(ictx, readahead_offset, readahead_length, 0,
160 &readahead_object_extents);
9f95a23c
TL
161 for (auto& object_extent : readahead_object_extents) {
162 ldout(ictx->cct, 20) << "(readahead) "
163 << data_object_name(ictx,
164 object_extent.object_no) << " "
165 << object_extent.offset << "~"
166 << object_extent.length << dendl;
167
168 auto req_comp = new C_RBD_Readahead<I>(ictx, object_extent.object_no,
169 object_extent.offset,
170 object_extent.length);
171 auto req = io::ObjectDispatchSpec::create_read(
172 ictx, io::OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
f67539c2 173 &req_comp->extents, io_context, 0, 0, {}, nullptr, req_comp);
9f95a23c
TL
174 req->send();
175 }
176
177 ictx->perfcounter->inc(l_librbd_readahead);
178 ictx->perfcounter->inc(l_librbd_readahead_bytes, readahead_length);
179 }
180}
181
11fdf7f2
TL
182template <typename I>
183struct C_UpdateTimestamp : public Context {
184public:
185 I& m_image_ctx;
186 bool m_modify; // if modify set to 'true', modify timestamp is updated,
187 // access timestamp otherwise
188 AsyncOperation m_async_op;
91327a77 189
11fdf7f2
TL
190 C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) {
191 m_async_op.start_op(*get_image_ctx(&m_image_ctx));
91327a77 192 }
11fdf7f2
TL
193 ~C_UpdateTimestamp() override {
194 m_async_op.finish_op();
91327a77
AA
195 }
196
11fdf7f2
TL
197 void send() {
198 librados::ObjectWriteOperation op;
199 if (m_modify) {
200 cls_client::set_modify_timestamp(&op);
201 } else {
202 cls_client::set_access_timestamp(&op);
91327a77 203 }
7c673cae 204
11fdf7f2
TL
205 auto comp = librbd::util::create_rados_callback(this);
206 int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
207 ceph_assert(r == 0);
208 comp->release();
7c673cae
FG
209 }
210
211 void finish(int r) override {
11fdf7f2 212 // ignore errors updating timestamp
7c673cae
FG
213 }
214};
215
11fdf7f2
TL
216bool should_update_timestamp(const utime_t& now, const utime_t& timestamp,
217 uint64_t interval) {
218 return (interval &&
219 (static_cast<uint64_t>(now.sec()) >= interval + timestamp));
224ce89b
WB
220}
221
11fdf7f2 222} // anonymous namespace
c07f9fc5 223
9f95a23c
TL
224#undef dout_prefix
225#define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \
226 << " " << __func__ << ": "
227
7c673cae
FG
228template <typename I>
229void ImageRequest<I>::aio_read(I *ictx, AioCompletion *c,
230 Extents &&image_extents,
f67539c2
TL
231 ReadResult &&read_result, IOContext io_context,
232 int op_flags, int read_flags,
31f18b77 233 const ZTracer::Trace &parent_trace) {
7c673cae 234 ImageReadRequest<I> req(*ictx, c, std::move(image_extents),
f67539c2
TL
235 std::move(read_result), io_context, op_flags,
236 read_flags, parent_trace);
7c673cae
FG
237 req.send();
238}
239
240template <typename I>
241void ImageRequest<I>::aio_write(I *ictx, AioCompletion *c,
242 Extents &&image_extents, bufferlist &&bl,
f67539c2 243 IOContext io_context, int op_flags,
31f18b77 244 const ZTracer::Trace &parent_trace) {
7c673cae 245 ImageWriteRequest<I> req(*ictx, c, std::move(image_extents), std::move(bl),
f67539c2 246 io_context, op_flags, parent_trace);
7c673cae
FG
247 req.send();
248}
249
250template <typename I>
251void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c,
11fdf7f2
TL
252 Extents &&image_extents,
253 uint32_t discard_granularity_bytes,
f67539c2
TL
254 IOContext io_context,
255 const ZTracer::Trace &parent_trace) {
11fdf7f2 256 ImageDiscardRequest<I> req(*ictx, c, std::move(image_extents),
f67539c2
TL
257 discard_granularity_bytes, io_context,
258 parent_trace);
7c673cae
FG
259 req.send();
260}
261
262template <typename I>
31f18b77 263void ImageRequest<I>::aio_flush(I *ictx, AioCompletion *c,
11fdf7f2
TL
264 FlushSource flush_source,
265 const ZTracer::Trace &parent_trace) {
266 ImageFlushRequest<I> req(*ictx, c, flush_source, parent_trace);
7c673cae
FG
267 req.send();
268}
269
270template <typename I>
271void ImageRequest<I>::aio_writesame(I *ictx, AioCompletion *c,
11fdf7f2 272 Extents &&image_extents,
f67539c2
TL
273 bufferlist &&bl, IOContext io_context,
274 int op_flags,
31f18b77 275 const ZTracer::Trace &parent_trace) {
11fdf7f2 276 ImageWriteSameRequest<I> req(*ictx, c, std::move(image_extents),
f67539c2
TL
277 std::move(bl), io_context, op_flags,
278 parent_trace);
7c673cae
FG
279 req.send();
280}
281
c07f9fc5
FG
282template <typename I>
283void ImageRequest<I>::aio_compare_and_write(I *ictx, AioCompletion *c,
284 Extents &&image_extents,
285 bufferlist &&cmp_bl,
286 bufferlist &&bl,
287 uint64_t *mismatch_offset,
f67539c2 288 IOContext io_context, int op_flags,
c07f9fc5
FG
289 const ZTracer::Trace &parent_trace) {
290 ImageCompareAndWriteRequest<I> req(*ictx, c, std::move(image_extents),
291 std::move(cmp_bl), std::move(bl),
f67539c2
TL
292 mismatch_offset, io_context, op_flags,
293 parent_trace);
c07f9fc5
FG
294 req.send();
295}
296
7c673cae
FG
297template <typename I>
298void ImageRequest<I>::send() {
299 I &image_ctx = this->m_image_ctx;
11fdf7f2 300 ceph_assert(m_aio_comp->is_initialized(get_aio_type()));
494da23a 301 ceph_assert(m_aio_comp->is_started());
7c673cae
FG
302
303 CephContext *cct = image_ctx.cct;
304 AioCompletion *aio_comp = this->m_aio_comp;
305 ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", "
c07f9fc5 306 << "completion=" << aio_comp << dendl;
7c673cae 307
f67539c2
TL
308 update_timestamp();
309 send_request();
7c673cae
FG
310}
311
312template <typename I>
11fdf7f2
TL
313void ImageRequest<I>::update_timestamp() {
314 bool modify = (get_aio_type() != AIO_TYPE_READ);
315 uint64_t update_interval;
316 if (modify) {
317 update_interval = m_image_ctx.mtime_update_interval;
318 } else {
319 update_interval = m_image_ctx.atime_update_interval;
320 }
7c673cae 321
11fdf7f2
TL
322 if (update_interval == 0) {
323 return;
324 }
325
326 utime_t (I::*get_timestamp_fn)() const;
327 void (I::*set_timestamp_fn)(utime_t);
328 if (modify) {
329 get_timestamp_fn = &I::get_modify_timestamp;
330 set_timestamp_fn = &I::set_modify_timestamp;
331 } else {
332 get_timestamp_fn = &I::get_access_timestamp;
333 set_timestamp_fn = &I::set_access_timestamp;
334 }
335
336 utime_t ts = ceph_clock_now();
337 {
9f95a23c 338 std::shared_lock timestamp_locker{m_image_ctx.timestamp_lock};
11fdf7f2
TL
339 if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx),
340 update_interval)) {
341 return;
342 }
343 }
344
345 {
9f95a23c 346 std::unique_lock timestamp_locker{m_image_ctx.timestamp_lock};
11fdf7f2
TL
347 bool update = should_update_timestamp(
348 ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval);
349 if (!update) {
350 return;
351 }
352
353 std::invoke(set_timestamp_fn, m_image_ctx, ts);
354 }
355
356 // TODO we fire and forget this outside the IO path to prevent
357 // potential race conditions with librbd client IO callbacks
358 // between different threads (e.g. librados and object cacher)
359 ldout(m_image_ctx.cct, 10) << get_request_type() << dendl;
360 auto req = new C_UpdateTimestamp<I>(m_image_ctx, modify);
361 req->send();
7c673cae
FG
362}
363
364template <typename I>
365ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp,
366 Extents &&image_extents,
f67539c2
TL
367 ReadResult &&read_result,
368 IOContext io_context, int op_flags,
369 int read_flags,
370 const ZTracer::Trace &parent_trace)
371 : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents),
372 io_context, "read", parent_trace),
373 m_op_flags(op_flags), m_read_flags(read_flags) {
7c673cae
FG
374 aio_comp->read_result = std::move(read_result);
375}
376
7c673cae
FG
377template <typename I>
378void ImageReadRequest<I>::send_request() {
379 I &image_ctx = this->m_image_ctx;
380 CephContext *cct = image_ctx.cct;
381
382 auto &image_extents = this->m_image_extents;
11fdf7f2 383 if (image_ctx.cache && image_ctx.readahead_max_bytes > 0 &&
7c673cae 384 !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) {
f67539c2 385 readahead(get_image_ctx(&image_ctx), image_extents, this->m_io_context);
9f95a23c 386 }
7c673cae 387
9f95a23c
TL
388 // map image extents to object extents
389 LightweightObjectExtents object_extents;
f67539c2 390 uint64_t buffer_ofs = 0;
9f95a23c
TL
391 for (auto &extent : image_extents) {
392 if (extent.second == 0) {
393 continue;
7c673cae 394 }
7c673cae 395
f67539c2
TL
396 util::file_to_extents(&image_ctx, extent.first, extent.second, buffer_ofs,
397 &object_extents);
9f95a23c 398 buffer_ofs += extent.second;
7c673cae 399 }
7c673cae 400
9f95a23c 401 AioCompletion *aio_comp = this->m_aio_comp;
f67539c2
TL
402 aio_comp->read_result.set_image_extents(image_extents);
403
404 // issue the requests
9f95a23c
TL
405 aio_comp->set_request_count(object_extents.size());
406 for (auto &oe : object_extents) {
407 ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " "
408 << oe.offset << "~" << oe.length << " from "
409 << oe.buffer_extents << dendl;
7c673cae 410
9f95a23c 411 auto req_comp = new io::ReadResult::C_ObjectReadRequest(
f67539c2 412 aio_comp, {{oe.offset, oe.length, std::move(oe.buffer_extents)}});
9f95a23c 413 auto req = ObjectDispatchSpec::create_read(
f67539c2
TL
414 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, oe.object_no,
415 &req_comp->extents, this->m_io_context, m_op_flags, m_read_flags,
416 this->m_trace, nullptr, req_comp);
9f95a23c
TL
417 req->send();
418 }
7c673cae
FG
419
420 image_ctx.perfcounter->inc(l_librbd_rd);
421 image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs);
422}
423
7c673cae
FG
424template <typename I>
425void AbstractImageWriteRequest<I>::send_request() {
426 I &image_ctx = this->m_image_ctx;
7c673cae 427
7c673cae
FG
428 bool journaling = false;
429
430 AioCompletion *aio_comp = this->m_aio_comp;
7c673cae
FG
431 {
432 // prevent image size from changing between computing clip and recording
433 // pending async operation
9f95a23c 434 std::shared_lock image_locker{image_ctx.image_lock};
7c673cae
FG
435 journaling = (image_ctx.journal != nullptr &&
436 image_ctx.journal->is_journal_appending());
437 }
438
9f95a23c
TL
439 uint64_t clip_len = 0;
440 LightweightObjectExtents object_extents;
441 for (auto &extent : this->m_image_extents) {
442 if (extent.second == 0) {
443 continue;
444 }
445
446 // map to object extents
f67539c2
TL
447 io::util::file_to_extents(&image_ctx, extent.first, extent.second, clip_len,
448 &object_extents);
9f95a23c
TL
449 clip_len += extent.second;
450 }
451
11fdf7f2 452 int ret = prune_object_extents(&object_extents);
c07f9fc5
FG
453 if (ret < 0) {
454 aio_comp->fail(ret);
455 return;
456 }
7c673cae 457
39ae355f
TL
458 // reflect changes in object_extents back to m_image_extents
459 if (ret == 1) {
460 this->m_image_extents.clear();
461 for (auto& object_extent : object_extents) {
462 io::Extents image_extents;
463 io::util::extent_to_file(&image_ctx, object_extent.object_no,
464 object_extent.offset, object_extent.length,
465 image_extents);
466 this->m_image_extents.insert(this->m_image_extents.end(),
467 image_extents.begin(), image_extents.end());
468 }
469 }
470
9f95a23c 471 aio_comp->set_request_count(object_extents.size());
7c673cae
FG
472 if (!object_extents.empty()) {
473 uint64_t journal_tid = 0;
7c673cae
FG
474 if (journaling) {
475 // in-flight ops are flushed prior to closing the journal
11fdf7f2
TL
476 ceph_assert(image_ctx.journal != NULL);
477 journal_tid = append_journal_event(m_synchronous);
7c673cae
FG
478 }
479
f67539c2 480 send_object_requests(object_extents, this->m_io_context, journal_tid);
7c673cae
FG
481 }
482
483 update_stats(clip_len);
7c673cae
FG
484}
485
486template <typename I>
487void AbstractImageWriteRequest<I>::send_object_requests(
f67539c2 488 const LightweightObjectExtents &object_extents, IOContext io_context,
11fdf7f2 489 uint64_t journal_tid) {
7c673cae
FG
490 I &image_ctx = this->m_image_ctx;
491 CephContext *cct = image_ctx.cct;
492
493 AioCompletion *aio_comp = this->m_aio_comp;
9f95a23c
TL
494 bool single_extent = (object_extents.size() == 1);
495 for (auto& oe : object_extents) {
496 ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " "
497 << oe.offset << "~" << oe.length << " from "
498 << oe.buffer_extents << dendl;
7c673cae 499 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
f67539c2
TL
500 auto request = create_object_request(oe, io_context, journal_tid,
501 single_extent, req_comp);
9f95a23c 502 request->send();
7c673cae
FG
503 }
504}
505
506template <typename I>
9f95a23c
TL
507void ImageWriteRequest<I>::assemble_extent(
508 const LightweightObjectExtent &object_extent, bufferlist *bl) {
7c673cae
FG
509 for (auto q = object_extent.buffer_extents.begin();
510 q != object_extent.buffer_extents.end(); ++q) {
511 bufferlist sub_bl;
512 sub_bl.substr_of(m_bl, q->first, q->second);
513 bl->claim_append(sub_bl);
514 }
515}
516
517template <typename I>
11fdf7f2 518uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) {
7c673cae
FG
519 I &image_ctx = this->m_image_ctx;
520
521 uint64_t tid = 0;
522 uint64_t buffer_offset = 0;
11fdf7f2 523 ceph_assert(!this->m_image_extents.empty());
7c673cae
FG
524 for (auto &extent : this->m_image_extents) {
525 bufferlist sub_bl;
526 sub_bl.substr_of(m_bl, buffer_offset, extent.second);
527 buffer_offset += extent.second;
528
529 tid = image_ctx.journal->append_write_event(extent.first, extent.second,
11fdf7f2 530 sub_bl, synchronous);
7c673cae
FG
531 }
532
7c673cae
FG
533 return tid;
534}
535
7c673cae 536template <typename I>
11fdf7f2 537ObjectDispatchSpec *ImageWriteRequest<I>::create_object_request(
f67539c2 538 const LightweightObjectExtent &object_extent, IOContext io_context,
9f95a23c 539 uint64_t journal_tid, bool single_extent, Context *on_finish) {
7c673cae 540 I &image_ctx = this->m_image_ctx;
7c673cae
FG
541
542 bufferlist bl;
f67539c2
TL
543 if (single_extent && object_extent.buffer_extents.size() == 1 &&
544 m_bl.length() == object_extent.length) {
9f95a23c
TL
545 // optimization for single object/buffer extent writes
546 bl = std::move(m_bl);
547 } else {
548 assemble_extent(object_extent, &bl);
549 }
550
11fdf7f2 551 auto req = ObjectDispatchSpec::create_write(
9f95a23c 552 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
f67539c2
TL
553 object_extent.offset, std::move(bl), io_context, m_op_flags, 0,
554 std::nullopt, journal_tid, this->m_trace, on_finish);
7c673cae
FG
555 return req;
556}
557
558template <typename I>
559void ImageWriteRequest<I>::update_stats(size_t length) {
560 I &image_ctx = this->m_image_ctx;
561 image_ctx.perfcounter->inc(l_librbd_wr);
562 image_ctx.perfcounter->inc(l_librbd_wr_bytes, length);
563}
564
565template <typename I>
11fdf7f2 566uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) {
7c673cae
FG
567 I &image_ctx = this->m_image_ctx;
568
569 uint64_t tid = 0;
11fdf7f2 570 ceph_assert(!this->m_image_extents.empty());
7c673cae 571 for (auto &extent : this->m_image_extents) {
11fdf7f2
TL
572 journal::EventEntry event_entry(
573 journal::AioDiscardEvent(extent.first,
574 extent.second,
575 this->m_discard_granularity_bytes));
7c673cae 576 tid = image_ctx.journal->append_io_event(std::move(event_entry),
11fdf7f2
TL
577 extent.first, extent.second,
578 synchronous, 0);
7c673cae
FG
579 }
580
7c673cae
FG
581 return tid;
582}
583
7c673cae 584template <typename I>
11fdf7f2 585ObjectDispatchSpec *ImageDiscardRequest<I>::create_object_request(
f67539c2 586 const LightweightObjectExtent &object_extent, IOContext io_context,
9f95a23c 587 uint64_t journal_tid, bool single_extent, Context *on_finish) {
7c673cae 588 I &image_ctx = this->m_image_ctx;
11fdf7f2 589 auto req = ObjectDispatchSpec::create_discard(
9f95a23c 590 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
f67539c2 591 object_extent.offset, object_extent.length, io_context,
11fdf7f2
TL
592 OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE, journal_tid, this->m_trace,
593 on_finish);
7c673cae
FG
594 return req;
595}
596
597template <typename I>
598void ImageDiscardRequest<I>::update_stats(size_t length) {
599 I &image_ctx = this->m_image_ctx;
600 image_ctx.perfcounter->inc(l_librbd_discard);
601 image_ctx.perfcounter->inc(l_librbd_discard_bytes, length);
602}
603
11fdf7f2
TL
604template <typename I>
605int ImageDiscardRequest<I>::prune_object_extents(
9f95a23c 606 LightweightObjectExtents* object_extents) const {
11fdf7f2
TL
607 if (m_discard_granularity_bytes == 0) {
608 return 0;
609 }
610
611 // Align the range to discard_granularity_bytes boundary and skip
612 // and discards that are too small to free up any space.
613 //
614 // discard_granularity_bytes >= object_size && tail truncation
615 // is a special case for filestore
616 bool prune_required = false;
39ae355f 617 bool length_modified = false;
11fdf7f2
TL
618 auto object_size = this->m_image_ctx.layout.object_size;
619 auto discard_granularity_bytes = std::min(m_discard_granularity_bytes,
620 object_size);
621 auto xform_lambda =
39ae355f 622 [discard_granularity_bytes, object_size, &prune_required, &length_modified]
9f95a23c 623 (LightweightObjectExtent& object_extent) {
11fdf7f2
TL
624 auto& offset = object_extent.offset;
625 auto& length = object_extent.length;
626 auto next_offset = offset + length;
627
628 if ((discard_granularity_bytes < object_size) ||
629 (next_offset < object_size)) {
630 offset = p2roundup<uint64_t>(offset, discard_granularity_bytes);
631 next_offset = p2align<uint64_t>(next_offset, discard_granularity_bytes);
632 if (offset >= next_offset) {
633 prune_required = true;
634 length = 0;
635 } else {
39ae355f
TL
636 auto new_length = next_offset - offset;
637 if (length != new_length) {
638 length_modified = true;
639 length = new_length;
640 }
11fdf7f2
TL
641 }
642 }
643 };
644 std::for_each(object_extents->begin(), object_extents->end(),
645 xform_lambda);
646
647 if (prune_required) {
648 // one or more object extents were skipped
649 auto remove_lambda =
9f95a23c 650 [](const LightweightObjectExtent& object_extent) {
11fdf7f2
TL
651 return (object_extent.length == 0);
652 };
653 object_extents->erase(
654 std::remove_if(object_extents->begin(), object_extents->end(),
655 remove_lambda),
656 object_extents->end());
657 }
39ae355f
TL
658
659 // object extents were modified, image extents needs updating
660 if (length_modified || prune_required) {
661 return 1;
662 }
663
11fdf7f2
TL
664 return 0;
665}
666
7c673cae
FG
667template <typename I>
668void ImageFlushRequest<I>::send_request() {
669 I &image_ctx = this->m_image_ctx;
7c673cae
FG
670
671 bool journaling = false;
672 {
9f95a23c 673 std::shared_lock image_locker{image_ctx.image_lock};
11fdf7f2
TL
674 journaling = (m_flush_source == FLUSH_SOURCE_USER &&
675 image_ctx.journal != nullptr &&
7c673cae
FG
676 image_ctx.journal->is_journal_appending());
677 }
678
679 AioCompletion *aio_comp = this->m_aio_comp;
11fdf7f2
TL
680 aio_comp->set_request_count(1);
681
682 Context *ctx = new C_AioRequest(aio_comp);
683
684 // ensure no locks are held when flush is complete
685 ctx = librbd::util::create_async_context_callback(image_ctx, ctx);
686
9f95a23c 687 uint64_t journal_tid = 0;
7c673cae
FG
688 if (journaling) {
689 // in-flight ops are flushed prior to closing the journal
9f95a23c
TL
690 ceph_assert(image_ctx.journal != NULL);
691 journal_tid = image_ctx.journal->append_io_event(
11fdf7f2 692 journal::EventEntry(journal::AioFlushEvent()), 0, 0, false, 0);
494da23a 693 image_ctx.journal->user_flushed();
7c673cae
FG
694 }
695
9f95a23c
TL
696 auto object_dispatch_spec = ObjectDispatchSpec::create_flush(
697 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, m_flush_source, journal_tid,
698 this->m_trace, ctx);
699 ctx = new LambdaContext([object_dispatch_spec](int r) {
700 object_dispatch_spec->send();
701 });
702
11fdf7f2 703 // ensure all in-flight IOs are settled if non-user flush request
f67539c2
TL
704 if (m_flush_source == FLUSH_SOURCE_WRITEBACK) {
705 ctx->complete(0);
706 } else {
707 aio_comp->async_op.flush(ctx);
708 }
11fdf7f2
TL
709
710 // might be flushing during image shutdown
711 if (image_ctx.perfcounter != nullptr) {
712 image_ctx.perfcounter->inc(l_librbd_flush);
713 }
7c673cae
FG
714}
715
7c673cae 716template <typename I>
11fdf7f2 717uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) {
7c673cae
FG
718 I &image_ctx = this->m_image_ctx;
719
720 uint64_t tid = 0;
11fdf7f2 721 ceph_assert(!this->m_image_extents.empty());
7c673cae
FG
722 for (auto &extent : this->m_image_extents) {
723 journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first,
724 extent.second,
725 m_data_bl));
726 tid = image_ctx.journal->append_io_event(std::move(event_entry),
11fdf7f2
TL
727 extent.first, extent.second,
728 synchronous, 0);
7c673cae
FG
729 }
730
7c673cae
FG
731 return tid;
732}
733
7c673cae 734template <typename I>
11fdf7f2 735ObjectDispatchSpec *ImageWriteSameRequest<I>::create_object_request(
f67539c2 736 const LightweightObjectExtent &object_extent, IOContext io_context,
9f95a23c 737 uint64_t journal_tid, bool single_extent, Context *on_finish) {
7c673cae 738 I &image_ctx = this->m_image_ctx;
7c673cae
FG
739
740 bufferlist bl;
11fdf7f2
TL
741 ObjectDispatchSpec *req;
742
743 if (util::assemble_write_same_extent(object_extent, m_data_bl, &bl, false)) {
9f95a23c 744 auto buffer_extents{object_extent.buffer_extents};
7c673cae 745
11fdf7f2 746 req = ObjectDispatchSpec::create_write_same(
9f95a23c
TL
747 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
748 object_extent.offset, object_extent.length, std::move(buffer_extents),
f67539c2 749 std::move(bl), io_context, m_op_flags, journal_tid,
11fdf7f2 750 this->m_trace, on_finish);
7c673cae
FG
751 return req;
752 }
11fdf7f2 753 req = ObjectDispatchSpec::create_write(
9f95a23c 754 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
f67539c2
TL
755 object_extent.offset, std::move(bl), io_context, m_op_flags, 0,
756 std::nullopt, journal_tid, this->m_trace, on_finish);
7c673cae
FG
757 return req;
758}
759
760template <typename I>
761void ImageWriteSameRequest<I>::update_stats(size_t length) {
762 I &image_ctx = this->m_image_ctx;
763 image_ctx.perfcounter->inc(l_librbd_ws);
764 image_ctx.perfcounter->inc(l_librbd_ws_bytes, length);
765}
766
c07f9fc5
FG
767template <typename I>
768uint64_t ImageCompareAndWriteRequest<I>::append_journal_event(
11fdf7f2 769 bool synchronous) {
c07f9fc5
FG
770 I &image_ctx = this->m_image_ctx;
771
772 uint64_t tid = 0;
11fdf7f2 773 ceph_assert(this->m_image_extents.size() == 1);
c07f9fc5 774 auto &extent = this->m_image_extents.front();
39ae355f
TL
775 tid = image_ctx.journal->append_compare_and_write_event(extent.first,
776 extent.second,
777 m_cmp_bl,
778 m_bl,
779 synchronous);
c07f9fc5
FG
780
781 return tid;
782}
783
c07f9fc5
FG
784template <typename I>
785void ImageCompareAndWriteRequest<I>::assemble_extent(
39ae355f
TL
786 const LightweightObjectExtent &object_extent, bufferlist *bl,
787 bufferlist *cmp_bl) {
c07f9fc5
FG
788 for (auto q = object_extent.buffer_extents.begin();
789 q != object_extent.buffer_extents.end(); ++q) {
790 bufferlist sub_bl;
791 sub_bl.substr_of(m_bl, q->first, q->second);
792 bl->claim_append(sub_bl);
39ae355f
TL
793
794 bufferlist sub_cmp_bl;
795 sub_cmp_bl.substr_of(m_cmp_bl, q->first, q->second);
796 cmp_bl->claim_append(sub_cmp_bl);
c07f9fc5
FG
797 }
798}
799
c07f9fc5 800template <typename I>
11fdf7f2 801ObjectDispatchSpec *ImageCompareAndWriteRequest<I>::create_object_request(
f67539c2 802 const LightweightObjectExtent &object_extent, IOContext io_context,
9f95a23c 803 uint64_t journal_tid, bool single_extent, Context *on_finish) {
c07f9fc5
FG
804 I &image_ctx = this->m_image_ctx;
805
806 bufferlist bl;
39ae355f
TL
807 bufferlist cmp_bl;
808 assemble_extent(object_extent, &bl, &cmp_bl);
11fdf7f2 809 auto req = ObjectDispatchSpec::create_compare_and_write(
9f95a23c 810 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
39ae355f 811 object_extent.offset, std::move(cmp_bl), std::move(bl), io_context,
9f95a23c 812 m_mismatch_offset, m_op_flags, journal_tid, this->m_trace, on_finish);
c07f9fc5
FG
813 return req;
814}
815
816template <typename I>
817void ImageCompareAndWriteRequest<I>::update_stats(size_t length) {
818 I &image_ctx = this->m_image_ctx;
819 image_ctx.perfcounter->inc(l_librbd_cmp);
820 image_ctx.perfcounter->inc(l_librbd_cmp_bytes, length);
821}
822
823template <typename I>
11fdf7f2 824int ImageCompareAndWriteRequest<I>::prune_object_extents(
9f95a23c 825 LightweightObjectExtents* object_extents) const {
11fdf7f2 826 if (object_extents->size() > 1)
c07f9fc5
FG
827 return -EINVAL;
828
829 I &image_ctx = this->m_image_ctx;
c07f9fc5 830 uint64_t su = image_ctx.layout.stripe_unit;
9f95a23c 831 auto& object_extent = object_extents->front();
39ae355f 832 if (su == 0 || (object_extent.offset % su + object_extent.length > su))
c07f9fc5
FG
833 return -EINVAL;
834
835 return 0;
836}
837
f67539c2
TL
838template <typename I>
839ImageListSnapsRequest<I>::ImageListSnapsRequest(
840 I& image_ctx, AioCompletion* aio_comp, Extents&& image_extents,
841 SnapIds&& snap_ids, int list_snaps_flags, SnapshotDelta* snapshot_delta,
842 const ZTracer::Trace& parent_trace)
843 : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents),
844 image_ctx.get_data_io_context(), "list-snaps",
845 parent_trace),
846 m_snap_ids(std::move(snap_ids)), m_list_snaps_flags(list_snaps_flags),
847 m_snapshot_delta(snapshot_delta) {
848}
849
850template <typename I>
851void ImageListSnapsRequest<I>::send_request() {
852 I &image_ctx = this->m_image_ctx;
853 CephContext *cct = image_ctx.cct;
854
855 // map image extents to object extents
856 auto &image_extents = this->m_image_extents;
857 std::map<uint64_t, Extents> object_number_extents;
858 for (auto& image_extent : image_extents) {
859 if (image_extent.second == 0) {
860 continue;
861 }
862
863 striper::LightweightObjectExtents object_extents;
864 io::util::file_to_extents(&image_ctx, image_extent.first,
865 image_extent.second, 0, &object_extents);
866 for (auto& object_extent : object_extents) {
867 object_number_extents[object_extent.object_no].emplace_back(
868 object_extent.offset, object_extent.length);
869 }
870 }
871
872 // reassemble the deltas back into image-extents when complete
873 auto aio_comp = this->m_aio_comp;
874 aio_comp->set_request_count(1);
875 auto assemble_ctx = new C_AssembleSnapshotDeltas<I>(
876 &image_ctx, aio_comp, m_snapshot_delta);
877 auto sub_aio_comp = AioCompletion::create_and_start<
878 Context, &Context::complete>(assemble_ctx, get_image_ctx(&image_ctx),
879 AIO_TYPE_GENERIC);
880
881 // issue the requests
882 sub_aio_comp->set_request_count(object_number_extents.size());
883 for (auto& oe : object_number_extents) {
884 ldout(cct, 20) << data_object_name(&image_ctx, oe.first) << " "
885 << oe.second << dendl;
886 auto ctx = new C_AioRequest(sub_aio_comp);
887 auto req = ObjectDispatchSpec::create_list_snaps(
888 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, oe.first, std::move(oe.second),
889 SnapIds{m_snap_ids}, m_list_snaps_flags, this->m_trace,
890 assemble_ctx->get_snapshot_delta(oe.first), ctx);
891 req->send();
892 }
893}
894
7c673cae
FG
895} // namespace io
896} // namespace librbd
897
898template class librbd::io::ImageRequest<librbd::ImageCtx>;
899template class librbd::io::ImageReadRequest<librbd::ImageCtx>;
900template class librbd::io::AbstractImageWriteRequest<librbd::ImageCtx>;
901template class librbd::io::ImageWriteRequest<librbd::ImageCtx>;
902template class librbd::io::ImageDiscardRequest<librbd::ImageCtx>;
903template class librbd::io::ImageFlushRequest<librbd::ImageCtx>;
904template class librbd::io::ImageWriteSameRequest<librbd::ImageCtx>;
c07f9fc5 905template class librbd::io::ImageCompareAndWriteRequest<librbd::ImageCtx>;
f67539c2 906template class librbd::io::ImageListSnapsRequest<librbd::ImageCtx>;