]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/io/ImageRequest.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / librbd / io / ImageRequest.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "librbd/io/ImageRequest.h"
5#include "librbd/ImageCtx.h"
6#include "librbd/internal.h"
7#include "librbd/Journal.h"
b32b8144 8#include "librbd/Types.h"
7c673cae
FG
9#include "librbd/Utils.h"
10#include "librbd/cache/ImageCache.h"
11#include "librbd/io/AioCompletion.h"
11fdf7f2
TL
12#include "librbd/io/AsyncOperation.h"
13#include "librbd/io/ObjectDispatchInterface.h"
14#include "librbd/io/ObjectDispatchSpec.h"
15#include "librbd/io/ObjectDispatcher.h"
16#include "librbd/io/Utils.h"
7c673cae
FG
17#include "librbd/journal/Types.h"
18#include "include/rados/librados.hpp"
11fdf7f2 19#include "common/perf_counters.h"
7c673cae
FG
20#include "common/WorkQueue.h"
21#include "osdc/Striper.h"
11fdf7f2
TL
22#include <algorithm>
23#include <functional>
7c673cae
FG
24
25#define dout_subsys ceph_subsys_rbd
26#undef dout_prefix
9f95a23c 27#define dout_prefix *_dout << "librbd::io::ImageRequest: " << __func__ << ": "
7c673cae
FG
28
29namespace librbd {
30namespace io {
31
9f95a23c 32using librbd::util::data_object_name;
11fdf7f2 33using librbd::util::get_image_ctx;
7c673cae
FG
34
35namespace {
36
9f95a23c
TL
37template <typename I>
38struct C_RBD_Readahead : public Context {
39 I *ictx;
40 uint64_t object_no;
41 uint64_t offset;
42 uint64_t length;
43
44 bufferlist read_data;
45 io::ExtentMap extent_map;
46
47 C_RBD_Readahead(I *ictx, uint64_t object_no, uint64_t offset, uint64_t length)
48 : ictx(ictx), object_no(object_no), offset(offset), length(length) {
49 ictx->readahead.inc_pending();
50 }
51
52 void finish(int r) override {
53 ldout(ictx->cct, 20) << "C_RBD_Readahead on "
54 << data_object_name(ictx, object_no) << ": "
55 << offset << "~" << length << dendl;
56 ictx->readahead.dec_pending();
57 }
58};
59
60template <typename I>
61void readahead(I *ictx, const Extents& image_extents) {
62 uint64_t total_bytes = 0;
63 for (auto& image_extent : image_extents) {
64 total_bytes += image_extent.second;
65 }
66
67 ictx->image_lock.lock_shared();
68 auto total_bytes_read = ictx->total_bytes_read.fetch_add(total_bytes);
69 bool abort = (
70 ictx->readahead_disable_after_bytes != 0 &&
71 total_bytes_read > ictx->readahead_disable_after_bytes);
72 if (abort) {
73 ictx->image_lock.unlock_shared();
74 return;
75 }
76
77 uint64_t image_size = ictx->get_image_size(ictx->snap_id);
78 auto snap_id = ictx->snap_id;
79 ictx->image_lock.unlock_shared();
80
81 auto readahead_extent = ictx->readahead.update(image_extents, image_size);
82 uint64_t readahead_offset = readahead_extent.first;
83 uint64_t readahead_length = readahead_extent.second;
84
85 if (readahead_length > 0) {
86 ldout(ictx->cct, 20) << "(readahead logical) " << readahead_offset << "~"
87 << readahead_length << dendl;
88 LightweightObjectExtents readahead_object_extents;
89 Striper::file_to_extents(ictx->cct, &ictx->layout,
90 readahead_offset, readahead_length, 0, 0,
91 &readahead_object_extents);
92 for (auto& object_extent : readahead_object_extents) {
93 ldout(ictx->cct, 20) << "(readahead) "
94 << data_object_name(ictx,
95 object_extent.object_no) << " "
96 << object_extent.offset << "~"
97 << object_extent.length << dendl;
98
99 auto req_comp = new C_RBD_Readahead<I>(ictx, object_extent.object_no,
100 object_extent.offset,
101 object_extent.length);
102 auto req = io::ObjectDispatchSpec::create_read(
103 ictx, io::OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
104 object_extent.offset, object_extent.length, snap_id, 0, {},
105 &req_comp->read_data, &req_comp->extent_map, req_comp);
106 req->send();
107 }
108
109 ictx->perfcounter->inc(l_librbd_readahead);
110 ictx->perfcounter->inc(l_librbd_readahead_bytes, readahead_length);
111 }
112}
113
11fdf7f2
TL
114template <typename I>
115struct C_UpdateTimestamp : public Context {
116public:
117 I& m_image_ctx;
118 bool m_modify; // if modify set to 'true', modify timestamp is updated,
119 // access timestamp otherwise
120 AsyncOperation m_async_op;
91327a77 121
11fdf7f2
TL
122 C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) {
123 m_async_op.start_op(*get_image_ctx(&m_image_ctx));
91327a77 124 }
11fdf7f2
TL
125 ~C_UpdateTimestamp() override {
126 m_async_op.finish_op();
91327a77
AA
127 }
128
11fdf7f2
TL
129 void send() {
130 librados::ObjectWriteOperation op;
131 if (m_modify) {
132 cls_client::set_modify_timestamp(&op);
133 } else {
134 cls_client::set_access_timestamp(&op);
91327a77 135 }
7c673cae 136
11fdf7f2
TL
137 auto comp = librbd::util::create_rados_callback(this);
138 int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
139 ceph_assert(r == 0);
140 comp->release();
7c673cae
FG
141 }
142
143 void finish(int r) override {
11fdf7f2 144 // ignore errors updating timestamp
7c673cae
FG
145 }
146};
147
11fdf7f2
TL
148bool should_update_timestamp(const utime_t& now, const utime_t& timestamp,
149 uint64_t interval) {
150 return (interval &&
151 (static_cast<uint64_t>(now.sec()) >= interval + timestamp));
224ce89b
WB
152}
153
11fdf7f2 154} // anonymous namespace
c07f9fc5 155
9f95a23c
TL
156#undef dout_prefix
157#define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \
158 << " " << __func__ << ": "
159
7c673cae
FG
160template <typename I>
161void ImageRequest<I>::aio_read(I *ictx, AioCompletion *c,
162 Extents &&image_extents,
31f18b77
FG
163 ReadResult &&read_result, int op_flags,
164 const ZTracer::Trace &parent_trace) {
7c673cae 165 ImageReadRequest<I> req(*ictx, c, std::move(image_extents),
31f18b77 166 std::move(read_result), op_flags, parent_trace);
7c673cae
FG
167 req.send();
168}
169
170template <typename I>
171void ImageRequest<I>::aio_write(I *ictx, AioCompletion *c,
172 Extents &&image_extents, bufferlist &&bl,
31f18b77
FG
173 int op_flags,
174 const ZTracer::Trace &parent_trace) {
7c673cae 175 ImageWriteRequest<I> req(*ictx, c, std::move(image_extents), std::move(bl),
31f18b77 176 op_flags, parent_trace);
7c673cae
FG
177 req.send();
178}
179
180template <typename I>
181void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c,
11fdf7f2
TL
182 Extents &&image_extents,
183 uint32_t discard_granularity_bytes,
31f18b77 184 const ZTracer::Trace &parent_trace) {
11fdf7f2
TL
185 ImageDiscardRequest<I> req(*ictx, c, std::move(image_extents),
186 discard_granularity_bytes, parent_trace);
7c673cae
FG
187 req.send();
188}
189
190template <typename I>
31f18b77 191void ImageRequest<I>::aio_flush(I *ictx, AioCompletion *c,
11fdf7f2
TL
192 FlushSource flush_source,
193 const ZTracer::Trace &parent_trace) {
194 ImageFlushRequest<I> req(*ictx, c, flush_source, parent_trace);
7c673cae
FG
195 req.send();
196}
197
198template <typename I>
199void ImageRequest<I>::aio_writesame(I *ictx, AioCompletion *c,
11fdf7f2 200 Extents &&image_extents,
31f18b77
FG
201 bufferlist &&bl, int op_flags,
202 const ZTracer::Trace &parent_trace) {
11fdf7f2
TL
203 ImageWriteSameRequest<I> req(*ictx, c, std::move(image_extents),
204 std::move(bl), op_flags, parent_trace);
7c673cae
FG
205 req.send();
206}
207
c07f9fc5
FG
208template <typename I>
209void ImageRequest<I>::aio_compare_and_write(I *ictx, AioCompletion *c,
210 Extents &&image_extents,
211 bufferlist &&cmp_bl,
212 bufferlist &&bl,
213 uint64_t *mismatch_offset,
214 int op_flags,
215 const ZTracer::Trace &parent_trace) {
216 ImageCompareAndWriteRequest<I> req(*ictx, c, std::move(image_extents),
217 std::move(cmp_bl), std::move(bl),
218 mismatch_offset, op_flags, parent_trace);
219 req.send();
220}
221
7c673cae
FG
222template <typename I>
223void ImageRequest<I>::send() {
224 I &image_ctx = this->m_image_ctx;
11fdf7f2 225 ceph_assert(m_aio_comp->is_initialized(get_aio_type()));
494da23a 226 ceph_assert(m_aio_comp->is_started());
7c673cae
FG
227
228 CephContext *cct = image_ctx.cct;
229 AioCompletion *aio_comp = this->m_aio_comp;
230 ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", "
c07f9fc5 231 << "completion=" << aio_comp << dendl;
7c673cae 232
7c673cae
FG
233 int r = clip_request();
234 if (r < 0) {
235 m_aio_comp->fail(r);
236 return;
237 }
238
9f95a23c
TL
239 if (finish_request_early()) {
240 return;
241 }
242
7c673cae 243 if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) {
11fdf7f2 244 update_timestamp();
7c673cae
FG
245 send_request();
246 } else {
247 send_image_cache_request();
248 }
249}
250
251template <typename I>
252int ImageRequest<I>::clip_request() {
9f95a23c 253 std::shared_lock image_locker{m_image_ctx.image_lock};
7c673cae
FG
254 for (auto &image_extent : m_image_extents) {
255 auto clip_len = image_extent.second;
256 int r = clip_io(get_image_ctx(&m_image_ctx), image_extent.first, &clip_len);
257 if (r < 0) {
258 return r;
259 }
260
261 image_extent.second = clip_len;
262 }
263 return 0;
264}
265
266template <typename I>
11fdf7f2
TL
267void ImageRequest<I>::update_timestamp() {
268 bool modify = (get_aio_type() != AIO_TYPE_READ);
269 uint64_t update_interval;
270 if (modify) {
271 update_interval = m_image_ctx.mtime_update_interval;
272 } else {
273 update_interval = m_image_ctx.atime_update_interval;
274 }
7c673cae 275
11fdf7f2
TL
276 if (update_interval == 0) {
277 return;
278 }
279
280 utime_t (I::*get_timestamp_fn)() const;
281 void (I::*set_timestamp_fn)(utime_t);
282 if (modify) {
283 get_timestamp_fn = &I::get_modify_timestamp;
284 set_timestamp_fn = &I::set_modify_timestamp;
285 } else {
286 get_timestamp_fn = &I::get_access_timestamp;
287 set_timestamp_fn = &I::set_access_timestamp;
288 }
289
290 utime_t ts = ceph_clock_now();
291 {
9f95a23c 292 std::shared_lock timestamp_locker{m_image_ctx.timestamp_lock};
11fdf7f2
TL
293 if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx),
294 update_interval)) {
295 return;
296 }
297 }
298
299 {
9f95a23c 300 std::unique_lock timestamp_locker{m_image_ctx.timestamp_lock};
11fdf7f2
TL
301 bool update = should_update_timestamp(
302 ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval);
303 if (!update) {
304 return;
305 }
306
307 std::invoke(set_timestamp_fn, m_image_ctx, ts);
308 }
309
310 // TODO we fire and forget this outside the IO path to prevent
311 // potential race conditions with librbd client IO callbacks
312 // between different threads (e.g. librados and object cacher)
313 ldout(m_image_ctx.cct, 10) << get_request_type() << dendl;
314 auto req = new C_UpdateTimestamp<I>(m_image_ctx, modify);
315 req->send();
7c673cae
FG
316}
317
318template <typename I>
319ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp,
320 Extents &&image_extents,
31f18b77
FG
321 ReadResult &&read_result, int op_flags,
322 const ZTracer::Trace &parent_trace)
323 : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents), "read",
324 parent_trace),
7c673cae
FG
325 m_op_flags(op_flags) {
326 aio_comp->read_result = std::move(read_result);
327}
328
329template <typename I>
330int ImageReadRequest<I>::clip_request() {
331 int r = ImageRequest<I>::clip_request();
332 if (r < 0) {
333 return r;
334 }
335
336 uint64_t buffer_length = 0;
337 auto &image_extents = this->m_image_extents;
338 for (auto &image_extent : image_extents) {
339 buffer_length += image_extent.second;
340 }
341 this->m_aio_comp->read_result.set_clip_length(buffer_length);
342 return 0;
343}
344
345template <typename I>
346void ImageReadRequest<I>::send_request() {
347 I &image_ctx = this->m_image_ctx;
348 CephContext *cct = image_ctx.cct;
349
350 auto &image_extents = this->m_image_extents;
11fdf7f2 351 if (image_ctx.cache && image_ctx.readahead_max_bytes > 0 &&
7c673cae
FG
352 !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) {
353 readahead(get_image_ctx(&image_ctx), image_extents);
354 }
355
7c673cae 356 librados::snap_t snap_id;
7c673cae
FG
357 uint64_t buffer_ofs = 0;
358 {
359 // prevent image size from changing between computing clip and recording
360 // pending async operation
9f95a23c 361 std::shared_lock image_locker{image_ctx.image_lock};
7c673cae 362 snap_id = image_ctx.snap_id;
9f95a23c 363 }
7c673cae 364
9f95a23c
TL
365 // map image extents to object extents
366 LightweightObjectExtents object_extents;
367 for (auto &extent : image_extents) {
368 if (extent.second == 0) {
369 continue;
7c673cae 370 }
7c673cae 371
9f95a23c
TL
372 Striper::file_to_extents(cct, &image_ctx.layout, extent.first,
373 extent.second, 0, buffer_ofs, &object_extents);
374 buffer_ofs += extent.second;
7c673cae 375 }
7c673cae
FG
376
377 // issue the requests
9f95a23c
TL
378 AioCompletion *aio_comp = this->m_aio_comp;
379 aio_comp->set_request_count(object_extents.size());
380 for (auto &oe : object_extents) {
381 ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " "
382 << oe.offset << "~" << oe.length << " from "
383 << oe.buffer_extents << dendl;
7c673cae 384
9f95a23c
TL
385 auto req_comp = new io::ReadResult::C_ObjectReadRequest(
386 aio_comp, oe.offset, oe.length, std::move(oe.buffer_extents));
387 auto req = ObjectDispatchSpec::create_read(
388 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, oe.object_no, oe.offset,
389 oe.length, snap_id, m_op_flags, this->m_trace, &req_comp->bl,
390 &req_comp->extent_map, req_comp);
391 req->send();
392 }
7c673cae
FG
393
394 image_ctx.perfcounter->inc(l_librbd_rd);
395 image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs);
396}
397
398template <typename I>
399void ImageReadRequest<I>::send_image_cache_request() {
400 I &image_ctx = this->m_image_ctx;
11fdf7f2 401 ceph_assert(image_ctx.image_cache != nullptr);
7c673cae
FG
402
403 AioCompletion *aio_comp = this->m_aio_comp;
404 aio_comp->set_request_count(1);
405
406 auto *req_comp = new io::ReadResult::C_ImageReadRequest(
407 aio_comp, this->m_image_extents);
408 image_ctx.image_cache->aio_read(std::move(this->m_image_extents),
409 &req_comp->bl, m_op_flags,
410 req_comp);
411}
412
9f95a23c
TL
413template <typename I>
414bool AbstractImageWriteRequest<I>::finish_request_early() {
415 AioCompletion *aio_comp = this->m_aio_comp;
416 {
417 std::shared_lock image_locker{this->m_image_ctx.image_lock};
418 if (this->m_image_ctx.snap_id != CEPH_NOSNAP || this->m_image_ctx.read_only) {
419 aio_comp->fail(-EROFS);
420 return true;
421 }
422 }
423 uint64_t total_bytes = 0;
424 for (auto& image_extent : this->m_image_extents) {
425 total_bytes += image_extent.second;
426 }
427 if (total_bytes == 0) {
428 aio_comp->set_request_count(0);
429 return true;
430 }
431 return false;
432}
433
7c673cae
FG
434template <typename I>
435void AbstractImageWriteRequest<I>::send_request() {
436 I &image_ctx = this->m_image_ctx;
437 CephContext *cct = image_ctx.cct;
438
7c673cae
FG
439 bool journaling = false;
440
441 AioCompletion *aio_comp = this->m_aio_comp;
7c673cae
FG
442 ::SnapContext snapc;
443 {
444 // prevent image size from changing between computing clip and recording
445 // pending async operation
9f95a23c 446 std::shared_lock image_locker{image_ctx.image_lock};
7c673cae
FG
447
448 snapc = image_ctx.snapc;
449 journaling = (image_ctx.journal != nullptr &&
450 image_ctx.journal->is_journal_appending());
451 }
452
9f95a23c
TL
453 uint64_t clip_len = 0;
454 LightweightObjectExtents object_extents;
455 for (auto &extent : this->m_image_extents) {
456 if (extent.second == 0) {
457 continue;
458 }
459
460 // map to object extents
461 Striper::file_to_extents(cct, &image_ctx.layout, extent.first,
462 extent.second, 0, clip_len, &object_extents);
463 clip_len += extent.second;
464 }
465
11fdf7f2 466 int ret = prune_object_extents(&object_extents);
c07f9fc5
FG
467 if (ret < 0) {
468 aio_comp->fail(ret);
469 return;
470 }
7c673cae 471
9f95a23c 472 aio_comp->set_request_count(object_extents.size());
7c673cae
FG
473 if (!object_extents.empty()) {
474 uint64_t journal_tid = 0;
7c673cae
FG
475 if (journaling) {
476 // in-flight ops are flushed prior to closing the journal
11fdf7f2
TL
477 ceph_assert(image_ctx.journal != NULL);
478 journal_tid = append_journal_event(m_synchronous);
7c673cae
FG
479 }
480
11fdf7f2 481 send_object_requests(object_extents, snapc, journal_tid);
7c673cae
FG
482 }
483
484 update_stats(clip_len);
7c673cae
FG
485}
486
487template <typename I>
488void AbstractImageWriteRequest<I>::send_object_requests(
9f95a23c 489 const LightweightObjectExtents &object_extents, const ::SnapContext &snapc,
11fdf7f2 490 uint64_t journal_tid) {
7c673cae
FG
491 I &image_ctx = this->m_image_ctx;
492 CephContext *cct = image_ctx.cct;
493
494 AioCompletion *aio_comp = this->m_aio_comp;
9f95a23c
TL
495 bool single_extent = (object_extents.size() == 1);
496 for (auto& oe : object_extents) {
497 ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " "
498 << oe.offset << "~" << oe.length << " from "
499 << oe.buffer_extents << dendl;
7c673cae 500 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
9f95a23c
TL
501 auto request = create_object_request(oe, snapc, journal_tid, single_extent,
502 req_comp);
503 request->send();
7c673cae
FG
504 }
505}
506
507template <typename I>
9f95a23c
TL
508void ImageWriteRequest<I>::assemble_extent(
509 const LightweightObjectExtent &object_extent, bufferlist *bl) {
7c673cae
FG
510 for (auto q = object_extent.buffer_extents.begin();
511 q != object_extent.buffer_extents.end(); ++q) {
512 bufferlist sub_bl;
513 sub_bl.substr_of(m_bl, q->first, q->second);
514 bl->claim_append(sub_bl);
515 }
516}
517
518template <typename I>
11fdf7f2 519uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) {
7c673cae
FG
520 I &image_ctx = this->m_image_ctx;
521
522 uint64_t tid = 0;
523 uint64_t buffer_offset = 0;
11fdf7f2 524 ceph_assert(!this->m_image_extents.empty());
7c673cae
FG
525 for (auto &extent : this->m_image_extents) {
526 bufferlist sub_bl;
527 sub_bl.substr_of(m_bl, buffer_offset, extent.second);
528 buffer_offset += extent.second;
529
530 tid = image_ctx.journal->append_write_event(extent.first, extent.second,
11fdf7f2 531 sub_bl, synchronous);
7c673cae
FG
532 }
533
7c673cae
FG
534 return tid;
535}
536
537template <typename I>
538void ImageWriteRequest<I>::send_image_cache_request() {
539 I &image_ctx = this->m_image_ctx;
11fdf7f2 540 ceph_assert(image_ctx.image_cache != nullptr);
7c673cae
FG
541
542 AioCompletion *aio_comp = this->m_aio_comp;
543 aio_comp->set_request_count(1);
544 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
545 image_ctx.image_cache->aio_write(std::move(this->m_image_extents),
546 std::move(m_bl), m_op_flags, req_comp);
547}
548
549template <typename I>
11fdf7f2 550ObjectDispatchSpec *ImageWriteRequest<I>::create_object_request(
9f95a23c
TL
551 const LightweightObjectExtent &object_extent, const ::SnapContext &snapc,
552 uint64_t journal_tid, bool single_extent, Context *on_finish) {
7c673cae 553 I &image_ctx = this->m_image_ctx;
7c673cae
FG
554
555 bufferlist bl;
9f95a23c
TL
556 if (single_extent && object_extent.buffer_extents.size() == 1) {
557 // optimization for single object/buffer extent writes
558 bl = std::move(m_bl);
559 } else {
560 assemble_extent(object_extent, &bl);
561 }
562
11fdf7f2 563 auto req = ObjectDispatchSpec::create_write(
9f95a23c
TL
564 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
565 object_extent.offset, std::move(bl), snapc, m_op_flags, journal_tid,
566 this->m_trace, on_finish);
7c673cae
FG
567 return req;
568}
569
570template <typename I>
571void ImageWriteRequest<I>::update_stats(size_t length) {
572 I &image_ctx = this->m_image_ctx;
573 image_ctx.perfcounter->inc(l_librbd_wr);
574 image_ctx.perfcounter->inc(l_librbd_wr_bytes, length);
575}
576
577template <typename I>
11fdf7f2 578uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) {
7c673cae
FG
579 I &image_ctx = this->m_image_ctx;
580
581 uint64_t tid = 0;
11fdf7f2 582 ceph_assert(!this->m_image_extents.empty());
7c673cae 583 for (auto &extent : this->m_image_extents) {
11fdf7f2
TL
584 journal::EventEntry event_entry(
585 journal::AioDiscardEvent(extent.first,
586 extent.second,
587 this->m_discard_granularity_bytes));
7c673cae 588 tid = image_ctx.journal->append_io_event(std::move(event_entry),
11fdf7f2
TL
589 extent.first, extent.second,
590 synchronous, 0);
7c673cae
FG
591 }
592
7c673cae
FG
593 return tid;
594}
595
7c673cae
FG
596template <typename I>
597void ImageDiscardRequest<I>::send_image_cache_request() {
598 I &image_ctx = this->m_image_ctx;
11fdf7f2 599 ceph_assert(image_ctx.image_cache != nullptr);
7c673cae
FG
600
601 AioCompletion *aio_comp = this->m_aio_comp;
602 aio_comp->set_request_count(this->m_image_extents.size());
603 for (auto &extent : this->m_image_extents) {
604 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
605 image_ctx.image_cache->aio_discard(extent.first, extent.second,
11fdf7f2
TL
606 this->m_discard_granularity_bytes,
607 req_comp);
7c673cae
FG
608 }
609}
610
611template <typename I>
11fdf7f2 612ObjectDispatchSpec *ImageDiscardRequest<I>::create_object_request(
9f95a23c
TL
613 const LightweightObjectExtent &object_extent, const ::SnapContext &snapc,
614 uint64_t journal_tid, bool single_extent, Context *on_finish) {
7c673cae 615 I &image_ctx = this->m_image_ctx;
11fdf7f2 616 auto req = ObjectDispatchSpec::create_discard(
9f95a23c
TL
617 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
618 object_extent.offset, object_extent.length, snapc,
11fdf7f2
TL
619 OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE, journal_tid, this->m_trace,
620 on_finish);
7c673cae
FG
621 return req;
622}
623
624template <typename I>
625void ImageDiscardRequest<I>::update_stats(size_t length) {
626 I &image_ctx = this->m_image_ctx;
627 image_ctx.perfcounter->inc(l_librbd_discard);
628 image_ctx.perfcounter->inc(l_librbd_discard_bytes, length);
629}
630
11fdf7f2
TL
631template <typename I>
632int ImageDiscardRequest<I>::prune_object_extents(
9f95a23c 633 LightweightObjectExtents* object_extents) const {
11fdf7f2
TL
634 if (m_discard_granularity_bytes == 0) {
635 return 0;
636 }
637
638 // Align the range to discard_granularity_bytes boundary and skip
639 // and discards that are too small to free up any space.
640 //
641 // discard_granularity_bytes >= object_size && tail truncation
642 // is a special case for filestore
643 bool prune_required = false;
644 auto object_size = this->m_image_ctx.layout.object_size;
645 auto discard_granularity_bytes = std::min(m_discard_granularity_bytes,
646 object_size);
647 auto xform_lambda =
648 [discard_granularity_bytes, object_size, &prune_required]
9f95a23c 649 (LightweightObjectExtent& object_extent) {
11fdf7f2
TL
650 auto& offset = object_extent.offset;
651 auto& length = object_extent.length;
652 auto next_offset = offset + length;
653
654 if ((discard_granularity_bytes < object_size) ||
655 (next_offset < object_size)) {
656 offset = p2roundup<uint64_t>(offset, discard_granularity_bytes);
657 next_offset = p2align<uint64_t>(next_offset, discard_granularity_bytes);
658 if (offset >= next_offset) {
659 prune_required = true;
660 length = 0;
661 } else {
662 length = next_offset - offset;
663 }
664 }
665 };
666 std::for_each(object_extents->begin(), object_extents->end(),
667 xform_lambda);
668
669 if (prune_required) {
670 // one or more object extents were skipped
671 auto remove_lambda =
9f95a23c 672 [](const LightweightObjectExtent& object_extent) {
11fdf7f2
TL
673 return (object_extent.length == 0);
674 };
675 object_extents->erase(
676 std::remove_if(object_extents->begin(), object_extents->end(),
677 remove_lambda),
678 object_extents->end());
679 }
680 return 0;
681}
682
7c673cae
FG
683template <typename I>
684void ImageFlushRequest<I>::send_request() {
685 I &image_ctx = this->m_image_ctx;
7c673cae
FG
686
687 bool journaling = false;
688 {
9f95a23c 689 std::shared_lock image_locker{image_ctx.image_lock};
11fdf7f2
TL
690 journaling = (m_flush_source == FLUSH_SOURCE_USER &&
691 image_ctx.journal != nullptr &&
7c673cae
FG
692 image_ctx.journal->is_journal_appending());
693 }
694
695 AioCompletion *aio_comp = this->m_aio_comp;
11fdf7f2
TL
696 aio_comp->set_request_count(1);
697
698 Context *ctx = new C_AioRequest(aio_comp);
699
700 // ensure no locks are held when flush is complete
701 ctx = librbd::util::create_async_context_callback(image_ctx, ctx);
702
9f95a23c 703 uint64_t journal_tid = 0;
7c673cae
FG
704 if (journaling) {
705 // in-flight ops are flushed prior to closing the journal
9f95a23c
TL
706 ceph_assert(image_ctx.journal != NULL);
707 journal_tid = image_ctx.journal->append_io_event(
11fdf7f2 708 journal::EventEntry(journal::AioFlushEvent()), 0, 0, false, 0);
494da23a 709 image_ctx.journal->user_flushed();
7c673cae
FG
710 }
711
9f95a23c
TL
712 auto object_dispatch_spec = ObjectDispatchSpec::create_flush(
713 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, m_flush_source, journal_tid,
714 this->m_trace, ctx);
715 ctx = new LambdaContext([object_dispatch_spec](int r) {
716 object_dispatch_spec->send();
717 });
718
11fdf7f2 719 // ensure all in-flight IOs are settled if non-user flush request
494da23a 720 aio_comp->async_op.flush(ctx);
11fdf7f2
TL
721
722 // might be flushing during image shutdown
723 if (image_ctx.perfcounter != nullptr) {
724 image_ctx.perfcounter->inc(l_librbd_flush);
725 }
7c673cae
FG
726}
727
728template <typename I>
729void ImageFlushRequest<I>::send_image_cache_request() {
730 I &image_ctx = this->m_image_ctx;
11fdf7f2 731 ceph_assert(image_ctx.image_cache != nullptr);
7c673cae
FG
732
733 AioCompletion *aio_comp = this->m_aio_comp;
734 aio_comp->set_request_count(1);
735 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
736 image_ctx.image_cache->aio_flush(req_comp);
737}
738
739template <typename I>
11fdf7f2 740uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) {
7c673cae
FG
741 I &image_ctx = this->m_image_ctx;
742
743 uint64_t tid = 0;
11fdf7f2 744 ceph_assert(!this->m_image_extents.empty());
7c673cae
FG
745 for (auto &extent : this->m_image_extents) {
746 journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first,
747 extent.second,
748 m_data_bl));
749 tid = image_ctx.journal->append_io_event(std::move(event_entry),
11fdf7f2
TL
750 extent.first, extent.second,
751 synchronous, 0);
7c673cae
FG
752 }
753
7c673cae
FG
754 return tid;
755}
756
757template <typename I>
758void ImageWriteSameRequest<I>::send_image_cache_request() {
759 I &image_ctx = this->m_image_ctx;
11fdf7f2 760 ceph_assert(image_ctx.image_cache != nullptr);
7c673cae
FG
761
762 AioCompletion *aio_comp = this->m_aio_comp;
763 aio_comp->set_request_count(this->m_image_extents.size());
764 for (auto &extent : this->m_image_extents) {
765 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
766 image_ctx.image_cache->aio_writesame(extent.first, extent.second,
767 std::move(m_data_bl), m_op_flags,
768 req_comp);
769 }
770}
771
772template <typename I>
11fdf7f2 773ObjectDispatchSpec *ImageWriteSameRequest<I>::create_object_request(
9f95a23c
TL
774 const LightweightObjectExtent &object_extent, const ::SnapContext &snapc,
775 uint64_t journal_tid, bool single_extent, Context *on_finish) {
7c673cae 776 I &image_ctx = this->m_image_ctx;
7c673cae
FG
777
778 bufferlist bl;
11fdf7f2
TL
779 ObjectDispatchSpec *req;
780
781 if (util::assemble_write_same_extent(object_extent, m_data_bl, &bl, false)) {
9f95a23c 782 auto buffer_extents{object_extent.buffer_extents};
7c673cae 783
11fdf7f2 784 req = ObjectDispatchSpec::create_write_same(
9f95a23c
TL
785 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
786 object_extent.offset, object_extent.length, std::move(buffer_extents),
787 std::move(bl), snapc, m_op_flags, journal_tid,
11fdf7f2 788 this->m_trace, on_finish);
7c673cae
FG
789 return req;
790 }
11fdf7f2 791 req = ObjectDispatchSpec::create_write(
9f95a23c
TL
792 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
793 object_extent.offset, std::move(bl), snapc, m_op_flags, journal_tid,
794 this->m_trace, on_finish);
7c673cae
FG
795 return req;
796}
797
798template <typename I>
799void ImageWriteSameRequest<I>::update_stats(size_t length) {
800 I &image_ctx = this->m_image_ctx;
801 image_ctx.perfcounter->inc(l_librbd_ws);
802 image_ctx.perfcounter->inc(l_librbd_ws_bytes, length);
803}
804
c07f9fc5
FG
805template <typename I>
806uint64_t ImageCompareAndWriteRequest<I>::append_journal_event(
11fdf7f2 807 bool synchronous) {
c07f9fc5
FG
808 I &image_ctx = this->m_image_ctx;
809
810 uint64_t tid = 0;
11fdf7f2 811 ceph_assert(this->m_image_extents.size() == 1);
c07f9fc5 812 auto &extent = this->m_image_extents.front();
11fdf7f2
TL
813 journal::EventEntry event_entry(
814 journal::AioCompareAndWriteEvent(extent.first, extent.second, m_cmp_bl,
815 m_bl));
c07f9fc5 816 tid = image_ctx.journal->append_io_event(std::move(event_entry),
11fdf7f2
TL
817 extent.first, extent.second,
818 synchronous, -EILSEQ);
c07f9fc5
FG
819
820 return tid;
821}
822
c07f9fc5
FG
823template <typename I>
824void ImageCompareAndWriteRequest<I>::assemble_extent(
9f95a23c 825 const LightweightObjectExtent &object_extent, bufferlist *bl) {
c07f9fc5
FG
826 for (auto q = object_extent.buffer_extents.begin();
827 q != object_extent.buffer_extents.end(); ++q) {
828 bufferlist sub_bl;
829 sub_bl.substr_of(m_bl, q->first, q->second);
830 bl->claim_append(sub_bl);
831 }
832}
833
834template <typename I>
835void ImageCompareAndWriteRequest<I>::send_image_cache_request() {
836 I &image_ctx = this->m_image_ctx;
11fdf7f2 837 ceph_assert(image_ctx.image_cache != nullptr);
c07f9fc5
FG
838
839 AioCompletion *aio_comp = this->m_aio_comp;
840 aio_comp->set_request_count(1);
841 C_AioRequest *req_comp = new C_AioRequest(aio_comp);
842 image_ctx.image_cache->aio_compare_and_write(
843 std::move(this->m_image_extents), std::move(m_cmp_bl), std::move(m_bl),
844 m_mismatch_offset, m_op_flags, req_comp);
845}
846
847template <typename I>
11fdf7f2 848ObjectDispatchSpec *ImageCompareAndWriteRequest<I>::create_object_request(
9f95a23c
TL
849 const LightweightObjectExtent &object_extent, const ::SnapContext &snapc,
850 uint64_t journal_tid, bool single_extent, Context *on_finish) {
c07f9fc5
FG
851 I &image_ctx = this->m_image_ctx;
852
11fdf7f2
TL
853 // NOTE: safe to move m_cmp_bl since we only support this op against
854 // a single object
c07f9fc5
FG
855 bufferlist bl;
856 assemble_extent(object_extent, &bl);
11fdf7f2 857 auto req = ObjectDispatchSpec::create_compare_and_write(
9f95a23c
TL
858 &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no,
859 object_extent.offset, std::move(m_cmp_bl), std::move(bl), snapc,
860 m_mismatch_offset, m_op_flags, journal_tid, this->m_trace, on_finish);
c07f9fc5
FG
861 return req;
862}
863
864template <typename I>
865void ImageCompareAndWriteRequest<I>::update_stats(size_t length) {
866 I &image_ctx = this->m_image_ctx;
867 image_ctx.perfcounter->inc(l_librbd_cmp);
868 image_ctx.perfcounter->inc(l_librbd_cmp_bytes, length);
869}
870
871template <typename I>
11fdf7f2 872int ImageCompareAndWriteRequest<I>::prune_object_extents(
9f95a23c 873 LightweightObjectExtents* object_extents) const {
11fdf7f2 874 if (object_extents->size() > 1)
c07f9fc5
FG
875 return -EINVAL;
876
877 I &image_ctx = this->m_image_ctx;
878 uint64_t sector_size = 512ULL;
879 uint64_t su = image_ctx.layout.stripe_unit;
9f95a23c 880 auto& object_extent = object_extents->front();
c07f9fc5
FG
881 if (object_extent.offset % sector_size + object_extent.length > sector_size ||
882 (su != 0 && (object_extent.offset % su + object_extent.length > su)))
883 return -EINVAL;
884
885 return 0;
886}
887
7c673cae
FG
888} // namespace io
889} // namespace librbd
890
891template class librbd::io::ImageRequest<librbd::ImageCtx>;
892template class librbd::io::ImageReadRequest<librbd::ImageCtx>;
893template class librbd::io::AbstractImageWriteRequest<librbd::ImageCtx>;
894template class librbd::io::ImageWriteRequest<librbd::ImageCtx>;
895template class librbd::io::ImageDiscardRequest<librbd::ImageCtx>;
896template class librbd::io::ImageFlushRequest<librbd::ImageCtx>;
897template class librbd::io::ImageWriteSameRequest<librbd::ImageCtx>;
c07f9fc5 898template class librbd::io::ImageCompareAndWriteRequest<librbd::ImageCtx>;