]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/io/ImageRequest.h" | |
5 | #include "librbd/ImageCtx.h" | |
6 | #include "librbd/internal.h" | |
7 | #include "librbd/Journal.h" | |
b32b8144 | 8 | #include "librbd/Types.h" |
7c673cae | 9 | #include "librbd/Utils.h" |
f67539c2 | 10 | #include "librbd/asio/ContextWQ.h" |
7c673cae | 11 | #include "librbd/io/AioCompletion.h" |
11fdf7f2 TL |
12 | #include "librbd/io/AsyncOperation.h" |
13 | #include "librbd/io/ObjectDispatchInterface.h" | |
14 | #include "librbd/io/ObjectDispatchSpec.h" | |
f67539c2 | 15 | #include "librbd/io/ObjectDispatcherInterface.h" |
11fdf7f2 | 16 | #include "librbd/io/Utils.h" |
7c673cae FG |
17 | #include "librbd/journal/Types.h" |
18 | #include "include/rados/librados.hpp" | |
f67539c2 | 19 | #include "common/errno.h" |
11fdf7f2 | 20 | #include "common/perf_counters.h" |
7c673cae | 21 | #include "osdc/Striper.h" |
11fdf7f2 TL |
22 | #include <algorithm> |
23 | #include <functional> | |
f67539c2 | 24 | #include <map> |
7c673cae FG |
25 | |
26 | #define dout_subsys ceph_subsys_rbd | |
27 | #undef dout_prefix | |
9f95a23c | 28 | #define dout_prefix *_dout << "librbd::io::ImageRequest: " << __func__ << ": " |
7c673cae FG |
29 | |
30 | namespace librbd { | |
31 | namespace io { | |
32 | ||
9f95a23c | 33 | using librbd::util::data_object_name; |
11fdf7f2 | 34 | using librbd::util::get_image_ctx; |
7c673cae FG |
35 | |
36 | namespace { | |
37 | ||
f67539c2 TL |
38 | template <typename I> |
39 | struct C_AssembleSnapshotDeltas : public C_AioRequest { | |
40 | I* image_ctx; | |
41 | SnapshotDelta* snapshot_delta; | |
42 | ||
43 | ceph::mutex lock = ceph::make_mutex( | |
44 | "librbd::io::C_AssembleSnapshotDeltas::lock", false); | |
45 | std::map<uint64_t, SnapshotDelta> object_snapshot_delta; | |
46 | ||
47 | C_AssembleSnapshotDeltas(I* image_ctx, AioCompletion* aio_comp, | |
48 | SnapshotDelta* snapshot_delta) | |
49 | : C_AioRequest(aio_comp), | |
50 | image_ctx(image_ctx), snapshot_delta(snapshot_delta) { | |
51 | } | |
52 | ||
53 | SnapshotDelta* get_snapshot_delta(uint64_t object_no) { | |
54 | std::unique_lock locker{lock}; | |
55 | return &object_snapshot_delta[object_no]; | |
56 | } | |
57 | ||
58 | void finish(int r) override { | |
59 | auto cct = image_ctx->cct; | |
60 | ||
61 | if (r < 0) { | |
62 | lderr(cct) << "C_AssembleSnapshotDeltas: list snaps failed: " | |
63 | << cpp_strerror(r) << dendl; | |
64 | C_AioRequest::finish(r); | |
65 | return; | |
66 | } | |
67 | ||
68 | std::unique_lock locker{lock}; | |
69 | *snapshot_delta = {}; | |
70 | for (auto& [object_no, object_snapshot_delta] : object_snapshot_delta) { | |
71 | SnapshotDelta image_snapshot_delta; | |
72 | object_to_image_intervals(object_no, object_snapshot_delta, | |
73 | &image_snapshot_delta, snapshot_delta); | |
74 | ||
75 | ldout(cct, 20) << "object_no=" << object_no << ", " | |
76 | << "object_snapshot_delta=" | |
77 | << object_snapshot_delta << ", " | |
78 | << "image_snapshot_delta=" << image_snapshot_delta | |
79 | << dendl; | |
80 | } | |
81 | ||
82 | ldout(cct, 20) << "snapshot_delta=" << *snapshot_delta << dendl; | |
83 | C_AioRequest::finish(0); | |
84 | } | |
85 | ||
86 | void object_to_image_intervals( | |
87 | uint64_t object_no, const SnapshotDelta& object_snapshot_delta, | |
88 | SnapshotDelta* image_snapshot_delta, | |
89 | SnapshotDelta* assembled_image_snapshot_delta) { | |
90 | for (auto& [key, object_extents] : object_snapshot_delta) { | |
91 | for (auto& object_extent : object_extents) { | |
1e59de90 TL |
92 | auto [image_extents, _] = io::util::object_to_area_extents( |
93 | image_ctx, object_no, | |
94 | {{object_extent.get_off(), object_extent.get_len()}}); | |
f67539c2 TL |
95 | |
96 | auto& intervals = (*image_snapshot_delta)[key]; | |
97 | auto& assembled_intervals = (*assembled_image_snapshot_delta)[key]; | |
98 | for (auto [image_offset, image_length] : image_extents) { | |
99 | SparseExtent sparse_extent{object_extent.get_val().state, | |
100 | image_length}; | |
101 | intervals.insert(image_offset, image_length, sparse_extent); | |
102 | assembled_intervals.insert(image_offset, image_length, | |
103 | sparse_extent); | |
104 | } | |
105 | } | |
106 | } | |
107 | } | |
108 | }; | |
109 | ||
9f95a23c TL |
110 | template <typename I> |
111 | struct C_RBD_Readahead : public Context { | |
112 | I *ictx; | |
113 | uint64_t object_no; | |
f67539c2 | 114 | io::ReadExtents extents; |
9f95a23c TL |
115 | |
116 | C_RBD_Readahead(I *ictx, uint64_t object_no, uint64_t offset, uint64_t length) | |
f67539c2 | 117 | : ictx(ictx), object_no(object_no), extents({{offset, length}}) { |
9f95a23c TL |
118 | ictx->readahead.inc_pending(); |
119 | } | |
120 | ||
121 | void finish(int r) override { | |
f67539c2 TL |
122 | ceph_assert(extents.size() == 1); |
123 | auto& extent = extents.front(); | |
9f95a23c TL |
124 | ldout(ictx->cct, 20) << "C_RBD_Readahead on " |
125 | << data_object_name(ictx, object_no) << ": " | |
f67539c2 | 126 | << extent.offset << "~" << extent.length << dendl; |
9f95a23c TL |
127 | ictx->readahead.dec_pending(); |
128 | } | |
129 | }; | |
130 | ||
131 | template <typename I> | |
f67539c2 | 132 | void readahead(I *ictx, const Extents& image_extents, IOContext io_context) { |
9f95a23c TL |
133 | uint64_t total_bytes = 0; |
134 | for (auto& image_extent : image_extents) { | |
135 | total_bytes += image_extent.second; | |
136 | } | |
137 | ||
138 | ictx->image_lock.lock_shared(); | |
139 | auto total_bytes_read = ictx->total_bytes_read.fetch_add(total_bytes); | |
140 | bool abort = ( | |
141 | ictx->readahead_disable_after_bytes != 0 && | |
142 | total_bytes_read > ictx->readahead_disable_after_bytes); | |
143 | if (abort) { | |
144 | ictx->image_lock.unlock_shared(); | |
145 | return; | |
146 | } | |
147 | ||
1e59de90 | 148 | uint64_t data_size = ictx->get_area_size(ImageArea::DATA); |
9f95a23c TL |
149 | ictx->image_lock.unlock_shared(); |
150 | ||
1e59de90 | 151 | auto readahead_extent = ictx->readahead.update(image_extents, data_size); |
9f95a23c TL |
152 | uint64_t readahead_offset = readahead_extent.first; |
153 | uint64_t readahead_length = readahead_extent.second; | |
154 | ||
155 | if (readahead_length > 0) { | |
156 | ldout(ictx->cct, 20) << "(readahead logical) " << readahead_offset << "~" | |
157 | << readahead_length << dendl; | |
158 | LightweightObjectExtents readahead_object_extents; | |
1e59de90 TL |
159 | io::util::area_to_object_extents(ictx, readahead_offset, readahead_length, |
160 | ImageArea::DATA, 0, | |
161 | &readahead_object_extents); | |
9f95a23c TL |
162 | for (auto& object_extent : readahead_object_extents) { |
163 | ldout(ictx->cct, 20) << "(readahead) " | |
164 | << data_object_name(ictx, | |
165 | object_extent.object_no) << " " | |
166 | << object_extent.offset << "~" | |
167 | << object_extent.length << dendl; | |
168 | ||
169 | auto req_comp = new C_RBD_Readahead<I>(ictx, object_extent.object_no, | |
170 | object_extent.offset, | |
171 | object_extent.length); | |
172 | auto req = io::ObjectDispatchSpec::create_read( | |
173 | ictx, io::OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, | |
f67539c2 | 174 | &req_comp->extents, io_context, 0, 0, {}, nullptr, req_comp); |
9f95a23c TL |
175 | req->send(); |
176 | } | |
177 | ||
178 | ictx->perfcounter->inc(l_librbd_readahead); | |
179 | ictx->perfcounter->inc(l_librbd_readahead_bytes, readahead_length); | |
180 | } | |
181 | } | |
182 | ||
11fdf7f2 TL |
183 | template <typename I> |
184 | struct C_UpdateTimestamp : public Context { | |
185 | public: | |
186 | I& m_image_ctx; | |
187 | bool m_modify; // if modify set to 'true', modify timestamp is updated, | |
188 | // access timestamp otherwise | |
189 | AsyncOperation m_async_op; | |
91327a77 | 190 | |
11fdf7f2 TL |
191 | C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) { |
192 | m_async_op.start_op(*get_image_ctx(&m_image_ctx)); | |
91327a77 | 193 | } |
11fdf7f2 TL |
194 | ~C_UpdateTimestamp() override { |
195 | m_async_op.finish_op(); | |
91327a77 AA |
196 | } |
197 | ||
11fdf7f2 TL |
198 | void send() { |
199 | librados::ObjectWriteOperation op; | |
200 | if (m_modify) { | |
201 | cls_client::set_modify_timestamp(&op); | |
202 | } else { | |
203 | cls_client::set_access_timestamp(&op); | |
91327a77 | 204 | } |
7c673cae | 205 | |
11fdf7f2 TL |
206 | auto comp = librbd::util::create_rados_callback(this); |
207 | int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op); | |
208 | ceph_assert(r == 0); | |
209 | comp->release(); | |
7c673cae FG |
210 | } |
211 | ||
212 | void finish(int r) override { | |
11fdf7f2 | 213 | // ignore errors updating timestamp |
7c673cae FG |
214 | } |
215 | }; | |
216 | ||
11fdf7f2 TL |
217 | bool should_update_timestamp(const utime_t& now, const utime_t& timestamp, |
218 | uint64_t interval) { | |
219 | return (interval && | |
220 | (static_cast<uint64_t>(now.sec()) >= interval + timestamp)); | |
224ce89b WB |
221 | } |
222 | ||
11fdf7f2 | 223 | } // anonymous namespace |
c07f9fc5 | 224 | |
9f95a23c TL |
225 | #undef dout_prefix |
226 | #define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \ | |
227 | << " " << __func__ << ": " | |
228 | ||
7c673cae FG |
229 | template <typename I> |
230 | void ImageRequest<I>::aio_read(I *ictx, AioCompletion *c, | |
1e59de90 | 231 | Extents &&image_extents, ImageArea area, |
f67539c2 TL |
232 | ReadResult &&read_result, IOContext io_context, |
233 | int op_flags, int read_flags, | |
31f18b77 | 234 | const ZTracer::Trace &parent_trace) { |
1e59de90 | 235 | ImageReadRequest<I> req(*ictx, c, std::move(image_extents), area, |
f67539c2 TL |
236 | std::move(read_result), io_context, op_flags, |
237 | read_flags, parent_trace); | |
7c673cae FG |
238 | req.send(); |
239 | } | |
240 | ||
241 | template <typename I> | |
242 | void ImageRequest<I>::aio_write(I *ictx, AioCompletion *c, | |
1e59de90 TL |
243 | Extents &&image_extents, ImageArea area, |
244 | bufferlist &&bl, int op_flags, | |
31f18b77 | 245 | const ZTracer::Trace &parent_trace) { |
1e59de90 TL |
246 | ImageWriteRequest<I> req(*ictx, c, std::move(image_extents), area, |
247 | std::move(bl), op_flags, parent_trace); | |
7c673cae FG |
248 | req.send(); |
249 | } | |
250 | ||
251 | template <typename I> | |
252 | void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c, | |
1e59de90 | 253 | Extents &&image_extents, ImageArea area, |
11fdf7f2 | 254 | uint32_t discard_granularity_bytes, |
f67539c2 | 255 | const ZTracer::Trace &parent_trace) { |
1e59de90 TL |
256 | ImageDiscardRequest<I> req(*ictx, c, std::move(image_extents), area, |
257 | discard_granularity_bytes, parent_trace); | |
7c673cae FG |
258 | req.send(); |
259 | } | |
260 | ||
261 | template <typename I> | |
31f18b77 | 262 | void ImageRequest<I>::aio_flush(I *ictx, AioCompletion *c, |
11fdf7f2 TL |
263 | FlushSource flush_source, |
264 | const ZTracer::Trace &parent_trace) { | |
265 | ImageFlushRequest<I> req(*ictx, c, flush_source, parent_trace); | |
7c673cae FG |
266 | req.send(); |
267 | } | |
268 | ||
269 | template <typename I> | |
270 | void ImageRequest<I>::aio_writesame(I *ictx, AioCompletion *c, | |
1e59de90 TL |
271 | Extents &&image_extents, ImageArea area, |
272 | bufferlist &&bl, int op_flags, | |
31f18b77 | 273 | const ZTracer::Trace &parent_trace) { |
1e59de90 TL |
274 | ImageWriteSameRequest<I> req(*ictx, c, std::move(image_extents), area, |
275 | std::move(bl), op_flags, parent_trace); | |
7c673cae FG |
276 | req.send(); |
277 | } | |
278 | ||
c07f9fc5 FG |
279 | template <typename I> |
280 | void ImageRequest<I>::aio_compare_and_write(I *ictx, AioCompletion *c, | |
281 | Extents &&image_extents, | |
1e59de90 | 282 | ImageArea area, |
c07f9fc5 FG |
283 | bufferlist &&cmp_bl, |
284 | bufferlist &&bl, | |
285 | uint64_t *mismatch_offset, | |
1e59de90 | 286 | int op_flags, |
c07f9fc5 | 287 | const ZTracer::Trace &parent_trace) { |
1e59de90 | 288 | ImageCompareAndWriteRequest<I> req(*ictx, c, std::move(image_extents), area, |
c07f9fc5 | 289 | std::move(cmp_bl), std::move(bl), |
1e59de90 | 290 | mismatch_offset, op_flags, parent_trace); |
c07f9fc5 FG |
291 | req.send(); |
292 | } | |
293 | ||
7c673cae FG |
294 | template <typename I> |
295 | void ImageRequest<I>::send() { | |
296 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 297 | ceph_assert(m_aio_comp->is_initialized(get_aio_type())); |
494da23a | 298 | ceph_assert(m_aio_comp->is_started()); |
7c673cae FG |
299 | |
300 | CephContext *cct = image_ctx.cct; | |
301 | AioCompletion *aio_comp = this->m_aio_comp; | |
302 | ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", " | |
c07f9fc5 | 303 | << "completion=" << aio_comp << dendl; |
7c673cae | 304 | |
f67539c2 TL |
305 | update_timestamp(); |
306 | send_request(); | |
7c673cae FG |
307 | } |
308 | ||
309 | template <typename I> | |
11fdf7f2 TL |
310 | void ImageRequest<I>::update_timestamp() { |
311 | bool modify = (get_aio_type() != AIO_TYPE_READ); | |
312 | uint64_t update_interval; | |
313 | if (modify) { | |
314 | update_interval = m_image_ctx.mtime_update_interval; | |
315 | } else { | |
316 | update_interval = m_image_ctx.atime_update_interval; | |
317 | } | |
7c673cae | 318 | |
11fdf7f2 TL |
319 | if (update_interval == 0) { |
320 | return; | |
321 | } | |
322 | ||
323 | utime_t (I::*get_timestamp_fn)() const; | |
324 | void (I::*set_timestamp_fn)(utime_t); | |
325 | if (modify) { | |
326 | get_timestamp_fn = &I::get_modify_timestamp; | |
327 | set_timestamp_fn = &I::set_modify_timestamp; | |
328 | } else { | |
329 | get_timestamp_fn = &I::get_access_timestamp; | |
330 | set_timestamp_fn = &I::set_access_timestamp; | |
331 | } | |
332 | ||
333 | utime_t ts = ceph_clock_now(); | |
334 | { | |
9f95a23c | 335 | std::shared_lock timestamp_locker{m_image_ctx.timestamp_lock}; |
11fdf7f2 TL |
336 | if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx), |
337 | update_interval)) { | |
338 | return; | |
339 | } | |
340 | } | |
341 | ||
342 | { | |
9f95a23c | 343 | std::unique_lock timestamp_locker{m_image_ctx.timestamp_lock}; |
11fdf7f2 TL |
344 | bool update = should_update_timestamp( |
345 | ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval); | |
346 | if (!update) { | |
347 | return; | |
348 | } | |
349 | ||
350 | std::invoke(set_timestamp_fn, m_image_ctx, ts); | |
351 | } | |
352 | ||
353 | // TODO we fire and forget this outside the IO path to prevent | |
354 | // potential race conditions with librbd client IO callbacks | |
355 | // between different threads (e.g. librados and object cacher) | |
356 | ldout(m_image_ctx.cct, 10) << get_request_type() << dendl; | |
357 | auto req = new C_UpdateTimestamp<I>(m_image_ctx, modify); | |
358 | req->send(); | |
7c673cae FG |
359 | } |
360 | ||
361 | template <typename I> | |
362 | ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp, | |
1e59de90 | 363 | Extents &&image_extents, ImageArea area, |
f67539c2 TL |
364 | ReadResult &&read_result, |
365 | IOContext io_context, int op_flags, | |
366 | int read_flags, | |
367 | const ZTracer::Trace &parent_trace) | |
1e59de90 TL |
368 | : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents), area, |
369 | "read", parent_trace), | |
370 | m_io_context(io_context), m_op_flags(op_flags), m_read_flags(read_flags) { | |
7c673cae FG |
371 | aio_comp->read_result = std::move(read_result); |
372 | } | |
373 | ||
7c673cae FG |
374 | template <typename I> |
375 | void ImageReadRequest<I>::send_request() { | |
376 | I &image_ctx = this->m_image_ctx; | |
377 | CephContext *cct = image_ctx.cct; | |
378 | ||
379 | auto &image_extents = this->m_image_extents; | |
1e59de90 TL |
380 | if (this->m_image_area == ImageArea::DATA && |
381 | image_ctx.cache && image_ctx.readahead_max_bytes > 0 && | |
7c673cae | 382 | !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) { |
1e59de90 | 383 | readahead(get_image_ctx(&image_ctx), image_extents, m_io_context); |
9f95a23c | 384 | } |
7c673cae | 385 | |
9f95a23c TL |
386 | // map image extents to object extents |
387 | LightweightObjectExtents object_extents; | |
f67539c2 | 388 | uint64_t buffer_ofs = 0; |
9f95a23c TL |
389 | for (auto &extent : image_extents) { |
390 | if (extent.second == 0) { | |
391 | continue; | |
7c673cae | 392 | } |
7c673cae | 393 | |
1e59de90 TL |
394 | util::area_to_object_extents(&image_ctx, extent.first, extent.second, |
395 | this->m_image_area, buffer_ofs, | |
396 | &object_extents); | |
9f95a23c | 397 | buffer_ofs += extent.second; |
7c673cae | 398 | } |
7c673cae | 399 | |
9f95a23c | 400 | AioCompletion *aio_comp = this->m_aio_comp; |
f67539c2 TL |
401 | aio_comp->read_result.set_image_extents(image_extents); |
402 | ||
403 | // issue the requests | |
9f95a23c TL |
404 | aio_comp->set_request_count(object_extents.size()); |
405 | for (auto &oe : object_extents) { | |
406 | ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " " | |
407 | << oe.offset << "~" << oe.length << " from " | |
408 | << oe.buffer_extents << dendl; | |
7c673cae | 409 | |
9f95a23c | 410 | auto req_comp = new io::ReadResult::C_ObjectReadRequest( |
f67539c2 | 411 | aio_comp, {{oe.offset, oe.length, std::move(oe.buffer_extents)}}); |
9f95a23c | 412 | auto req = ObjectDispatchSpec::create_read( |
f67539c2 | 413 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, oe.object_no, |
1e59de90 | 414 | &req_comp->extents, m_io_context, m_op_flags, m_read_flags, |
f67539c2 | 415 | this->m_trace, nullptr, req_comp); |
9f95a23c TL |
416 | req->send(); |
417 | } | |
7c673cae FG |
418 | |
419 | image_ctx.perfcounter->inc(l_librbd_rd); | |
420 | image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs); | |
421 | } | |
422 | ||
7c673cae FG |
423 | template <typename I> |
424 | void AbstractImageWriteRequest<I>::send_request() { | |
425 | I &image_ctx = this->m_image_ctx; | |
7c673cae | 426 | |
7c673cae FG |
427 | bool journaling = false; |
428 | ||
429 | AioCompletion *aio_comp = this->m_aio_comp; | |
7c673cae FG |
430 | { |
431 | // prevent image size from changing between computing clip and recording | |
432 | // pending async operation | |
9f95a23c | 433 | std::shared_lock image_locker{image_ctx.image_lock}; |
7c673cae FG |
434 | journaling = (image_ctx.journal != nullptr && |
435 | image_ctx.journal->is_journal_appending()); | |
436 | } | |
437 | ||
9f95a23c TL |
438 | uint64_t clip_len = 0; |
439 | LightweightObjectExtents object_extents; | |
440 | for (auto &extent : this->m_image_extents) { | |
441 | if (extent.second == 0) { | |
442 | continue; | |
443 | } | |
444 | ||
445 | // map to object extents | |
1e59de90 TL |
446 | io::util::area_to_object_extents(&image_ctx, extent.first, extent.second, |
447 | this->m_image_area, clip_len, | |
448 | &object_extents); | |
9f95a23c TL |
449 | clip_len += extent.second; |
450 | } | |
451 | ||
11fdf7f2 | 452 | int ret = prune_object_extents(&object_extents); |
c07f9fc5 FG |
453 | if (ret < 0) { |
454 | aio_comp->fail(ret); | |
455 | return; | |
456 | } | |
7c673cae | 457 | |
39ae355f TL |
458 | // reflect changes in object_extents back to m_image_extents |
459 | if (ret == 1) { | |
460 | this->m_image_extents.clear(); | |
461 | for (auto& object_extent : object_extents) { | |
1e59de90 TL |
462 | auto [image_extents, _] = io::util::object_to_area_extents( |
463 | &image_ctx, object_extent.object_no, | |
464 | {{object_extent.offset, object_extent.length}}); | |
39ae355f TL |
465 | this->m_image_extents.insert(this->m_image_extents.end(), |
466 | image_extents.begin(), image_extents.end()); | |
467 | } | |
468 | } | |
469 | ||
9f95a23c | 470 | aio_comp->set_request_count(object_extents.size()); |
7c673cae FG |
471 | if (!object_extents.empty()) { |
472 | uint64_t journal_tid = 0; | |
7c673cae FG |
473 | if (journaling) { |
474 | // in-flight ops are flushed prior to closing the journal | |
11fdf7f2 TL |
475 | ceph_assert(image_ctx.journal != NULL); |
476 | journal_tid = append_journal_event(m_synchronous); | |
7c673cae FG |
477 | } |
478 | ||
1e59de90 TL |
479 | // it's very important that IOContext is captured here instead of |
480 | // e.g. at the API layer so that an up-to-date snap context is used | |
481 | // when owning the exclusive lock | |
482 | send_object_requests(object_extents, image_ctx.get_data_io_context(), | |
483 | journal_tid); | |
7c673cae FG |
484 | } |
485 | ||
486 | update_stats(clip_len); | |
7c673cae FG |
487 | } |
488 | ||
489 | template <typename I> | |
490 | void AbstractImageWriteRequest<I>::send_object_requests( | |
f67539c2 | 491 | const LightweightObjectExtents &object_extents, IOContext io_context, |
11fdf7f2 | 492 | uint64_t journal_tid) { |
7c673cae FG |
493 | I &image_ctx = this->m_image_ctx; |
494 | CephContext *cct = image_ctx.cct; | |
495 | ||
496 | AioCompletion *aio_comp = this->m_aio_comp; | |
9f95a23c TL |
497 | bool single_extent = (object_extents.size() == 1); |
498 | for (auto& oe : object_extents) { | |
499 | ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " " | |
500 | << oe.offset << "~" << oe.length << " from " | |
501 | << oe.buffer_extents << dendl; | |
7c673cae | 502 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); |
f67539c2 TL |
503 | auto request = create_object_request(oe, io_context, journal_tid, |
504 | single_extent, req_comp); | |
9f95a23c | 505 | request->send(); |
7c673cae FG |
506 | } |
507 | } | |
508 | ||
509 | template <typename I> | |
9f95a23c TL |
510 | void ImageWriteRequest<I>::assemble_extent( |
511 | const LightweightObjectExtent &object_extent, bufferlist *bl) { | |
7c673cae FG |
512 | for (auto q = object_extent.buffer_extents.begin(); |
513 | q != object_extent.buffer_extents.end(); ++q) { | |
514 | bufferlist sub_bl; | |
515 | sub_bl.substr_of(m_bl, q->first, q->second); | |
516 | bl->claim_append(sub_bl); | |
517 | } | |
518 | } | |
519 | ||
520 | template <typename I> | |
11fdf7f2 | 521 | uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) { |
7c673cae FG |
522 | I &image_ctx = this->m_image_ctx; |
523 | ||
524 | uint64_t tid = 0; | |
525 | uint64_t buffer_offset = 0; | |
11fdf7f2 | 526 | ceph_assert(!this->m_image_extents.empty()); |
7c673cae FG |
527 | for (auto &extent : this->m_image_extents) { |
528 | bufferlist sub_bl; | |
529 | sub_bl.substr_of(m_bl, buffer_offset, extent.second); | |
530 | buffer_offset += extent.second; | |
531 | ||
532 | tid = image_ctx.journal->append_write_event(extent.first, extent.second, | |
11fdf7f2 | 533 | sub_bl, synchronous); |
7c673cae FG |
534 | } |
535 | ||
7c673cae FG |
536 | return tid; |
537 | } | |
538 | ||
7c673cae | 539 | template <typename I> |
11fdf7f2 | 540 | ObjectDispatchSpec *ImageWriteRequest<I>::create_object_request( |
f67539c2 | 541 | const LightweightObjectExtent &object_extent, IOContext io_context, |
9f95a23c | 542 | uint64_t journal_tid, bool single_extent, Context *on_finish) { |
7c673cae | 543 | I &image_ctx = this->m_image_ctx; |
7c673cae FG |
544 | |
545 | bufferlist bl; | |
f67539c2 TL |
546 | if (single_extent && object_extent.buffer_extents.size() == 1 && |
547 | m_bl.length() == object_extent.length) { | |
9f95a23c TL |
548 | // optimization for single object/buffer extent writes |
549 | bl = std::move(m_bl); | |
550 | } else { | |
551 | assemble_extent(object_extent, &bl); | |
552 | } | |
553 | ||
11fdf7f2 | 554 | auto req = ObjectDispatchSpec::create_write( |
9f95a23c | 555 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
f67539c2 TL |
556 | object_extent.offset, std::move(bl), io_context, m_op_flags, 0, |
557 | std::nullopt, journal_tid, this->m_trace, on_finish); | |
7c673cae FG |
558 | return req; |
559 | } | |
560 | ||
561 | template <typename I> | |
562 | void ImageWriteRequest<I>::update_stats(size_t length) { | |
563 | I &image_ctx = this->m_image_ctx; | |
564 | image_ctx.perfcounter->inc(l_librbd_wr); | |
565 | image_ctx.perfcounter->inc(l_librbd_wr_bytes, length); | |
566 | } | |
567 | ||
568 | template <typename I> | |
11fdf7f2 | 569 | uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) { |
7c673cae FG |
570 | I &image_ctx = this->m_image_ctx; |
571 | ||
572 | uint64_t tid = 0; | |
11fdf7f2 | 573 | ceph_assert(!this->m_image_extents.empty()); |
7c673cae | 574 | for (auto &extent : this->m_image_extents) { |
11fdf7f2 TL |
575 | journal::EventEntry event_entry( |
576 | journal::AioDiscardEvent(extent.first, | |
577 | extent.second, | |
578 | this->m_discard_granularity_bytes)); | |
7c673cae | 579 | tid = image_ctx.journal->append_io_event(std::move(event_entry), |
11fdf7f2 TL |
580 | extent.first, extent.second, |
581 | synchronous, 0); | |
7c673cae FG |
582 | } |
583 | ||
7c673cae FG |
584 | return tid; |
585 | } | |
586 | ||
7c673cae | 587 | template <typename I> |
11fdf7f2 | 588 | ObjectDispatchSpec *ImageDiscardRequest<I>::create_object_request( |
f67539c2 | 589 | const LightweightObjectExtent &object_extent, IOContext io_context, |
9f95a23c | 590 | uint64_t journal_tid, bool single_extent, Context *on_finish) { |
7c673cae | 591 | I &image_ctx = this->m_image_ctx; |
11fdf7f2 | 592 | auto req = ObjectDispatchSpec::create_discard( |
9f95a23c | 593 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
f67539c2 | 594 | object_extent.offset, object_extent.length, io_context, |
11fdf7f2 TL |
595 | OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE, journal_tid, this->m_trace, |
596 | on_finish); | |
7c673cae FG |
597 | return req; |
598 | } | |
599 | ||
600 | template <typename I> | |
601 | void ImageDiscardRequest<I>::update_stats(size_t length) { | |
602 | I &image_ctx = this->m_image_ctx; | |
603 | image_ctx.perfcounter->inc(l_librbd_discard); | |
604 | image_ctx.perfcounter->inc(l_librbd_discard_bytes, length); | |
605 | } | |
606 | ||
11fdf7f2 TL |
607 | template <typename I> |
608 | int ImageDiscardRequest<I>::prune_object_extents( | |
9f95a23c | 609 | LightweightObjectExtents* object_extents) const { |
11fdf7f2 TL |
610 | if (m_discard_granularity_bytes == 0) { |
611 | return 0; | |
612 | } | |
613 | ||
614 | // Align the range to discard_granularity_bytes boundary and skip | |
615 | // and discards that are too small to free up any space. | |
616 | // | |
617 | // discard_granularity_bytes >= object_size && tail truncation | |
618 | // is a special case for filestore | |
619 | bool prune_required = false; | |
39ae355f | 620 | bool length_modified = false; |
11fdf7f2 TL |
621 | auto object_size = this->m_image_ctx.layout.object_size; |
622 | auto discard_granularity_bytes = std::min(m_discard_granularity_bytes, | |
623 | object_size); | |
624 | auto xform_lambda = | |
39ae355f | 625 | [discard_granularity_bytes, object_size, &prune_required, &length_modified] |
9f95a23c | 626 | (LightweightObjectExtent& object_extent) { |
11fdf7f2 TL |
627 | auto& offset = object_extent.offset; |
628 | auto& length = object_extent.length; | |
629 | auto next_offset = offset + length; | |
630 | ||
631 | if ((discard_granularity_bytes < object_size) || | |
632 | (next_offset < object_size)) { | |
633 | offset = p2roundup<uint64_t>(offset, discard_granularity_bytes); | |
634 | next_offset = p2align<uint64_t>(next_offset, discard_granularity_bytes); | |
635 | if (offset >= next_offset) { | |
636 | prune_required = true; | |
637 | length = 0; | |
638 | } else { | |
39ae355f TL |
639 | auto new_length = next_offset - offset; |
640 | if (length != new_length) { | |
641 | length_modified = true; | |
642 | length = new_length; | |
643 | } | |
11fdf7f2 TL |
644 | } |
645 | } | |
646 | }; | |
647 | std::for_each(object_extents->begin(), object_extents->end(), | |
648 | xform_lambda); | |
649 | ||
650 | if (prune_required) { | |
651 | // one or more object extents were skipped | |
652 | auto remove_lambda = | |
9f95a23c | 653 | [](const LightweightObjectExtent& object_extent) { |
11fdf7f2 TL |
654 | return (object_extent.length == 0); |
655 | }; | |
656 | object_extents->erase( | |
657 | std::remove_if(object_extents->begin(), object_extents->end(), | |
658 | remove_lambda), | |
659 | object_extents->end()); | |
660 | } | |
39ae355f TL |
661 | |
662 | // object extents were modified, image extents needs updating | |
663 | if (length_modified || prune_required) { | |
664 | return 1; | |
665 | } | |
666 | ||
11fdf7f2 TL |
667 | return 0; |
668 | } | |
669 | ||
7c673cae FG |
670 | template <typename I> |
671 | void ImageFlushRequest<I>::send_request() { | |
672 | I &image_ctx = this->m_image_ctx; | |
7c673cae FG |
673 | |
674 | bool journaling = false; | |
675 | { | |
9f95a23c | 676 | std::shared_lock image_locker{image_ctx.image_lock}; |
11fdf7f2 TL |
677 | journaling = (m_flush_source == FLUSH_SOURCE_USER && |
678 | image_ctx.journal != nullptr && | |
7c673cae FG |
679 | image_ctx.journal->is_journal_appending()); |
680 | } | |
681 | ||
682 | AioCompletion *aio_comp = this->m_aio_comp; | |
11fdf7f2 TL |
683 | aio_comp->set_request_count(1); |
684 | ||
685 | Context *ctx = new C_AioRequest(aio_comp); | |
686 | ||
687 | // ensure no locks are held when flush is complete | |
688 | ctx = librbd::util::create_async_context_callback(image_ctx, ctx); | |
689 | ||
9f95a23c | 690 | uint64_t journal_tid = 0; |
7c673cae FG |
691 | if (journaling) { |
692 | // in-flight ops are flushed prior to closing the journal | |
9f95a23c TL |
693 | ceph_assert(image_ctx.journal != NULL); |
694 | journal_tid = image_ctx.journal->append_io_event( | |
11fdf7f2 | 695 | journal::EventEntry(journal::AioFlushEvent()), 0, 0, false, 0); |
494da23a | 696 | image_ctx.journal->user_flushed(); |
7c673cae FG |
697 | } |
698 | ||
9f95a23c TL |
699 | auto object_dispatch_spec = ObjectDispatchSpec::create_flush( |
700 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, m_flush_source, journal_tid, | |
701 | this->m_trace, ctx); | |
702 | ctx = new LambdaContext([object_dispatch_spec](int r) { | |
703 | object_dispatch_spec->send(); | |
704 | }); | |
705 | ||
11fdf7f2 | 706 | // ensure all in-flight IOs are settled if non-user flush request |
f67539c2 TL |
707 | if (m_flush_source == FLUSH_SOURCE_WRITEBACK) { |
708 | ctx->complete(0); | |
709 | } else { | |
710 | aio_comp->async_op.flush(ctx); | |
711 | } | |
11fdf7f2 TL |
712 | |
713 | // might be flushing during image shutdown | |
714 | if (image_ctx.perfcounter != nullptr) { | |
715 | image_ctx.perfcounter->inc(l_librbd_flush); | |
716 | } | |
7c673cae FG |
717 | } |
718 | ||
7c673cae | 719 | template <typename I> |
11fdf7f2 | 720 | uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) { |
7c673cae FG |
721 | I &image_ctx = this->m_image_ctx; |
722 | ||
723 | uint64_t tid = 0; | |
11fdf7f2 | 724 | ceph_assert(!this->m_image_extents.empty()); |
7c673cae FG |
725 | for (auto &extent : this->m_image_extents) { |
726 | journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first, | |
727 | extent.second, | |
728 | m_data_bl)); | |
729 | tid = image_ctx.journal->append_io_event(std::move(event_entry), | |
11fdf7f2 TL |
730 | extent.first, extent.second, |
731 | synchronous, 0); | |
7c673cae FG |
732 | } |
733 | ||
7c673cae FG |
734 | return tid; |
735 | } | |
736 | ||
7c673cae | 737 | template <typename I> |
11fdf7f2 | 738 | ObjectDispatchSpec *ImageWriteSameRequest<I>::create_object_request( |
f67539c2 | 739 | const LightweightObjectExtent &object_extent, IOContext io_context, |
9f95a23c | 740 | uint64_t journal_tid, bool single_extent, Context *on_finish) { |
7c673cae | 741 | I &image_ctx = this->m_image_ctx; |
7c673cae FG |
742 | |
743 | bufferlist bl; | |
11fdf7f2 TL |
744 | ObjectDispatchSpec *req; |
745 | ||
746 | if (util::assemble_write_same_extent(object_extent, m_data_bl, &bl, false)) { | |
9f95a23c | 747 | auto buffer_extents{object_extent.buffer_extents}; |
7c673cae | 748 | |
11fdf7f2 | 749 | req = ObjectDispatchSpec::create_write_same( |
9f95a23c TL |
750 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
751 | object_extent.offset, object_extent.length, std::move(buffer_extents), | |
f67539c2 | 752 | std::move(bl), io_context, m_op_flags, journal_tid, |
11fdf7f2 | 753 | this->m_trace, on_finish); |
7c673cae FG |
754 | return req; |
755 | } | |
11fdf7f2 | 756 | req = ObjectDispatchSpec::create_write( |
9f95a23c | 757 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
f67539c2 TL |
758 | object_extent.offset, std::move(bl), io_context, m_op_flags, 0, |
759 | std::nullopt, journal_tid, this->m_trace, on_finish); | |
7c673cae FG |
760 | return req; |
761 | } | |
762 | ||
763 | template <typename I> | |
764 | void ImageWriteSameRequest<I>::update_stats(size_t length) { | |
765 | I &image_ctx = this->m_image_ctx; | |
766 | image_ctx.perfcounter->inc(l_librbd_ws); | |
767 | image_ctx.perfcounter->inc(l_librbd_ws_bytes, length); | |
768 | } | |
769 | ||
c07f9fc5 FG |
770 | template <typename I> |
771 | uint64_t ImageCompareAndWriteRequest<I>::append_journal_event( | |
11fdf7f2 | 772 | bool synchronous) { |
c07f9fc5 FG |
773 | I &image_ctx = this->m_image_ctx; |
774 | ||
775 | uint64_t tid = 0; | |
11fdf7f2 | 776 | ceph_assert(this->m_image_extents.size() == 1); |
c07f9fc5 | 777 | auto &extent = this->m_image_extents.front(); |
39ae355f TL |
778 | tid = image_ctx.journal->append_compare_and_write_event(extent.first, |
779 | extent.second, | |
780 | m_cmp_bl, | |
781 | m_bl, | |
782 | synchronous); | |
c07f9fc5 FG |
783 | |
784 | return tid; | |
785 | } | |
786 | ||
c07f9fc5 FG |
787 | template <typename I> |
788 | void ImageCompareAndWriteRequest<I>::assemble_extent( | |
39ae355f TL |
789 | const LightweightObjectExtent &object_extent, bufferlist *bl, |
790 | bufferlist *cmp_bl) { | |
c07f9fc5 FG |
791 | for (auto q = object_extent.buffer_extents.begin(); |
792 | q != object_extent.buffer_extents.end(); ++q) { | |
793 | bufferlist sub_bl; | |
794 | sub_bl.substr_of(m_bl, q->first, q->second); | |
795 | bl->claim_append(sub_bl); | |
39ae355f TL |
796 | |
797 | bufferlist sub_cmp_bl; | |
798 | sub_cmp_bl.substr_of(m_cmp_bl, q->first, q->second); | |
799 | cmp_bl->claim_append(sub_cmp_bl); | |
c07f9fc5 FG |
800 | } |
801 | } | |
802 | ||
c07f9fc5 | 803 | template <typename I> |
11fdf7f2 | 804 | ObjectDispatchSpec *ImageCompareAndWriteRequest<I>::create_object_request( |
f67539c2 | 805 | const LightweightObjectExtent &object_extent, IOContext io_context, |
9f95a23c | 806 | uint64_t journal_tid, bool single_extent, Context *on_finish) { |
c07f9fc5 FG |
807 | I &image_ctx = this->m_image_ctx; |
808 | ||
809 | bufferlist bl; | |
39ae355f TL |
810 | bufferlist cmp_bl; |
811 | assemble_extent(object_extent, &bl, &cmp_bl); | |
11fdf7f2 | 812 | auto req = ObjectDispatchSpec::create_compare_and_write( |
9f95a23c | 813 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
39ae355f | 814 | object_extent.offset, std::move(cmp_bl), std::move(bl), io_context, |
9f95a23c | 815 | m_mismatch_offset, m_op_flags, journal_tid, this->m_trace, on_finish); |
c07f9fc5 FG |
816 | return req; |
817 | } | |
818 | ||
819 | template <typename I> | |
820 | void ImageCompareAndWriteRequest<I>::update_stats(size_t length) { | |
821 | I &image_ctx = this->m_image_ctx; | |
822 | image_ctx.perfcounter->inc(l_librbd_cmp); | |
823 | image_ctx.perfcounter->inc(l_librbd_cmp_bytes, length); | |
824 | } | |
825 | ||
826 | template <typename I> | |
11fdf7f2 | 827 | int ImageCompareAndWriteRequest<I>::prune_object_extents( |
9f95a23c | 828 | LightweightObjectExtents* object_extents) const { |
11fdf7f2 | 829 | if (object_extents->size() > 1) |
c07f9fc5 FG |
830 | return -EINVAL; |
831 | ||
832 | I &image_ctx = this->m_image_ctx; | |
c07f9fc5 | 833 | uint64_t su = image_ctx.layout.stripe_unit; |
9f95a23c | 834 | auto& object_extent = object_extents->front(); |
39ae355f | 835 | if (su == 0 || (object_extent.offset % su + object_extent.length > su)) |
c07f9fc5 FG |
836 | return -EINVAL; |
837 | ||
838 | return 0; | |
839 | } | |
840 | ||
f67539c2 TL |
841 | template <typename I> |
842 | ImageListSnapsRequest<I>::ImageListSnapsRequest( | |
843 | I& image_ctx, AioCompletion* aio_comp, Extents&& image_extents, | |
1e59de90 TL |
844 | ImageArea area, SnapIds&& snap_ids, int list_snaps_flags, |
845 | SnapshotDelta* snapshot_delta, const ZTracer::Trace& parent_trace) | |
846 | : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents), area, | |
847 | "list-snaps", parent_trace), | |
f67539c2 TL |
848 | m_snap_ids(std::move(snap_ids)), m_list_snaps_flags(list_snaps_flags), |
849 | m_snapshot_delta(snapshot_delta) { | |
850 | } | |
851 | ||
852 | template <typename I> | |
853 | void ImageListSnapsRequest<I>::send_request() { | |
854 | I &image_ctx = this->m_image_ctx; | |
855 | CephContext *cct = image_ctx.cct; | |
856 | ||
857 | // map image extents to object extents | |
858 | auto &image_extents = this->m_image_extents; | |
859 | std::map<uint64_t, Extents> object_number_extents; | |
860 | for (auto& image_extent : image_extents) { | |
861 | if (image_extent.second == 0) { | |
862 | continue; | |
863 | } | |
864 | ||
865 | striper::LightweightObjectExtents object_extents; | |
1e59de90 TL |
866 | io::util::area_to_object_extents(&image_ctx, image_extent.first, |
867 | image_extent.second, this->m_image_area, 0, | |
868 | &object_extents); | |
f67539c2 TL |
869 | for (auto& object_extent : object_extents) { |
870 | object_number_extents[object_extent.object_no].emplace_back( | |
871 | object_extent.offset, object_extent.length); | |
872 | } | |
873 | } | |
874 | ||
875 | // reassemble the deltas back into image-extents when complete | |
876 | auto aio_comp = this->m_aio_comp; | |
877 | aio_comp->set_request_count(1); | |
878 | auto assemble_ctx = new C_AssembleSnapshotDeltas<I>( | |
879 | &image_ctx, aio_comp, m_snapshot_delta); | |
880 | auto sub_aio_comp = AioCompletion::create_and_start< | |
881 | Context, &Context::complete>(assemble_ctx, get_image_ctx(&image_ctx), | |
882 | AIO_TYPE_GENERIC); | |
883 | ||
884 | // issue the requests | |
885 | sub_aio_comp->set_request_count(object_number_extents.size()); | |
886 | for (auto& oe : object_number_extents) { | |
887 | ldout(cct, 20) << data_object_name(&image_ctx, oe.first) << " " | |
888 | << oe.second << dendl; | |
889 | auto ctx = new C_AioRequest(sub_aio_comp); | |
890 | auto req = ObjectDispatchSpec::create_list_snaps( | |
891 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, oe.first, std::move(oe.second), | |
892 | SnapIds{m_snap_ids}, m_list_snaps_flags, this->m_trace, | |
893 | assemble_ctx->get_snapshot_delta(oe.first), ctx); | |
894 | req->send(); | |
895 | } | |
896 | } | |
897 | ||
7c673cae FG |
898 | } // namespace io |
899 | } // namespace librbd | |
900 | ||
901 | template class librbd::io::ImageRequest<librbd::ImageCtx>; | |
902 | template class librbd::io::ImageReadRequest<librbd::ImageCtx>; | |
903 | template class librbd::io::AbstractImageWriteRequest<librbd::ImageCtx>; | |
904 | template class librbd::io::ImageWriteRequest<librbd::ImageCtx>; | |
905 | template class librbd::io::ImageDiscardRequest<librbd::ImageCtx>; | |
906 | template class librbd::io::ImageFlushRequest<librbd::ImageCtx>; | |
907 | template class librbd::io::ImageWriteSameRequest<librbd::ImageCtx>; | |
c07f9fc5 | 908 | template class librbd::io::ImageCompareAndWriteRequest<librbd::ImageCtx>; |
f67539c2 | 909 | template class librbd::io::ImageListSnapsRequest<librbd::ImageCtx>; |