]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/io/ImageRequest.h" | |
5 | #include "librbd/ImageCtx.h" | |
6 | #include "librbd/internal.h" | |
7 | #include "librbd/Journal.h" | |
b32b8144 | 8 | #include "librbd/Types.h" |
7c673cae | 9 | #include "librbd/Utils.h" |
f67539c2 | 10 | #include "librbd/asio/ContextWQ.h" |
7c673cae | 11 | #include "librbd/io/AioCompletion.h" |
11fdf7f2 TL |
12 | #include "librbd/io/AsyncOperation.h" |
13 | #include "librbd/io/ObjectDispatchInterface.h" | |
14 | #include "librbd/io/ObjectDispatchSpec.h" | |
f67539c2 | 15 | #include "librbd/io/ObjectDispatcherInterface.h" |
11fdf7f2 | 16 | #include "librbd/io/Utils.h" |
7c673cae FG |
17 | #include "librbd/journal/Types.h" |
18 | #include "include/rados/librados.hpp" | |
f67539c2 | 19 | #include "common/errno.h" |
11fdf7f2 | 20 | #include "common/perf_counters.h" |
7c673cae | 21 | #include "osdc/Striper.h" |
11fdf7f2 TL |
22 | #include <algorithm> |
23 | #include <functional> | |
f67539c2 | 24 | #include <map> |
7c673cae FG |
25 | |
26 | #define dout_subsys ceph_subsys_rbd | |
27 | #undef dout_prefix | |
9f95a23c | 28 | #define dout_prefix *_dout << "librbd::io::ImageRequest: " << __func__ << ": " |
7c673cae FG |
29 | |
30 | namespace librbd { | |
31 | namespace io { | |
32 | ||
9f95a23c | 33 | using librbd::util::data_object_name; |
11fdf7f2 | 34 | using librbd::util::get_image_ctx; |
7c673cae FG |
35 | |
36 | namespace { | |
37 | ||
f67539c2 TL |
38 | template <typename I> |
39 | struct C_AssembleSnapshotDeltas : public C_AioRequest { | |
40 | I* image_ctx; | |
41 | SnapshotDelta* snapshot_delta; | |
42 | ||
43 | ceph::mutex lock = ceph::make_mutex( | |
44 | "librbd::io::C_AssembleSnapshotDeltas::lock", false); | |
45 | std::map<uint64_t, SnapshotDelta> object_snapshot_delta; | |
46 | ||
47 | C_AssembleSnapshotDeltas(I* image_ctx, AioCompletion* aio_comp, | |
48 | SnapshotDelta* snapshot_delta) | |
49 | : C_AioRequest(aio_comp), | |
50 | image_ctx(image_ctx), snapshot_delta(snapshot_delta) { | |
51 | } | |
52 | ||
53 | SnapshotDelta* get_snapshot_delta(uint64_t object_no) { | |
54 | std::unique_lock locker{lock}; | |
55 | return &object_snapshot_delta[object_no]; | |
56 | } | |
57 | ||
58 | void finish(int r) override { | |
59 | auto cct = image_ctx->cct; | |
60 | ||
61 | if (r < 0) { | |
62 | lderr(cct) << "C_AssembleSnapshotDeltas: list snaps failed: " | |
63 | << cpp_strerror(r) << dendl; | |
64 | C_AioRequest::finish(r); | |
65 | return; | |
66 | } | |
67 | ||
68 | std::unique_lock locker{lock}; | |
69 | *snapshot_delta = {}; | |
70 | for (auto& [object_no, object_snapshot_delta] : object_snapshot_delta) { | |
71 | SnapshotDelta image_snapshot_delta; | |
72 | object_to_image_intervals(object_no, object_snapshot_delta, | |
73 | &image_snapshot_delta, snapshot_delta); | |
74 | ||
75 | ldout(cct, 20) << "object_no=" << object_no << ", " | |
76 | << "object_snapshot_delta=" | |
77 | << object_snapshot_delta << ", " | |
78 | << "image_snapshot_delta=" << image_snapshot_delta | |
79 | << dendl; | |
80 | } | |
81 | ||
82 | ldout(cct, 20) << "snapshot_delta=" << *snapshot_delta << dendl; | |
83 | C_AioRequest::finish(0); | |
84 | } | |
85 | ||
86 | void object_to_image_intervals( | |
87 | uint64_t object_no, const SnapshotDelta& object_snapshot_delta, | |
88 | SnapshotDelta* image_snapshot_delta, | |
89 | SnapshotDelta* assembled_image_snapshot_delta) { | |
90 | for (auto& [key, object_extents] : object_snapshot_delta) { | |
91 | for (auto& object_extent : object_extents) { | |
92 | Extents image_extents; | |
93 | io::util::extent_to_file(image_ctx, object_no, object_extent.get_off(), | |
94 | object_extent.get_len(), image_extents); | |
95 | ||
96 | auto& intervals = (*image_snapshot_delta)[key]; | |
97 | auto& assembled_intervals = (*assembled_image_snapshot_delta)[key]; | |
98 | for (auto [image_offset, image_length] : image_extents) { | |
99 | SparseExtent sparse_extent{object_extent.get_val().state, | |
100 | image_length}; | |
101 | intervals.insert(image_offset, image_length, sparse_extent); | |
102 | assembled_intervals.insert(image_offset, image_length, | |
103 | sparse_extent); | |
104 | } | |
105 | } | |
106 | } | |
107 | } | |
108 | }; | |
109 | ||
9f95a23c TL |
110 | template <typename I> |
111 | struct C_RBD_Readahead : public Context { | |
112 | I *ictx; | |
113 | uint64_t object_no; | |
f67539c2 | 114 | io::ReadExtents extents; |
9f95a23c TL |
115 | |
116 | C_RBD_Readahead(I *ictx, uint64_t object_no, uint64_t offset, uint64_t length) | |
f67539c2 | 117 | : ictx(ictx), object_no(object_no), extents({{offset, length}}) { |
9f95a23c TL |
118 | ictx->readahead.inc_pending(); |
119 | } | |
120 | ||
121 | void finish(int r) override { | |
f67539c2 TL |
122 | ceph_assert(extents.size() == 1); |
123 | auto& extent = extents.front(); | |
9f95a23c TL |
124 | ldout(ictx->cct, 20) << "C_RBD_Readahead on " |
125 | << data_object_name(ictx, object_no) << ": " | |
f67539c2 | 126 | << extent.offset << "~" << extent.length << dendl; |
9f95a23c TL |
127 | ictx->readahead.dec_pending(); |
128 | } | |
129 | }; | |
130 | ||
131 | template <typename I> | |
f67539c2 | 132 | void readahead(I *ictx, const Extents& image_extents, IOContext io_context) { |
9f95a23c TL |
133 | uint64_t total_bytes = 0; |
134 | for (auto& image_extent : image_extents) { | |
135 | total_bytes += image_extent.second; | |
136 | } | |
137 | ||
138 | ictx->image_lock.lock_shared(); | |
139 | auto total_bytes_read = ictx->total_bytes_read.fetch_add(total_bytes); | |
140 | bool abort = ( | |
141 | ictx->readahead_disable_after_bytes != 0 && | |
142 | total_bytes_read > ictx->readahead_disable_after_bytes); | |
143 | if (abort) { | |
144 | ictx->image_lock.unlock_shared(); | |
145 | return; | |
146 | } | |
147 | ||
f67539c2 | 148 | uint64_t image_size = ictx->get_effective_image_size(ictx->snap_id); |
9f95a23c TL |
149 | ictx->image_lock.unlock_shared(); |
150 | ||
151 | auto readahead_extent = ictx->readahead.update(image_extents, image_size); | |
152 | uint64_t readahead_offset = readahead_extent.first; | |
153 | uint64_t readahead_length = readahead_extent.second; | |
154 | ||
155 | if (readahead_length > 0) { | |
156 | ldout(ictx->cct, 20) << "(readahead logical) " << readahead_offset << "~" | |
157 | << readahead_length << dendl; | |
158 | LightweightObjectExtents readahead_object_extents; | |
f67539c2 TL |
159 | io::util::file_to_extents(ictx, readahead_offset, readahead_length, 0, |
160 | &readahead_object_extents); | |
9f95a23c TL |
161 | for (auto& object_extent : readahead_object_extents) { |
162 | ldout(ictx->cct, 20) << "(readahead) " | |
163 | << data_object_name(ictx, | |
164 | object_extent.object_no) << " " | |
165 | << object_extent.offset << "~" | |
166 | << object_extent.length << dendl; | |
167 | ||
168 | auto req_comp = new C_RBD_Readahead<I>(ictx, object_extent.object_no, | |
169 | object_extent.offset, | |
170 | object_extent.length); | |
171 | auto req = io::ObjectDispatchSpec::create_read( | |
172 | ictx, io::OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, | |
f67539c2 | 173 | &req_comp->extents, io_context, 0, 0, {}, nullptr, req_comp); |
9f95a23c TL |
174 | req->send(); |
175 | } | |
176 | ||
177 | ictx->perfcounter->inc(l_librbd_readahead); | |
178 | ictx->perfcounter->inc(l_librbd_readahead_bytes, readahead_length); | |
179 | } | |
180 | } | |
181 | ||
11fdf7f2 TL |
182 | template <typename I> |
183 | struct C_UpdateTimestamp : public Context { | |
184 | public: | |
185 | I& m_image_ctx; | |
186 | bool m_modify; // if modify set to 'true', modify timestamp is updated, | |
187 | // access timestamp otherwise | |
188 | AsyncOperation m_async_op; | |
91327a77 | 189 | |
11fdf7f2 TL |
190 | C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) { |
191 | m_async_op.start_op(*get_image_ctx(&m_image_ctx)); | |
91327a77 | 192 | } |
11fdf7f2 TL |
193 | ~C_UpdateTimestamp() override { |
194 | m_async_op.finish_op(); | |
91327a77 AA |
195 | } |
196 | ||
11fdf7f2 TL |
197 | void send() { |
198 | librados::ObjectWriteOperation op; | |
199 | if (m_modify) { | |
200 | cls_client::set_modify_timestamp(&op); | |
201 | } else { | |
202 | cls_client::set_access_timestamp(&op); | |
91327a77 | 203 | } |
7c673cae | 204 | |
11fdf7f2 TL |
205 | auto comp = librbd::util::create_rados_callback(this); |
206 | int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op); | |
207 | ceph_assert(r == 0); | |
208 | comp->release(); | |
7c673cae FG |
209 | } |
210 | ||
211 | void finish(int r) override { | |
11fdf7f2 | 212 | // ignore errors updating timestamp |
7c673cae FG |
213 | } |
214 | }; | |
215 | ||
11fdf7f2 TL |
216 | bool should_update_timestamp(const utime_t& now, const utime_t& timestamp, |
217 | uint64_t interval) { | |
218 | return (interval && | |
219 | (static_cast<uint64_t>(now.sec()) >= interval + timestamp)); | |
224ce89b WB |
220 | } |
221 | ||
11fdf7f2 | 222 | } // anonymous namespace |
c07f9fc5 | 223 | |
9f95a23c TL |
224 | #undef dout_prefix |
225 | #define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \ | |
226 | << " " << __func__ << ": " | |
227 | ||
7c673cae FG |
228 | template <typename I> |
229 | void ImageRequest<I>::aio_read(I *ictx, AioCompletion *c, | |
230 | Extents &&image_extents, | |
f67539c2 TL |
231 | ReadResult &&read_result, IOContext io_context, |
232 | int op_flags, int read_flags, | |
31f18b77 | 233 | const ZTracer::Trace &parent_trace) { |
7c673cae | 234 | ImageReadRequest<I> req(*ictx, c, std::move(image_extents), |
f67539c2 TL |
235 | std::move(read_result), io_context, op_flags, |
236 | read_flags, parent_trace); | |
7c673cae FG |
237 | req.send(); |
238 | } | |
239 | ||
240 | template <typename I> | |
241 | void ImageRequest<I>::aio_write(I *ictx, AioCompletion *c, | |
242 | Extents &&image_extents, bufferlist &&bl, | |
f67539c2 | 243 | IOContext io_context, int op_flags, |
31f18b77 | 244 | const ZTracer::Trace &parent_trace) { |
7c673cae | 245 | ImageWriteRequest<I> req(*ictx, c, std::move(image_extents), std::move(bl), |
f67539c2 | 246 | io_context, op_flags, parent_trace); |
7c673cae FG |
247 | req.send(); |
248 | } | |
249 | ||
250 | template <typename I> | |
251 | void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c, | |
11fdf7f2 TL |
252 | Extents &&image_extents, |
253 | uint32_t discard_granularity_bytes, | |
f67539c2 TL |
254 | IOContext io_context, |
255 | const ZTracer::Trace &parent_trace) { | |
11fdf7f2 | 256 | ImageDiscardRequest<I> req(*ictx, c, std::move(image_extents), |
f67539c2 TL |
257 | discard_granularity_bytes, io_context, |
258 | parent_trace); | |
7c673cae FG |
259 | req.send(); |
260 | } | |
261 | ||
262 | template <typename I> | |
31f18b77 | 263 | void ImageRequest<I>::aio_flush(I *ictx, AioCompletion *c, |
11fdf7f2 TL |
264 | FlushSource flush_source, |
265 | const ZTracer::Trace &parent_trace) { | |
266 | ImageFlushRequest<I> req(*ictx, c, flush_source, parent_trace); | |
7c673cae FG |
267 | req.send(); |
268 | } | |
269 | ||
270 | template <typename I> | |
271 | void ImageRequest<I>::aio_writesame(I *ictx, AioCompletion *c, | |
11fdf7f2 | 272 | Extents &&image_extents, |
f67539c2 TL |
273 | bufferlist &&bl, IOContext io_context, |
274 | int op_flags, | |
31f18b77 | 275 | const ZTracer::Trace &parent_trace) { |
11fdf7f2 | 276 | ImageWriteSameRequest<I> req(*ictx, c, std::move(image_extents), |
f67539c2 TL |
277 | std::move(bl), io_context, op_flags, |
278 | parent_trace); | |
7c673cae FG |
279 | req.send(); |
280 | } | |
281 | ||
c07f9fc5 FG |
282 | template <typename I> |
283 | void ImageRequest<I>::aio_compare_and_write(I *ictx, AioCompletion *c, | |
284 | Extents &&image_extents, | |
285 | bufferlist &&cmp_bl, | |
286 | bufferlist &&bl, | |
287 | uint64_t *mismatch_offset, | |
f67539c2 | 288 | IOContext io_context, int op_flags, |
c07f9fc5 FG |
289 | const ZTracer::Trace &parent_trace) { |
290 | ImageCompareAndWriteRequest<I> req(*ictx, c, std::move(image_extents), | |
291 | std::move(cmp_bl), std::move(bl), | |
f67539c2 TL |
292 | mismatch_offset, io_context, op_flags, |
293 | parent_trace); | |
c07f9fc5 FG |
294 | req.send(); |
295 | } | |
296 | ||
7c673cae FG |
297 | template <typename I> |
298 | void ImageRequest<I>::send() { | |
299 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 300 | ceph_assert(m_aio_comp->is_initialized(get_aio_type())); |
494da23a | 301 | ceph_assert(m_aio_comp->is_started()); |
7c673cae FG |
302 | |
303 | CephContext *cct = image_ctx.cct; | |
304 | AioCompletion *aio_comp = this->m_aio_comp; | |
305 | ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", " | |
c07f9fc5 | 306 | << "completion=" << aio_comp << dendl; |
7c673cae | 307 | |
f67539c2 TL |
308 | update_timestamp(); |
309 | send_request(); | |
7c673cae FG |
310 | } |
311 | ||
312 | template <typename I> | |
11fdf7f2 TL |
313 | void ImageRequest<I>::update_timestamp() { |
314 | bool modify = (get_aio_type() != AIO_TYPE_READ); | |
315 | uint64_t update_interval; | |
316 | if (modify) { | |
317 | update_interval = m_image_ctx.mtime_update_interval; | |
318 | } else { | |
319 | update_interval = m_image_ctx.atime_update_interval; | |
320 | } | |
7c673cae | 321 | |
11fdf7f2 TL |
322 | if (update_interval == 0) { |
323 | return; | |
324 | } | |
325 | ||
326 | utime_t (I::*get_timestamp_fn)() const; | |
327 | void (I::*set_timestamp_fn)(utime_t); | |
328 | if (modify) { | |
329 | get_timestamp_fn = &I::get_modify_timestamp; | |
330 | set_timestamp_fn = &I::set_modify_timestamp; | |
331 | } else { | |
332 | get_timestamp_fn = &I::get_access_timestamp; | |
333 | set_timestamp_fn = &I::set_access_timestamp; | |
334 | } | |
335 | ||
336 | utime_t ts = ceph_clock_now(); | |
337 | { | |
9f95a23c | 338 | std::shared_lock timestamp_locker{m_image_ctx.timestamp_lock}; |
11fdf7f2 TL |
339 | if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx), |
340 | update_interval)) { | |
341 | return; | |
342 | } | |
343 | } | |
344 | ||
345 | { | |
9f95a23c | 346 | std::unique_lock timestamp_locker{m_image_ctx.timestamp_lock}; |
11fdf7f2 TL |
347 | bool update = should_update_timestamp( |
348 | ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval); | |
349 | if (!update) { | |
350 | return; | |
351 | } | |
352 | ||
353 | std::invoke(set_timestamp_fn, m_image_ctx, ts); | |
354 | } | |
355 | ||
356 | // TODO we fire and forget this outside the IO path to prevent | |
357 | // potential race conditions with librbd client IO callbacks | |
358 | // between different threads (e.g. librados and object cacher) | |
359 | ldout(m_image_ctx.cct, 10) << get_request_type() << dendl; | |
360 | auto req = new C_UpdateTimestamp<I>(m_image_ctx, modify); | |
361 | req->send(); | |
7c673cae FG |
362 | } |
363 | ||
364 | template <typename I> | |
365 | ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp, | |
366 | Extents &&image_extents, | |
f67539c2 TL |
367 | ReadResult &&read_result, |
368 | IOContext io_context, int op_flags, | |
369 | int read_flags, | |
370 | const ZTracer::Trace &parent_trace) | |
371 | : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents), | |
372 | io_context, "read", parent_trace), | |
373 | m_op_flags(op_flags), m_read_flags(read_flags) { | |
7c673cae FG |
374 | aio_comp->read_result = std::move(read_result); |
375 | } | |
376 | ||
7c673cae FG |
377 | template <typename I> |
378 | void ImageReadRequest<I>::send_request() { | |
379 | I &image_ctx = this->m_image_ctx; | |
380 | CephContext *cct = image_ctx.cct; | |
381 | ||
382 | auto &image_extents = this->m_image_extents; | |
11fdf7f2 | 383 | if (image_ctx.cache && image_ctx.readahead_max_bytes > 0 && |
7c673cae | 384 | !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) { |
f67539c2 | 385 | readahead(get_image_ctx(&image_ctx), image_extents, this->m_io_context); |
9f95a23c | 386 | } |
7c673cae | 387 | |
9f95a23c TL |
388 | // map image extents to object extents |
389 | LightweightObjectExtents object_extents; | |
f67539c2 | 390 | uint64_t buffer_ofs = 0; |
9f95a23c TL |
391 | for (auto &extent : image_extents) { |
392 | if (extent.second == 0) { | |
393 | continue; | |
7c673cae | 394 | } |
7c673cae | 395 | |
f67539c2 TL |
396 | util::file_to_extents(&image_ctx, extent.first, extent.second, buffer_ofs, |
397 | &object_extents); | |
9f95a23c | 398 | buffer_ofs += extent.second; |
7c673cae | 399 | } |
7c673cae | 400 | |
9f95a23c | 401 | AioCompletion *aio_comp = this->m_aio_comp; |
f67539c2 TL |
402 | aio_comp->read_result.set_image_extents(image_extents); |
403 | ||
404 | // issue the requests | |
9f95a23c TL |
405 | aio_comp->set_request_count(object_extents.size()); |
406 | for (auto &oe : object_extents) { | |
407 | ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " " | |
408 | << oe.offset << "~" << oe.length << " from " | |
409 | << oe.buffer_extents << dendl; | |
7c673cae | 410 | |
9f95a23c | 411 | auto req_comp = new io::ReadResult::C_ObjectReadRequest( |
f67539c2 | 412 | aio_comp, {{oe.offset, oe.length, std::move(oe.buffer_extents)}}); |
9f95a23c | 413 | auto req = ObjectDispatchSpec::create_read( |
f67539c2 TL |
414 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, oe.object_no, |
415 | &req_comp->extents, this->m_io_context, m_op_flags, m_read_flags, | |
416 | this->m_trace, nullptr, req_comp); | |
9f95a23c TL |
417 | req->send(); |
418 | } | |
7c673cae FG |
419 | |
420 | image_ctx.perfcounter->inc(l_librbd_rd); | |
421 | image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs); | |
422 | } | |
423 | ||
7c673cae FG |
424 | template <typename I> |
425 | void AbstractImageWriteRequest<I>::send_request() { | |
426 | I &image_ctx = this->m_image_ctx; | |
7c673cae | 427 | |
7c673cae FG |
428 | bool journaling = false; |
429 | ||
430 | AioCompletion *aio_comp = this->m_aio_comp; | |
7c673cae FG |
431 | { |
432 | // prevent image size from changing between computing clip and recording | |
433 | // pending async operation | |
9f95a23c | 434 | std::shared_lock image_locker{image_ctx.image_lock}; |
7c673cae FG |
435 | journaling = (image_ctx.journal != nullptr && |
436 | image_ctx.journal->is_journal_appending()); | |
437 | } | |
438 | ||
9f95a23c TL |
439 | uint64_t clip_len = 0; |
440 | LightweightObjectExtents object_extents; | |
441 | for (auto &extent : this->m_image_extents) { | |
442 | if (extent.second == 0) { | |
443 | continue; | |
444 | } | |
445 | ||
446 | // map to object extents | |
f67539c2 TL |
447 | io::util::file_to_extents(&image_ctx, extent.first, extent.second, clip_len, |
448 | &object_extents); | |
9f95a23c TL |
449 | clip_len += extent.second; |
450 | } | |
451 | ||
11fdf7f2 | 452 | int ret = prune_object_extents(&object_extents); |
c07f9fc5 FG |
453 | if (ret < 0) { |
454 | aio_comp->fail(ret); | |
455 | return; | |
456 | } | |
7c673cae | 457 | |
39ae355f TL |
458 | // reflect changes in object_extents back to m_image_extents |
459 | if (ret == 1) { | |
460 | this->m_image_extents.clear(); | |
461 | for (auto& object_extent : object_extents) { | |
462 | io::Extents image_extents; | |
463 | io::util::extent_to_file(&image_ctx, object_extent.object_no, | |
464 | object_extent.offset, object_extent.length, | |
465 | image_extents); | |
466 | this->m_image_extents.insert(this->m_image_extents.end(), | |
467 | image_extents.begin(), image_extents.end()); | |
468 | } | |
469 | } | |
470 | ||
9f95a23c | 471 | aio_comp->set_request_count(object_extents.size()); |
7c673cae FG |
472 | if (!object_extents.empty()) { |
473 | uint64_t journal_tid = 0; | |
7c673cae FG |
474 | if (journaling) { |
475 | // in-flight ops are flushed prior to closing the journal | |
11fdf7f2 TL |
476 | ceph_assert(image_ctx.journal != NULL); |
477 | journal_tid = append_journal_event(m_synchronous); | |
7c673cae FG |
478 | } |
479 | ||
f67539c2 | 480 | send_object_requests(object_extents, this->m_io_context, journal_tid); |
7c673cae FG |
481 | } |
482 | ||
483 | update_stats(clip_len); | |
7c673cae FG |
484 | } |
485 | ||
486 | template <typename I> | |
487 | void AbstractImageWriteRequest<I>::send_object_requests( | |
f67539c2 | 488 | const LightweightObjectExtents &object_extents, IOContext io_context, |
11fdf7f2 | 489 | uint64_t journal_tid) { |
7c673cae FG |
490 | I &image_ctx = this->m_image_ctx; |
491 | CephContext *cct = image_ctx.cct; | |
492 | ||
493 | AioCompletion *aio_comp = this->m_aio_comp; | |
9f95a23c TL |
494 | bool single_extent = (object_extents.size() == 1); |
495 | for (auto& oe : object_extents) { | |
496 | ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " " | |
497 | << oe.offset << "~" << oe.length << " from " | |
498 | << oe.buffer_extents << dendl; | |
7c673cae | 499 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); |
f67539c2 TL |
500 | auto request = create_object_request(oe, io_context, journal_tid, |
501 | single_extent, req_comp); | |
9f95a23c | 502 | request->send(); |
7c673cae FG |
503 | } |
504 | } | |
505 | ||
506 | template <typename I> | |
9f95a23c TL |
507 | void ImageWriteRequest<I>::assemble_extent( |
508 | const LightweightObjectExtent &object_extent, bufferlist *bl) { | |
7c673cae FG |
509 | for (auto q = object_extent.buffer_extents.begin(); |
510 | q != object_extent.buffer_extents.end(); ++q) { | |
511 | bufferlist sub_bl; | |
512 | sub_bl.substr_of(m_bl, q->first, q->second); | |
513 | bl->claim_append(sub_bl); | |
514 | } | |
515 | } | |
516 | ||
517 | template <typename I> | |
11fdf7f2 | 518 | uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) { |
7c673cae FG |
519 | I &image_ctx = this->m_image_ctx; |
520 | ||
521 | uint64_t tid = 0; | |
522 | uint64_t buffer_offset = 0; | |
11fdf7f2 | 523 | ceph_assert(!this->m_image_extents.empty()); |
7c673cae FG |
524 | for (auto &extent : this->m_image_extents) { |
525 | bufferlist sub_bl; | |
526 | sub_bl.substr_of(m_bl, buffer_offset, extent.second); | |
527 | buffer_offset += extent.second; | |
528 | ||
529 | tid = image_ctx.journal->append_write_event(extent.first, extent.second, | |
11fdf7f2 | 530 | sub_bl, synchronous); |
7c673cae FG |
531 | } |
532 | ||
7c673cae FG |
533 | return tid; |
534 | } | |
535 | ||
7c673cae | 536 | template <typename I> |
11fdf7f2 | 537 | ObjectDispatchSpec *ImageWriteRequest<I>::create_object_request( |
f67539c2 | 538 | const LightweightObjectExtent &object_extent, IOContext io_context, |
9f95a23c | 539 | uint64_t journal_tid, bool single_extent, Context *on_finish) { |
7c673cae | 540 | I &image_ctx = this->m_image_ctx; |
7c673cae FG |
541 | |
542 | bufferlist bl; | |
f67539c2 TL |
543 | if (single_extent && object_extent.buffer_extents.size() == 1 && |
544 | m_bl.length() == object_extent.length) { | |
9f95a23c TL |
545 | // optimization for single object/buffer extent writes |
546 | bl = std::move(m_bl); | |
547 | } else { | |
548 | assemble_extent(object_extent, &bl); | |
549 | } | |
550 | ||
11fdf7f2 | 551 | auto req = ObjectDispatchSpec::create_write( |
9f95a23c | 552 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
f67539c2 TL |
553 | object_extent.offset, std::move(bl), io_context, m_op_flags, 0, |
554 | std::nullopt, journal_tid, this->m_trace, on_finish); | |
7c673cae FG |
555 | return req; |
556 | } | |
557 | ||
558 | template <typename I> | |
559 | void ImageWriteRequest<I>::update_stats(size_t length) { | |
560 | I &image_ctx = this->m_image_ctx; | |
561 | image_ctx.perfcounter->inc(l_librbd_wr); | |
562 | image_ctx.perfcounter->inc(l_librbd_wr_bytes, length); | |
563 | } | |
564 | ||
565 | template <typename I> | |
11fdf7f2 | 566 | uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) { |
7c673cae FG |
567 | I &image_ctx = this->m_image_ctx; |
568 | ||
569 | uint64_t tid = 0; | |
11fdf7f2 | 570 | ceph_assert(!this->m_image_extents.empty()); |
7c673cae | 571 | for (auto &extent : this->m_image_extents) { |
11fdf7f2 TL |
572 | journal::EventEntry event_entry( |
573 | journal::AioDiscardEvent(extent.first, | |
574 | extent.second, | |
575 | this->m_discard_granularity_bytes)); | |
7c673cae | 576 | tid = image_ctx.journal->append_io_event(std::move(event_entry), |
11fdf7f2 TL |
577 | extent.first, extent.second, |
578 | synchronous, 0); | |
7c673cae FG |
579 | } |
580 | ||
7c673cae FG |
581 | return tid; |
582 | } | |
583 | ||
7c673cae | 584 | template <typename I> |
11fdf7f2 | 585 | ObjectDispatchSpec *ImageDiscardRequest<I>::create_object_request( |
f67539c2 | 586 | const LightweightObjectExtent &object_extent, IOContext io_context, |
9f95a23c | 587 | uint64_t journal_tid, bool single_extent, Context *on_finish) { |
7c673cae | 588 | I &image_ctx = this->m_image_ctx; |
11fdf7f2 | 589 | auto req = ObjectDispatchSpec::create_discard( |
9f95a23c | 590 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
f67539c2 | 591 | object_extent.offset, object_extent.length, io_context, |
11fdf7f2 TL |
592 | OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE, journal_tid, this->m_trace, |
593 | on_finish); | |
7c673cae FG |
594 | return req; |
595 | } | |
596 | ||
597 | template <typename I> | |
598 | void ImageDiscardRequest<I>::update_stats(size_t length) { | |
599 | I &image_ctx = this->m_image_ctx; | |
600 | image_ctx.perfcounter->inc(l_librbd_discard); | |
601 | image_ctx.perfcounter->inc(l_librbd_discard_bytes, length); | |
602 | } | |
603 | ||
11fdf7f2 TL |
604 | template <typename I> |
605 | int ImageDiscardRequest<I>::prune_object_extents( | |
9f95a23c | 606 | LightweightObjectExtents* object_extents) const { |
11fdf7f2 TL |
607 | if (m_discard_granularity_bytes == 0) { |
608 | return 0; | |
609 | } | |
610 | ||
611 | // Align the range to discard_granularity_bytes boundary and skip | |
612 | // and discards that are too small to free up any space. | |
613 | // | |
614 | // discard_granularity_bytes >= object_size && tail truncation | |
615 | // is a special case for filestore | |
616 | bool prune_required = false; | |
39ae355f | 617 | bool length_modified = false; |
11fdf7f2 TL |
618 | auto object_size = this->m_image_ctx.layout.object_size; |
619 | auto discard_granularity_bytes = std::min(m_discard_granularity_bytes, | |
620 | object_size); | |
621 | auto xform_lambda = | |
39ae355f | 622 | [discard_granularity_bytes, object_size, &prune_required, &length_modified] |
9f95a23c | 623 | (LightweightObjectExtent& object_extent) { |
11fdf7f2 TL |
624 | auto& offset = object_extent.offset; |
625 | auto& length = object_extent.length; | |
626 | auto next_offset = offset + length; | |
627 | ||
628 | if ((discard_granularity_bytes < object_size) || | |
629 | (next_offset < object_size)) { | |
630 | offset = p2roundup<uint64_t>(offset, discard_granularity_bytes); | |
631 | next_offset = p2align<uint64_t>(next_offset, discard_granularity_bytes); | |
632 | if (offset >= next_offset) { | |
633 | prune_required = true; | |
634 | length = 0; | |
635 | } else { | |
39ae355f TL |
636 | auto new_length = next_offset - offset; |
637 | if (length != new_length) { | |
638 | length_modified = true; | |
639 | length = new_length; | |
640 | } | |
11fdf7f2 TL |
641 | } |
642 | } | |
643 | }; | |
644 | std::for_each(object_extents->begin(), object_extents->end(), | |
645 | xform_lambda); | |
646 | ||
647 | if (prune_required) { | |
648 | // one or more object extents were skipped | |
649 | auto remove_lambda = | |
9f95a23c | 650 | [](const LightweightObjectExtent& object_extent) { |
11fdf7f2 TL |
651 | return (object_extent.length == 0); |
652 | }; | |
653 | object_extents->erase( | |
654 | std::remove_if(object_extents->begin(), object_extents->end(), | |
655 | remove_lambda), | |
656 | object_extents->end()); | |
657 | } | |
39ae355f TL |
658 | |
659 | // object extents were modified, image extents needs updating | |
660 | if (length_modified || prune_required) { | |
661 | return 1; | |
662 | } | |
663 | ||
11fdf7f2 TL |
664 | return 0; |
665 | } | |
666 | ||
7c673cae FG |
667 | template <typename I> |
668 | void ImageFlushRequest<I>::send_request() { | |
669 | I &image_ctx = this->m_image_ctx; | |
7c673cae FG |
670 | |
671 | bool journaling = false; | |
672 | { | |
9f95a23c | 673 | std::shared_lock image_locker{image_ctx.image_lock}; |
11fdf7f2 TL |
674 | journaling = (m_flush_source == FLUSH_SOURCE_USER && |
675 | image_ctx.journal != nullptr && | |
7c673cae FG |
676 | image_ctx.journal->is_journal_appending()); |
677 | } | |
678 | ||
679 | AioCompletion *aio_comp = this->m_aio_comp; | |
11fdf7f2 TL |
680 | aio_comp->set_request_count(1); |
681 | ||
682 | Context *ctx = new C_AioRequest(aio_comp); | |
683 | ||
684 | // ensure no locks are held when flush is complete | |
685 | ctx = librbd::util::create_async_context_callback(image_ctx, ctx); | |
686 | ||
9f95a23c | 687 | uint64_t journal_tid = 0; |
7c673cae FG |
688 | if (journaling) { |
689 | // in-flight ops are flushed prior to closing the journal | |
9f95a23c TL |
690 | ceph_assert(image_ctx.journal != NULL); |
691 | journal_tid = image_ctx.journal->append_io_event( | |
11fdf7f2 | 692 | journal::EventEntry(journal::AioFlushEvent()), 0, 0, false, 0); |
494da23a | 693 | image_ctx.journal->user_flushed(); |
7c673cae FG |
694 | } |
695 | ||
9f95a23c TL |
696 | auto object_dispatch_spec = ObjectDispatchSpec::create_flush( |
697 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, m_flush_source, journal_tid, | |
698 | this->m_trace, ctx); | |
699 | ctx = new LambdaContext([object_dispatch_spec](int r) { | |
700 | object_dispatch_spec->send(); | |
701 | }); | |
702 | ||
11fdf7f2 | 703 | // ensure all in-flight IOs are settled if non-user flush request |
f67539c2 TL |
704 | if (m_flush_source == FLUSH_SOURCE_WRITEBACK) { |
705 | ctx->complete(0); | |
706 | } else { | |
707 | aio_comp->async_op.flush(ctx); | |
708 | } | |
11fdf7f2 TL |
709 | |
710 | // might be flushing during image shutdown | |
711 | if (image_ctx.perfcounter != nullptr) { | |
712 | image_ctx.perfcounter->inc(l_librbd_flush); | |
713 | } | |
7c673cae FG |
714 | } |
715 | ||
7c673cae | 716 | template <typename I> |
11fdf7f2 | 717 | uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) { |
7c673cae FG |
718 | I &image_ctx = this->m_image_ctx; |
719 | ||
720 | uint64_t tid = 0; | |
11fdf7f2 | 721 | ceph_assert(!this->m_image_extents.empty()); |
7c673cae FG |
722 | for (auto &extent : this->m_image_extents) { |
723 | journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first, | |
724 | extent.second, | |
725 | m_data_bl)); | |
726 | tid = image_ctx.journal->append_io_event(std::move(event_entry), | |
11fdf7f2 TL |
727 | extent.first, extent.second, |
728 | synchronous, 0); | |
7c673cae FG |
729 | } |
730 | ||
7c673cae FG |
731 | return tid; |
732 | } | |
733 | ||
7c673cae | 734 | template <typename I> |
11fdf7f2 | 735 | ObjectDispatchSpec *ImageWriteSameRequest<I>::create_object_request( |
f67539c2 | 736 | const LightweightObjectExtent &object_extent, IOContext io_context, |
9f95a23c | 737 | uint64_t journal_tid, bool single_extent, Context *on_finish) { |
7c673cae | 738 | I &image_ctx = this->m_image_ctx; |
7c673cae FG |
739 | |
740 | bufferlist bl; | |
11fdf7f2 TL |
741 | ObjectDispatchSpec *req; |
742 | ||
743 | if (util::assemble_write_same_extent(object_extent, m_data_bl, &bl, false)) { | |
9f95a23c | 744 | auto buffer_extents{object_extent.buffer_extents}; |
7c673cae | 745 | |
11fdf7f2 | 746 | req = ObjectDispatchSpec::create_write_same( |
9f95a23c TL |
747 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
748 | object_extent.offset, object_extent.length, std::move(buffer_extents), | |
f67539c2 | 749 | std::move(bl), io_context, m_op_flags, journal_tid, |
11fdf7f2 | 750 | this->m_trace, on_finish); |
7c673cae FG |
751 | return req; |
752 | } | |
11fdf7f2 | 753 | req = ObjectDispatchSpec::create_write( |
9f95a23c | 754 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
f67539c2 TL |
755 | object_extent.offset, std::move(bl), io_context, m_op_flags, 0, |
756 | std::nullopt, journal_tid, this->m_trace, on_finish); | |
7c673cae FG |
757 | return req; |
758 | } | |
759 | ||
760 | template <typename I> | |
761 | void ImageWriteSameRequest<I>::update_stats(size_t length) { | |
762 | I &image_ctx = this->m_image_ctx; | |
763 | image_ctx.perfcounter->inc(l_librbd_ws); | |
764 | image_ctx.perfcounter->inc(l_librbd_ws_bytes, length); | |
765 | } | |
766 | ||
c07f9fc5 FG |
767 | template <typename I> |
768 | uint64_t ImageCompareAndWriteRequest<I>::append_journal_event( | |
11fdf7f2 | 769 | bool synchronous) { |
c07f9fc5 FG |
770 | I &image_ctx = this->m_image_ctx; |
771 | ||
772 | uint64_t tid = 0; | |
11fdf7f2 | 773 | ceph_assert(this->m_image_extents.size() == 1); |
c07f9fc5 | 774 | auto &extent = this->m_image_extents.front(); |
39ae355f TL |
775 | tid = image_ctx.journal->append_compare_and_write_event(extent.first, |
776 | extent.second, | |
777 | m_cmp_bl, | |
778 | m_bl, | |
779 | synchronous); | |
c07f9fc5 FG |
780 | |
781 | return tid; | |
782 | } | |
783 | ||
c07f9fc5 FG |
784 | template <typename I> |
785 | void ImageCompareAndWriteRequest<I>::assemble_extent( | |
39ae355f TL |
786 | const LightweightObjectExtent &object_extent, bufferlist *bl, |
787 | bufferlist *cmp_bl) { | |
c07f9fc5 FG |
788 | for (auto q = object_extent.buffer_extents.begin(); |
789 | q != object_extent.buffer_extents.end(); ++q) { | |
790 | bufferlist sub_bl; | |
791 | sub_bl.substr_of(m_bl, q->first, q->second); | |
792 | bl->claim_append(sub_bl); | |
39ae355f TL |
793 | |
794 | bufferlist sub_cmp_bl; | |
795 | sub_cmp_bl.substr_of(m_cmp_bl, q->first, q->second); | |
796 | cmp_bl->claim_append(sub_cmp_bl); | |
c07f9fc5 FG |
797 | } |
798 | } | |
799 | ||
c07f9fc5 | 800 | template <typename I> |
11fdf7f2 | 801 | ObjectDispatchSpec *ImageCompareAndWriteRequest<I>::create_object_request( |
f67539c2 | 802 | const LightweightObjectExtent &object_extent, IOContext io_context, |
9f95a23c | 803 | uint64_t journal_tid, bool single_extent, Context *on_finish) { |
c07f9fc5 FG |
804 | I &image_ctx = this->m_image_ctx; |
805 | ||
806 | bufferlist bl; | |
39ae355f TL |
807 | bufferlist cmp_bl; |
808 | assemble_extent(object_extent, &bl, &cmp_bl); | |
11fdf7f2 | 809 | auto req = ObjectDispatchSpec::create_compare_and_write( |
9f95a23c | 810 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
39ae355f | 811 | object_extent.offset, std::move(cmp_bl), std::move(bl), io_context, |
9f95a23c | 812 | m_mismatch_offset, m_op_flags, journal_tid, this->m_trace, on_finish); |
c07f9fc5 FG |
813 | return req; |
814 | } | |
815 | ||
816 | template <typename I> | |
817 | void ImageCompareAndWriteRequest<I>::update_stats(size_t length) { | |
818 | I &image_ctx = this->m_image_ctx; | |
819 | image_ctx.perfcounter->inc(l_librbd_cmp); | |
820 | image_ctx.perfcounter->inc(l_librbd_cmp_bytes, length); | |
821 | } | |
822 | ||
823 | template <typename I> | |
11fdf7f2 | 824 | int ImageCompareAndWriteRequest<I>::prune_object_extents( |
9f95a23c | 825 | LightweightObjectExtents* object_extents) const { |
11fdf7f2 | 826 | if (object_extents->size() > 1) |
c07f9fc5 FG |
827 | return -EINVAL; |
828 | ||
829 | I &image_ctx = this->m_image_ctx; | |
c07f9fc5 | 830 | uint64_t su = image_ctx.layout.stripe_unit; |
9f95a23c | 831 | auto& object_extent = object_extents->front(); |
39ae355f | 832 | if (su == 0 || (object_extent.offset % su + object_extent.length > su)) |
c07f9fc5 FG |
833 | return -EINVAL; |
834 | ||
835 | return 0; | |
836 | } | |
837 | ||
f67539c2 TL |
838 | template <typename I> |
839 | ImageListSnapsRequest<I>::ImageListSnapsRequest( | |
840 | I& image_ctx, AioCompletion* aio_comp, Extents&& image_extents, | |
841 | SnapIds&& snap_ids, int list_snaps_flags, SnapshotDelta* snapshot_delta, | |
842 | const ZTracer::Trace& parent_trace) | |
843 | : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents), | |
844 | image_ctx.get_data_io_context(), "list-snaps", | |
845 | parent_trace), | |
846 | m_snap_ids(std::move(snap_ids)), m_list_snaps_flags(list_snaps_flags), | |
847 | m_snapshot_delta(snapshot_delta) { | |
848 | } | |
849 | ||
850 | template <typename I> | |
851 | void ImageListSnapsRequest<I>::send_request() { | |
852 | I &image_ctx = this->m_image_ctx; | |
853 | CephContext *cct = image_ctx.cct; | |
854 | ||
855 | // map image extents to object extents | |
856 | auto &image_extents = this->m_image_extents; | |
857 | std::map<uint64_t, Extents> object_number_extents; | |
858 | for (auto& image_extent : image_extents) { | |
859 | if (image_extent.second == 0) { | |
860 | continue; | |
861 | } | |
862 | ||
863 | striper::LightweightObjectExtents object_extents; | |
864 | io::util::file_to_extents(&image_ctx, image_extent.first, | |
865 | image_extent.second, 0, &object_extents); | |
866 | for (auto& object_extent : object_extents) { | |
867 | object_number_extents[object_extent.object_no].emplace_back( | |
868 | object_extent.offset, object_extent.length); | |
869 | } | |
870 | } | |
871 | ||
872 | // reassemble the deltas back into image-extents when complete | |
873 | auto aio_comp = this->m_aio_comp; | |
874 | aio_comp->set_request_count(1); | |
875 | auto assemble_ctx = new C_AssembleSnapshotDeltas<I>( | |
876 | &image_ctx, aio_comp, m_snapshot_delta); | |
877 | auto sub_aio_comp = AioCompletion::create_and_start< | |
878 | Context, &Context::complete>(assemble_ctx, get_image_ctx(&image_ctx), | |
879 | AIO_TYPE_GENERIC); | |
880 | ||
881 | // issue the requests | |
882 | sub_aio_comp->set_request_count(object_number_extents.size()); | |
883 | for (auto& oe : object_number_extents) { | |
884 | ldout(cct, 20) << data_object_name(&image_ctx, oe.first) << " " | |
885 | << oe.second << dendl; | |
886 | auto ctx = new C_AioRequest(sub_aio_comp); | |
887 | auto req = ObjectDispatchSpec::create_list_snaps( | |
888 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, oe.first, std::move(oe.second), | |
889 | SnapIds{m_snap_ids}, m_list_snaps_flags, this->m_trace, | |
890 | assemble_ctx->get_snapshot_delta(oe.first), ctx); | |
891 | req->send(); | |
892 | } | |
893 | } | |
894 | ||
7c673cae FG |
895 | } // namespace io |
896 | } // namespace librbd | |
897 | ||
898 | template class librbd::io::ImageRequest<librbd::ImageCtx>; | |
899 | template class librbd::io::ImageReadRequest<librbd::ImageCtx>; | |
900 | template class librbd::io::AbstractImageWriteRequest<librbd::ImageCtx>; | |
901 | template class librbd::io::ImageWriteRequest<librbd::ImageCtx>; | |
902 | template class librbd::io::ImageDiscardRequest<librbd::ImageCtx>; | |
903 | template class librbd::io::ImageFlushRequest<librbd::ImageCtx>; | |
904 | template class librbd::io::ImageWriteSameRequest<librbd::ImageCtx>; | |
c07f9fc5 | 905 | template class librbd::io::ImageCompareAndWriteRequest<librbd::ImageCtx>; |
f67539c2 | 906 | template class librbd::io::ImageListSnapsRequest<librbd::ImageCtx>; |