]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/io/ImageRequest.h" | |
5 | #include "librbd/ImageCtx.h" | |
6 | #include "librbd/internal.h" | |
7 | #include "librbd/Journal.h" | |
b32b8144 | 8 | #include "librbd/Types.h" |
7c673cae FG |
9 | #include "librbd/Utils.h" |
10 | #include "librbd/cache/ImageCache.h" | |
11 | #include "librbd/io/AioCompletion.h" | |
11fdf7f2 TL |
12 | #include "librbd/io/AsyncOperation.h" |
13 | #include "librbd/io/ObjectDispatchInterface.h" | |
14 | #include "librbd/io/ObjectDispatchSpec.h" | |
15 | #include "librbd/io/ObjectDispatcher.h" | |
16 | #include "librbd/io/Utils.h" | |
7c673cae FG |
17 | #include "librbd/journal/Types.h" |
18 | #include "include/rados/librados.hpp" | |
11fdf7f2 | 19 | #include "common/perf_counters.h" |
7c673cae FG |
20 | #include "common/WorkQueue.h" |
21 | #include "osdc/Striper.h" | |
11fdf7f2 TL |
22 | #include <algorithm> |
23 | #include <functional> | |
7c673cae FG |
24 | |
25 | #define dout_subsys ceph_subsys_rbd | |
26 | #undef dout_prefix | |
9f95a23c | 27 | #define dout_prefix *_dout << "librbd::io::ImageRequest: " << __func__ << ": " |
7c673cae FG |
28 | |
29 | namespace librbd { | |
30 | namespace io { | |
31 | ||
9f95a23c | 32 | using librbd::util::data_object_name; |
11fdf7f2 | 33 | using librbd::util::get_image_ctx; |
7c673cae FG |
34 | |
35 | namespace { | |
36 | ||
9f95a23c TL |
37 | template <typename I> |
38 | struct C_RBD_Readahead : public Context { | |
39 | I *ictx; | |
40 | uint64_t object_no; | |
41 | uint64_t offset; | |
42 | uint64_t length; | |
43 | ||
44 | bufferlist read_data; | |
45 | io::ExtentMap extent_map; | |
46 | ||
47 | C_RBD_Readahead(I *ictx, uint64_t object_no, uint64_t offset, uint64_t length) | |
48 | : ictx(ictx), object_no(object_no), offset(offset), length(length) { | |
49 | ictx->readahead.inc_pending(); | |
50 | } | |
51 | ||
52 | void finish(int r) override { | |
53 | ldout(ictx->cct, 20) << "C_RBD_Readahead on " | |
54 | << data_object_name(ictx, object_no) << ": " | |
55 | << offset << "~" << length << dendl; | |
56 | ictx->readahead.dec_pending(); | |
57 | } | |
58 | }; | |
59 | ||
60 | template <typename I> | |
61 | void readahead(I *ictx, const Extents& image_extents) { | |
62 | uint64_t total_bytes = 0; | |
63 | for (auto& image_extent : image_extents) { | |
64 | total_bytes += image_extent.second; | |
65 | } | |
66 | ||
67 | ictx->image_lock.lock_shared(); | |
68 | auto total_bytes_read = ictx->total_bytes_read.fetch_add(total_bytes); | |
69 | bool abort = ( | |
70 | ictx->readahead_disable_after_bytes != 0 && | |
71 | total_bytes_read > ictx->readahead_disable_after_bytes); | |
72 | if (abort) { | |
73 | ictx->image_lock.unlock_shared(); | |
74 | return; | |
75 | } | |
76 | ||
77 | uint64_t image_size = ictx->get_image_size(ictx->snap_id); | |
78 | auto snap_id = ictx->snap_id; | |
79 | ictx->image_lock.unlock_shared(); | |
80 | ||
81 | auto readahead_extent = ictx->readahead.update(image_extents, image_size); | |
82 | uint64_t readahead_offset = readahead_extent.first; | |
83 | uint64_t readahead_length = readahead_extent.second; | |
84 | ||
85 | if (readahead_length > 0) { | |
86 | ldout(ictx->cct, 20) << "(readahead logical) " << readahead_offset << "~" | |
87 | << readahead_length << dendl; | |
88 | LightweightObjectExtents readahead_object_extents; | |
89 | Striper::file_to_extents(ictx->cct, &ictx->layout, | |
90 | readahead_offset, readahead_length, 0, 0, | |
91 | &readahead_object_extents); | |
92 | for (auto& object_extent : readahead_object_extents) { | |
93 | ldout(ictx->cct, 20) << "(readahead) " | |
94 | << data_object_name(ictx, | |
95 | object_extent.object_no) << " " | |
96 | << object_extent.offset << "~" | |
97 | << object_extent.length << dendl; | |
98 | ||
99 | auto req_comp = new C_RBD_Readahead<I>(ictx, object_extent.object_no, | |
100 | object_extent.offset, | |
101 | object_extent.length); | |
102 | auto req = io::ObjectDispatchSpec::create_read( | |
103 | ictx, io::OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, | |
104 | object_extent.offset, object_extent.length, snap_id, 0, {}, | |
105 | &req_comp->read_data, &req_comp->extent_map, req_comp); | |
106 | req->send(); | |
107 | } | |
108 | ||
109 | ictx->perfcounter->inc(l_librbd_readahead); | |
110 | ictx->perfcounter->inc(l_librbd_readahead_bytes, readahead_length); | |
111 | } | |
112 | } | |
113 | ||
11fdf7f2 TL |
114 | template <typename I> |
115 | struct C_UpdateTimestamp : public Context { | |
116 | public: | |
117 | I& m_image_ctx; | |
118 | bool m_modify; // if modify set to 'true', modify timestamp is updated, | |
119 | // access timestamp otherwise | |
120 | AsyncOperation m_async_op; | |
91327a77 | 121 | |
11fdf7f2 TL |
122 | C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) { |
123 | m_async_op.start_op(*get_image_ctx(&m_image_ctx)); | |
91327a77 | 124 | } |
11fdf7f2 TL |
125 | ~C_UpdateTimestamp() override { |
126 | m_async_op.finish_op(); | |
91327a77 AA |
127 | } |
128 | ||
11fdf7f2 TL |
129 | void send() { |
130 | librados::ObjectWriteOperation op; | |
131 | if (m_modify) { | |
132 | cls_client::set_modify_timestamp(&op); | |
133 | } else { | |
134 | cls_client::set_access_timestamp(&op); | |
91327a77 | 135 | } |
7c673cae | 136 | |
11fdf7f2 TL |
137 | auto comp = librbd::util::create_rados_callback(this); |
138 | int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op); | |
139 | ceph_assert(r == 0); | |
140 | comp->release(); | |
7c673cae FG |
141 | } |
142 | ||
143 | void finish(int r) override { | |
11fdf7f2 | 144 | // ignore errors updating timestamp |
7c673cae FG |
145 | } |
146 | }; | |
147 | ||
11fdf7f2 TL |
148 | bool should_update_timestamp(const utime_t& now, const utime_t& timestamp, |
149 | uint64_t interval) { | |
150 | return (interval && | |
151 | (static_cast<uint64_t>(now.sec()) >= interval + timestamp)); | |
224ce89b WB |
152 | } |
153 | ||
11fdf7f2 | 154 | } // anonymous namespace |
c07f9fc5 | 155 | |
9f95a23c TL |
156 | #undef dout_prefix |
157 | #define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \ | |
158 | << " " << __func__ << ": " | |
159 | ||
7c673cae FG |
160 | template <typename I> |
161 | void ImageRequest<I>::aio_read(I *ictx, AioCompletion *c, | |
162 | Extents &&image_extents, | |
31f18b77 FG |
163 | ReadResult &&read_result, int op_flags, |
164 | const ZTracer::Trace &parent_trace) { | |
7c673cae | 165 | ImageReadRequest<I> req(*ictx, c, std::move(image_extents), |
31f18b77 | 166 | std::move(read_result), op_flags, parent_trace); |
7c673cae FG |
167 | req.send(); |
168 | } | |
169 | ||
170 | template <typename I> | |
171 | void ImageRequest<I>::aio_write(I *ictx, AioCompletion *c, | |
172 | Extents &&image_extents, bufferlist &&bl, | |
31f18b77 FG |
173 | int op_flags, |
174 | const ZTracer::Trace &parent_trace) { | |
7c673cae | 175 | ImageWriteRequest<I> req(*ictx, c, std::move(image_extents), std::move(bl), |
31f18b77 | 176 | op_flags, parent_trace); |
7c673cae FG |
177 | req.send(); |
178 | } | |
179 | ||
180 | template <typename I> | |
181 | void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c, | |
11fdf7f2 TL |
182 | Extents &&image_extents, |
183 | uint32_t discard_granularity_bytes, | |
31f18b77 | 184 | const ZTracer::Trace &parent_trace) { |
11fdf7f2 TL |
185 | ImageDiscardRequest<I> req(*ictx, c, std::move(image_extents), |
186 | discard_granularity_bytes, parent_trace); | |
7c673cae FG |
187 | req.send(); |
188 | } | |
189 | ||
190 | template <typename I> | |
31f18b77 | 191 | void ImageRequest<I>::aio_flush(I *ictx, AioCompletion *c, |
11fdf7f2 TL |
192 | FlushSource flush_source, |
193 | const ZTracer::Trace &parent_trace) { | |
194 | ImageFlushRequest<I> req(*ictx, c, flush_source, parent_trace); | |
7c673cae FG |
195 | req.send(); |
196 | } | |
197 | ||
198 | template <typename I> | |
199 | void ImageRequest<I>::aio_writesame(I *ictx, AioCompletion *c, | |
11fdf7f2 | 200 | Extents &&image_extents, |
31f18b77 FG |
201 | bufferlist &&bl, int op_flags, |
202 | const ZTracer::Trace &parent_trace) { | |
11fdf7f2 TL |
203 | ImageWriteSameRequest<I> req(*ictx, c, std::move(image_extents), |
204 | std::move(bl), op_flags, parent_trace); | |
7c673cae FG |
205 | req.send(); |
206 | } | |
207 | ||
c07f9fc5 FG |
208 | template <typename I> |
209 | void ImageRequest<I>::aio_compare_and_write(I *ictx, AioCompletion *c, | |
210 | Extents &&image_extents, | |
211 | bufferlist &&cmp_bl, | |
212 | bufferlist &&bl, | |
213 | uint64_t *mismatch_offset, | |
214 | int op_flags, | |
215 | const ZTracer::Trace &parent_trace) { | |
216 | ImageCompareAndWriteRequest<I> req(*ictx, c, std::move(image_extents), | |
217 | std::move(cmp_bl), std::move(bl), | |
218 | mismatch_offset, op_flags, parent_trace); | |
219 | req.send(); | |
220 | } | |
221 | ||
7c673cae FG |
222 | template <typename I> |
223 | void ImageRequest<I>::send() { | |
224 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 225 | ceph_assert(m_aio_comp->is_initialized(get_aio_type())); |
494da23a | 226 | ceph_assert(m_aio_comp->is_started()); |
7c673cae FG |
227 | |
228 | CephContext *cct = image_ctx.cct; | |
229 | AioCompletion *aio_comp = this->m_aio_comp; | |
230 | ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", " | |
c07f9fc5 | 231 | << "completion=" << aio_comp << dendl; |
7c673cae | 232 | |
7c673cae FG |
233 | int r = clip_request(); |
234 | if (r < 0) { | |
235 | m_aio_comp->fail(r); | |
236 | return; | |
237 | } | |
238 | ||
9f95a23c TL |
239 | if (finish_request_early()) { |
240 | return; | |
241 | } | |
242 | ||
7c673cae | 243 | if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) { |
11fdf7f2 | 244 | update_timestamp(); |
7c673cae FG |
245 | send_request(); |
246 | } else { | |
247 | send_image_cache_request(); | |
248 | } | |
249 | } | |
250 | ||
251 | template <typename I> | |
252 | int ImageRequest<I>::clip_request() { | |
9f95a23c | 253 | std::shared_lock image_locker{m_image_ctx.image_lock}; |
7c673cae FG |
254 | for (auto &image_extent : m_image_extents) { |
255 | auto clip_len = image_extent.second; | |
256 | int r = clip_io(get_image_ctx(&m_image_ctx), image_extent.first, &clip_len); | |
257 | if (r < 0) { | |
258 | return r; | |
259 | } | |
260 | ||
261 | image_extent.second = clip_len; | |
262 | } | |
263 | return 0; | |
264 | } | |
265 | ||
266 | template <typename I> | |
11fdf7f2 TL |
267 | void ImageRequest<I>::update_timestamp() { |
268 | bool modify = (get_aio_type() != AIO_TYPE_READ); | |
269 | uint64_t update_interval; | |
270 | if (modify) { | |
271 | update_interval = m_image_ctx.mtime_update_interval; | |
272 | } else { | |
273 | update_interval = m_image_ctx.atime_update_interval; | |
274 | } | |
7c673cae | 275 | |
11fdf7f2 TL |
276 | if (update_interval == 0) { |
277 | return; | |
278 | } | |
279 | ||
280 | utime_t (I::*get_timestamp_fn)() const; | |
281 | void (I::*set_timestamp_fn)(utime_t); | |
282 | if (modify) { | |
283 | get_timestamp_fn = &I::get_modify_timestamp; | |
284 | set_timestamp_fn = &I::set_modify_timestamp; | |
285 | } else { | |
286 | get_timestamp_fn = &I::get_access_timestamp; | |
287 | set_timestamp_fn = &I::set_access_timestamp; | |
288 | } | |
289 | ||
290 | utime_t ts = ceph_clock_now(); | |
291 | { | |
9f95a23c | 292 | std::shared_lock timestamp_locker{m_image_ctx.timestamp_lock}; |
11fdf7f2 TL |
293 | if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx), |
294 | update_interval)) { | |
295 | return; | |
296 | } | |
297 | } | |
298 | ||
299 | { | |
9f95a23c | 300 | std::unique_lock timestamp_locker{m_image_ctx.timestamp_lock}; |
11fdf7f2 TL |
301 | bool update = should_update_timestamp( |
302 | ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval); | |
303 | if (!update) { | |
304 | return; | |
305 | } | |
306 | ||
307 | std::invoke(set_timestamp_fn, m_image_ctx, ts); | |
308 | } | |
309 | ||
310 | // TODO we fire and forget this outside the IO path to prevent | |
311 | // potential race conditions with librbd client IO callbacks | |
312 | // between different threads (e.g. librados and object cacher) | |
313 | ldout(m_image_ctx.cct, 10) << get_request_type() << dendl; | |
314 | auto req = new C_UpdateTimestamp<I>(m_image_ctx, modify); | |
315 | req->send(); | |
7c673cae FG |
316 | } |
317 | ||
318 | template <typename I> | |
319 | ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp, | |
320 | Extents &&image_extents, | |
31f18b77 FG |
321 | ReadResult &&read_result, int op_flags, |
322 | const ZTracer::Trace &parent_trace) | |
323 | : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents), "read", | |
324 | parent_trace), | |
7c673cae FG |
325 | m_op_flags(op_flags) { |
326 | aio_comp->read_result = std::move(read_result); | |
327 | } | |
328 | ||
329 | template <typename I> | |
330 | int ImageReadRequest<I>::clip_request() { | |
331 | int r = ImageRequest<I>::clip_request(); | |
332 | if (r < 0) { | |
333 | return r; | |
334 | } | |
335 | ||
336 | uint64_t buffer_length = 0; | |
337 | auto &image_extents = this->m_image_extents; | |
338 | for (auto &image_extent : image_extents) { | |
339 | buffer_length += image_extent.second; | |
340 | } | |
341 | this->m_aio_comp->read_result.set_clip_length(buffer_length); | |
342 | return 0; | |
343 | } | |
344 | ||
345 | template <typename I> | |
346 | void ImageReadRequest<I>::send_request() { | |
347 | I &image_ctx = this->m_image_ctx; | |
348 | CephContext *cct = image_ctx.cct; | |
349 | ||
350 | auto &image_extents = this->m_image_extents; | |
11fdf7f2 | 351 | if (image_ctx.cache && image_ctx.readahead_max_bytes > 0 && |
7c673cae FG |
352 | !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) { |
353 | readahead(get_image_ctx(&image_ctx), image_extents); | |
354 | } | |
355 | ||
7c673cae | 356 | librados::snap_t snap_id; |
7c673cae FG |
357 | uint64_t buffer_ofs = 0; |
358 | { | |
359 | // prevent image size from changing between computing clip and recording | |
360 | // pending async operation | |
9f95a23c | 361 | std::shared_lock image_locker{image_ctx.image_lock}; |
7c673cae | 362 | snap_id = image_ctx.snap_id; |
9f95a23c | 363 | } |
7c673cae | 364 | |
9f95a23c TL |
365 | // map image extents to object extents |
366 | LightweightObjectExtents object_extents; | |
367 | for (auto &extent : image_extents) { | |
368 | if (extent.second == 0) { | |
369 | continue; | |
7c673cae | 370 | } |
7c673cae | 371 | |
9f95a23c TL |
372 | Striper::file_to_extents(cct, &image_ctx.layout, extent.first, |
373 | extent.second, 0, buffer_ofs, &object_extents); | |
374 | buffer_ofs += extent.second; | |
7c673cae | 375 | } |
7c673cae FG |
376 | |
377 | // issue the requests | |
9f95a23c TL |
378 | AioCompletion *aio_comp = this->m_aio_comp; |
379 | aio_comp->set_request_count(object_extents.size()); | |
380 | for (auto &oe : object_extents) { | |
381 | ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " " | |
382 | << oe.offset << "~" << oe.length << " from " | |
383 | << oe.buffer_extents << dendl; | |
7c673cae | 384 | |
9f95a23c TL |
385 | auto req_comp = new io::ReadResult::C_ObjectReadRequest( |
386 | aio_comp, oe.offset, oe.length, std::move(oe.buffer_extents)); | |
387 | auto req = ObjectDispatchSpec::create_read( | |
388 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, oe.object_no, oe.offset, | |
389 | oe.length, snap_id, m_op_flags, this->m_trace, &req_comp->bl, | |
390 | &req_comp->extent_map, req_comp); | |
391 | req->send(); | |
392 | } | |
7c673cae FG |
393 | |
394 | image_ctx.perfcounter->inc(l_librbd_rd); | |
395 | image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs); | |
396 | } | |
397 | ||
398 | template <typename I> | |
399 | void ImageReadRequest<I>::send_image_cache_request() { | |
400 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 401 | ceph_assert(image_ctx.image_cache != nullptr); |
7c673cae FG |
402 | |
403 | AioCompletion *aio_comp = this->m_aio_comp; | |
404 | aio_comp->set_request_count(1); | |
405 | ||
406 | auto *req_comp = new io::ReadResult::C_ImageReadRequest( | |
407 | aio_comp, this->m_image_extents); | |
408 | image_ctx.image_cache->aio_read(std::move(this->m_image_extents), | |
409 | &req_comp->bl, m_op_flags, | |
410 | req_comp); | |
411 | } | |
412 | ||
9f95a23c TL |
413 | template <typename I> |
414 | bool AbstractImageWriteRequest<I>::finish_request_early() { | |
415 | AioCompletion *aio_comp = this->m_aio_comp; | |
416 | { | |
417 | std::shared_lock image_locker{this->m_image_ctx.image_lock}; | |
418 | if (this->m_image_ctx.snap_id != CEPH_NOSNAP || this->m_image_ctx.read_only) { | |
419 | aio_comp->fail(-EROFS); | |
420 | return true; | |
421 | } | |
422 | } | |
423 | uint64_t total_bytes = 0; | |
424 | for (auto& image_extent : this->m_image_extents) { | |
425 | total_bytes += image_extent.second; | |
426 | } | |
427 | if (total_bytes == 0) { | |
428 | aio_comp->set_request_count(0); | |
429 | return true; | |
430 | } | |
431 | return false; | |
432 | } | |
433 | ||
7c673cae FG |
434 | template <typename I> |
435 | void AbstractImageWriteRequest<I>::send_request() { | |
436 | I &image_ctx = this->m_image_ctx; | |
437 | CephContext *cct = image_ctx.cct; | |
438 | ||
7c673cae FG |
439 | bool journaling = false; |
440 | ||
441 | AioCompletion *aio_comp = this->m_aio_comp; | |
7c673cae FG |
442 | ::SnapContext snapc; |
443 | { | |
444 | // prevent image size from changing between computing clip and recording | |
445 | // pending async operation | |
9f95a23c | 446 | std::shared_lock image_locker{image_ctx.image_lock}; |
7c673cae FG |
447 | |
448 | snapc = image_ctx.snapc; | |
449 | journaling = (image_ctx.journal != nullptr && | |
450 | image_ctx.journal->is_journal_appending()); | |
451 | } | |
452 | ||
9f95a23c TL |
453 | uint64_t clip_len = 0; |
454 | LightweightObjectExtents object_extents; | |
455 | for (auto &extent : this->m_image_extents) { | |
456 | if (extent.second == 0) { | |
457 | continue; | |
458 | } | |
459 | ||
460 | // map to object extents | |
461 | Striper::file_to_extents(cct, &image_ctx.layout, extent.first, | |
462 | extent.second, 0, clip_len, &object_extents); | |
463 | clip_len += extent.second; | |
464 | } | |
465 | ||
11fdf7f2 | 466 | int ret = prune_object_extents(&object_extents); |
c07f9fc5 FG |
467 | if (ret < 0) { |
468 | aio_comp->fail(ret); | |
469 | return; | |
470 | } | |
7c673cae | 471 | |
9f95a23c | 472 | aio_comp->set_request_count(object_extents.size()); |
7c673cae FG |
473 | if (!object_extents.empty()) { |
474 | uint64_t journal_tid = 0; | |
7c673cae FG |
475 | if (journaling) { |
476 | // in-flight ops are flushed prior to closing the journal | |
11fdf7f2 TL |
477 | ceph_assert(image_ctx.journal != NULL); |
478 | journal_tid = append_journal_event(m_synchronous); | |
7c673cae FG |
479 | } |
480 | ||
11fdf7f2 | 481 | send_object_requests(object_extents, snapc, journal_tid); |
7c673cae FG |
482 | } |
483 | ||
484 | update_stats(clip_len); | |
7c673cae FG |
485 | } |
486 | ||
487 | template <typename I> | |
488 | void AbstractImageWriteRequest<I>::send_object_requests( | |
9f95a23c | 489 | const LightweightObjectExtents &object_extents, const ::SnapContext &snapc, |
11fdf7f2 | 490 | uint64_t journal_tid) { |
7c673cae FG |
491 | I &image_ctx = this->m_image_ctx; |
492 | CephContext *cct = image_ctx.cct; | |
493 | ||
494 | AioCompletion *aio_comp = this->m_aio_comp; | |
9f95a23c TL |
495 | bool single_extent = (object_extents.size() == 1); |
496 | for (auto& oe : object_extents) { | |
497 | ldout(cct, 20) << data_object_name(&image_ctx, oe.object_no) << " " | |
498 | << oe.offset << "~" << oe.length << " from " | |
499 | << oe.buffer_extents << dendl; | |
7c673cae | 500 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); |
9f95a23c TL |
501 | auto request = create_object_request(oe, snapc, journal_tid, single_extent, |
502 | req_comp); | |
503 | request->send(); | |
7c673cae FG |
504 | } |
505 | } | |
506 | ||
507 | template <typename I> | |
9f95a23c TL |
508 | void ImageWriteRequest<I>::assemble_extent( |
509 | const LightweightObjectExtent &object_extent, bufferlist *bl) { | |
7c673cae FG |
510 | for (auto q = object_extent.buffer_extents.begin(); |
511 | q != object_extent.buffer_extents.end(); ++q) { | |
512 | bufferlist sub_bl; | |
513 | sub_bl.substr_of(m_bl, q->first, q->second); | |
514 | bl->claim_append(sub_bl); | |
515 | } | |
516 | } | |
517 | ||
518 | template <typename I> | |
11fdf7f2 | 519 | uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) { |
7c673cae FG |
520 | I &image_ctx = this->m_image_ctx; |
521 | ||
522 | uint64_t tid = 0; | |
523 | uint64_t buffer_offset = 0; | |
11fdf7f2 | 524 | ceph_assert(!this->m_image_extents.empty()); |
7c673cae FG |
525 | for (auto &extent : this->m_image_extents) { |
526 | bufferlist sub_bl; | |
527 | sub_bl.substr_of(m_bl, buffer_offset, extent.second); | |
528 | buffer_offset += extent.second; | |
529 | ||
530 | tid = image_ctx.journal->append_write_event(extent.first, extent.second, | |
11fdf7f2 | 531 | sub_bl, synchronous); |
7c673cae FG |
532 | } |
533 | ||
7c673cae FG |
534 | return tid; |
535 | } | |
536 | ||
537 | template <typename I> | |
538 | void ImageWriteRequest<I>::send_image_cache_request() { | |
539 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 540 | ceph_assert(image_ctx.image_cache != nullptr); |
7c673cae FG |
541 | |
542 | AioCompletion *aio_comp = this->m_aio_comp; | |
543 | aio_comp->set_request_count(1); | |
544 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
545 | image_ctx.image_cache->aio_write(std::move(this->m_image_extents), | |
546 | std::move(m_bl), m_op_flags, req_comp); | |
547 | } | |
548 | ||
549 | template <typename I> | |
11fdf7f2 | 550 | ObjectDispatchSpec *ImageWriteRequest<I>::create_object_request( |
9f95a23c TL |
551 | const LightweightObjectExtent &object_extent, const ::SnapContext &snapc, |
552 | uint64_t journal_tid, bool single_extent, Context *on_finish) { | |
7c673cae | 553 | I &image_ctx = this->m_image_ctx; |
7c673cae FG |
554 | |
555 | bufferlist bl; | |
9f95a23c TL |
556 | if (single_extent && object_extent.buffer_extents.size() == 1) { |
557 | // optimization for single object/buffer extent writes | |
558 | bl = std::move(m_bl); | |
559 | } else { | |
560 | assemble_extent(object_extent, &bl); | |
561 | } | |
562 | ||
11fdf7f2 | 563 | auto req = ObjectDispatchSpec::create_write( |
9f95a23c TL |
564 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
565 | object_extent.offset, std::move(bl), snapc, m_op_flags, journal_tid, | |
566 | this->m_trace, on_finish); | |
7c673cae FG |
567 | return req; |
568 | } | |
569 | ||
570 | template <typename I> | |
571 | void ImageWriteRequest<I>::update_stats(size_t length) { | |
572 | I &image_ctx = this->m_image_ctx; | |
573 | image_ctx.perfcounter->inc(l_librbd_wr); | |
574 | image_ctx.perfcounter->inc(l_librbd_wr_bytes, length); | |
575 | } | |
576 | ||
577 | template <typename I> | |
11fdf7f2 | 578 | uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) { |
7c673cae FG |
579 | I &image_ctx = this->m_image_ctx; |
580 | ||
581 | uint64_t tid = 0; | |
11fdf7f2 | 582 | ceph_assert(!this->m_image_extents.empty()); |
7c673cae | 583 | for (auto &extent : this->m_image_extents) { |
11fdf7f2 TL |
584 | journal::EventEntry event_entry( |
585 | journal::AioDiscardEvent(extent.first, | |
586 | extent.second, | |
587 | this->m_discard_granularity_bytes)); | |
7c673cae | 588 | tid = image_ctx.journal->append_io_event(std::move(event_entry), |
11fdf7f2 TL |
589 | extent.first, extent.second, |
590 | synchronous, 0); | |
7c673cae FG |
591 | } |
592 | ||
7c673cae FG |
593 | return tid; |
594 | } | |
595 | ||
7c673cae FG |
596 | template <typename I> |
597 | void ImageDiscardRequest<I>::send_image_cache_request() { | |
598 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 599 | ceph_assert(image_ctx.image_cache != nullptr); |
7c673cae FG |
600 | |
601 | AioCompletion *aio_comp = this->m_aio_comp; | |
602 | aio_comp->set_request_count(this->m_image_extents.size()); | |
603 | for (auto &extent : this->m_image_extents) { | |
604 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
605 | image_ctx.image_cache->aio_discard(extent.first, extent.second, | |
11fdf7f2 TL |
606 | this->m_discard_granularity_bytes, |
607 | req_comp); | |
7c673cae FG |
608 | } |
609 | } | |
610 | ||
611 | template <typename I> | |
11fdf7f2 | 612 | ObjectDispatchSpec *ImageDiscardRequest<I>::create_object_request( |
9f95a23c TL |
613 | const LightweightObjectExtent &object_extent, const ::SnapContext &snapc, |
614 | uint64_t journal_tid, bool single_extent, Context *on_finish) { | |
7c673cae | 615 | I &image_ctx = this->m_image_ctx; |
11fdf7f2 | 616 | auto req = ObjectDispatchSpec::create_discard( |
9f95a23c TL |
617 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
618 | object_extent.offset, object_extent.length, snapc, | |
11fdf7f2 TL |
619 | OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE, journal_tid, this->m_trace, |
620 | on_finish); | |
7c673cae FG |
621 | return req; |
622 | } | |
623 | ||
624 | template <typename I> | |
625 | void ImageDiscardRequest<I>::update_stats(size_t length) { | |
626 | I &image_ctx = this->m_image_ctx; | |
627 | image_ctx.perfcounter->inc(l_librbd_discard); | |
628 | image_ctx.perfcounter->inc(l_librbd_discard_bytes, length); | |
629 | } | |
630 | ||
11fdf7f2 TL |
631 | template <typename I> |
632 | int ImageDiscardRequest<I>::prune_object_extents( | |
9f95a23c | 633 | LightweightObjectExtents* object_extents) const { |
11fdf7f2 TL |
634 | if (m_discard_granularity_bytes == 0) { |
635 | return 0; | |
636 | } | |
637 | ||
638 | // Align the range to discard_granularity_bytes boundary and skip | |
639 | // and discards that are too small to free up any space. | |
640 | // | |
641 | // discard_granularity_bytes >= object_size && tail truncation | |
642 | // is a special case for filestore | |
643 | bool prune_required = false; | |
644 | auto object_size = this->m_image_ctx.layout.object_size; | |
645 | auto discard_granularity_bytes = std::min(m_discard_granularity_bytes, | |
646 | object_size); | |
647 | auto xform_lambda = | |
648 | [discard_granularity_bytes, object_size, &prune_required] | |
9f95a23c | 649 | (LightweightObjectExtent& object_extent) { |
11fdf7f2 TL |
650 | auto& offset = object_extent.offset; |
651 | auto& length = object_extent.length; | |
652 | auto next_offset = offset + length; | |
653 | ||
654 | if ((discard_granularity_bytes < object_size) || | |
655 | (next_offset < object_size)) { | |
656 | offset = p2roundup<uint64_t>(offset, discard_granularity_bytes); | |
657 | next_offset = p2align<uint64_t>(next_offset, discard_granularity_bytes); | |
658 | if (offset >= next_offset) { | |
659 | prune_required = true; | |
660 | length = 0; | |
661 | } else { | |
662 | length = next_offset - offset; | |
663 | } | |
664 | } | |
665 | }; | |
666 | std::for_each(object_extents->begin(), object_extents->end(), | |
667 | xform_lambda); | |
668 | ||
669 | if (prune_required) { | |
670 | // one or more object extents were skipped | |
671 | auto remove_lambda = | |
9f95a23c | 672 | [](const LightweightObjectExtent& object_extent) { |
11fdf7f2 TL |
673 | return (object_extent.length == 0); |
674 | }; | |
675 | object_extents->erase( | |
676 | std::remove_if(object_extents->begin(), object_extents->end(), | |
677 | remove_lambda), | |
678 | object_extents->end()); | |
679 | } | |
680 | return 0; | |
681 | } | |
682 | ||
7c673cae FG |
683 | template <typename I> |
684 | void ImageFlushRequest<I>::send_request() { | |
685 | I &image_ctx = this->m_image_ctx; | |
7c673cae FG |
686 | |
687 | bool journaling = false; | |
688 | { | |
9f95a23c | 689 | std::shared_lock image_locker{image_ctx.image_lock}; |
11fdf7f2 TL |
690 | journaling = (m_flush_source == FLUSH_SOURCE_USER && |
691 | image_ctx.journal != nullptr && | |
7c673cae FG |
692 | image_ctx.journal->is_journal_appending()); |
693 | } | |
694 | ||
695 | AioCompletion *aio_comp = this->m_aio_comp; | |
11fdf7f2 TL |
696 | aio_comp->set_request_count(1); |
697 | ||
698 | Context *ctx = new C_AioRequest(aio_comp); | |
699 | ||
700 | // ensure no locks are held when flush is complete | |
701 | ctx = librbd::util::create_async_context_callback(image_ctx, ctx); | |
702 | ||
9f95a23c | 703 | uint64_t journal_tid = 0; |
7c673cae FG |
704 | if (journaling) { |
705 | // in-flight ops are flushed prior to closing the journal | |
9f95a23c TL |
706 | ceph_assert(image_ctx.journal != NULL); |
707 | journal_tid = image_ctx.journal->append_io_event( | |
11fdf7f2 | 708 | journal::EventEntry(journal::AioFlushEvent()), 0, 0, false, 0); |
494da23a | 709 | image_ctx.journal->user_flushed(); |
7c673cae FG |
710 | } |
711 | ||
9f95a23c TL |
712 | auto object_dispatch_spec = ObjectDispatchSpec::create_flush( |
713 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, m_flush_source, journal_tid, | |
714 | this->m_trace, ctx); | |
715 | ctx = new LambdaContext([object_dispatch_spec](int r) { | |
716 | object_dispatch_spec->send(); | |
717 | }); | |
718 | ||
11fdf7f2 | 719 | // ensure all in-flight IOs are settled if non-user flush request |
494da23a | 720 | aio_comp->async_op.flush(ctx); |
11fdf7f2 TL |
721 | |
722 | // might be flushing during image shutdown | |
723 | if (image_ctx.perfcounter != nullptr) { | |
724 | image_ctx.perfcounter->inc(l_librbd_flush); | |
725 | } | |
7c673cae FG |
726 | } |
727 | ||
728 | template <typename I> | |
729 | void ImageFlushRequest<I>::send_image_cache_request() { | |
730 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 731 | ceph_assert(image_ctx.image_cache != nullptr); |
7c673cae FG |
732 | |
733 | AioCompletion *aio_comp = this->m_aio_comp; | |
734 | aio_comp->set_request_count(1); | |
735 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
736 | image_ctx.image_cache->aio_flush(req_comp); | |
737 | } | |
738 | ||
739 | template <typename I> | |
11fdf7f2 | 740 | uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) { |
7c673cae FG |
741 | I &image_ctx = this->m_image_ctx; |
742 | ||
743 | uint64_t tid = 0; | |
11fdf7f2 | 744 | ceph_assert(!this->m_image_extents.empty()); |
7c673cae FG |
745 | for (auto &extent : this->m_image_extents) { |
746 | journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first, | |
747 | extent.second, | |
748 | m_data_bl)); | |
749 | tid = image_ctx.journal->append_io_event(std::move(event_entry), | |
11fdf7f2 TL |
750 | extent.first, extent.second, |
751 | synchronous, 0); | |
7c673cae FG |
752 | } |
753 | ||
7c673cae FG |
754 | return tid; |
755 | } | |
756 | ||
757 | template <typename I> | |
758 | void ImageWriteSameRequest<I>::send_image_cache_request() { | |
759 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 760 | ceph_assert(image_ctx.image_cache != nullptr); |
7c673cae FG |
761 | |
762 | AioCompletion *aio_comp = this->m_aio_comp; | |
763 | aio_comp->set_request_count(this->m_image_extents.size()); | |
764 | for (auto &extent : this->m_image_extents) { | |
765 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
766 | image_ctx.image_cache->aio_writesame(extent.first, extent.second, | |
767 | std::move(m_data_bl), m_op_flags, | |
768 | req_comp); | |
769 | } | |
770 | } | |
771 | ||
772 | template <typename I> | |
11fdf7f2 | 773 | ObjectDispatchSpec *ImageWriteSameRequest<I>::create_object_request( |
9f95a23c TL |
774 | const LightweightObjectExtent &object_extent, const ::SnapContext &snapc, |
775 | uint64_t journal_tid, bool single_extent, Context *on_finish) { | |
7c673cae | 776 | I &image_ctx = this->m_image_ctx; |
7c673cae FG |
777 | |
778 | bufferlist bl; | |
11fdf7f2 TL |
779 | ObjectDispatchSpec *req; |
780 | ||
781 | if (util::assemble_write_same_extent(object_extent, m_data_bl, &bl, false)) { | |
9f95a23c | 782 | auto buffer_extents{object_extent.buffer_extents}; |
7c673cae | 783 | |
11fdf7f2 | 784 | req = ObjectDispatchSpec::create_write_same( |
9f95a23c TL |
785 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
786 | object_extent.offset, object_extent.length, std::move(buffer_extents), | |
787 | std::move(bl), snapc, m_op_flags, journal_tid, | |
11fdf7f2 | 788 | this->m_trace, on_finish); |
7c673cae FG |
789 | return req; |
790 | } | |
11fdf7f2 | 791 | req = ObjectDispatchSpec::create_write( |
9f95a23c TL |
792 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
793 | object_extent.offset, std::move(bl), snapc, m_op_flags, journal_tid, | |
794 | this->m_trace, on_finish); | |
7c673cae FG |
795 | return req; |
796 | } | |
797 | ||
798 | template <typename I> | |
799 | void ImageWriteSameRequest<I>::update_stats(size_t length) { | |
800 | I &image_ctx = this->m_image_ctx; | |
801 | image_ctx.perfcounter->inc(l_librbd_ws); | |
802 | image_ctx.perfcounter->inc(l_librbd_ws_bytes, length); | |
803 | } | |
804 | ||
c07f9fc5 FG |
805 | template <typename I> |
806 | uint64_t ImageCompareAndWriteRequest<I>::append_journal_event( | |
11fdf7f2 | 807 | bool synchronous) { |
c07f9fc5 FG |
808 | I &image_ctx = this->m_image_ctx; |
809 | ||
810 | uint64_t tid = 0; | |
11fdf7f2 | 811 | ceph_assert(this->m_image_extents.size() == 1); |
c07f9fc5 | 812 | auto &extent = this->m_image_extents.front(); |
11fdf7f2 TL |
813 | journal::EventEntry event_entry( |
814 | journal::AioCompareAndWriteEvent(extent.first, extent.second, m_cmp_bl, | |
815 | m_bl)); | |
c07f9fc5 | 816 | tid = image_ctx.journal->append_io_event(std::move(event_entry), |
11fdf7f2 TL |
817 | extent.first, extent.second, |
818 | synchronous, -EILSEQ); | |
c07f9fc5 FG |
819 | |
820 | return tid; | |
821 | } | |
822 | ||
c07f9fc5 FG |
823 | template <typename I> |
824 | void ImageCompareAndWriteRequest<I>::assemble_extent( | |
9f95a23c | 825 | const LightweightObjectExtent &object_extent, bufferlist *bl) { |
c07f9fc5 FG |
826 | for (auto q = object_extent.buffer_extents.begin(); |
827 | q != object_extent.buffer_extents.end(); ++q) { | |
828 | bufferlist sub_bl; | |
829 | sub_bl.substr_of(m_bl, q->first, q->second); | |
830 | bl->claim_append(sub_bl); | |
831 | } | |
832 | } | |
833 | ||
834 | template <typename I> | |
835 | void ImageCompareAndWriteRequest<I>::send_image_cache_request() { | |
836 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 837 | ceph_assert(image_ctx.image_cache != nullptr); |
c07f9fc5 FG |
838 | |
839 | AioCompletion *aio_comp = this->m_aio_comp; | |
840 | aio_comp->set_request_count(1); | |
841 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
842 | image_ctx.image_cache->aio_compare_and_write( | |
843 | std::move(this->m_image_extents), std::move(m_cmp_bl), std::move(m_bl), | |
844 | m_mismatch_offset, m_op_flags, req_comp); | |
845 | } | |
846 | ||
847 | template <typename I> | |
11fdf7f2 | 848 | ObjectDispatchSpec *ImageCompareAndWriteRequest<I>::create_object_request( |
9f95a23c TL |
849 | const LightweightObjectExtent &object_extent, const ::SnapContext &snapc, |
850 | uint64_t journal_tid, bool single_extent, Context *on_finish) { | |
c07f9fc5 FG |
851 | I &image_ctx = this->m_image_ctx; |
852 | ||
11fdf7f2 TL |
853 | // NOTE: safe to move m_cmp_bl since we only support this op against |
854 | // a single object | |
c07f9fc5 FG |
855 | bufferlist bl; |
856 | assemble_extent(object_extent, &bl); | |
11fdf7f2 | 857 | auto req = ObjectDispatchSpec::create_compare_and_write( |
9f95a23c TL |
858 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.object_no, |
859 | object_extent.offset, std::move(m_cmp_bl), std::move(bl), snapc, | |
860 | m_mismatch_offset, m_op_flags, journal_tid, this->m_trace, on_finish); | |
c07f9fc5 FG |
861 | return req; |
862 | } | |
863 | ||
864 | template <typename I> | |
865 | void ImageCompareAndWriteRequest<I>::update_stats(size_t length) { | |
866 | I &image_ctx = this->m_image_ctx; | |
867 | image_ctx.perfcounter->inc(l_librbd_cmp); | |
868 | image_ctx.perfcounter->inc(l_librbd_cmp_bytes, length); | |
869 | } | |
870 | ||
871 | template <typename I> | |
11fdf7f2 | 872 | int ImageCompareAndWriteRequest<I>::prune_object_extents( |
9f95a23c | 873 | LightweightObjectExtents* object_extents) const { |
11fdf7f2 | 874 | if (object_extents->size() > 1) |
c07f9fc5 FG |
875 | return -EINVAL; |
876 | ||
877 | I &image_ctx = this->m_image_ctx; | |
878 | uint64_t sector_size = 512ULL; | |
879 | uint64_t su = image_ctx.layout.stripe_unit; | |
9f95a23c | 880 | auto& object_extent = object_extents->front(); |
c07f9fc5 FG |
881 | if (object_extent.offset % sector_size + object_extent.length > sector_size || |
882 | (su != 0 && (object_extent.offset % su + object_extent.length > su))) | |
883 | return -EINVAL; | |
884 | ||
885 | return 0; | |
886 | } | |
887 | ||
7c673cae FG |
888 | } // namespace io |
889 | } // namespace librbd | |
890 | ||
891 | template class librbd::io::ImageRequest<librbd::ImageCtx>; | |
892 | template class librbd::io::ImageReadRequest<librbd::ImageCtx>; | |
893 | template class librbd::io::AbstractImageWriteRequest<librbd::ImageCtx>; | |
894 | template class librbd::io::ImageWriteRequest<librbd::ImageCtx>; | |
895 | template class librbd::io::ImageDiscardRequest<librbd::ImageCtx>; | |
896 | template class librbd::io::ImageFlushRequest<librbd::ImageCtx>; | |
897 | template class librbd::io::ImageWriteSameRequest<librbd::ImageCtx>; | |
c07f9fc5 | 898 | template class librbd::io::ImageCompareAndWriteRequest<librbd::ImageCtx>; |