]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/io/ImageRequest.h" | |
5 | #include "librbd/ImageCtx.h" | |
6 | #include "librbd/internal.h" | |
7 | #include "librbd/Journal.h" | |
b32b8144 | 8 | #include "librbd/Types.h" |
7c673cae FG |
9 | #include "librbd/Utils.h" |
10 | #include "librbd/cache/ImageCache.h" | |
11 | #include "librbd/io/AioCompletion.h" | |
11fdf7f2 TL |
12 | #include "librbd/io/AsyncOperation.h" |
13 | #include "librbd/io/ObjectDispatchInterface.h" | |
14 | #include "librbd/io/ObjectDispatchSpec.h" | |
15 | #include "librbd/io/ObjectDispatcher.h" | |
16 | #include "librbd/io/Utils.h" | |
7c673cae FG |
17 | #include "librbd/journal/Types.h" |
18 | #include "include/rados/librados.hpp" | |
11fdf7f2 | 19 | #include "common/perf_counters.h" |
7c673cae FG |
20 | #include "common/WorkQueue.h" |
21 | #include "osdc/Striper.h" | |
11fdf7f2 TL |
22 | #include <algorithm> |
23 | #include <functional> | |
7c673cae FG |
24 | |
25 | #define dout_subsys ceph_subsys_rbd | |
26 | #undef dout_prefix | |
27 | #define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \ | |
28 | << " " << __func__ << ": " | |
29 | ||
30 | namespace librbd { | |
31 | namespace io { | |
32 | ||
11fdf7f2 | 33 | using librbd::util::get_image_ctx; |
7c673cae FG |
34 | |
35 | namespace { | |
36 | ||
11fdf7f2 TL |
37 | template <typename I> |
38 | struct C_UpdateTimestamp : public Context { | |
39 | public: | |
40 | I& m_image_ctx; | |
41 | bool m_modify; // if modify set to 'true', modify timestamp is updated, | |
42 | // access timestamp otherwise | |
43 | AsyncOperation m_async_op; | |
91327a77 | 44 | |
11fdf7f2 TL |
45 | C_UpdateTimestamp(I& ictx, bool m) : m_image_ctx(ictx), m_modify(m) { |
46 | m_async_op.start_op(*get_image_ctx(&m_image_ctx)); | |
91327a77 | 47 | } |
11fdf7f2 TL |
48 | ~C_UpdateTimestamp() override { |
49 | m_async_op.finish_op(); | |
91327a77 AA |
50 | } |
51 | ||
11fdf7f2 TL |
52 | void send() { |
53 | librados::ObjectWriteOperation op; | |
54 | if (m_modify) { | |
55 | cls_client::set_modify_timestamp(&op); | |
56 | } else { | |
57 | cls_client::set_access_timestamp(&op); | |
91327a77 | 58 | } |
7c673cae | 59 | |
11fdf7f2 TL |
60 | auto comp = librbd::util::create_rados_callback(this); |
61 | int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op); | |
62 | ceph_assert(r == 0); | |
63 | comp->release(); | |
7c673cae FG |
64 | } |
65 | ||
66 | void finish(int r) override { | |
11fdf7f2 | 67 | // ignore errors updating timestamp |
7c673cae FG |
68 | } |
69 | }; | |
70 | ||
11fdf7f2 TL |
71 | bool should_update_timestamp(const utime_t& now, const utime_t& timestamp, |
72 | uint64_t interval) { | |
73 | return (interval && | |
74 | (static_cast<uint64_t>(now.sec()) >= interval + timestamp)); | |
224ce89b WB |
75 | } |
76 | ||
11fdf7f2 | 77 | } // anonymous namespace |
c07f9fc5 | 78 | |
7c673cae FG |
79 | template <typename I> |
80 | void ImageRequest<I>::aio_read(I *ictx, AioCompletion *c, | |
81 | Extents &&image_extents, | |
31f18b77 FG |
82 | ReadResult &&read_result, int op_flags, |
83 | const ZTracer::Trace &parent_trace) { | |
7c673cae | 84 | ImageReadRequest<I> req(*ictx, c, std::move(image_extents), |
31f18b77 | 85 | std::move(read_result), op_flags, parent_trace); |
7c673cae FG |
86 | req.send(); |
87 | } | |
88 | ||
89 | template <typename I> | |
90 | void ImageRequest<I>::aio_write(I *ictx, AioCompletion *c, | |
91 | Extents &&image_extents, bufferlist &&bl, | |
31f18b77 FG |
92 | int op_flags, |
93 | const ZTracer::Trace &parent_trace) { | |
7c673cae | 94 | ImageWriteRequest<I> req(*ictx, c, std::move(image_extents), std::move(bl), |
31f18b77 | 95 | op_flags, parent_trace); |
7c673cae FG |
96 | req.send(); |
97 | } | |
98 | ||
99 | template <typename I> | |
100 | void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c, | |
11fdf7f2 TL |
101 | Extents &&image_extents, |
102 | uint32_t discard_granularity_bytes, | |
31f18b77 | 103 | const ZTracer::Trace &parent_trace) { |
11fdf7f2 TL |
104 | ImageDiscardRequest<I> req(*ictx, c, std::move(image_extents), |
105 | discard_granularity_bytes, parent_trace); | |
7c673cae FG |
106 | req.send(); |
107 | } | |
108 | ||
109 | template <typename I> | |
31f18b77 | 110 | void ImageRequest<I>::aio_flush(I *ictx, AioCompletion *c, |
11fdf7f2 TL |
111 | FlushSource flush_source, |
112 | const ZTracer::Trace &parent_trace) { | |
113 | ImageFlushRequest<I> req(*ictx, c, flush_source, parent_trace); | |
7c673cae FG |
114 | req.send(); |
115 | } | |
116 | ||
117 | template <typename I> | |
118 | void ImageRequest<I>::aio_writesame(I *ictx, AioCompletion *c, | |
11fdf7f2 | 119 | Extents &&image_extents, |
31f18b77 FG |
120 | bufferlist &&bl, int op_flags, |
121 | const ZTracer::Trace &parent_trace) { | |
11fdf7f2 TL |
122 | ImageWriteSameRequest<I> req(*ictx, c, std::move(image_extents), |
123 | std::move(bl), op_flags, parent_trace); | |
7c673cae FG |
124 | req.send(); |
125 | } | |
126 | ||
c07f9fc5 FG |
127 | template <typename I> |
128 | void ImageRequest<I>::aio_compare_and_write(I *ictx, AioCompletion *c, | |
129 | Extents &&image_extents, | |
130 | bufferlist &&cmp_bl, | |
131 | bufferlist &&bl, | |
132 | uint64_t *mismatch_offset, | |
133 | int op_flags, | |
134 | const ZTracer::Trace &parent_trace) { | |
135 | ImageCompareAndWriteRequest<I> req(*ictx, c, std::move(image_extents), | |
136 | std::move(cmp_bl), std::move(bl), | |
137 | mismatch_offset, op_flags, parent_trace); | |
138 | req.send(); | |
139 | } | |
140 | ||
141 | ||
7c673cae FG |
142 | template <typename I> |
143 | void ImageRequest<I>::send() { | |
144 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 TL |
145 | ceph_assert(m_aio_comp->is_initialized(get_aio_type())); |
146 | ceph_assert(m_aio_comp->is_started() ^ (get_aio_type() == AIO_TYPE_FLUSH)); | |
7c673cae FG |
147 | |
148 | CephContext *cct = image_ctx.cct; | |
149 | AioCompletion *aio_comp = this->m_aio_comp; | |
150 | ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", " | |
c07f9fc5 | 151 | << "completion=" << aio_comp << dendl; |
7c673cae FG |
152 | |
153 | aio_comp->get(); | |
154 | int r = clip_request(); | |
155 | if (r < 0) { | |
156 | m_aio_comp->fail(r); | |
157 | return; | |
158 | } | |
159 | ||
160 | if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) { | |
11fdf7f2 | 161 | update_timestamp(); |
7c673cae FG |
162 | send_request(); |
163 | } else { | |
164 | send_image_cache_request(); | |
165 | } | |
166 | } | |
167 | ||
168 | template <typename I> | |
169 | int ImageRequest<I>::clip_request() { | |
170 | RWLock::RLocker snap_locker(m_image_ctx.snap_lock); | |
171 | for (auto &image_extent : m_image_extents) { | |
172 | auto clip_len = image_extent.second; | |
173 | int r = clip_io(get_image_ctx(&m_image_ctx), image_extent.first, &clip_len); | |
174 | if (r < 0) { | |
175 | return r; | |
176 | } | |
177 | ||
178 | image_extent.second = clip_len; | |
179 | } | |
180 | return 0; | |
181 | } | |
182 | ||
183 | template <typename I> | |
11fdf7f2 TL |
184 | void ImageRequest<I>::update_timestamp() { |
185 | bool modify = (get_aio_type() != AIO_TYPE_READ); | |
186 | uint64_t update_interval; | |
187 | if (modify) { | |
188 | update_interval = m_image_ctx.mtime_update_interval; | |
189 | } else { | |
190 | update_interval = m_image_ctx.atime_update_interval; | |
191 | } | |
7c673cae | 192 | |
11fdf7f2 TL |
193 | if (update_interval == 0) { |
194 | return; | |
195 | } | |
196 | ||
197 | utime_t (I::*get_timestamp_fn)() const; | |
198 | void (I::*set_timestamp_fn)(utime_t); | |
199 | if (modify) { | |
200 | get_timestamp_fn = &I::get_modify_timestamp; | |
201 | set_timestamp_fn = &I::set_modify_timestamp; | |
202 | } else { | |
203 | get_timestamp_fn = &I::get_access_timestamp; | |
204 | set_timestamp_fn = &I::set_access_timestamp; | |
205 | } | |
206 | ||
207 | utime_t ts = ceph_clock_now(); | |
208 | { | |
209 | RWLock::RLocker timestamp_locker(m_image_ctx.timestamp_lock); | |
210 | if(!should_update_timestamp(ts, std::invoke(get_timestamp_fn, m_image_ctx), | |
211 | update_interval)) { | |
212 | return; | |
213 | } | |
214 | } | |
215 | ||
216 | { | |
217 | RWLock::WLocker timestamp_locker(m_image_ctx.timestamp_lock); | |
218 | bool update = should_update_timestamp( | |
219 | ts, std::invoke(get_timestamp_fn, m_image_ctx), update_interval); | |
220 | if (!update) { | |
221 | return; | |
222 | } | |
223 | ||
224 | std::invoke(set_timestamp_fn, m_image_ctx, ts); | |
225 | } | |
226 | ||
227 | // TODO we fire and forget this outside the IO path to prevent | |
228 | // potential race conditions with librbd client IO callbacks | |
229 | // between different threads (e.g. librados and object cacher) | |
230 | ldout(m_image_ctx.cct, 10) << get_request_type() << dendl; | |
231 | auto req = new C_UpdateTimestamp<I>(m_image_ctx, modify); | |
232 | req->send(); | |
7c673cae FG |
233 | } |
234 | ||
235 | template <typename I> | |
236 | ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp, | |
237 | Extents &&image_extents, | |
31f18b77 FG |
238 | ReadResult &&read_result, int op_flags, |
239 | const ZTracer::Trace &parent_trace) | |
240 | : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents), "read", | |
241 | parent_trace), | |
7c673cae FG |
242 | m_op_flags(op_flags) { |
243 | aio_comp->read_result = std::move(read_result); | |
244 | } | |
245 | ||
246 | template <typename I> | |
247 | int ImageReadRequest<I>::clip_request() { | |
248 | int r = ImageRequest<I>::clip_request(); | |
249 | if (r < 0) { | |
250 | return r; | |
251 | } | |
252 | ||
253 | uint64_t buffer_length = 0; | |
254 | auto &image_extents = this->m_image_extents; | |
255 | for (auto &image_extent : image_extents) { | |
256 | buffer_length += image_extent.second; | |
257 | } | |
258 | this->m_aio_comp->read_result.set_clip_length(buffer_length); | |
259 | return 0; | |
260 | } | |
261 | ||
262 | template <typename I> | |
263 | void ImageReadRequest<I>::send_request() { | |
264 | I &image_ctx = this->m_image_ctx; | |
265 | CephContext *cct = image_ctx.cct; | |
266 | ||
267 | auto &image_extents = this->m_image_extents; | |
11fdf7f2 | 268 | if (image_ctx.cache && image_ctx.readahead_max_bytes > 0 && |
7c673cae FG |
269 | !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) { |
270 | readahead(get_image_ctx(&image_ctx), image_extents); | |
271 | } | |
272 | ||
273 | AioCompletion *aio_comp = this->m_aio_comp; | |
274 | librados::snap_t snap_id; | |
275 | map<object_t,vector<ObjectExtent> > object_extents; | |
276 | uint64_t buffer_ofs = 0; | |
277 | { | |
278 | // prevent image size from changing between computing clip and recording | |
279 | // pending async operation | |
280 | RWLock::RLocker snap_locker(image_ctx.snap_lock); | |
281 | snap_id = image_ctx.snap_id; | |
282 | ||
283 | // map image extents to object extents | |
284 | for (auto &extent : image_extents) { | |
285 | if (extent.second == 0) { | |
286 | continue; | |
287 | } | |
288 | ||
289 | Striper::file_to_extents(cct, image_ctx.format_string, &image_ctx.layout, | |
290 | extent.first, extent.second, 0, object_extents, | |
291 | buffer_ofs); | |
292 | buffer_ofs += extent.second; | |
293 | } | |
294 | } | |
295 | ||
296 | // pre-calculate the expected number of read requests | |
297 | uint32_t request_count = 0; | |
298 | for (auto &object_extent : object_extents) { | |
299 | request_count += object_extent.second.size(); | |
300 | } | |
301 | aio_comp->set_request_count(request_count); | |
302 | ||
303 | // issue the requests | |
304 | for (auto &object_extent : object_extents) { | |
305 | for (auto &extent : object_extent.second) { | |
306 | ldout(cct, 20) << "oid " << extent.oid << " " << extent.offset << "~" | |
307 | << extent.length << " from " << extent.buffer_extents | |
308 | << dendl; | |
309 | ||
11fdf7f2 TL |
310 | auto req_comp = new io::ReadResult::C_ObjectReadRequest( |
311 | aio_comp, extent.offset, extent.length, | |
312 | std::move(extent.buffer_extents)); | |
313 | auto req = ObjectDispatchSpec::create_read( | |
314 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, extent.oid.name, | |
315 | extent.objectno, extent.offset, extent.length, snap_id, m_op_flags, | |
316 | this->m_trace, &req_comp->bl, &req_comp->extent_map, req_comp); | |
b32b8144 | 317 | req->send(); |
7c673cae FG |
318 | } |
319 | } | |
320 | ||
321 | aio_comp->put(); | |
322 | ||
323 | image_ctx.perfcounter->inc(l_librbd_rd); | |
324 | image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs); | |
325 | } | |
326 | ||
327 | template <typename I> | |
328 | void ImageReadRequest<I>::send_image_cache_request() { | |
329 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 330 | ceph_assert(image_ctx.image_cache != nullptr); |
7c673cae FG |
331 | |
332 | AioCompletion *aio_comp = this->m_aio_comp; | |
333 | aio_comp->set_request_count(1); | |
334 | ||
335 | auto *req_comp = new io::ReadResult::C_ImageReadRequest( | |
336 | aio_comp, this->m_image_extents); | |
337 | image_ctx.image_cache->aio_read(std::move(this->m_image_extents), | |
338 | &req_comp->bl, m_op_flags, | |
339 | req_comp); | |
340 | } | |
341 | ||
342 | template <typename I> | |
343 | void AbstractImageWriteRequest<I>::send_request() { | |
344 | I &image_ctx = this->m_image_ctx; | |
345 | CephContext *cct = image_ctx.cct; | |
346 | ||
347 | RWLock::RLocker md_locker(image_ctx.md_lock); | |
348 | ||
349 | bool journaling = false; | |
350 | ||
351 | AioCompletion *aio_comp = this->m_aio_comp; | |
352 | uint64_t clip_len = 0; | |
353 | ObjectExtents object_extents; | |
354 | ::SnapContext snapc; | |
355 | { | |
356 | // prevent image size from changing between computing clip and recording | |
357 | // pending async operation | |
358 | RWLock::RLocker snap_locker(image_ctx.snap_lock); | |
359 | if (image_ctx.snap_id != CEPH_NOSNAP || image_ctx.read_only) { | |
360 | aio_comp->fail(-EROFS); | |
361 | return; | |
362 | } | |
363 | ||
364 | for (auto &extent : this->m_image_extents) { | |
365 | if (extent.second == 0) { | |
366 | continue; | |
367 | } | |
368 | ||
369 | // map to object extents | |
370 | Striper::file_to_extents(cct, image_ctx.format_string, &image_ctx.layout, | |
371 | extent.first, extent.second, 0, object_extents); | |
372 | clip_len += extent.second; | |
373 | } | |
374 | ||
375 | snapc = image_ctx.snapc; | |
376 | journaling = (image_ctx.journal != nullptr && | |
377 | image_ctx.journal->is_journal_appending()); | |
378 | } | |
379 | ||
11fdf7f2 | 380 | int ret = prune_object_extents(&object_extents); |
c07f9fc5 FG |
381 | if (ret < 0) { |
382 | aio_comp->fail(ret); | |
383 | return; | |
384 | } | |
7c673cae FG |
385 | |
386 | if (!object_extents.empty()) { | |
387 | uint64_t journal_tid = 0; | |
7c673cae FG |
388 | if (journaling) { |
389 | // in-flight ops are flushed prior to closing the journal | |
11fdf7f2 TL |
390 | ceph_assert(image_ctx.journal != NULL); |
391 | journal_tid = append_journal_event(m_synchronous); | |
7c673cae FG |
392 | } |
393 | ||
11fdf7f2 TL |
394 | aio_comp->set_request_count(object_extents.size()); |
395 | send_object_requests(object_extents, snapc, journal_tid); | |
7c673cae FG |
396 | } else { |
397 | // no IO to perform -- fire completion | |
398 | aio_comp->unblock(); | |
399 | } | |
400 | ||
401 | update_stats(clip_len); | |
402 | aio_comp->put(); | |
403 | } | |
404 | ||
405 | template <typename I> | |
406 | void AbstractImageWriteRequest<I>::send_object_requests( | |
407 | const ObjectExtents &object_extents, const ::SnapContext &snapc, | |
11fdf7f2 | 408 | uint64_t journal_tid) { |
7c673cae FG |
409 | I &image_ctx = this->m_image_ctx; |
410 | CephContext *cct = image_ctx.cct; | |
411 | ||
412 | AioCompletion *aio_comp = this->m_aio_comp; | |
413 | for (ObjectExtents::const_iterator p = object_extents.begin(); | |
414 | p != object_extents.end(); ++p) { | |
415 | ldout(cct, 20) << "oid " << p->oid << " " << p->offset << "~" << p->length | |
416 | << " from " << p->buffer_extents << dendl; | |
417 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
11fdf7f2 | 418 | auto request = create_object_request(*p, snapc, journal_tid, req_comp); |
7c673cae FG |
419 | |
420 | // if journaling, stash the request for later; otherwise send | |
421 | if (request != NULL) { | |
11fdf7f2 | 422 | request->send(); |
7c673cae FG |
423 | } |
424 | } | |
425 | } | |
426 | ||
427 | template <typename I> | |
428 | void ImageWriteRequest<I>::assemble_extent(const ObjectExtent &object_extent, | |
429 | bufferlist *bl) { | |
430 | for (auto q = object_extent.buffer_extents.begin(); | |
431 | q != object_extent.buffer_extents.end(); ++q) { | |
432 | bufferlist sub_bl; | |
433 | sub_bl.substr_of(m_bl, q->first, q->second); | |
434 | bl->claim_append(sub_bl); | |
435 | } | |
436 | } | |
437 | ||
438 | template <typename I> | |
11fdf7f2 | 439 | uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) { |
7c673cae FG |
440 | I &image_ctx = this->m_image_ctx; |
441 | ||
442 | uint64_t tid = 0; | |
443 | uint64_t buffer_offset = 0; | |
11fdf7f2 | 444 | ceph_assert(!this->m_image_extents.empty()); |
7c673cae FG |
445 | for (auto &extent : this->m_image_extents) { |
446 | bufferlist sub_bl; | |
447 | sub_bl.substr_of(m_bl, buffer_offset, extent.second); | |
448 | buffer_offset += extent.second; | |
449 | ||
450 | tid = image_ctx.journal->append_write_event(extent.first, extent.second, | |
11fdf7f2 | 451 | sub_bl, synchronous); |
7c673cae FG |
452 | } |
453 | ||
7c673cae FG |
454 | return tid; |
455 | } | |
456 | ||
457 | template <typename I> | |
458 | void ImageWriteRequest<I>::send_image_cache_request() { | |
459 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 460 | ceph_assert(image_ctx.image_cache != nullptr); |
7c673cae FG |
461 | |
462 | AioCompletion *aio_comp = this->m_aio_comp; | |
463 | aio_comp->set_request_count(1); | |
464 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
465 | image_ctx.image_cache->aio_write(std::move(this->m_image_extents), | |
466 | std::move(m_bl), m_op_flags, req_comp); | |
467 | } | |
468 | ||
469 | template <typename I> | |
11fdf7f2 | 470 | ObjectDispatchSpec *ImageWriteRequest<I>::create_object_request( |
7c673cae | 471 | const ObjectExtent &object_extent, const ::SnapContext &snapc, |
11fdf7f2 | 472 | uint64_t journal_tid, Context *on_finish) { |
7c673cae | 473 | I &image_ctx = this->m_image_ctx; |
7c673cae FG |
474 | |
475 | bufferlist bl; | |
476 | assemble_extent(object_extent, &bl); | |
11fdf7f2 TL |
477 | auto req = ObjectDispatchSpec::create_write( |
478 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.oid.name, | |
479 | object_extent.objectno, object_extent.offset, std::move(bl), snapc, | |
480 | m_op_flags, journal_tid, this->m_trace, on_finish); | |
7c673cae FG |
481 | return req; |
482 | } | |
483 | ||
484 | template <typename I> | |
485 | void ImageWriteRequest<I>::update_stats(size_t length) { | |
486 | I &image_ctx = this->m_image_ctx; | |
487 | image_ctx.perfcounter->inc(l_librbd_wr); | |
488 | image_ctx.perfcounter->inc(l_librbd_wr_bytes, length); | |
489 | } | |
490 | ||
491 | template <typename I> | |
11fdf7f2 | 492 | uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) { |
7c673cae FG |
493 | I &image_ctx = this->m_image_ctx; |
494 | ||
495 | uint64_t tid = 0; | |
11fdf7f2 | 496 | ceph_assert(!this->m_image_extents.empty()); |
7c673cae | 497 | for (auto &extent : this->m_image_extents) { |
11fdf7f2 TL |
498 | journal::EventEntry event_entry( |
499 | journal::AioDiscardEvent(extent.first, | |
500 | extent.second, | |
501 | this->m_discard_granularity_bytes)); | |
7c673cae | 502 | tid = image_ctx.journal->append_io_event(std::move(event_entry), |
11fdf7f2 TL |
503 | extent.first, extent.second, |
504 | synchronous, 0); | |
7c673cae FG |
505 | } |
506 | ||
7c673cae FG |
507 | return tid; |
508 | } | |
509 | ||
7c673cae FG |
510 | template <typename I> |
511 | void ImageDiscardRequest<I>::send_image_cache_request() { | |
512 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 513 | ceph_assert(image_ctx.image_cache != nullptr); |
7c673cae FG |
514 | |
515 | AioCompletion *aio_comp = this->m_aio_comp; | |
516 | aio_comp->set_request_count(this->m_image_extents.size()); | |
517 | for (auto &extent : this->m_image_extents) { | |
518 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
519 | image_ctx.image_cache->aio_discard(extent.first, extent.second, | |
11fdf7f2 TL |
520 | this->m_discard_granularity_bytes, |
521 | req_comp); | |
7c673cae FG |
522 | } |
523 | } | |
524 | ||
525 | template <typename I> | |
11fdf7f2 | 526 | ObjectDispatchSpec *ImageDiscardRequest<I>::create_object_request( |
7c673cae | 527 | const ObjectExtent &object_extent, const ::SnapContext &snapc, |
11fdf7f2 | 528 | uint64_t journal_tid, Context *on_finish) { |
7c673cae | 529 | I &image_ctx = this->m_image_ctx; |
11fdf7f2 TL |
530 | auto req = ObjectDispatchSpec::create_discard( |
531 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.oid.name, | |
532 | object_extent.objectno, object_extent.offset, object_extent.length, snapc, | |
533 | OBJECT_DISCARD_FLAG_DISABLE_CLONE_REMOVE, journal_tid, this->m_trace, | |
534 | on_finish); | |
7c673cae FG |
535 | return req; |
536 | } | |
537 | ||
538 | template <typename I> | |
539 | void ImageDiscardRequest<I>::update_stats(size_t length) { | |
540 | I &image_ctx = this->m_image_ctx; | |
541 | image_ctx.perfcounter->inc(l_librbd_discard); | |
542 | image_ctx.perfcounter->inc(l_librbd_discard_bytes, length); | |
543 | } | |
544 | ||
11fdf7f2 TL |
545 | template <typename I> |
546 | int ImageDiscardRequest<I>::prune_object_extents( | |
547 | ObjectExtents* object_extents) const { | |
548 | if (m_discard_granularity_bytes == 0) { | |
549 | return 0; | |
550 | } | |
551 | ||
552 | // Align the range to discard_granularity_bytes boundary and skip | |
553 | // and discards that are too small to free up any space. | |
554 | // | |
555 | // discard_granularity_bytes >= object_size && tail truncation | |
556 | // is a special case for filestore | |
557 | bool prune_required = false; | |
558 | auto object_size = this->m_image_ctx.layout.object_size; | |
559 | auto discard_granularity_bytes = std::min(m_discard_granularity_bytes, | |
560 | object_size); | |
561 | auto xform_lambda = | |
562 | [discard_granularity_bytes, object_size, &prune_required] | |
563 | (ObjectExtent& object_extent) { | |
564 | auto& offset = object_extent.offset; | |
565 | auto& length = object_extent.length; | |
566 | auto next_offset = offset + length; | |
567 | ||
568 | if ((discard_granularity_bytes < object_size) || | |
569 | (next_offset < object_size)) { | |
570 | offset = p2roundup<uint64_t>(offset, discard_granularity_bytes); | |
571 | next_offset = p2align<uint64_t>(next_offset, discard_granularity_bytes); | |
572 | if (offset >= next_offset) { | |
573 | prune_required = true; | |
574 | length = 0; | |
575 | } else { | |
576 | length = next_offset - offset; | |
577 | } | |
578 | } | |
579 | }; | |
580 | std::for_each(object_extents->begin(), object_extents->end(), | |
581 | xform_lambda); | |
582 | ||
583 | if (prune_required) { | |
584 | // one or more object extents were skipped | |
585 | auto remove_lambda = | |
586 | [](const ObjectExtent& object_extent) { | |
587 | return (object_extent.length == 0); | |
588 | }; | |
589 | object_extents->erase( | |
590 | std::remove_if(object_extents->begin(), object_extents->end(), | |
591 | remove_lambda), | |
592 | object_extents->end()); | |
593 | } | |
594 | return 0; | |
595 | } | |
596 | ||
7c673cae FG |
597 | template <typename I> |
598 | void ImageFlushRequest<I>::send_request() { | |
599 | I &image_ctx = this->m_image_ctx; | |
7c673cae FG |
600 | |
601 | bool journaling = false; | |
602 | { | |
603 | RWLock::RLocker snap_locker(image_ctx.snap_lock); | |
11fdf7f2 TL |
604 | journaling = (m_flush_source == FLUSH_SOURCE_USER && |
605 | image_ctx.journal != nullptr && | |
7c673cae FG |
606 | image_ctx.journal->is_journal_appending()); |
607 | } | |
608 | ||
609 | AioCompletion *aio_comp = this->m_aio_comp; | |
11fdf7f2 TL |
610 | aio_comp->set_request_count(1); |
611 | ||
612 | Context *ctx = new C_AioRequest(aio_comp); | |
613 | ||
614 | // ensure no locks are held when flush is complete | |
615 | ctx = librbd::util::create_async_context_callback(image_ctx, ctx); | |
616 | ||
7c673cae FG |
617 | if (journaling) { |
618 | // in-flight ops are flushed prior to closing the journal | |
619 | uint64_t journal_tid = image_ctx.journal->append_io_event( | |
11fdf7f2 TL |
620 | journal::EventEntry(journal::AioFlushEvent()), 0, 0, false, 0); |
621 | ||
622 | ctx = new FunctionContext( | |
623 | [&image_ctx, journal_tid, ctx](int r) { | |
624 | image_ctx.journal->commit_io_event(journal_tid, r); | |
625 | ctx->complete(r); | |
626 | }); | |
627 | ctx = new FunctionContext( | |
628 | [&image_ctx, journal_tid, ctx](int r) { | |
7c673cae | 629 | image_ctx.journal->flush_event(journal_tid, ctx); |
11fdf7f2 | 630 | }); |
7c673cae FG |
631 | } else { |
632 | // flush rbd cache only when journaling is not enabled | |
11fdf7f2 TL |
633 | auto object_dispatch_spec = ObjectDispatchSpec::create_flush( |
634 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, m_flush_source, this->m_trace, | |
635 | ctx); | |
636 | ctx = new FunctionContext([object_dispatch_spec](int r) { | |
637 | object_dispatch_spec->send(); | |
638 | }); | |
7c673cae FG |
639 | } |
640 | ||
11fdf7f2 TL |
641 | // ensure all in-flight IOs are settled if non-user flush request |
642 | image_ctx.flush_async_operations(ctx); | |
643 | aio_comp->start_op(true); | |
644 | aio_comp->put(); | |
645 | ||
646 | // might be flushing during image shutdown | |
647 | if (image_ctx.perfcounter != nullptr) { | |
648 | image_ctx.perfcounter->inc(l_librbd_flush); | |
649 | } | |
7c673cae FG |
650 | } |
651 | ||
652 | template <typename I> | |
653 | void ImageFlushRequest<I>::send_image_cache_request() { | |
654 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 655 | ceph_assert(image_ctx.image_cache != nullptr); |
7c673cae FG |
656 | |
657 | AioCompletion *aio_comp = this->m_aio_comp; | |
658 | aio_comp->set_request_count(1); | |
659 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
660 | image_ctx.image_cache->aio_flush(req_comp); | |
661 | } | |
662 | ||
663 | template <typename I> | |
11fdf7f2 | 664 | uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) { |
7c673cae FG |
665 | I &image_ctx = this->m_image_ctx; |
666 | ||
667 | uint64_t tid = 0; | |
11fdf7f2 | 668 | ceph_assert(!this->m_image_extents.empty()); |
7c673cae FG |
669 | for (auto &extent : this->m_image_extents) { |
670 | journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first, | |
671 | extent.second, | |
672 | m_data_bl)); | |
673 | tid = image_ctx.journal->append_io_event(std::move(event_entry), | |
11fdf7f2 TL |
674 | extent.first, extent.second, |
675 | synchronous, 0); | |
7c673cae FG |
676 | } |
677 | ||
7c673cae FG |
678 | return tid; |
679 | } | |
680 | ||
681 | template <typename I> | |
682 | void ImageWriteSameRequest<I>::send_image_cache_request() { | |
683 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 684 | ceph_assert(image_ctx.image_cache != nullptr); |
7c673cae FG |
685 | |
686 | AioCompletion *aio_comp = this->m_aio_comp; | |
687 | aio_comp->set_request_count(this->m_image_extents.size()); | |
688 | for (auto &extent : this->m_image_extents) { | |
689 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
690 | image_ctx.image_cache->aio_writesame(extent.first, extent.second, | |
691 | std::move(m_data_bl), m_op_flags, | |
692 | req_comp); | |
693 | } | |
694 | } | |
695 | ||
696 | template <typename I> | |
11fdf7f2 | 697 | ObjectDispatchSpec *ImageWriteSameRequest<I>::create_object_request( |
7c673cae | 698 | const ObjectExtent &object_extent, const ::SnapContext &snapc, |
11fdf7f2 | 699 | uint64_t journal_tid, Context *on_finish) { |
7c673cae | 700 | I &image_ctx = this->m_image_ctx; |
7c673cae FG |
701 | |
702 | bufferlist bl; | |
11fdf7f2 TL |
703 | ObjectDispatchSpec *req; |
704 | ||
705 | if (util::assemble_write_same_extent(object_extent, m_data_bl, &bl, false)) { | |
706 | Extents buffer_extents{object_extent.buffer_extents}; | |
7c673cae | 707 | |
11fdf7f2 TL |
708 | req = ObjectDispatchSpec::create_write_same( |
709 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.oid.name, | |
710 | object_extent.objectno, object_extent.offset, object_extent.length, | |
711 | std::move(buffer_extents), std::move(bl), snapc, m_op_flags, journal_tid, | |
712 | this->m_trace, on_finish); | |
7c673cae FG |
713 | return req; |
714 | } | |
11fdf7f2 TL |
715 | req = ObjectDispatchSpec::create_write( |
716 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.oid.name, | |
717 | object_extent.objectno, object_extent.offset, std::move(bl), snapc, | |
718 | m_op_flags, journal_tid, this->m_trace, on_finish); | |
7c673cae FG |
719 | return req; |
720 | } | |
721 | ||
722 | template <typename I> | |
723 | void ImageWriteSameRequest<I>::update_stats(size_t length) { | |
724 | I &image_ctx = this->m_image_ctx; | |
725 | image_ctx.perfcounter->inc(l_librbd_ws); | |
726 | image_ctx.perfcounter->inc(l_librbd_ws_bytes, length); | |
727 | } | |
728 | ||
c07f9fc5 FG |
729 | template <typename I> |
730 | uint64_t ImageCompareAndWriteRequest<I>::append_journal_event( | |
11fdf7f2 | 731 | bool synchronous) { |
c07f9fc5 FG |
732 | I &image_ctx = this->m_image_ctx; |
733 | ||
734 | uint64_t tid = 0; | |
11fdf7f2 | 735 | ceph_assert(this->m_image_extents.size() == 1); |
c07f9fc5 | 736 | auto &extent = this->m_image_extents.front(); |
11fdf7f2 TL |
737 | journal::EventEntry event_entry( |
738 | journal::AioCompareAndWriteEvent(extent.first, extent.second, m_cmp_bl, | |
739 | m_bl)); | |
c07f9fc5 | 740 | tid = image_ctx.journal->append_io_event(std::move(event_entry), |
11fdf7f2 TL |
741 | extent.first, extent.second, |
742 | synchronous, -EILSEQ); | |
c07f9fc5 FG |
743 | |
744 | return tid; | |
745 | } | |
746 | ||
c07f9fc5 FG |
747 | template <typename I> |
748 | void ImageCompareAndWriteRequest<I>::assemble_extent( | |
749 | const ObjectExtent &object_extent, bufferlist *bl) { | |
750 | for (auto q = object_extent.buffer_extents.begin(); | |
751 | q != object_extent.buffer_extents.end(); ++q) { | |
752 | bufferlist sub_bl; | |
753 | sub_bl.substr_of(m_bl, q->first, q->second); | |
754 | bl->claim_append(sub_bl); | |
755 | } | |
756 | } | |
757 | ||
758 | template <typename I> | |
759 | void ImageCompareAndWriteRequest<I>::send_image_cache_request() { | |
760 | I &image_ctx = this->m_image_ctx; | |
11fdf7f2 | 761 | ceph_assert(image_ctx.image_cache != nullptr); |
c07f9fc5 FG |
762 | |
763 | AioCompletion *aio_comp = this->m_aio_comp; | |
764 | aio_comp->set_request_count(1); | |
765 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
766 | image_ctx.image_cache->aio_compare_and_write( | |
767 | std::move(this->m_image_extents), std::move(m_cmp_bl), std::move(m_bl), | |
768 | m_mismatch_offset, m_op_flags, req_comp); | |
769 | } | |
770 | ||
771 | template <typename I> | |
11fdf7f2 | 772 | ObjectDispatchSpec *ImageCompareAndWriteRequest<I>::create_object_request( |
c07f9fc5 FG |
773 | const ObjectExtent &object_extent, |
774 | const ::SnapContext &snapc, | |
11fdf7f2 | 775 | uint64_t journal_tid, Context *on_finish) { |
c07f9fc5 FG |
776 | I &image_ctx = this->m_image_ctx; |
777 | ||
11fdf7f2 TL |
778 | // NOTE: safe to move m_cmp_bl since we only support this op against |
779 | // a single object | |
c07f9fc5 FG |
780 | bufferlist bl; |
781 | assemble_extent(object_extent, &bl); | |
11fdf7f2 TL |
782 | auto req = ObjectDispatchSpec::create_compare_and_write( |
783 | &image_ctx, OBJECT_DISPATCH_LAYER_NONE, object_extent.oid.name, | |
784 | object_extent.objectno, object_extent.offset, std::move(m_cmp_bl), | |
785 | std::move(bl), snapc, m_mismatch_offset, m_op_flags, journal_tid, | |
786 | this->m_trace, on_finish); | |
c07f9fc5 FG |
787 | return req; |
788 | } | |
789 | ||
790 | template <typename I> | |
791 | void ImageCompareAndWriteRequest<I>::update_stats(size_t length) { | |
792 | I &image_ctx = this->m_image_ctx; | |
793 | image_ctx.perfcounter->inc(l_librbd_cmp); | |
794 | image_ctx.perfcounter->inc(l_librbd_cmp_bytes, length); | |
795 | } | |
796 | ||
797 | template <typename I> | |
11fdf7f2 TL |
798 | int ImageCompareAndWriteRequest<I>::prune_object_extents( |
799 | ObjectExtents* object_extents) const { | |
800 | if (object_extents->size() > 1) | |
c07f9fc5 FG |
801 | return -EINVAL; |
802 | ||
803 | I &image_ctx = this->m_image_ctx; | |
804 | uint64_t sector_size = 512ULL; | |
805 | uint64_t su = image_ctx.layout.stripe_unit; | |
11fdf7f2 | 806 | ObjectExtent object_extent = object_extents->front(); |
c07f9fc5 FG |
807 | if (object_extent.offset % sector_size + object_extent.length > sector_size || |
808 | (su != 0 && (object_extent.offset % su + object_extent.length > su))) | |
809 | return -EINVAL; | |
810 | ||
811 | return 0; | |
812 | } | |
813 | ||
7c673cae FG |
814 | } // namespace io |
815 | } // namespace librbd | |
816 | ||
817 | template class librbd::io::ImageRequest<librbd::ImageCtx>; | |
818 | template class librbd::io::ImageReadRequest<librbd::ImageCtx>; | |
819 | template class librbd::io::AbstractImageWriteRequest<librbd::ImageCtx>; | |
820 | template class librbd::io::ImageWriteRequest<librbd::ImageCtx>; | |
821 | template class librbd::io::ImageDiscardRequest<librbd::ImageCtx>; | |
822 | template class librbd::io::ImageFlushRequest<librbd::ImageCtx>; | |
823 | template class librbd::io::ImageWriteSameRequest<librbd::ImageCtx>; | |
c07f9fc5 | 824 | template class librbd::io::ImageCompareAndWriteRequest<librbd::ImageCtx>; |