]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/io/ImageRequest.h" | |
5 | #include "librbd/ImageCtx.h" | |
6 | #include "librbd/internal.h" | |
7 | #include "librbd/Journal.h" | |
8 | #include "librbd/Utils.h" | |
9 | #include "librbd/cache/ImageCache.h" | |
10 | #include "librbd/io/AioCompletion.h" | |
11 | #include "librbd/io/ObjectRequest.h" | |
12 | #include "librbd/journal/Types.h" | |
13 | #include "include/rados/librados.hpp" | |
14 | #include "common/WorkQueue.h" | |
15 | #include "osdc/Striper.h" | |
16 | ||
17 | #define dout_subsys ceph_subsys_rbd | |
18 | #undef dout_prefix | |
19 | #define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \ | |
20 | << " " << __func__ << ": " | |
21 | ||
22 | namespace librbd { | |
23 | namespace io { | |
24 | ||
25 | using util::get_image_ctx; | |
26 | ||
27 | namespace { | |
28 | ||
29 | template <typename ImageCtxT = ImageCtx> | |
30 | struct C_DiscardJournalCommit : public Context { | |
31 | typedef std::vector<ObjectExtent> ObjectExtents; | |
32 | ||
33 | ImageCtxT &image_ctx; | |
34 | AioCompletion *aio_comp; | |
35 | ObjectExtents object_extents; | |
36 | ||
37 | C_DiscardJournalCommit(ImageCtxT &_image_ctx, AioCompletion *_aio_comp, | |
38 | const ObjectExtents &_object_extents, uint64_t tid) | |
39 | : image_ctx(_image_ctx), aio_comp(_aio_comp), | |
40 | object_extents(_object_extents) { | |
41 | CephContext *cct = image_ctx.cct; | |
42 | ldout(cct, 20) << "delaying cache discard until journal tid " << tid << " " | |
43 | << "safe" << dendl; | |
44 | ||
45 | aio_comp->add_request(); | |
46 | } | |
47 | ||
48 | void finish(int r) override { | |
49 | CephContext *cct = image_ctx.cct; | |
50 | ldout(cct, 20) << "C_DiscardJournalCommit: " | |
51 | << "journal committed: discarding from cache" << dendl; | |
52 | ||
53 | Mutex::Locker cache_locker(image_ctx.cache_lock); | |
54 | image_ctx.object_cacher->discard_set(image_ctx.object_set, object_extents); | |
55 | aio_comp->complete_request(r); | |
56 | } | |
57 | }; | |
58 | ||
59 | template <typename ImageCtxT = ImageCtx> | |
60 | struct C_FlushJournalCommit : public Context { | |
61 | ImageCtxT &image_ctx; | |
62 | AioCompletion *aio_comp; | |
63 | ||
64 | C_FlushJournalCommit(ImageCtxT &_image_ctx, AioCompletion *_aio_comp, | |
65 | uint64_t tid) | |
66 | : image_ctx(_image_ctx), aio_comp(_aio_comp) { | |
67 | CephContext *cct = image_ctx.cct; | |
68 | ldout(cct, 20) << "delaying flush until journal tid " << tid << " " | |
69 | << "safe" << dendl; | |
70 | ||
71 | aio_comp->add_request(); | |
72 | } | |
73 | ||
74 | void finish(int r) override { | |
75 | CephContext *cct = image_ctx.cct; | |
76 | ldout(cct, 20) << "C_FlushJournalCommit: journal committed" << dendl; | |
77 | aio_comp->complete_request(r); | |
78 | } | |
79 | }; | |
80 | ||
81 | template <typename ImageCtxT> | |
82 | class C_ObjectCacheRead : public Context { | |
83 | public: | |
84 | explicit C_ObjectCacheRead(ImageCtxT &ictx, ObjectReadRequest<ImageCtxT> *req) | |
85 | : m_image_ctx(ictx), m_req(req), m_enqueued(false) {} | |
86 | ||
87 | void complete(int r) override { | |
88 | if (!m_enqueued) { | |
89 | // cache_lock creates a lock ordering issue -- so re-execute this context | |
90 | // outside the cache_lock | |
91 | m_enqueued = true; | |
92 | m_image_ctx.op_work_queue->queue(this, r); | |
93 | return; | |
94 | } | |
95 | Context::complete(r); | |
96 | } | |
97 | ||
98 | protected: | |
99 | void finish(int r) override { | |
100 | m_req->complete(r); | |
101 | } | |
102 | ||
103 | private: | |
104 | ImageCtxT &m_image_ctx; | |
105 | ObjectReadRequest<ImageCtxT> *m_req; | |
106 | bool m_enqueued; | |
107 | }; | |
108 | ||
109 | } // anonymous namespace | |
110 | ||
224ce89b WB |
111 | template <typename I> |
112 | ImageRequest<I>* ImageRequest<I>::create_read_request( | |
113 | I &image_ctx, AioCompletion *aio_comp, Extents &&image_extents, | |
114 | ReadResult &&read_result, int op_flags, | |
115 | const ZTracer::Trace &parent_trace) { | |
116 | return new ImageReadRequest<I>(image_ctx, aio_comp, | |
117 | std::move(image_extents), | |
118 | std::move(read_result), op_flags, | |
119 | parent_trace); | |
120 | } | |
121 | ||
122 | template <typename I> | |
123 | ImageRequest<I>* ImageRequest<I>::create_write_request( | |
124 | I &image_ctx, AioCompletion *aio_comp, Extents &&image_extents, | |
125 | bufferlist &&bl, int op_flags, const ZTracer::Trace &parent_trace) { | |
126 | return new ImageWriteRequest<I>(image_ctx, aio_comp, std::move(image_extents), | |
127 | std::move(bl), op_flags, parent_trace); | |
128 | } | |
129 | ||
130 | template <typename I> | |
131 | ImageRequest<I>* ImageRequest<I>::create_discard_request( | |
132 | I &image_ctx, AioCompletion *aio_comp, uint64_t off, uint64_t len, | |
133 | bool skip_partial_discard, const ZTracer::Trace &parent_trace) { | |
134 | return new ImageDiscardRequest<I>(image_ctx, aio_comp, off, len, | |
135 | skip_partial_discard, parent_trace); | |
136 | } | |
137 | ||
138 | template <typename I> | |
139 | ImageRequest<I>* ImageRequest<I>::create_flush_request( | |
140 | I &image_ctx, AioCompletion *aio_comp, | |
141 | const ZTracer::Trace &parent_trace) { | |
142 | return new ImageFlushRequest<I>(image_ctx, aio_comp, parent_trace); | |
143 | } | |
144 | ||
145 | template <typename I> | |
146 | ImageRequest<I>* ImageRequest<I>::create_writesame_request( | |
147 | I &image_ctx, AioCompletion *aio_comp, uint64_t off, uint64_t len, | |
148 | bufferlist &&bl, int op_flags, const ZTracer::Trace &parent_trace) { | |
149 | return new ImageWriteSameRequest<I>(image_ctx, aio_comp, off, len, | |
150 | std::move(bl), op_flags, parent_trace); | |
151 | } | |
152 | ||
7c673cae FG |
153 | template <typename I> |
154 | void ImageRequest<I>::aio_read(I *ictx, AioCompletion *c, | |
155 | Extents &&image_extents, | |
31f18b77 FG |
156 | ReadResult &&read_result, int op_flags, |
157 | const ZTracer::Trace &parent_trace) { | |
7c673cae | 158 | ImageReadRequest<I> req(*ictx, c, std::move(image_extents), |
31f18b77 | 159 | std::move(read_result), op_flags, parent_trace); |
7c673cae FG |
160 | req.send(); |
161 | } | |
162 | ||
163 | template <typename I> | |
164 | void ImageRequest<I>::aio_write(I *ictx, AioCompletion *c, | |
165 | Extents &&image_extents, bufferlist &&bl, | |
31f18b77 FG |
166 | int op_flags, |
167 | const ZTracer::Trace &parent_trace) { | |
7c673cae | 168 | ImageWriteRequest<I> req(*ictx, c, std::move(image_extents), std::move(bl), |
31f18b77 | 169 | op_flags, parent_trace); |
7c673cae FG |
170 | req.send(); |
171 | } | |
172 | ||
173 | template <typename I> | |
174 | void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c, | |
175 | uint64_t off, uint64_t len, | |
31f18b77 FG |
176 | bool skip_partial_discard, |
177 | const ZTracer::Trace &parent_trace) { | |
178 | ImageDiscardRequest<I> req(*ictx, c, off, len, skip_partial_discard, | |
179 | parent_trace); | |
7c673cae FG |
180 | req.send(); |
181 | } | |
182 | ||
183 | template <typename I> | |
31f18b77 FG |
184 | void ImageRequest<I>::aio_flush(I *ictx, AioCompletion *c, |
185 | const ZTracer::Trace &parent_trace) { | |
186 | ImageFlushRequest<I> req(*ictx, c, parent_trace); | |
7c673cae FG |
187 | req.send(); |
188 | } | |
189 | ||
190 | template <typename I> | |
191 | void ImageRequest<I>::aio_writesame(I *ictx, AioCompletion *c, | |
192 | uint64_t off, uint64_t len, | |
31f18b77 FG |
193 | bufferlist &&bl, int op_flags, |
194 | const ZTracer::Trace &parent_trace) { | |
195 | ImageWriteSameRequest<I> req(*ictx, c, off, len, std::move(bl), op_flags, | |
196 | parent_trace); | |
7c673cae FG |
197 | req.send(); |
198 | } | |
199 | ||
200 | template <typename I> | |
201 | void ImageRequest<I>::send() { | |
202 | I &image_ctx = this->m_image_ctx; | |
203 | assert(m_aio_comp->is_initialized(get_aio_type())); | |
204 | assert(m_aio_comp->is_started() ^ (get_aio_type() == AIO_TYPE_FLUSH)); | |
205 | ||
206 | CephContext *cct = image_ctx.cct; | |
207 | AioCompletion *aio_comp = this->m_aio_comp; | |
208 | ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", " | |
209 | << "completion=" << aio_comp << dendl; | |
210 | ||
211 | aio_comp->get(); | |
212 | int r = clip_request(); | |
213 | if (r < 0) { | |
214 | m_aio_comp->fail(r); | |
215 | return; | |
216 | } | |
217 | ||
218 | if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) { | |
219 | send_request(); | |
220 | } else { | |
221 | send_image_cache_request(); | |
222 | } | |
223 | } | |
224 | ||
225 | template <typename I> | |
226 | int ImageRequest<I>::clip_request() { | |
227 | RWLock::RLocker snap_locker(m_image_ctx.snap_lock); | |
228 | for (auto &image_extent : m_image_extents) { | |
229 | auto clip_len = image_extent.second; | |
230 | int r = clip_io(get_image_ctx(&m_image_ctx), image_extent.first, &clip_len); | |
231 | if (r < 0) { | |
232 | return r; | |
233 | } | |
234 | ||
235 | image_extent.second = clip_len; | |
236 | } | |
237 | return 0; | |
238 | } | |
239 | ||
240 | template <typename I> | |
241 | void ImageRequest<I>::start_op() { | |
242 | m_aio_comp->start_op(); | |
243 | } | |
244 | ||
245 | template <typename I> | |
246 | void ImageRequest<I>::fail(int r) { | |
247 | AioCompletion *aio_comp = this->m_aio_comp; | |
248 | aio_comp->get(); | |
249 | aio_comp->fail(r); | |
250 | } | |
251 | ||
252 | template <typename I> | |
253 | ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp, | |
254 | Extents &&image_extents, | |
31f18b77 FG |
255 | ReadResult &&read_result, int op_flags, |
256 | const ZTracer::Trace &parent_trace) | |
257 | : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents), "read", | |
258 | parent_trace), | |
7c673cae FG |
259 | m_op_flags(op_flags) { |
260 | aio_comp->read_result = std::move(read_result); | |
261 | } | |
262 | ||
263 | template <typename I> | |
264 | int ImageReadRequest<I>::clip_request() { | |
265 | int r = ImageRequest<I>::clip_request(); | |
266 | if (r < 0) { | |
267 | return r; | |
268 | } | |
269 | ||
270 | uint64_t buffer_length = 0; | |
271 | auto &image_extents = this->m_image_extents; | |
272 | for (auto &image_extent : image_extents) { | |
273 | buffer_length += image_extent.second; | |
274 | } | |
275 | this->m_aio_comp->read_result.set_clip_length(buffer_length); | |
276 | return 0; | |
277 | } | |
278 | ||
279 | template <typename I> | |
280 | void ImageReadRequest<I>::send_request() { | |
281 | I &image_ctx = this->m_image_ctx; | |
282 | CephContext *cct = image_ctx.cct; | |
283 | ||
284 | auto &image_extents = this->m_image_extents; | |
285 | if (image_ctx.object_cacher && image_ctx.readahead_max_bytes > 0 && | |
286 | !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) { | |
287 | readahead(get_image_ctx(&image_ctx), image_extents); | |
288 | } | |
289 | ||
290 | AioCompletion *aio_comp = this->m_aio_comp; | |
291 | librados::snap_t snap_id; | |
292 | map<object_t,vector<ObjectExtent> > object_extents; | |
293 | uint64_t buffer_ofs = 0; | |
294 | { | |
295 | // prevent image size from changing between computing clip and recording | |
296 | // pending async operation | |
297 | RWLock::RLocker snap_locker(image_ctx.snap_lock); | |
298 | snap_id = image_ctx.snap_id; | |
299 | ||
300 | // map image extents to object extents | |
301 | for (auto &extent : image_extents) { | |
302 | if (extent.second == 0) { | |
303 | continue; | |
304 | } | |
305 | ||
306 | Striper::file_to_extents(cct, image_ctx.format_string, &image_ctx.layout, | |
307 | extent.first, extent.second, 0, object_extents, | |
308 | buffer_ofs); | |
309 | buffer_ofs += extent.second; | |
310 | } | |
311 | } | |
312 | ||
313 | // pre-calculate the expected number of read requests | |
314 | uint32_t request_count = 0; | |
315 | for (auto &object_extent : object_extents) { | |
316 | request_count += object_extent.second.size(); | |
317 | } | |
318 | aio_comp->set_request_count(request_count); | |
319 | ||
320 | // issue the requests | |
321 | for (auto &object_extent : object_extents) { | |
322 | for (auto &extent : object_extent.second) { | |
323 | ldout(cct, 20) << "oid " << extent.oid << " " << extent.offset << "~" | |
324 | << extent.length << " from " << extent.buffer_extents | |
325 | << dendl; | |
326 | ||
327 | auto req_comp = new io::ReadResult::C_SparseReadRequest<I>( | |
328 | aio_comp); | |
329 | ObjectReadRequest<I> *req = ObjectReadRequest<I>::create( | |
330 | &image_ctx, extent.oid.name, extent.objectno, extent.offset, | |
31f18b77 FG |
331 | extent.length, extent.buffer_extents, snap_id, true, m_op_flags, |
332 | this->m_trace, req_comp); | |
7c673cae FG |
333 | req_comp->request = req; |
334 | ||
335 | if (image_ctx.object_cacher) { | |
336 | C_ObjectCacheRead<I> *cache_comp = new C_ObjectCacheRead<I>(image_ctx, | |
337 | req); | |
31f18b77 FG |
338 | image_ctx.aio_read_from_cache( |
339 | extent.oid, extent.objectno, &req->data(), extent.length, | |
340 | extent.offset, cache_comp, m_op_flags, | |
341 | (this->m_trace.valid() ? &this->m_trace : nullptr)); | |
7c673cae FG |
342 | } else { |
343 | req->send(); | |
344 | } | |
345 | } | |
346 | } | |
347 | ||
348 | aio_comp->put(); | |
349 | ||
350 | image_ctx.perfcounter->inc(l_librbd_rd); | |
351 | image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs); | |
352 | } | |
353 | ||
354 | template <typename I> | |
355 | void ImageReadRequest<I>::send_image_cache_request() { | |
356 | I &image_ctx = this->m_image_ctx; | |
357 | assert(image_ctx.image_cache != nullptr); | |
358 | ||
359 | AioCompletion *aio_comp = this->m_aio_comp; | |
360 | aio_comp->set_request_count(1); | |
361 | ||
362 | auto *req_comp = new io::ReadResult::C_ImageReadRequest( | |
363 | aio_comp, this->m_image_extents); | |
364 | image_ctx.image_cache->aio_read(std::move(this->m_image_extents), | |
365 | &req_comp->bl, m_op_flags, | |
366 | req_comp); | |
367 | } | |
368 | ||
369 | template <typename I> | |
370 | void AbstractImageWriteRequest<I>::send_request() { | |
371 | I &image_ctx = this->m_image_ctx; | |
372 | CephContext *cct = image_ctx.cct; | |
373 | ||
374 | RWLock::RLocker md_locker(image_ctx.md_lock); | |
375 | ||
376 | bool journaling = false; | |
377 | ||
378 | AioCompletion *aio_comp = this->m_aio_comp; | |
379 | uint64_t clip_len = 0; | |
380 | ObjectExtents object_extents; | |
381 | ::SnapContext snapc; | |
382 | { | |
383 | // prevent image size from changing between computing clip and recording | |
384 | // pending async operation | |
385 | RWLock::RLocker snap_locker(image_ctx.snap_lock); | |
386 | if (image_ctx.snap_id != CEPH_NOSNAP || image_ctx.read_only) { | |
387 | aio_comp->fail(-EROFS); | |
388 | return; | |
389 | } | |
390 | ||
391 | for (auto &extent : this->m_image_extents) { | |
392 | if (extent.second == 0) { | |
393 | continue; | |
394 | } | |
395 | ||
396 | // map to object extents | |
397 | Striper::file_to_extents(cct, image_ctx.format_string, &image_ctx.layout, | |
398 | extent.first, extent.second, 0, object_extents); | |
399 | clip_len += extent.second; | |
400 | } | |
401 | ||
402 | snapc = image_ctx.snapc; | |
403 | journaling = (image_ctx.journal != nullptr && | |
404 | image_ctx.journal->is_journal_appending()); | |
405 | } | |
406 | ||
407 | prune_object_extents(object_extents); | |
408 | ||
409 | if (!object_extents.empty()) { | |
410 | uint64_t journal_tid = 0; | |
411 | aio_comp->set_request_count( | |
412 | object_extents.size() + get_object_cache_request_count(journaling)); | |
413 | ||
414 | ObjectRequests requests; | |
415 | send_object_requests(object_extents, snapc, | |
416 | (journaling ? &requests : nullptr)); | |
417 | ||
418 | if (journaling) { | |
419 | // in-flight ops are flushed prior to closing the journal | |
420 | assert(image_ctx.journal != NULL); | |
421 | journal_tid = append_journal_event(requests, m_synchronous); | |
422 | } | |
423 | ||
424 | if (image_ctx.object_cacher != NULL) { | |
425 | send_object_cache_requests(object_extents, journal_tid); | |
426 | } | |
427 | } else { | |
428 | // no IO to perform -- fire completion | |
429 | aio_comp->unblock(); | |
430 | } | |
431 | ||
432 | update_stats(clip_len); | |
433 | aio_comp->put(); | |
434 | } | |
435 | ||
436 | template <typename I> | |
437 | void AbstractImageWriteRequest<I>::send_object_requests( | |
438 | const ObjectExtents &object_extents, const ::SnapContext &snapc, | |
439 | ObjectRequests *object_requests) { | |
440 | I &image_ctx = this->m_image_ctx; | |
441 | CephContext *cct = image_ctx.cct; | |
442 | ||
443 | AioCompletion *aio_comp = this->m_aio_comp; | |
444 | for (ObjectExtents::const_iterator p = object_extents.begin(); | |
445 | p != object_extents.end(); ++p) { | |
446 | ldout(cct, 20) << "oid " << p->oid << " " << p->offset << "~" << p->length | |
447 | << " from " << p->buffer_extents << dendl; | |
448 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
449 | ObjectRequestHandle *request = create_object_request(*p, snapc, | |
450 | req_comp); | |
451 | ||
452 | // if journaling, stash the request for later; otherwise send | |
453 | if (request != NULL) { | |
454 | if (object_requests != NULL) { | |
455 | object_requests->push_back(request); | |
456 | } else { | |
457 | request->send(); | |
458 | } | |
459 | } | |
460 | } | |
461 | } | |
462 | ||
463 | template <typename I> | |
464 | void ImageWriteRequest<I>::assemble_extent(const ObjectExtent &object_extent, | |
465 | bufferlist *bl) { | |
466 | for (auto q = object_extent.buffer_extents.begin(); | |
467 | q != object_extent.buffer_extents.end(); ++q) { | |
468 | bufferlist sub_bl; | |
469 | sub_bl.substr_of(m_bl, q->first, q->second); | |
470 | bl->claim_append(sub_bl); | |
471 | } | |
472 | } | |
473 | ||
474 | template <typename I> | |
475 | uint64_t ImageWriteRequest<I>::append_journal_event( | |
476 | const ObjectRequests &requests, bool synchronous) { | |
477 | I &image_ctx = this->m_image_ctx; | |
478 | ||
479 | uint64_t tid = 0; | |
480 | uint64_t buffer_offset = 0; | |
481 | assert(!this->m_image_extents.empty()); | |
482 | for (auto &extent : this->m_image_extents) { | |
483 | bufferlist sub_bl; | |
484 | sub_bl.substr_of(m_bl, buffer_offset, extent.second); | |
485 | buffer_offset += extent.second; | |
486 | ||
487 | tid = image_ctx.journal->append_write_event(extent.first, extent.second, | |
488 | sub_bl, requests, synchronous); | |
489 | } | |
490 | ||
491 | if (image_ctx.object_cacher == NULL) { | |
492 | AioCompletion *aio_comp = this->m_aio_comp; | |
493 | aio_comp->associate_journal_event(tid); | |
494 | } | |
495 | return tid; | |
496 | } | |
497 | ||
498 | template <typename I> | |
499 | void ImageWriteRequest<I>::send_image_cache_request() { | |
500 | I &image_ctx = this->m_image_ctx; | |
501 | assert(image_ctx.image_cache != nullptr); | |
502 | ||
503 | AioCompletion *aio_comp = this->m_aio_comp; | |
504 | aio_comp->set_request_count(1); | |
505 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
506 | image_ctx.image_cache->aio_write(std::move(this->m_image_extents), | |
507 | std::move(m_bl), m_op_flags, req_comp); | |
508 | } | |
509 | ||
510 | template <typename I> | |
511 | void ImageWriteRequest<I>::send_object_cache_requests( | |
512 | const ObjectExtents &object_extents, uint64_t journal_tid) { | |
513 | I &image_ctx = this->m_image_ctx; | |
514 | for (auto p = object_extents.begin(); p != object_extents.end(); ++p) { | |
515 | const ObjectExtent &object_extent = *p; | |
516 | ||
517 | bufferlist bl; | |
518 | assemble_extent(object_extent, &bl); | |
519 | ||
520 | AioCompletion *aio_comp = this->m_aio_comp; | |
521 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
31f18b77 FG |
522 | image_ctx.write_to_cache( |
523 | object_extent.oid, bl, object_extent.length, object_extent.offset, | |
524 | req_comp, m_op_flags, journal_tid, | |
525 | (this->m_trace.valid() ? &this->m_trace : nullptr)); | |
7c673cae FG |
526 | } |
527 | } | |
528 | ||
529 | template <typename I> | |
530 | void ImageWriteRequest<I>::send_object_requests( | |
531 | const ObjectExtents &object_extents, const ::SnapContext &snapc, | |
532 | ObjectRequests *object_requests) { | |
533 | I &image_ctx = this->m_image_ctx; | |
534 | ||
535 | // cache handles creating object requests during writeback | |
536 | if (image_ctx.object_cacher == NULL) { | |
537 | AbstractImageWriteRequest<I>::send_object_requests(object_extents, snapc, | |
538 | object_requests); | |
539 | } | |
540 | } | |
541 | ||
542 | template <typename I> | |
543 | ObjectRequestHandle *ImageWriteRequest<I>::create_object_request( | |
544 | const ObjectExtent &object_extent, const ::SnapContext &snapc, | |
545 | Context *on_finish) { | |
546 | I &image_ctx = this->m_image_ctx; | |
547 | assert(image_ctx.object_cacher == NULL); | |
548 | ||
549 | bufferlist bl; | |
550 | assemble_extent(object_extent, &bl); | |
551 | ObjectRequest<I> *req = ObjectRequest<I>::create_write( | |
552 | &image_ctx, object_extent.oid.name, object_extent.objectno, | |
31f18b77 | 553 | object_extent.offset, bl, snapc, m_op_flags, this->m_trace, on_finish); |
7c673cae FG |
554 | return req; |
555 | } | |
556 | ||
557 | template <typename I> | |
558 | void ImageWriteRequest<I>::update_stats(size_t length) { | |
559 | I &image_ctx = this->m_image_ctx; | |
560 | image_ctx.perfcounter->inc(l_librbd_wr); | |
561 | image_ctx.perfcounter->inc(l_librbd_wr_bytes, length); | |
562 | } | |
563 | ||
564 | template <typename I> | |
565 | uint64_t ImageDiscardRequest<I>::append_journal_event( | |
566 | const ObjectRequests &requests, bool synchronous) { | |
567 | I &image_ctx = this->m_image_ctx; | |
568 | ||
569 | uint64_t tid = 0; | |
570 | assert(!this->m_image_extents.empty()); | |
571 | for (auto &extent : this->m_image_extents) { | |
572 | journal::EventEntry event_entry(journal::AioDiscardEvent(extent.first, | |
573 | extent.second, | |
574 | this->m_skip_partial_discard)); | |
575 | tid = image_ctx.journal->append_io_event(std::move(event_entry), | |
576 | requests, extent.first, | |
577 | extent.second, synchronous); | |
578 | } | |
579 | ||
580 | AioCompletion *aio_comp = this->m_aio_comp; | |
581 | aio_comp->associate_journal_event(tid); | |
582 | return tid; | |
583 | } | |
584 | ||
585 | template <typename I> | |
586 | void ImageDiscardRequest<I>::prune_object_extents(ObjectExtents &object_extents) { | |
587 | I &image_ctx = this->m_image_ctx; | |
588 | CephContext *cct = image_ctx.cct; | |
589 | if (!this->m_skip_partial_discard) { | |
590 | return; | |
591 | } | |
592 | ||
593 | for (auto p = object_extents.begin(); p != object_extents.end(); ) { | |
594 | if (p->offset + p->length < image_ctx.layout.object_size) { | |
595 | ldout(cct, 20) << "oid " << p->oid << " " << p->offset << "~" | |
596 | << p->length << " from " << p->buffer_extents | |
597 | << ": skip partial discard" << dendl; | |
598 | p = object_extents.erase(p); | |
599 | } else { | |
600 | ++p; | |
601 | } | |
602 | } | |
603 | } | |
604 | ||
605 | template <typename I> | |
606 | uint32_t ImageDiscardRequest<I>::get_object_cache_request_count(bool journaling) const { | |
607 | // extra completion request is required for tracking journal commit | |
608 | I &image_ctx = this->m_image_ctx; | |
609 | return (image_ctx.object_cacher != nullptr && journaling ? 1 : 0); | |
610 | } | |
611 | ||
612 | template <typename I> | |
613 | void ImageDiscardRequest<I>::send_image_cache_request() { | |
614 | I &image_ctx = this->m_image_ctx; | |
615 | assert(image_ctx.image_cache != nullptr); | |
616 | ||
617 | AioCompletion *aio_comp = this->m_aio_comp; | |
618 | aio_comp->set_request_count(this->m_image_extents.size()); | |
619 | for (auto &extent : this->m_image_extents) { | |
620 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
621 | image_ctx.image_cache->aio_discard(extent.first, extent.second, | |
622 | this->m_skip_partial_discard, req_comp); | |
623 | } | |
624 | } | |
625 | ||
626 | template <typename I> | |
627 | void ImageDiscardRequest<I>::send_object_cache_requests( | |
628 | const ObjectExtents &object_extents, uint64_t journal_tid) { | |
629 | I &image_ctx = this->m_image_ctx; | |
630 | if (journal_tid == 0) { | |
631 | Mutex::Locker cache_locker(image_ctx.cache_lock); | |
632 | image_ctx.object_cacher->discard_set(image_ctx.object_set, | |
633 | object_extents); | |
634 | } else { | |
635 | // cannot discard from cache until journal has committed | |
636 | assert(image_ctx.journal != NULL); | |
637 | AioCompletion *aio_comp = this->m_aio_comp; | |
638 | image_ctx.journal->wait_event( | |
639 | journal_tid, new C_DiscardJournalCommit<I>(image_ctx, aio_comp, | |
640 | object_extents, journal_tid)); | |
641 | } | |
642 | } | |
643 | ||
644 | template <typename I> | |
645 | ObjectRequestHandle *ImageDiscardRequest<I>::create_object_request( | |
646 | const ObjectExtent &object_extent, const ::SnapContext &snapc, | |
647 | Context *on_finish) { | |
648 | I &image_ctx = this->m_image_ctx; | |
649 | ||
650 | ObjectRequest<I> *req; | |
651 | if (object_extent.length == image_ctx.layout.object_size) { | |
652 | req = ObjectRequest<I>::create_remove( | |
653 | &image_ctx, object_extent.oid.name, object_extent.objectno, snapc, | |
31f18b77 | 654 | this->m_trace, on_finish); |
7c673cae FG |
655 | } else if (object_extent.offset + object_extent.length == |
656 | image_ctx.layout.object_size) { | |
657 | req = ObjectRequest<I>::create_truncate( | |
658 | &image_ctx, object_extent.oid.name, object_extent.objectno, | |
31f18b77 | 659 | object_extent.offset, snapc, this->m_trace, on_finish); |
7c673cae FG |
660 | } else { |
661 | req = ObjectRequest<I>::create_zero( | |
662 | &image_ctx, object_extent.oid.name, object_extent.objectno, | |
31f18b77 FG |
663 | object_extent.offset, object_extent.length, snapc, |
664 | this->m_trace, on_finish); | |
7c673cae FG |
665 | } |
666 | return req; | |
667 | } | |
668 | ||
669 | template <typename I> | |
670 | void ImageDiscardRequest<I>::update_stats(size_t length) { | |
671 | I &image_ctx = this->m_image_ctx; | |
672 | image_ctx.perfcounter->inc(l_librbd_discard); | |
673 | image_ctx.perfcounter->inc(l_librbd_discard_bytes, length); | |
674 | } | |
675 | ||
676 | template <typename I> | |
677 | void ImageFlushRequest<I>::send_request() { | |
678 | I &image_ctx = this->m_image_ctx; | |
679 | image_ctx.user_flushed(); | |
680 | ||
681 | bool journaling = false; | |
682 | { | |
683 | RWLock::RLocker snap_locker(image_ctx.snap_lock); | |
684 | journaling = (image_ctx.journal != nullptr && | |
685 | image_ctx.journal->is_journal_appending()); | |
686 | } | |
687 | ||
688 | AioCompletion *aio_comp = this->m_aio_comp; | |
689 | if (journaling) { | |
690 | // in-flight ops are flushed prior to closing the journal | |
691 | uint64_t journal_tid = image_ctx.journal->append_io_event( | |
692 | journal::EventEntry(journal::AioFlushEvent()), | |
693 | ObjectRequests(), 0, 0, false); | |
694 | ||
695 | aio_comp->set_request_count(1); | |
696 | aio_comp->associate_journal_event(journal_tid); | |
697 | ||
698 | FunctionContext *flush_ctx = new FunctionContext( | |
699 | [aio_comp, &image_ctx, journal_tid] (int r) { | |
700 | auto ctx = new C_FlushJournalCommit<I>(image_ctx, aio_comp, | |
701 | journal_tid); | |
702 | image_ctx.journal->flush_event(journal_tid, ctx); | |
703 | ||
704 | // track flush op for block writes | |
705 | aio_comp->start_op(true); | |
706 | aio_comp->put(); | |
707 | }); | |
708 | ||
709 | image_ctx.flush_async_operations(flush_ctx); | |
710 | } else { | |
711 | // flush rbd cache only when journaling is not enabled | |
712 | aio_comp->set_request_count(1); | |
713 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
714 | image_ctx.flush(req_comp); | |
715 | ||
716 | aio_comp->start_op(true); | |
717 | aio_comp->put(); | |
718 | } | |
719 | ||
720 | image_ctx.perfcounter->inc(l_librbd_aio_flush); | |
721 | } | |
722 | ||
723 | template <typename I> | |
724 | void ImageFlushRequest<I>::send_image_cache_request() { | |
725 | I &image_ctx = this->m_image_ctx; | |
726 | assert(image_ctx.image_cache != nullptr); | |
727 | ||
728 | AioCompletion *aio_comp = this->m_aio_comp; | |
729 | aio_comp->set_request_count(1); | |
730 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
731 | image_ctx.image_cache->aio_flush(req_comp); | |
732 | } | |
733 | ||
734 | template <typename I> | |
735 | bool ImageWriteSameRequest<I>::assemble_writesame_extent(const ObjectExtent &object_extent, | |
736 | bufferlist *bl, bool force_write) { | |
737 | size_t m_data_len = m_data_bl.length(); | |
738 | ||
739 | if (!force_write) { | |
740 | bool may_writesame = true; | |
741 | ||
742 | for (auto q = object_extent.buffer_extents.begin(); | |
743 | q != object_extent.buffer_extents.end(); ++q) { | |
744 | if (!(q->first % m_data_len == 0 && q->second % m_data_len == 0)) { | |
745 | may_writesame = false; | |
746 | break; | |
747 | } | |
748 | } | |
749 | ||
750 | if (may_writesame) { | |
751 | bl->append(m_data_bl); | |
752 | return true; | |
753 | } | |
754 | } | |
755 | ||
756 | for (auto q = object_extent.buffer_extents.begin(); | |
757 | q != object_extent.buffer_extents.end(); ++q) { | |
758 | bufferlist sub_bl; | |
759 | uint64_t sub_off = q->first % m_data_len; | |
760 | uint64_t sub_len = m_data_len - sub_off; | |
761 | uint64_t extent_left = q->second; | |
762 | while (extent_left >= sub_len) { | |
763 | sub_bl.substr_of(m_data_bl, sub_off, sub_len); | |
764 | bl->claim_append(sub_bl); | |
765 | extent_left -= sub_len; | |
766 | if (sub_off) { | |
767 | sub_off = 0; | |
768 | sub_len = m_data_len; | |
769 | } | |
770 | } | |
771 | if (extent_left) { | |
772 | sub_bl.substr_of(m_data_bl, sub_off, extent_left); | |
773 | bl->claim_append(sub_bl); | |
774 | } | |
775 | } | |
776 | return false; | |
777 | } | |
778 | ||
779 | template <typename I> | |
780 | uint64_t ImageWriteSameRequest<I>::append_journal_event( | |
781 | const ObjectRequests &requests, bool synchronous) { | |
782 | I &image_ctx = this->m_image_ctx; | |
783 | ||
784 | uint64_t tid = 0; | |
785 | assert(!this->m_image_extents.empty()); | |
786 | for (auto &extent : this->m_image_extents) { | |
787 | journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first, | |
788 | extent.second, | |
789 | m_data_bl)); | |
790 | tid = image_ctx.journal->append_io_event(std::move(event_entry), | |
791 | requests, extent.first, | |
792 | extent.second, synchronous); | |
793 | } | |
794 | ||
795 | if (image_ctx.object_cacher == NULL) { | |
796 | AioCompletion *aio_comp = this->m_aio_comp; | |
797 | aio_comp->associate_journal_event(tid); | |
798 | } | |
799 | return tid; | |
800 | } | |
801 | ||
802 | template <typename I> | |
803 | void ImageWriteSameRequest<I>::send_image_cache_request() { | |
804 | I &image_ctx = this->m_image_ctx; | |
805 | assert(image_ctx.image_cache != nullptr); | |
806 | ||
807 | AioCompletion *aio_comp = this->m_aio_comp; | |
808 | aio_comp->set_request_count(this->m_image_extents.size()); | |
809 | for (auto &extent : this->m_image_extents) { | |
810 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
811 | image_ctx.image_cache->aio_writesame(extent.first, extent.second, | |
812 | std::move(m_data_bl), m_op_flags, | |
813 | req_comp); | |
814 | } | |
815 | } | |
816 | ||
817 | template <typename I> | |
818 | void ImageWriteSameRequest<I>::send_object_cache_requests( | |
819 | const ObjectExtents &object_extents, uint64_t journal_tid) { | |
820 | I &image_ctx = this->m_image_ctx; | |
821 | for (auto p = object_extents.begin(); p != object_extents.end(); ++p) { | |
822 | const ObjectExtent &object_extent = *p; | |
823 | ||
824 | bufferlist bl; | |
825 | assemble_writesame_extent(object_extent, &bl, true); | |
826 | ||
827 | AioCompletion *aio_comp = this->m_aio_comp; | |
828 | C_AioRequest *req_comp = new C_AioRequest(aio_comp); | |
31f18b77 FG |
829 | image_ctx.write_to_cache( |
830 | object_extent.oid, bl, object_extent.length, object_extent.offset, | |
831 | req_comp, m_op_flags, journal_tid, | |
832 | (this->m_trace.valid() ? &this->m_trace : nullptr)); | |
7c673cae FG |
833 | } |
834 | } | |
835 | ||
836 | template <typename I> | |
837 | void ImageWriteSameRequest<I>::send_object_requests( | |
838 | const ObjectExtents &object_extents, const ::SnapContext &snapc, | |
839 | ObjectRequests *object_requests) { | |
840 | I &image_ctx = this->m_image_ctx; | |
841 | ||
842 | // cache handles creating object requests during writeback | |
843 | if (image_ctx.object_cacher == NULL) { | |
844 | AbstractImageWriteRequest<I>::send_object_requests(object_extents, snapc, | |
845 | object_requests); | |
846 | } | |
847 | } | |
848 | ||
849 | template <typename I> | |
850 | ObjectRequestHandle *ImageWriteSameRequest<I>::create_object_request( | |
851 | const ObjectExtent &object_extent, const ::SnapContext &snapc, | |
852 | Context *on_finish) { | |
853 | I &image_ctx = this->m_image_ctx; | |
854 | assert(image_ctx.object_cacher == NULL); | |
855 | ||
856 | bufferlist bl; | |
857 | ObjectRequest<I> *req; | |
858 | ||
859 | if (assemble_writesame_extent(object_extent, &bl, false)) { | |
860 | req = ObjectRequest<I>::create_writesame( | |
861 | &image_ctx, object_extent.oid.name, object_extent.objectno, | |
862 | object_extent.offset, object_extent.length, | |
31f18b77 | 863 | bl, snapc, m_op_flags, this->m_trace, on_finish); |
7c673cae FG |
864 | return req; |
865 | } | |
866 | req = ObjectRequest<I>::create_write( | |
867 | &image_ctx, object_extent.oid.name, object_extent.objectno, | |
31f18b77 | 868 | object_extent.offset, bl, snapc, m_op_flags, this->m_trace, on_finish); |
7c673cae FG |
869 | return req; |
870 | } | |
871 | ||
872 | template <typename I> | |
873 | void ImageWriteSameRequest<I>::update_stats(size_t length) { | |
874 | I &image_ctx = this->m_image_ctx; | |
875 | image_ctx.perfcounter->inc(l_librbd_ws); | |
876 | image_ctx.perfcounter->inc(l_librbd_ws_bytes, length); | |
877 | } | |
878 | ||
879 | } // namespace io | |
880 | } // namespace librbd | |
881 | ||
882 | template class librbd::io::ImageRequest<librbd::ImageCtx>; | |
883 | template class librbd::io::ImageReadRequest<librbd::ImageCtx>; | |
884 | template class librbd::io::AbstractImageWriteRequest<librbd::ImageCtx>; | |
885 | template class librbd::io::ImageWriteRequest<librbd::ImageCtx>; | |
886 | template class librbd::io::ImageDiscardRequest<librbd::ImageCtx>; | |
887 | template class librbd::io::ImageFlushRequest<librbd::ImageCtx>; | |
888 | template class librbd::io::ImageWriteSameRequest<librbd::ImageCtx>; |