]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/api/Io.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / librbd / api / Io.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/api/Io.h"
5 #include "include/intarith.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "common/Cond.h"
9 #include "common/EventTrace.h"
10 #include "librbd/ImageCtx.h"
11 #include "librbd/internal.h"
12 #include "librbd/Utils.h"
13 #include "librbd/io/AioCompletion.h"
14 #include "librbd/io/ImageDispatchSpec.h"
15 #include "librbd/io/Types.h"
16
17 #define dout_subsys ceph_subsys_rbd
18 #undef dout_prefix
19 #define dout_prefix *_dout << "librbd::api::Io " << __func__ << ": "
20
21 namespace librbd {
22 namespace api {
23
24 namespace {
25
26 template <typename I>
27 bool is_valid_io(I& image_ctx, io::AioCompletion* aio_comp) {
28 auto cct = image_ctx.cct;
29
30 if (!image_ctx.data_ctx.is_valid()) {
31 lderr(cct) << "missing data pool" << dendl;
32
33 aio_comp->fail(-ENODEV);
34 return false;
35 }
36
37 return true;
38 }
39
40 } // anonymous namespace
41
42 template <typename I>
43 ssize_t Io<I>::read(
44 I &image_ctx, uint64_t off, uint64_t len, io::ReadResult &&read_result,
45 int op_flags) {
46 auto cct = image_ctx.cct;
47
48 ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", "
49 << "len = " << len << dendl;
50
51 C_SaferCond ctx;
52 auto aio_comp = io::AioCompletion::create(&ctx);
53 aio_read(image_ctx, aio_comp, off, len, std::move(read_result), op_flags,
54 false);
55 return ctx.wait();
56 }
57
58 template <typename I>
59 ssize_t Io<I>::write(
60 I &image_ctx, uint64_t off, uint64_t len, bufferlist &&bl, int op_flags) {
61 auto cct = image_ctx.cct;
62 ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", "
63 << "len = " << len << dendl;
64
65 image_ctx.image_lock.lock_shared();
66 int r = clip_io(util::get_image_ctx(&image_ctx), off, &len);
67 image_ctx.image_lock.unlock_shared();
68 if (r < 0) {
69 lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
70 return r;
71 }
72
73 C_SaferCond ctx;
74 auto aio_comp = io::AioCompletion::create(&ctx);
75 aio_write(image_ctx, aio_comp, off, len, std::move(bl), op_flags, false);
76
77 r = ctx.wait();
78 if (r < 0) {
79 return r;
80 }
81 return len;
82 }
83
84 template <typename I>
85 ssize_t Io<I>::discard(
86 I &image_ctx, uint64_t off, uint64_t len,
87 uint32_t discard_granularity_bytes) {
88 auto cct = image_ctx.cct;
89 ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", "
90 << "len = " << len << dendl;
91
92 image_ctx.image_lock.lock_shared();
93 int r = clip_io(util::get_image_ctx(&image_ctx), off, &len);
94 image_ctx.image_lock.unlock_shared();
95 if (r < 0) {
96 lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
97 return r;
98 }
99
100 C_SaferCond ctx;
101 auto aio_comp = io::AioCompletion::create(&ctx);
102 aio_discard(image_ctx, aio_comp, off, len, discard_granularity_bytes, false);
103
104 r = ctx.wait();
105 if (r < 0) {
106 return r;
107 }
108 return len;
109 }
110
111 template <typename I>
112 ssize_t Io<I>::write_same(
113 I &image_ctx, uint64_t off, uint64_t len, bufferlist &&bl, int op_flags) {
114 auto cct = image_ctx.cct;
115 ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", "
116 << "len = " << len << ", data_len " << bl.length() << dendl;
117
118 image_ctx.image_lock.lock_shared();
119 int r = clip_io(util::get_image_ctx(&image_ctx), off, &len);
120 image_ctx.image_lock.unlock_shared();
121 if (r < 0) {
122 lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
123 return r;
124 }
125
126 C_SaferCond ctx;
127 auto aio_comp = io::AioCompletion::create(&ctx);
128 aio_write_same(image_ctx, aio_comp, off, len, std::move(bl), op_flags, false);
129
130 r = ctx.wait();
131 if (r < 0) {
132 return r;
133 }
134 return len;
135 }
136
137 template <typename I>
138 ssize_t Io<I>::write_zeroes(I& image_ctx, uint64_t off, uint64_t len,
139 int zero_flags, int op_flags) {
140 auto cct = image_ctx.cct;
141 ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", "
142 << "len = " << len << dendl;
143
144 image_ctx.image_lock.lock_shared();
145 int r = clip_io(util::get_image_ctx(&image_ctx), off, &len);
146 image_ctx.image_lock.unlock_shared();
147 if (r < 0) {
148 lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
149 return r;
150 }
151
152 C_SaferCond ctx;
153 auto aio_comp = io::AioCompletion::create(&ctx);
154 aio_write_zeroes(image_ctx, aio_comp, off, len, zero_flags, op_flags, false);
155
156 r = ctx.wait();
157 if (r < 0) {
158 return r;
159 }
160 return len;
161 }
162
163 template <typename I>
164 ssize_t Io<I>::compare_and_write(
165 I &image_ctx, uint64_t off, uint64_t len, bufferlist &&cmp_bl,
166 bufferlist &&bl, uint64_t *mismatch_off, int op_flags) {
167 auto cct = image_ctx.cct;
168 ldout(cct, 20) << "compare_and_write ictx=" << &image_ctx << ", off="
169 << off << ", " << "len = " << len << dendl;
170
171 image_ctx.image_lock.lock_shared();
172 int r = clip_io(util::get_image_ctx(&image_ctx), off, &len);
173 image_ctx.image_lock.unlock_shared();
174 if (r < 0) {
175 lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
176 return r;
177 }
178
179 C_SaferCond ctx;
180 auto aio_comp = io::AioCompletion::create(&ctx);
181 aio_compare_and_write(image_ctx, aio_comp, off, len, std::move(cmp_bl),
182 std::move(bl), mismatch_off, op_flags, false);
183
184 r = ctx.wait();
185 if (r < 0) {
186 return r;
187 }
188 return len;
189 }
190
191 template <typename I>
192 int Io<I>::flush(I &image_ctx) {
193 auto cct = image_ctx.cct;
194 ldout(cct, 20) << "ictx=" << &image_ctx << dendl;
195
196 C_SaferCond ctx;
197 auto aio_comp = io::AioCompletion::create(&ctx);
198 aio_flush(image_ctx, aio_comp, false);
199
200 int r = ctx.wait();
201 if (r < 0) {
202 return r;
203 }
204
205 return 0;
206 }
207
208 template <typename I>
209 void Io<I>::aio_read(I &image_ctx, io::AioCompletion *aio_comp, uint64_t off,
210 uint64_t len, io::ReadResult &&read_result, int op_flags,
211 bool native_async) {
212 auto cct = image_ctx.cct;
213 FUNCTRACE(cct);
214 ZTracer::Trace trace;
215 if (image_ctx.blkin_trace_all) {
216 trace.init("io: read", &image_ctx.trace_endpoint);
217 trace.event("init");
218 }
219
220 aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_READ);
221 ldout(cct, 20) << "ictx=" << &image_ctx << ", "
222 << "completion=" << aio_comp << ", off=" << off << ", "
223 << "len=" << len << ", " << "flags=" << op_flags << dendl;
224
225 if (native_async && image_ctx.event_socket.is_valid()) {
226 aio_comp->set_event_notify(true);
227 }
228
229 if (!is_valid_io(image_ctx, aio_comp)) {
230 return;
231 }
232
233 auto req = io::ImageDispatchSpec::create_read(
234 image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, {{off, len}},
235 std::move(read_result), image_ctx.get_data_io_context(), op_flags, 0,
236 trace);
237 req->send();
238 }
239
240 template <typename I>
241 void Io<I>::aio_write(I &image_ctx, io::AioCompletion *aio_comp, uint64_t off,
242 uint64_t len, bufferlist &&bl, int op_flags,
243 bool native_async) {
244 auto cct = image_ctx.cct;
245 FUNCTRACE(cct);
246 ZTracer::Trace trace;
247 if (image_ctx.blkin_trace_all) {
248 trace.init("io: write", &image_ctx.trace_endpoint);
249 trace.event("init");
250 }
251
252 aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_WRITE);
253 ldout(cct, 20) << "ictx=" << &image_ctx << ", "
254 << "completion=" << aio_comp << ", off=" << off << ", "
255 << "len=" << len << ", flags=" << op_flags << dendl;
256
257 if (native_async && image_ctx.event_socket.is_valid()) {
258 aio_comp->set_event_notify(true);
259 }
260
261 if (!is_valid_io(image_ctx, aio_comp)) {
262 return;
263 }
264
265 auto req = io::ImageDispatchSpec::create_write(
266 image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, {{off, len}},
267 std::move(bl), image_ctx.get_data_io_context(), op_flags, trace);
268 req->send();
269 }
270
271 template <typename I>
272 void Io<I>::aio_discard(I &image_ctx, io::AioCompletion *aio_comp, uint64_t off,
273 uint64_t len, uint32_t discard_granularity_bytes,
274 bool native_async) {
275 auto cct = image_ctx.cct;
276 FUNCTRACE(cct);
277 ZTracer::Trace trace;
278 if (image_ctx.blkin_trace_all) {
279 trace.init("io: discard", &image_ctx.trace_endpoint);
280 trace.event("init");
281 }
282
283 aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_DISCARD);
284 ldout(cct, 20) << "ictx=" << &image_ctx << ", "
285 << "completion=" << aio_comp << ", off=" << off << ", "
286 << "len=" << len << dendl;
287
288 if (native_async && image_ctx.event_socket.is_valid()) {
289 aio_comp->set_event_notify(true);
290 }
291
292 if (!is_valid_io(image_ctx, aio_comp)) {
293 return;
294 }
295
296 auto req = io::ImageDispatchSpec::create_discard(
297 image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, off, len,
298 discard_granularity_bytes, image_ctx.get_data_io_context(), trace);
299 req->send();
300 }
301
302 template <typename I>
303 void Io<I>::aio_write_same(I &image_ctx, io::AioCompletion *aio_comp,
304 uint64_t off, uint64_t len, bufferlist &&bl,
305 int op_flags, bool native_async) {
306 auto cct = image_ctx.cct;
307 FUNCTRACE(cct);
308 ZTracer::Trace trace;
309 if (image_ctx.blkin_trace_all) {
310 trace.init("io: writesame", &image_ctx.trace_endpoint);
311 trace.event("init");
312 }
313
314 aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_WRITESAME);
315 ldout(cct, 20) << "ictx=" << &image_ctx << ", "
316 << "completion=" << aio_comp << ", off=" << off << ", "
317 << "len=" << len << ", data_len = " << bl.length() << ", "
318 << "flags=" << op_flags << dendl;
319
320 if (native_async && image_ctx.event_socket.is_valid()) {
321 aio_comp->set_event_notify(true);
322 }
323
324 if (!is_valid_io(image_ctx, aio_comp)) {
325 return;
326 }
327
328 auto req = io::ImageDispatchSpec::create_write_same(
329 image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, off, len,
330 std::move(bl), image_ctx.get_data_io_context(), op_flags, trace);
331 req->send();
332 }
333
334 template <typename I>
335 void Io<I>::aio_write_zeroes(I& image_ctx, io::AioCompletion *aio_comp,
336 uint64_t off, uint64_t len, int zero_flags,
337 int op_flags, bool native_async) {
338 auto cct = image_ctx.cct;
339 FUNCTRACE(cct);
340 ZTracer::Trace trace;
341 if (image_ctx.blkin_trace_all) {
342 trace.init("io: write_zeroes", &image_ctx.trace_endpoint);
343 trace.event("init");
344 }
345
346 auto io_type = io::AIO_TYPE_DISCARD;
347 if ((zero_flags & RBD_WRITE_ZEROES_FLAG_THICK_PROVISION) != 0) {
348 zero_flags &= ~RBD_WRITE_ZEROES_FLAG_THICK_PROVISION;
349 io_type = io::AIO_TYPE_WRITESAME;
350 }
351
352 aio_comp->init_time(util::get_image_ctx(&image_ctx), io_type);
353 ldout(cct, 20) << "ictx=" << &image_ctx << ", "
354 << "completion=" << aio_comp << ", off=" << off << ", "
355 << "len=" << len << dendl;
356
357 if (native_async && image_ctx.event_socket.is_valid()) {
358 aio_comp->set_event_notify(true);
359 }
360
361 // validate the supported flags
362 if (zero_flags != 0U) {
363 aio_comp->fail(-EINVAL);
364 return;
365 }
366
367 if (!is_valid_io(image_ctx, aio_comp)) {
368 return;
369 }
370
371 if (io_type == io::AIO_TYPE_WRITESAME) {
372 // write-same needs to be aligned to its buffer but librbd has never forced
373 // block alignment. Hide that requirement from the user by adding optional
374 // writes.
375 const uint64_t data_length = 512;
376 uint64_t write_same_offset = p2roundup(off, data_length);
377 uint64_t write_same_offset_end = p2align(off + len, data_length);
378 uint64_t write_same_length = 0;
379 if (write_same_offset_end > write_same_offset) {
380 write_same_length = write_same_offset_end - write_same_offset;
381 }
382
383 uint64_t prepend_offset = off;
384 uint64_t prepend_length = write_same_offset - off;
385 uint64_t append_offset = write_same_offset + write_same_length;
386 uint64_t append_length = len - prepend_length - write_same_length;
387 ldout(cct, 20) << "prepend_offset=" << prepend_offset << ", "
388 << "prepend_length=" << prepend_length << ", "
389 << "write_same_offset=" << write_same_offset << ", "
390 << "write_same_length=" << write_same_length << ", "
391 << "append_offset=" << append_offset << ", "
392 << "append_length=" << append_length << dendl;
393 ceph_assert(prepend_length + write_same_length + append_length == len);
394
395 if (write_same_length <= data_length) {
396 // unaligned or small write-zeroes request -- use single write
397 bufferlist bl;
398 bl.append_zero(len);
399
400 aio_comp->aio_type = io::AIO_TYPE_WRITE;
401 auto req = io::ImageDispatchSpec::create_write(
402 image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, {{off, len}},
403 std::move(bl), image_ctx.get_data_io_context(), op_flags, trace);
404 req->send();
405 return;
406 } else if (prepend_length == 0 && append_length == 0) {
407 // fully aligned -- use a single write-same image request
408 bufferlist bl;
409 bl.append_zero(data_length);
410
411 auto req = io::ImageDispatchSpec::create_write_same(
412 image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, off, len,
413 std::move(bl), image_ctx.get_data_io_context(), op_flags, trace);
414 req->send();
415 return;
416 }
417
418 // to reach this point, we need at least one prepend/append write along with
419 // a write-same -- therefore we will need to wrap the provided AioCompletion
420 auto request_count = 1;
421 if (prepend_length > 0) {
422 ++request_count;
423 }
424 if (append_length > 0) {
425 ++request_count;
426 }
427
428 ceph_assert(request_count > 1);
429 aio_comp->start_op();
430 aio_comp->set_request_count(request_count);
431
432 if (prepend_length > 0) {
433 bufferlist bl;
434 bl.append_zero(prepend_length);
435
436 Context* prepend_ctx = new io::C_AioRequest(aio_comp);
437 auto prepend_aio_comp = io::AioCompletion::create_and_start(
438 prepend_ctx, &image_ctx, io::AIO_TYPE_WRITE);
439 auto prepend_req = io::ImageDispatchSpec::create_write(
440 image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, prepend_aio_comp,
441 {{prepend_offset, prepend_length}}, std::move(bl),
442 image_ctx.get_data_io_context(), op_flags, trace);
443 prepend_req->send();
444 }
445
446 if (append_length > 0) {
447 bufferlist bl;
448 bl.append_zero(append_length);
449
450 Context* append_ctx = new io::C_AioRequest(aio_comp);
451 auto append_aio_comp = io::AioCompletion::create_and_start(
452 append_ctx, &image_ctx, io::AIO_TYPE_WRITE);
453 auto append_req = io::ImageDispatchSpec::create_write(
454 image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, append_aio_comp,
455 {{append_offset, append_length}}, std::move(bl),
456 image_ctx.get_data_io_context(), op_flags, trace);
457 append_req->send();
458 }
459
460 bufferlist bl;
461 bl.append_zero(data_length);
462
463 Context* write_same_ctx = new io::C_AioRequest(aio_comp);
464 auto write_same_aio_comp = io::AioCompletion::create_and_start(
465 write_same_ctx, &image_ctx, io::AIO_TYPE_WRITESAME);
466 auto req = io::ImageDispatchSpec::create_write_same(
467 image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, write_same_aio_comp,
468 write_same_offset, write_same_length, std::move(bl),
469 image_ctx.get_data_io_context(), op_flags, trace);
470 req->send();
471 return;
472 }
473
474 // enable partial discard (zeroing) of objects
475 uint32_t discard_granularity_bytes = 0;
476
477 auto req = io::ImageDispatchSpec::create_discard(
478 image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, off, len,
479 discard_granularity_bytes, image_ctx.get_data_io_context(), trace);
480 req->send();
481 }
482
483 template <typename I>
484 void Io<I>::aio_compare_and_write(I &image_ctx, io::AioCompletion *aio_comp,
485 uint64_t off, uint64_t len,
486 bufferlist &&cmp_bl,
487 bufferlist &&bl, uint64_t *mismatch_off,
488 int op_flags, bool native_async) {
489 auto cct = image_ctx.cct;
490 FUNCTRACE(cct);
491 ZTracer::Trace trace;
492 if (image_ctx.blkin_trace_all) {
493 trace.init("io: compare_and_write", &image_ctx.trace_endpoint);
494 trace.event("init");
495 }
496
497 aio_comp->init_time(util::get_image_ctx(&image_ctx),
498 io::AIO_TYPE_COMPARE_AND_WRITE);
499 ldout(cct, 20) << "ictx=" << &image_ctx << ", "
500 << "completion=" << aio_comp << ", off=" << off << ", "
501 << "len=" << len << dendl;
502
503 if (native_async && image_ctx.event_socket.is_valid()) {
504 aio_comp->set_event_notify(true);
505 }
506
507 if (!is_valid_io(image_ctx, aio_comp)) {
508 return;
509 }
510
511 auto req = io::ImageDispatchSpec::create_compare_and_write(
512 image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, {{off, len}},
513 std::move(cmp_bl), std::move(bl), mismatch_off,
514 image_ctx.get_data_io_context(), op_flags, trace);
515 req->send();
516 }
517
518 template <typename I>
519 void Io<I>::aio_flush(I &image_ctx, io::AioCompletion *aio_comp,
520 bool native_async) {
521 auto cct = image_ctx.cct;
522 FUNCTRACE(cct);
523 ZTracer::Trace trace;
524 if (image_ctx.blkin_trace_all) {
525 trace.init("io: flush", &image_ctx.trace_endpoint);
526 trace.event("init");
527 }
528
529 aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_FLUSH);
530 ldout(cct, 20) << "ictx=" << &image_ctx << ", "
531 << "completion=" << aio_comp << dendl;
532
533 if (native_async && image_ctx.event_socket.is_valid()) {
534 aio_comp->set_event_notify(true);
535 }
536
537 if (!is_valid_io(image_ctx, aio_comp)) {
538 return;
539 }
540
541 auto req = io::ImageDispatchSpec::create_flush(
542 image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp,
543 io::FLUSH_SOURCE_USER, trace);
544 req->send();
545 }
546
547 } // namespace api
548 } // namespace librbd
549
550 template class librbd::api::Io<librbd::ImageCtx>;