1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
5 * Copyright Red Hat, Inc.
8 * Stefan Hajnoczi <stefanha@redhat.com>
11 #include "qemu/osdep.h"
13 #include "block/block_int.h"
14 #include "exec/memory.h"
15 #include "exec/cpu-common.h" /* for qemu_ram_get_fd() */
16 #include "qapi/error.h"
17 #include "qemu/error-report.h"
18 #include "qapi/qmp/qdict.h"
19 #include "qemu/module.h"
20 #include "exec/memory.h" /* for ram_block_discard_disable() */
22 #include "block/block-io.h"
25 * Keep the QEMU BlockDriver names identical to the libblkio driver names.
26 * Using macros instead of typing out the string literals avoids typos.
28 #define DRIVER_IO_URING "io_uring"
29 #define DRIVER_NVME_IO_URING "nvme-io_uring"
30 #define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci"
31 #define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
32 #define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
35 * Allocated bounce buffers are kept in a list sorted by buffer address.
37 typedef struct BlkioBounceBuf
{
38 QLIST_ENTRY(BlkioBounceBuf
) next
;
40 /* The bounce buffer */
46 * libblkio is not thread-safe so this lock protects ->blkio and
51 struct blkioq
*blkioq
; /* make this multi-queue in the future... */
55 * Polling fetches the next completion into this field.
57 * No lock is necessary since only one thread calls aio_poll() and invokes
58 * fd and poll handlers.
60 struct blkio_completion poll_completion
;
63 * Protects ->bounce_pool, ->bounce_bufs, ->bounce_available.
65 * Lock ordering: ->bounce_lock before ->blkio_lock.
69 /* Bounce buffer pool */
70 struct blkio_mem_region bounce_pool
;
72 /* Sorted list of allocated bounce buffers */
73 QLIST_HEAD(, BlkioBounceBuf
) bounce_bufs
;
75 /* Queue for coroutines waiting for bounce buffer space */
76 CoQueue bounce_available
;
78 /* The value of the "mem-region-alignment" property */
79 size_t mem_region_alignment
;
81 /* Can we skip adding/deleting blkio_mem_regions? */
82 bool needs_mem_regions
;
84 /* Are file descriptors necessary for blkio_mem_regions? */
85 bool needs_mem_region_fd
;
87 /* Are madvise(MADV_DONTNEED)-style operations unavailable? */
88 bool may_pin_mem_regions
;
91 /* Called with s->bounce_lock held */
92 static int blkio_resize_bounce_pool(BDRVBlkioState
*s
, int64_t bytes
)
94 /* There can be no allocated bounce buffers during resize */
95 assert(QLIST_EMPTY(&s
->bounce_bufs
));
97 /* Pad size to reduce frequency of resize calls */
100 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
103 if (s
->bounce_pool
.addr
) {
104 blkio_unmap_mem_region(s
->blkio
, &s
->bounce_pool
);
105 blkio_free_mem_region(s
->blkio
, &s
->bounce_pool
);
106 memset(&s
->bounce_pool
, 0, sizeof(s
->bounce_pool
));
109 /* Automatically freed when s->blkio is destroyed */
110 ret
= blkio_alloc_mem_region(s
->blkio
, &s
->bounce_pool
, bytes
);
115 ret
= blkio_map_mem_region(s
->blkio
, &s
->bounce_pool
);
117 blkio_free_mem_region(s
->blkio
, &s
->bounce_pool
);
118 memset(&s
->bounce_pool
, 0, sizeof(s
->bounce_pool
));
126 /* Called with s->bounce_lock held */
128 blkio_do_alloc_bounce_buffer(BDRVBlkioState
*s
, BlkioBounceBuf
*bounce
,
131 void *addr
= s
->bounce_pool
.addr
;
132 BlkioBounceBuf
*cur
= NULL
;
133 BlkioBounceBuf
*prev
= NULL
;
137 * This is just a linear search over the holes between requests. An
138 * efficient allocator would be nice.
140 QLIST_FOREACH(cur
, &s
->bounce_bufs
, next
) {
141 space
= cur
->buf
.iov_base
- addr
;
142 if (bytes
<= space
) {
143 QLIST_INSERT_BEFORE(cur
, bounce
, next
);
144 bounce
->buf
.iov_base
= addr
;
145 bounce
->buf
.iov_len
= bytes
;
149 addr
= cur
->buf
.iov_base
+ cur
->buf
.iov_len
;
153 /* Is there space after the last request? */
154 space
= s
->bounce_pool
.addr
+ s
->bounce_pool
.len
- addr
;
159 QLIST_INSERT_AFTER(prev
, bounce
, next
);
161 QLIST_INSERT_HEAD(&s
->bounce_bufs
, bounce
, next
);
163 bounce
->buf
.iov_base
= addr
;
164 bounce
->buf
.iov_len
= bytes
;
168 static int coroutine_fn
169 blkio_alloc_bounce_buffer(BDRVBlkioState
*s
, BlkioBounceBuf
*bounce
,
173 * Ensure fairness: first time around we join the back of the queue,
174 * subsequently we join the front so we don't lose our place.
176 CoQueueWaitFlags wait_flags
= 0;
178 QEMU_LOCK_GUARD(&s
->bounce_lock
);
180 /* Ensure fairness: don't even try if other requests are already waiting */
181 if (!qemu_co_queue_empty(&s
->bounce_available
)) {
182 qemu_co_queue_wait_flags(&s
->bounce_available
, &s
->bounce_lock
,
184 wait_flags
= CO_QUEUE_WAIT_FRONT
;
188 if (blkio_do_alloc_bounce_buffer(s
, bounce
, bytes
)) {
189 /* Kick the next queued request since there may be space */
190 qemu_co_queue_next(&s
->bounce_available
);
195 * If there are no in-flight requests then the pool was simply too
198 if (QLIST_EMPTY(&s
->bounce_bufs
)) {
202 ret
= blkio_resize_bounce_pool(s
, bytes
);
204 /* Kick the next queued request since that may fail too */
205 qemu_co_queue_next(&s
->bounce_available
);
209 ok
= blkio_do_alloc_bounce_buffer(s
, bounce
, bytes
);
210 assert(ok
); /* must have space this time */
214 qemu_co_queue_wait_flags(&s
->bounce_available
, &s
->bounce_lock
,
216 wait_flags
= CO_QUEUE_WAIT_FRONT
;
220 static void coroutine_fn
blkio_free_bounce_buffer(BDRVBlkioState
*s
,
221 BlkioBounceBuf
*bounce
)
223 QEMU_LOCK_GUARD(&s
->bounce_lock
);
225 QLIST_REMOVE(bounce
, next
);
227 /* Wake up waiting coroutines since space may now be available */
228 qemu_co_queue_next(&s
->bounce_available
);
231 /* For async to .bdrv_co_*() conversion */
233 Coroutine
*coroutine
;
237 static void blkio_completion_fd_read(void *opaque
)
239 BlockDriverState
*bs
= opaque
;
240 BDRVBlkioState
*s
= bs
->opaque
;
244 /* Polling may have already fetched a completion */
245 if (s
->poll_completion
.user_data
!= NULL
) {
246 BlkioCoData
*cod
= s
->poll_completion
.user_data
;
247 cod
->ret
= s
->poll_completion
.ret
;
249 /* Clear it in case aio_co_wake() enters a nested event loop */
250 s
->poll_completion
.user_data
= NULL
;
252 aio_co_wake(cod
->coroutine
);
255 /* Reset completion fd status */
256 ret
= read(s
->completion_fd
, &val
, sizeof(val
));
258 /* Ignore errors, there's nothing we can do */
262 * Reading one completion at a time makes nested event loop re-entrancy
263 * simple. Change this loop to get multiple completions in one go if it
264 * becomes a performance bottleneck.
267 struct blkio_completion completion
;
269 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
270 ret
= blkioq_do_io(s
->blkioq
, &completion
, 0, 1, NULL
);
276 BlkioCoData
*cod
= completion
.user_data
;
277 cod
->ret
= completion
.ret
;
278 aio_co_wake(cod
->coroutine
);
282 static bool blkio_completion_fd_poll(void *opaque
)
284 BlockDriverState
*bs
= opaque
;
285 BDRVBlkioState
*s
= bs
->opaque
;
288 /* Just in case we already fetched a completion */
289 if (s
->poll_completion
.user_data
!= NULL
) {
293 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
294 ret
= blkioq_do_io(s
->blkioq
, &s
->poll_completion
, 0, 1, NULL
);
299 static void blkio_completion_fd_poll_ready(void *opaque
)
301 blkio_completion_fd_read(opaque
);
304 static void blkio_attach_aio_context(BlockDriverState
*bs
,
305 AioContext
*new_context
)
307 BDRVBlkioState
*s
= bs
->opaque
;
309 aio_set_fd_handler(new_context
, s
->completion_fd
,
310 blkio_completion_fd_read
, NULL
,
311 blkio_completion_fd_poll
,
312 blkio_completion_fd_poll_ready
, bs
);
315 static void blkio_detach_aio_context(BlockDriverState
*bs
)
317 BDRVBlkioState
*s
= bs
->opaque
;
319 aio_set_fd_handler(bdrv_get_aio_context(bs
), s
->completion_fd
, NULL
, NULL
,
323 /* Call with s->blkio_lock held to submit I/O after enqueuing a new request */
324 static void blkio_submit_io(BlockDriverState
*bs
)
326 if (qatomic_read(&bs
->io_plugged
) == 0) {
327 BDRVBlkioState
*s
= bs
->opaque
;
329 blkioq_do_io(s
->blkioq
, NULL
, 0, 0, NULL
);
333 static int coroutine_fn
334 blkio_co_pdiscard(BlockDriverState
*bs
, int64_t offset
, int64_t bytes
)
336 BDRVBlkioState
*s
= bs
->opaque
;
338 .coroutine
= qemu_coroutine_self(),
341 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
342 blkioq_discard(s
->blkioq
, offset
, bytes
, &cod
, 0);
346 qemu_coroutine_yield();
350 static int coroutine_fn
351 blkio_co_preadv(BlockDriverState
*bs
, int64_t offset
, int64_t bytes
,
352 QEMUIOVector
*qiov
, BdrvRequestFlags flags
)
355 .coroutine
= qemu_coroutine_self(),
357 BDRVBlkioState
*s
= bs
->opaque
;
358 bool use_bounce_buffer
=
359 s
->needs_mem_regions
&& !(flags
& BDRV_REQ_REGISTERED_BUF
);
360 BlkioBounceBuf bounce
;
361 struct iovec
*iov
= qiov
->iov
;
362 int iovcnt
= qiov
->niov
;
364 if (use_bounce_buffer
) {
365 int ret
= blkio_alloc_bounce_buffer(s
, &bounce
, bytes
);
374 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
375 blkioq_readv(s
->blkioq
, offset
, iov
, iovcnt
, &cod
, 0);
379 qemu_coroutine_yield();
381 if (use_bounce_buffer
) {
383 qemu_iovec_from_buf(qiov
, 0,
388 blkio_free_bounce_buffer(s
, &bounce
);
394 static int coroutine_fn
blkio_co_pwritev(BlockDriverState
*bs
, int64_t offset
,
395 int64_t bytes
, QEMUIOVector
*qiov
, BdrvRequestFlags flags
)
397 uint32_t blkio_flags
= (flags
& BDRV_REQ_FUA
) ? BLKIO_REQ_FUA
: 0;
399 .coroutine
= qemu_coroutine_self(),
401 BDRVBlkioState
*s
= bs
->opaque
;
402 bool use_bounce_buffer
=
403 s
->needs_mem_regions
&& !(flags
& BDRV_REQ_REGISTERED_BUF
);
404 BlkioBounceBuf bounce
;
405 struct iovec
*iov
= qiov
->iov
;
406 int iovcnt
= qiov
->niov
;
408 if (use_bounce_buffer
) {
409 int ret
= blkio_alloc_bounce_buffer(s
, &bounce
, bytes
);
414 qemu_iovec_to_buf(qiov
, 0, bounce
.buf
.iov_base
, bytes
);
419 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
420 blkioq_writev(s
->blkioq
, offset
, iov
, iovcnt
, &cod
, blkio_flags
);
424 qemu_coroutine_yield();
426 if (use_bounce_buffer
) {
427 blkio_free_bounce_buffer(s
, &bounce
);
433 static int coroutine_fn
blkio_co_flush(BlockDriverState
*bs
)
435 BDRVBlkioState
*s
= bs
->opaque
;
437 .coroutine
= qemu_coroutine_self(),
440 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
441 blkioq_flush(s
->blkioq
, &cod
, 0);
445 qemu_coroutine_yield();
449 static int coroutine_fn
blkio_co_pwrite_zeroes(BlockDriverState
*bs
,
450 int64_t offset
, int64_t bytes
, BdrvRequestFlags flags
)
452 BDRVBlkioState
*s
= bs
->opaque
;
454 .coroutine
= qemu_coroutine_self(),
456 uint32_t blkio_flags
= 0;
458 if (flags
& BDRV_REQ_FUA
) {
459 blkio_flags
|= BLKIO_REQ_FUA
;
461 if (!(flags
& BDRV_REQ_MAY_UNMAP
)) {
462 blkio_flags
|= BLKIO_REQ_NO_UNMAP
;
464 if (flags
& BDRV_REQ_NO_FALLBACK
) {
465 blkio_flags
|= BLKIO_REQ_NO_FALLBACK
;
468 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
469 blkioq_write_zeroes(s
->blkioq
, offset
, bytes
, &cod
, blkio_flags
);
473 qemu_coroutine_yield();
477 static void coroutine_fn
blkio_co_io_unplug(BlockDriverState
*bs
)
479 BDRVBlkioState
*s
= bs
->opaque
;
481 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
490 } BlkioMemRegionResult
;
493 * Produce a struct blkio_mem_region for a given address and size.
495 * This function produces identical results when called multiple times with the
496 * same arguments. This property is necessary because blkio_unmap_mem_region()
497 * must receive the same struct blkio_mem_region field values that were passed
498 * to blkio_map_mem_region().
500 static BlkioMemRegionResult
501 blkio_mem_region_from_host(BlockDriverState
*bs
,
502 void *host
, size_t size
,
503 struct blkio_mem_region
*region
,
506 BDRVBlkioState
*s
= bs
->opaque
;
508 ram_addr_t fd_offset
= 0;
510 if (((uintptr_t)host
| size
) % s
->mem_region_alignment
) {
511 error_setg(errp
, "unaligned buf %p with size %zu", host
, size
);
515 /* Attempt to find the fd for the underlying memory */
516 if (s
->needs_mem_region_fd
) {
522 * bdrv_register_buf() is called with the BQL held so mr lives at least
523 * until this function returns.
525 ram_block
= qemu_ram_block_from_host(host
, false, &fd_offset
);
527 fd
= qemu_ram_get_fd(ram_block
);
531 * Ideally every RAMBlock would have an fd. pc-bios and other
532 * things don't. Luckily they are usually not I/O buffers and we
533 * can just ignore them.
538 /* Make sure the fd covers the entire range */
539 end_block
= qemu_ram_block_from_host(host
+ size
- 1, false, &offset
);
540 if (ram_block
!= end_block
) {
541 error_setg(errp
, "registered buffer at %p with size %zu extends "
542 "beyond RAMBlock", host
, size
);
547 *region
= (struct blkio_mem_region
){
551 .fd_offset
= fd_offset
,
556 static bool blkio_register_buf(BlockDriverState
*bs
, void *host
, size_t size
,
559 BDRVBlkioState
*s
= bs
->opaque
;
560 struct blkio_mem_region region
;
561 BlkioMemRegionResult region_result
;
565 * Mapping memory regions conflicts with RAM discard (virtio-mem) when
566 * there is pinning, so only do it when necessary.
568 if (!s
->needs_mem_regions
&& s
->may_pin_mem_regions
) {
572 region_result
= blkio_mem_region_from_host(bs
, host
, size
, ®ion
, errp
);
573 if (region_result
== BMRR_SKIP
) {
575 } else if (region_result
!= BMRR_OK
) {
579 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
580 ret
= blkio_map_mem_region(s
->blkio
, ®ion
);
584 error_setg(errp
, "Failed to add blkio mem region %p with size %zu: %s",
585 host
, size
, blkio_get_error_msg());
591 static void blkio_unregister_buf(BlockDriverState
*bs
, void *host
, size_t size
)
593 BDRVBlkioState
*s
= bs
->opaque
;
594 struct blkio_mem_region region
;
596 /* See blkio_register_buf() */
597 if (!s
->needs_mem_regions
&& s
->may_pin_mem_regions
) {
601 if (blkio_mem_region_from_host(bs
, host
, size
, ®ion
, NULL
) != BMRR_OK
) {
605 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
606 blkio_unmap_mem_region(s
->blkio
, ®ion
);
610 static int blkio_io_uring_open(BlockDriverState
*bs
, QDict
*options
, int flags
,
613 const char *filename
= qdict_get_str(options
, "filename");
614 BDRVBlkioState
*s
= bs
->opaque
;
617 ret
= blkio_set_str(s
->blkio
, "path", filename
);
618 qdict_del(options
, "filename");
620 error_setg_errno(errp
, -ret
, "failed to set path: %s",
621 blkio_get_error_msg());
625 if (flags
& BDRV_O_NOCACHE
) {
626 ret
= blkio_set_bool(s
->blkio
, "direct", true);
628 error_setg_errno(errp
, -ret
, "failed to set direct: %s",
629 blkio_get_error_msg());
637 static int blkio_nvme_io_uring(BlockDriverState
*bs
, QDict
*options
, int flags
,
640 const char *path
= qdict_get_try_str(options
, "path");
641 BDRVBlkioState
*s
= bs
->opaque
;
645 error_setg(errp
, "missing 'path' option");
649 ret
= blkio_set_str(s
->blkio
, "path", path
);
650 qdict_del(options
, "path");
652 error_setg_errno(errp
, -ret
, "failed to set path: %s",
653 blkio_get_error_msg());
657 if (!(flags
& BDRV_O_NOCACHE
)) {
658 error_setg(errp
, "cache.direct=off is not supported");
665 static int blkio_virtio_blk_common_open(BlockDriverState
*bs
,
666 QDict
*options
, int flags
, Error
**errp
)
668 const char *path
= qdict_get_try_str(options
, "path");
669 BDRVBlkioState
*s
= bs
->opaque
;
673 error_setg(errp
, "missing 'path' option");
677 ret
= blkio_set_str(s
->blkio
, "path", path
);
678 qdict_del(options
, "path");
680 error_setg_errno(errp
, -ret
, "failed to set path: %s",
681 blkio_get_error_msg());
685 if (!(flags
& BDRV_O_NOCACHE
)) {
686 error_setg(errp
, "cache.direct=off is not supported");
692 static int blkio_file_open(BlockDriverState
*bs
, QDict
*options
, int flags
,
695 const char *blkio_driver
= bs
->drv
->protocol_name
;
696 BDRVBlkioState
*s
= bs
->opaque
;
699 ret
= blkio_create(blkio_driver
, &s
->blkio
);
701 error_setg_errno(errp
, -ret
, "blkio_create failed: %s",
702 blkio_get_error_msg());
706 if (strcmp(blkio_driver
, DRIVER_IO_URING
) == 0) {
707 ret
= blkio_io_uring_open(bs
, options
, flags
, errp
);
708 } else if (strcmp(blkio_driver
, DRIVER_NVME_IO_URING
) == 0) {
709 ret
= blkio_nvme_io_uring(bs
, options
, flags
, errp
);
710 } else if (strcmp(blkio_driver
, DRIVER_VIRTIO_BLK_VFIO_PCI
) == 0) {
711 ret
= blkio_virtio_blk_common_open(bs
, options
, flags
, errp
);
712 } else if (strcmp(blkio_driver
, DRIVER_VIRTIO_BLK_VHOST_USER
) == 0) {
713 ret
= blkio_virtio_blk_common_open(bs
, options
, flags
, errp
);
714 } else if (strcmp(blkio_driver
, DRIVER_VIRTIO_BLK_VHOST_VDPA
) == 0) {
715 ret
= blkio_virtio_blk_common_open(bs
, options
, flags
, errp
);
717 g_assert_not_reached();
720 blkio_destroy(&s
->blkio
);
724 if (!(flags
& BDRV_O_RDWR
)) {
725 ret
= blkio_set_bool(s
->blkio
, "read-only", true);
727 error_setg_errno(errp
, -ret
, "failed to set read-only: %s",
728 blkio_get_error_msg());
729 blkio_destroy(&s
->blkio
);
734 ret
= blkio_connect(s
->blkio
);
736 error_setg_errno(errp
, -ret
, "blkio_connect failed: %s",
737 blkio_get_error_msg());
738 blkio_destroy(&s
->blkio
);
742 ret
= blkio_get_bool(s
->blkio
,
744 &s
->needs_mem_regions
);
746 error_setg_errno(errp
, -ret
,
747 "failed to get needs-mem-regions: %s",
748 blkio_get_error_msg());
749 blkio_destroy(&s
->blkio
);
753 ret
= blkio_get_bool(s
->blkio
,
754 "needs-mem-region-fd",
755 &s
->needs_mem_region_fd
);
757 error_setg_errno(errp
, -ret
,
758 "failed to get needs-mem-region-fd: %s",
759 blkio_get_error_msg());
760 blkio_destroy(&s
->blkio
);
764 ret
= blkio_get_uint64(s
->blkio
,
765 "mem-region-alignment",
766 &s
->mem_region_alignment
);
768 error_setg_errno(errp
, -ret
,
769 "failed to get mem-region-alignment: %s",
770 blkio_get_error_msg());
771 blkio_destroy(&s
->blkio
);
775 ret
= blkio_get_bool(s
->blkio
,
776 "may-pin-mem-regions",
777 &s
->may_pin_mem_regions
);
779 /* Be conservative (assume pinning) if the property is not supported */
780 s
->may_pin_mem_regions
= s
->needs_mem_regions
;
784 * Notify if libblkio drivers pin memory and prevent features like
785 * virtio-mem from working.
787 if (s
->may_pin_mem_regions
) {
788 ret
= ram_block_discard_disable(true);
790 error_setg_errno(errp
, -ret
, "ram_block_discard_disable() failed");
791 blkio_destroy(&s
->blkio
);
796 ret
= blkio_start(s
->blkio
);
798 error_setg_errno(errp
, -ret
, "blkio_start failed: %s",
799 blkio_get_error_msg());
800 blkio_destroy(&s
->blkio
);
801 if (s
->may_pin_mem_regions
) {
802 ram_block_discard_disable(false);
807 bs
->supported_write_flags
= BDRV_REQ_FUA
| BDRV_REQ_REGISTERED_BUF
;
808 bs
->supported_zero_flags
= BDRV_REQ_FUA
| BDRV_REQ_MAY_UNMAP
|
809 BDRV_REQ_NO_FALLBACK
;
811 qemu_mutex_init(&s
->blkio_lock
);
812 qemu_co_mutex_init(&s
->bounce_lock
);
813 qemu_co_queue_init(&s
->bounce_available
);
814 QLIST_INIT(&s
->bounce_bufs
);
815 s
->blkioq
= blkio_get_queue(s
->blkio
, 0);
816 s
->completion_fd
= blkioq_get_completion_fd(s
->blkioq
);
818 blkio_attach_aio_context(bs
, bdrv_get_aio_context(bs
));
822 static void blkio_close(BlockDriverState
*bs
)
824 BDRVBlkioState
*s
= bs
->opaque
;
826 /* There is no destroy() API for s->bounce_lock */
828 qemu_mutex_destroy(&s
->blkio_lock
);
829 blkio_detach_aio_context(bs
);
830 blkio_destroy(&s
->blkio
);
832 if (s
->may_pin_mem_regions
) {
833 ram_block_discard_disable(false);
837 static int64_t coroutine_fn
blkio_co_getlength(BlockDriverState
*bs
)
839 BDRVBlkioState
*s
= bs
->opaque
;
843 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
844 ret
= blkio_get_uint64(s
->blkio
, "capacity", &capacity
);
853 static int coroutine_fn
blkio_truncate(BlockDriverState
*bs
, int64_t offset
,
854 bool exact
, PreallocMode prealloc
,
855 BdrvRequestFlags flags
, Error
**errp
)
857 int64_t current_length
;
859 if (prealloc
!= PREALLOC_MODE_OFF
) {
860 error_setg(errp
, "Unsupported preallocation mode '%s'",
861 PreallocMode_str(prealloc
));
865 current_length
= blkio_co_getlength(bs
);
867 if (offset
> current_length
) {
868 error_setg(errp
, "Cannot grow device");
870 } else if (exact
&& offset
!= current_length
) {
871 error_setg(errp
, "Cannot resize device");
878 static int coroutine_fn
879 blkio_co_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
884 static void blkio_refresh_limits(BlockDriverState
*bs
, Error
**errp
)
886 BDRVBlkioState
*s
= bs
->opaque
;
887 QEMU_LOCK_GUARD(&s
->blkio_lock
);
891 ret
= blkio_get_int(s
->blkio
, "request-alignment", &value
);
893 error_setg_errno(errp
, -ret
, "failed to get \"request-alignment\": %s",
894 blkio_get_error_msg());
897 bs
->bl
.request_alignment
= value
;
898 if (bs
->bl
.request_alignment
< 1 ||
899 bs
->bl
.request_alignment
>= INT_MAX
||
900 !is_power_of_2(bs
->bl
.request_alignment
)) {
901 error_setg(errp
, "invalid \"request-alignment\" value %" PRIu32
", "
902 "must be a power of 2 less than INT_MAX",
903 bs
->bl
.request_alignment
);
907 ret
= blkio_get_int(s
->blkio
, "optimal-io-size", &value
);
909 error_setg_errno(errp
, -ret
, "failed to get \"optimal-io-size\": %s",
910 blkio_get_error_msg());
913 bs
->bl
.opt_transfer
= value
;
914 if (bs
->bl
.opt_transfer
> INT_MAX
||
915 (bs
->bl
.opt_transfer
% bs
->bl
.request_alignment
)) {
916 error_setg(errp
, "invalid \"optimal-io-size\" value %" PRIu32
", must "
917 "be a multiple of %" PRIu32
, bs
->bl
.opt_transfer
,
918 bs
->bl
.request_alignment
);
922 ret
= blkio_get_int(s
->blkio
, "max-transfer", &value
);
924 error_setg_errno(errp
, -ret
, "failed to get \"max-transfer\": %s",
925 blkio_get_error_msg());
928 bs
->bl
.max_transfer
= value
;
929 if ((bs
->bl
.max_transfer
% bs
->bl
.request_alignment
) ||
930 (bs
->bl
.opt_transfer
&& (bs
->bl
.max_transfer
% bs
->bl
.opt_transfer
))) {
931 error_setg(errp
, "invalid \"max-transfer\" value %" PRIu32
", must be "
932 "a multiple of %" PRIu32
" and %" PRIu32
" (if non-zero)",
933 bs
->bl
.max_transfer
, bs
->bl
.request_alignment
,
934 bs
->bl
.opt_transfer
);
938 ret
= blkio_get_int(s
->blkio
, "buf-alignment", &value
);
940 error_setg_errno(errp
, -ret
, "failed to get \"buf-alignment\": %s",
941 blkio_get_error_msg());
945 error_setg(errp
, "invalid \"buf-alignment\" value %d, must be "
949 bs
->bl
.min_mem_alignment
= value
;
951 ret
= blkio_get_int(s
->blkio
, "optimal-buf-alignment", &value
);
953 error_setg_errno(errp
, -ret
,
954 "failed to get \"optimal-buf-alignment\": %s",
955 blkio_get_error_msg());
959 error_setg(errp
, "invalid \"optimal-buf-alignment\" value %d, "
960 "must be positive", value
);
963 bs
->bl
.opt_mem_alignment
= value
;
965 ret
= blkio_get_int(s
->blkio
, "max-segments", &value
);
967 error_setg_errno(errp
, -ret
, "failed to get \"max-segments\": %s",
968 blkio_get_error_msg());
972 error_setg(errp
, "invalid \"max-segments\" value %d, must be positive",
976 bs
->bl
.max_iov
= value
;
981 * Missing libblkio APIs:
983 * - co_invalidate_cache
990 #define BLKIO_DRIVER(name, ...) \
992 .format_name = name, \
993 .protocol_name = name, \
994 .instance_size = sizeof(BDRVBlkioState), \
995 .bdrv_file_open = blkio_file_open, \
996 .bdrv_close = blkio_close, \
997 .bdrv_co_getlength = blkio_co_getlength, \
998 .bdrv_co_truncate = blkio_truncate, \
999 .bdrv_co_get_info = blkio_co_get_info, \
1000 .bdrv_attach_aio_context = blkio_attach_aio_context, \
1001 .bdrv_detach_aio_context = blkio_detach_aio_context, \
1002 .bdrv_co_pdiscard = blkio_co_pdiscard, \
1003 .bdrv_co_preadv = blkio_co_preadv, \
1004 .bdrv_co_pwritev = blkio_co_pwritev, \
1005 .bdrv_co_flush_to_disk = blkio_co_flush, \
1006 .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
1007 .bdrv_co_io_unplug = blkio_co_io_unplug, \
1008 .bdrv_refresh_limits = blkio_refresh_limits, \
1009 .bdrv_register_buf = blkio_register_buf, \
1010 .bdrv_unregister_buf = blkio_unregister_buf, \
1014 static BlockDriver bdrv_io_uring
= BLKIO_DRIVER(
1016 .bdrv_needs_filename
= true,
1019 static BlockDriver bdrv_nvme_io_uring
= BLKIO_DRIVER(
1020 DRIVER_NVME_IO_URING
,
1023 static BlockDriver bdrv_virtio_blk_vfio_pci
= BLKIO_DRIVER(
1024 DRIVER_VIRTIO_BLK_VFIO_PCI
1027 static BlockDriver bdrv_virtio_blk_vhost_user
= BLKIO_DRIVER(
1028 DRIVER_VIRTIO_BLK_VHOST_USER
1031 static BlockDriver bdrv_virtio_blk_vhost_vdpa
= BLKIO_DRIVER(
1032 DRIVER_VIRTIO_BLK_VHOST_VDPA
1035 static void bdrv_blkio_init(void)
1037 bdrv_register(&bdrv_io_uring
);
1038 bdrv_register(&bdrv_nvme_io_uring
);
1039 bdrv_register(&bdrv_virtio_blk_vfio_pci
);
1040 bdrv_register(&bdrv_virtio_blk_vhost_user
);
1041 bdrv_register(&bdrv_virtio_blk_vhost_vdpa
);
1044 block_init(bdrv_blkio_init
);