]> git.proxmox.com Git - mirror_qemu.git/blob - block/export/vduse-blk.c
vduse-blk: Add vduse-blk resize support
[mirror_qemu.git] / block / export / vduse-blk.c
1 /*
2 * Export QEMU block device via VDUSE
3 *
4 * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
5 *
6 * Author:
7 * Xie Yongji <xieyongji@bytedance.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
11 */
12
13 #include <sys/eventfd.h>
14
15 #include "qemu/osdep.h"
16 #include "qapi/error.h"
17 #include "block/export.h"
18 #include "qemu/error-report.h"
19 #include "util/block-helpers.h"
20 #include "subprojects/libvduse/libvduse.h"
21 #include "virtio-blk-handler.h"
22
23 #include "standard-headers/linux/virtio_blk.h"
24
25 #define VDUSE_DEFAULT_NUM_QUEUE 1
26 #define VDUSE_DEFAULT_QUEUE_SIZE 256
27
28 typedef struct VduseBlkExport {
29 BlockExport export;
30 VirtioBlkHandler handler;
31 VduseDev *dev;
32 uint16_t num_queues;
33 unsigned int inflight;
34 } VduseBlkExport;
35
36 typedef struct VduseBlkReq {
37 VduseVirtqElement elem;
38 VduseVirtq *vq;
39 } VduseBlkReq;
40
41 static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
42 {
43 vblk_exp->inflight++;
44 }
45
46 static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
47 {
48 if (--vblk_exp->inflight == 0) {
49 aio_wait_kick();
50 }
51 }
52
53 static void vduse_blk_req_complete(VduseBlkReq *req, size_t in_len)
54 {
55 vduse_queue_push(req->vq, &req->elem, in_len);
56 vduse_queue_notify(req->vq);
57
58 free(req);
59 }
60
61 static void coroutine_fn vduse_blk_virtio_process_req(void *opaque)
62 {
63 VduseBlkReq *req = opaque;
64 VduseVirtq *vq = req->vq;
65 VduseDev *dev = vduse_queue_get_dev(vq);
66 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
67 VirtioBlkHandler *handler = &vblk_exp->handler;
68 VduseVirtqElement *elem = &req->elem;
69 struct iovec *in_iov = elem->in_sg;
70 struct iovec *out_iov = elem->out_sg;
71 unsigned in_num = elem->in_num;
72 unsigned out_num = elem->out_num;
73 int in_len;
74
75 in_len = virtio_blk_process_req(handler, in_iov,
76 out_iov, in_num, out_num);
77 if (in_len < 0) {
78 free(req);
79 return;
80 }
81
82 vduse_blk_req_complete(req, in_len);
83 vduse_blk_inflight_dec(vblk_exp);
84 }
85
86 static void vduse_blk_vq_handler(VduseDev *dev, VduseVirtq *vq)
87 {
88 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
89
90 while (1) {
91 VduseBlkReq *req;
92
93 req = vduse_queue_pop(vq, sizeof(VduseBlkReq));
94 if (!req) {
95 break;
96 }
97 req->vq = vq;
98
99 Coroutine *co =
100 qemu_coroutine_create(vduse_blk_virtio_process_req, req);
101
102 vduse_blk_inflight_inc(vblk_exp);
103 qemu_coroutine_enter(co);
104 }
105 }
106
107 static void on_vduse_vq_kick(void *opaque)
108 {
109 VduseVirtq *vq = opaque;
110 VduseDev *dev = vduse_queue_get_dev(vq);
111 int fd = vduse_queue_get_fd(vq);
112 eventfd_t kick_data;
113
114 if (eventfd_read(fd, &kick_data) == -1) {
115 error_report("failed to read data from eventfd");
116 return;
117 }
118
119 vduse_blk_vq_handler(dev, vq);
120 }
121
122 static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
123 {
124 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
125
126 aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
127 true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
128 }
129
130 static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
131 {
132 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
133
134 aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
135 true, NULL, NULL, NULL, NULL, NULL);
136 }
137
138 static const VduseOps vduse_blk_ops = {
139 .enable_queue = vduse_blk_enable_queue,
140 .disable_queue = vduse_blk_disable_queue,
141 };
142
143 static void on_vduse_dev_kick(void *opaque)
144 {
145 VduseDev *dev = opaque;
146
147 vduse_dev_handler(dev);
148 }
149
150 static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
151 {
152 int i;
153
154 aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
155 true, on_vduse_dev_kick, NULL, NULL, NULL,
156 vblk_exp->dev);
157
158 for (i = 0; i < vblk_exp->num_queues; i++) {
159 VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
160 int fd = vduse_queue_get_fd(vq);
161
162 if (fd < 0) {
163 continue;
164 }
165 aio_set_fd_handler(vblk_exp->export.ctx, fd, true,
166 on_vduse_vq_kick, NULL, NULL, NULL, vq);
167 }
168 }
169
170 static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
171 {
172 int i;
173
174 for (i = 0; i < vblk_exp->num_queues; i++) {
175 VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
176 int fd = vduse_queue_get_fd(vq);
177
178 if (fd < 0) {
179 continue;
180 }
181 aio_set_fd_handler(vblk_exp->export.ctx, fd,
182 true, NULL, NULL, NULL, NULL, NULL);
183 }
184 aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
185 true, NULL, NULL, NULL, NULL, NULL);
186
187 AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0);
188 }
189
190
191 static void blk_aio_attached(AioContext *ctx, void *opaque)
192 {
193 VduseBlkExport *vblk_exp = opaque;
194
195 vblk_exp->export.ctx = ctx;
196 vduse_blk_attach_ctx(vblk_exp, ctx);
197 }
198
199 static void blk_aio_detach(void *opaque)
200 {
201 VduseBlkExport *vblk_exp = opaque;
202
203 vduse_blk_detach_ctx(vblk_exp);
204 vblk_exp->export.ctx = NULL;
205 }
206
207 static void vduse_blk_resize(void *opaque)
208 {
209 BlockExport *exp = opaque;
210 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
211 struct virtio_blk_config config;
212
213 config.capacity =
214 cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
215 vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity),
216 offsetof(struct virtio_blk_config, capacity),
217 (char *)&config.capacity);
218 }
219
220 static const BlockDevOps vduse_block_ops = {
221 .resize_cb = vduse_blk_resize,
222 };
223
224 static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
225 Error **errp)
226 {
227 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
228 BlockExportOptionsVduseBlk *vblk_opts = &opts->u.vduse_blk;
229 uint64_t logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
230 uint16_t num_queues = VDUSE_DEFAULT_NUM_QUEUE;
231 uint16_t queue_size = VDUSE_DEFAULT_QUEUE_SIZE;
232 Error *local_err = NULL;
233 struct virtio_blk_config config = { 0 };
234 uint64_t features;
235 int i;
236
237 if (vblk_opts->has_num_queues) {
238 num_queues = vblk_opts->num_queues;
239 if (num_queues == 0) {
240 error_setg(errp, "num-queues must be greater than 0");
241 return -EINVAL;
242 }
243 }
244
245 if (vblk_opts->has_queue_size) {
246 queue_size = vblk_opts->queue_size;
247 if (queue_size <= 2 || !is_power_of_2(queue_size) ||
248 queue_size > VIRTQUEUE_MAX_SIZE) {
249 error_setg(errp, "queue-size is invalid");
250 return -EINVAL;
251 }
252 }
253
254 if (vblk_opts->has_logical_block_size) {
255 logical_block_size = vblk_opts->logical_block_size;
256 check_block_size(exp->id, "logical-block-size", logical_block_size,
257 &local_err);
258 if (local_err) {
259 error_propagate(errp, local_err);
260 return -EINVAL;
261 }
262 }
263 vblk_exp->num_queues = num_queues;
264 vblk_exp->handler.blk = exp->blk;
265 vblk_exp->handler.serial = exp->id;
266 vblk_exp->handler.logical_block_size = logical_block_size;
267 vblk_exp->handler.writable = opts->writable;
268
269 config.capacity =
270 cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
271 config.seg_max = cpu_to_le32(queue_size - 2);
272 config.min_io_size = cpu_to_le16(1);
273 config.opt_io_size = cpu_to_le32(1);
274 config.num_queues = cpu_to_le16(num_queues);
275 config.blk_size = cpu_to_le32(logical_block_size);
276 config.max_discard_sectors = cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS);
277 config.max_discard_seg = cpu_to_le32(1);
278 config.discard_sector_alignment =
279 cpu_to_le32(logical_block_size >> VIRTIO_BLK_SECTOR_BITS);
280 config.max_write_zeroes_sectors =
281 cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS);
282 config.max_write_zeroes_seg = cpu_to_le32(1);
283
284 features = vduse_get_virtio_features() |
285 (1ULL << VIRTIO_BLK_F_SEG_MAX) |
286 (1ULL << VIRTIO_BLK_F_TOPOLOGY) |
287 (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
288 (1ULL << VIRTIO_BLK_F_FLUSH) |
289 (1ULL << VIRTIO_BLK_F_DISCARD) |
290 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
291
292 if (num_queues > 1) {
293 features |= 1ULL << VIRTIO_BLK_F_MQ;
294 }
295 if (!opts->writable) {
296 features |= 1ULL << VIRTIO_BLK_F_RO;
297 }
298
299 vblk_exp->dev = vduse_dev_create(exp->id, VIRTIO_ID_BLOCK, 0,
300 features, num_queues,
301 sizeof(struct virtio_blk_config),
302 (char *)&config, &vduse_blk_ops,
303 vblk_exp);
304 if (!vblk_exp->dev) {
305 error_setg(errp, "failed to create vduse device");
306 return -ENOMEM;
307 }
308
309 for (i = 0; i < num_queues; i++) {
310 vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
311 }
312
313 aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true,
314 on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
315
316 blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
317 vblk_exp);
318
319 blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
320
321 return 0;
322 }
323
324 static void vduse_blk_exp_delete(BlockExport *exp)
325 {
326 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
327
328 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
329 vblk_exp);
330 blk_set_dev_ops(exp->blk, NULL, NULL);
331 vduse_dev_destroy(vblk_exp->dev);
332 }
333
334 static void vduse_blk_exp_request_shutdown(BlockExport *exp)
335 {
336 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
337
338 aio_context_acquire(vblk_exp->export.ctx);
339 vduse_blk_detach_ctx(vblk_exp);
340 aio_context_acquire(vblk_exp->export.ctx);
341 }
342
343 const BlockExportDriver blk_exp_vduse_blk = {
344 .type = BLOCK_EXPORT_TYPE_VDUSE_BLK,
345 .instance_size = sizeof(VduseBlkExport),
346 .create = vduse_blk_exp_create,
347 .delete = vduse_blk_exp_delete,
348 .request_shutdown = vduse_blk_exp_request_shutdown,
349 };