]> git.proxmox.com Git - mirror_qemu.git/blob - block/export/vduse-blk.c
f101c24c3f186c973cfb8b8d0aa0a87fd73e7e19
[mirror_qemu.git] / block / export / vduse-blk.c
1 /*
2 * Export QEMU block device via VDUSE
3 *
4 * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
5 *
6 * Author:
7 * Xie Yongji <xieyongji@bytedance.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
11 */
12
13 #include <sys/eventfd.h>
14
15 #include "qemu/osdep.h"
16 #include "qapi/error.h"
17 #include "block/export.h"
18 #include "qemu/error-report.h"
19 #include "util/block-helpers.h"
20 #include "subprojects/libvduse/libvduse.h"
21 #include "virtio-blk-handler.h"
22
23 #include "standard-headers/linux/virtio_blk.h"
24
25 #define VDUSE_DEFAULT_NUM_QUEUE 1
26 #define VDUSE_DEFAULT_QUEUE_SIZE 256
27
28 typedef struct VduseBlkExport {
29 BlockExport export;
30 VirtioBlkHandler handler;
31 VduseDev *dev;
32 uint16_t num_queues;
33 char *recon_file;
34 unsigned int inflight;
35 } VduseBlkExport;
36
37 typedef struct VduseBlkReq {
38 VduseVirtqElement elem;
39 VduseVirtq *vq;
40 } VduseBlkReq;
41
42 static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
43 {
44 vblk_exp->inflight++;
45 }
46
47 static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
48 {
49 if (--vblk_exp->inflight == 0) {
50 aio_wait_kick();
51 }
52 }
53
54 static void vduse_blk_req_complete(VduseBlkReq *req, size_t in_len)
55 {
56 vduse_queue_push(req->vq, &req->elem, in_len);
57 vduse_queue_notify(req->vq);
58
59 free(req);
60 }
61
62 static void coroutine_fn vduse_blk_virtio_process_req(void *opaque)
63 {
64 VduseBlkReq *req = opaque;
65 VduseVirtq *vq = req->vq;
66 VduseDev *dev = vduse_queue_get_dev(vq);
67 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
68 VirtioBlkHandler *handler = &vblk_exp->handler;
69 VduseVirtqElement *elem = &req->elem;
70 struct iovec *in_iov = elem->in_sg;
71 struct iovec *out_iov = elem->out_sg;
72 unsigned in_num = elem->in_num;
73 unsigned out_num = elem->out_num;
74 int in_len;
75
76 in_len = virtio_blk_process_req(handler, in_iov,
77 out_iov, in_num, out_num);
78 if (in_len < 0) {
79 free(req);
80 return;
81 }
82
83 vduse_blk_req_complete(req, in_len);
84 vduse_blk_inflight_dec(vblk_exp);
85 }
86
87 static void vduse_blk_vq_handler(VduseDev *dev, VduseVirtq *vq)
88 {
89 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
90
91 while (1) {
92 VduseBlkReq *req;
93
94 req = vduse_queue_pop(vq, sizeof(VduseBlkReq));
95 if (!req) {
96 break;
97 }
98 req->vq = vq;
99
100 Coroutine *co =
101 qemu_coroutine_create(vduse_blk_virtio_process_req, req);
102
103 vduse_blk_inflight_inc(vblk_exp);
104 qemu_coroutine_enter(co);
105 }
106 }
107
108 static void on_vduse_vq_kick(void *opaque)
109 {
110 VduseVirtq *vq = opaque;
111 VduseDev *dev = vduse_queue_get_dev(vq);
112 int fd = vduse_queue_get_fd(vq);
113 eventfd_t kick_data;
114
115 if (eventfd_read(fd, &kick_data) == -1) {
116 error_report("failed to read data from eventfd");
117 return;
118 }
119
120 vduse_blk_vq_handler(dev, vq);
121 }
122
123 static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
124 {
125 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
126
127 aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
128 true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
129 /* Make sure we don't miss any kick afer reconnecting */
130 eventfd_write(vduse_queue_get_fd(vq), 1);
131 }
132
133 static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
134 {
135 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
136
137 aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
138 true, NULL, NULL, NULL, NULL, NULL);
139 }
140
141 static const VduseOps vduse_blk_ops = {
142 .enable_queue = vduse_blk_enable_queue,
143 .disable_queue = vduse_blk_disable_queue,
144 };
145
146 static void on_vduse_dev_kick(void *opaque)
147 {
148 VduseDev *dev = opaque;
149
150 vduse_dev_handler(dev);
151 }
152
153 static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
154 {
155 int i;
156
157 aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
158 true, on_vduse_dev_kick, NULL, NULL, NULL,
159 vblk_exp->dev);
160
161 for (i = 0; i < vblk_exp->num_queues; i++) {
162 VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
163 int fd = vduse_queue_get_fd(vq);
164
165 if (fd < 0) {
166 continue;
167 }
168 aio_set_fd_handler(vblk_exp->export.ctx, fd, true,
169 on_vduse_vq_kick, NULL, NULL, NULL, vq);
170 }
171 }
172
173 static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
174 {
175 int i;
176
177 for (i = 0; i < vblk_exp->num_queues; i++) {
178 VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
179 int fd = vduse_queue_get_fd(vq);
180
181 if (fd < 0) {
182 continue;
183 }
184 aio_set_fd_handler(vblk_exp->export.ctx, fd,
185 true, NULL, NULL, NULL, NULL, NULL);
186 }
187 aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
188 true, NULL, NULL, NULL, NULL, NULL);
189
190 AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0);
191 }
192
193
194 static void blk_aio_attached(AioContext *ctx, void *opaque)
195 {
196 VduseBlkExport *vblk_exp = opaque;
197
198 vblk_exp->export.ctx = ctx;
199 vduse_blk_attach_ctx(vblk_exp, ctx);
200 }
201
202 static void blk_aio_detach(void *opaque)
203 {
204 VduseBlkExport *vblk_exp = opaque;
205
206 vduse_blk_detach_ctx(vblk_exp);
207 vblk_exp->export.ctx = NULL;
208 }
209
210 static void vduse_blk_resize(void *opaque)
211 {
212 BlockExport *exp = opaque;
213 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
214 struct virtio_blk_config config;
215
216 config.capacity =
217 cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
218 vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity),
219 offsetof(struct virtio_blk_config, capacity),
220 (char *)&config.capacity);
221 }
222
223 static const BlockDevOps vduse_block_ops = {
224 .resize_cb = vduse_blk_resize,
225 };
226
227 static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
228 Error **errp)
229 {
230 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
231 BlockExportOptionsVduseBlk *vblk_opts = &opts->u.vduse_blk;
232 uint64_t logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
233 uint16_t num_queues = VDUSE_DEFAULT_NUM_QUEUE;
234 uint16_t queue_size = VDUSE_DEFAULT_QUEUE_SIZE;
235 Error *local_err = NULL;
236 struct virtio_blk_config config = { 0 };
237 uint64_t features;
238 int i, ret;
239
240 if (vblk_opts->has_num_queues) {
241 num_queues = vblk_opts->num_queues;
242 if (num_queues == 0) {
243 error_setg(errp, "num-queues must be greater than 0");
244 return -EINVAL;
245 }
246 }
247
248 if (vblk_opts->has_queue_size) {
249 queue_size = vblk_opts->queue_size;
250 if (queue_size <= 2 || !is_power_of_2(queue_size) ||
251 queue_size > VIRTQUEUE_MAX_SIZE) {
252 error_setg(errp, "queue-size is invalid");
253 return -EINVAL;
254 }
255 }
256
257 if (vblk_opts->has_logical_block_size) {
258 logical_block_size = vblk_opts->logical_block_size;
259 check_block_size(exp->id, "logical-block-size", logical_block_size,
260 &local_err);
261 if (local_err) {
262 error_propagate(errp, local_err);
263 return -EINVAL;
264 }
265 }
266 vblk_exp->num_queues = num_queues;
267 vblk_exp->handler.blk = exp->blk;
268 vblk_exp->handler.serial = g_strdup(vblk_opts->has_serial ?
269 vblk_opts->serial : "");
270 vblk_exp->handler.logical_block_size = logical_block_size;
271 vblk_exp->handler.writable = opts->writable;
272
273 config.capacity =
274 cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
275 config.seg_max = cpu_to_le32(queue_size - 2);
276 config.min_io_size = cpu_to_le16(1);
277 config.opt_io_size = cpu_to_le32(1);
278 config.num_queues = cpu_to_le16(num_queues);
279 config.blk_size = cpu_to_le32(logical_block_size);
280 config.max_discard_sectors = cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS);
281 config.max_discard_seg = cpu_to_le32(1);
282 config.discard_sector_alignment =
283 cpu_to_le32(logical_block_size >> VIRTIO_BLK_SECTOR_BITS);
284 config.max_write_zeroes_sectors =
285 cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS);
286 config.max_write_zeroes_seg = cpu_to_le32(1);
287
288 features = vduse_get_virtio_features() |
289 (1ULL << VIRTIO_BLK_F_SEG_MAX) |
290 (1ULL << VIRTIO_BLK_F_TOPOLOGY) |
291 (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
292 (1ULL << VIRTIO_BLK_F_FLUSH) |
293 (1ULL << VIRTIO_BLK_F_DISCARD) |
294 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
295
296 if (num_queues > 1) {
297 features |= 1ULL << VIRTIO_BLK_F_MQ;
298 }
299 if (!opts->writable) {
300 features |= 1ULL << VIRTIO_BLK_F_RO;
301 }
302
303 vblk_exp->dev = vduse_dev_create(vblk_opts->name, VIRTIO_ID_BLOCK, 0,
304 features, num_queues,
305 sizeof(struct virtio_blk_config),
306 (char *)&config, &vduse_blk_ops,
307 vblk_exp);
308 if (!vblk_exp->dev) {
309 error_setg(errp, "failed to create vduse device");
310 ret = -ENOMEM;
311 goto err_dev;
312 }
313
314 vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s",
315 g_get_tmp_dir(), vblk_opts->name);
316 if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) {
317 error_setg(errp, "failed to set reconnect log file");
318 ret = -EINVAL;
319 goto err;
320 }
321
322 for (i = 0; i < num_queues; i++) {
323 vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
324 }
325
326 aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true,
327 on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
328
329 blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
330 vblk_exp);
331
332 blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
333
334 return 0;
335 err:
336 vduse_dev_destroy(vblk_exp->dev);
337 g_free(vblk_exp->recon_file);
338 err_dev:
339 g_free(vblk_exp->handler.serial);
340 return ret;
341 }
342
343 static void vduse_blk_exp_delete(BlockExport *exp)
344 {
345 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
346 int ret;
347
348 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
349 vblk_exp);
350 blk_set_dev_ops(exp->blk, NULL, NULL);
351 ret = vduse_dev_destroy(vblk_exp->dev);
352 if (ret != -EBUSY) {
353 unlink(vblk_exp->recon_file);
354 }
355 g_free(vblk_exp->recon_file);
356 g_free(vblk_exp->handler.serial);
357 }
358
359 static void vduse_blk_exp_request_shutdown(BlockExport *exp)
360 {
361 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
362
363 aio_context_acquire(vblk_exp->export.ctx);
364 vduse_blk_detach_ctx(vblk_exp);
365 aio_context_acquire(vblk_exp->export.ctx);
366 }
367
368 const BlockExportDriver blk_exp_vduse_blk = {
369 .type = BLOCK_EXPORT_TYPE_VDUSE_BLK,
370 .instance_size = sizeof(VduseBlkExport),
371 .create = vduse_blk_exp_create,
372 .delete = vduse_blk_exp_delete,
373 .request_shutdown = vduse_blk_exp_request_shutdown,
374 };