]> git.proxmox.com Git - mirror_qemu.git/blame - block/export/vduse-blk.c
aio: remove aio_disable_external() API
[mirror_qemu.git] / block / export / vduse-blk.c
CommitLineData
2a2359b8
XY
1/*
2 * Export QEMU block device via VDUSE
3 *
4 * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
5 *
6 * Author:
7 * Xie Yongji <xieyongji@bytedance.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
11 */
12
2ca10fae 13#include "qemu/osdep.h"
2a2359b8
XY
14#include <sys/eventfd.h>
15
2a2359b8
XY
16#include "qapi/error.h"
17#include "block/export.h"
18#include "qemu/error-report.h"
19#include "util/block-helpers.h"
20#include "subprojects/libvduse/libvduse.h"
21#include "virtio-blk-handler.h"
22
23#include "standard-headers/linux/virtio_blk.h"
24
25#define VDUSE_DEFAULT_NUM_QUEUE 1
26#define VDUSE_DEFAULT_QUEUE_SIZE 256
27
28typedef struct VduseBlkExport {
29 BlockExport export;
30 VirtioBlkHandler handler;
31 VduseDev *dev;
32 uint16_t num_queues;
d043e2db 33 char *recon_file;
195332c1
SH
34 unsigned int inflight; /* atomic */
35 bool vqs_started;
2a2359b8
XY
36} VduseBlkExport;
37
38typedef struct VduseBlkReq {
39 VduseVirtqElement elem;
40 VduseVirtq *vq;
41} VduseBlkReq;
42
43static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
44{
195332c1
SH
45 if (qatomic_fetch_inc(&vblk_exp->inflight) == 0) {
46 /* Prevent export from being deleted */
195332c1 47 blk_exp_ref(&vblk_exp->export);
195332c1 48 }
2a2359b8
XY
49}
50
51static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
52{
195332c1
SH
53 if (qatomic_fetch_dec(&vblk_exp->inflight) == 1) {
54 /* Wake AIO_WAIT_WHILE() */
2a2359b8 55 aio_wait_kick();
195332c1
SH
56
57 /* Now the export can be deleted */
195332c1 58 blk_exp_unref(&vblk_exp->export);
2a2359b8
XY
59 }
60}
61
62static void vduse_blk_req_complete(VduseBlkReq *req, size_t in_len)
63{
64 vduse_queue_push(req->vq, &req->elem, in_len);
65 vduse_queue_notify(req->vq);
66
67 free(req);
68}
69
70static void coroutine_fn vduse_blk_virtio_process_req(void *opaque)
71{
72 VduseBlkReq *req = opaque;
73 VduseVirtq *vq = req->vq;
74 VduseDev *dev = vduse_queue_get_dev(vq);
75 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
76 VirtioBlkHandler *handler = &vblk_exp->handler;
77 VduseVirtqElement *elem = &req->elem;
78 struct iovec *in_iov = elem->in_sg;
79 struct iovec *out_iov = elem->out_sg;
80 unsigned in_num = elem->in_num;
81 unsigned out_num = elem->out_num;
82 int in_len;
83
84 in_len = virtio_blk_process_req(handler, in_iov,
85 out_iov, in_num, out_num);
86 if (in_len < 0) {
87 free(req);
88 return;
89 }
90
91 vduse_blk_req_complete(req, in_len);
92 vduse_blk_inflight_dec(vblk_exp);
93}
94
95static void vduse_blk_vq_handler(VduseDev *dev, VduseVirtq *vq)
96{
97 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
98
99 while (1) {
100 VduseBlkReq *req;
101
102 req = vduse_queue_pop(vq, sizeof(VduseBlkReq));
103 if (!req) {
104 break;
105 }
106 req->vq = vq;
107
108 Coroutine *co =
109 qemu_coroutine_create(vduse_blk_virtio_process_req, req);
110
111 vduse_blk_inflight_inc(vblk_exp);
112 qemu_coroutine_enter(co);
113 }
114}
115
116static void on_vduse_vq_kick(void *opaque)
117{
118 VduseVirtq *vq = opaque;
119 VduseDev *dev = vduse_queue_get_dev(vq);
120 int fd = vduse_queue_get_fd(vq);
121 eventfd_t kick_data;
122
123 if (eventfd_read(fd, &kick_data) == -1) {
124 error_report("failed to read data from eventfd");
125 return;
126 }
127
128 vduse_blk_vq_handler(dev, vq);
129}
130
131static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
132{
133 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
134
195332c1
SH
135 if (!vblk_exp->vqs_started) {
136 return; /* vduse_blk_drained_end() will start vqs later */
137 }
138
2a2359b8 139 aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
60f782b6 140 on_vduse_vq_kick, NULL, NULL, NULL, vq);
d043e2db
XY
141 /* Make sure we don't miss any kick afer reconnecting */
142 eventfd_write(vduse_queue_get_fd(vq), 1);
2a2359b8
XY
143}
144
145static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
146{
147 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
195332c1 148 int fd = vduse_queue_get_fd(vq);
2a2359b8 149
195332c1
SH
150 if (fd < 0) {
151 return;
152 }
153
60f782b6 154 aio_set_fd_handler(vblk_exp->export.ctx, fd,
195332c1 155 NULL, NULL, NULL, NULL, NULL);
2a2359b8
XY
156}
157
158static const VduseOps vduse_blk_ops = {
159 .enable_queue = vduse_blk_enable_queue,
160 .disable_queue = vduse_blk_disable_queue,
161};
162
163static void on_vduse_dev_kick(void *opaque)
164{
165 VduseDev *dev = opaque;
166
167 vduse_dev_handler(dev);
168}
169
170static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
171{
2a2359b8 172 aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
60f782b6 173 on_vduse_dev_kick, NULL, NULL, NULL,
2a2359b8
XY
174 vblk_exp->dev);
175
195332c1 176 /* Virtqueues are handled by vduse_blk_drained_end() */
2a2359b8
XY
177}
178
179static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
180{
2a2359b8 181 aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
60f782b6 182 NULL, NULL, NULL, NULL, NULL);
2a2359b8 183
195332c1 184 /* Virtqueues are handled by vduse_blk_drained_begin() */
2a2359b8
XY
185}
186
187
188static void blk_aio_attached(AioContext *ctx, void *opaque)
189{
190 VduseBlkExport *vblk_exp = opaque;
191
192 vblk_exp->export.ctx = ctx;
193 vduse_blk_attach_ctx(vblk_exp, ctx);
194}
195
196static void blk_aio_detach(void *opaque)
197{
198 VduseBlkExport *vblk_exp = opaque;
199
200 vduse_blk_detach_ctx(vblk_exp);
201 vblk_exp->export.ctx = NULL;
202}
203
9e4dea67
XY
204static void vduse_blk_resize(void *opaque)
205{
206 BlockExport *exp = opaque;
207 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
208 struct virtio_blk_config config;
209
210 config.capacity =
211 cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
212 vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity),
213 offsetof(struct virtio_blk_config, capacity),
214 (char *)&config.capacity);
215}
216
195332c1
SH
217static void vduse_blk_stop_virtqueues(VduseBlkExport *vblk_exp)
218{
219 for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
220 VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
221 vduse_blk_disable_queue(vblk_exp->dev, vq);
222 }
223
224 vblk_exp->vqs_started = false;
225}
226
227static void vduse_blk_start_virtqueues(VduseBlkExport *vblk_exp)
228{
229 vblk_exp->vqs_started = true;
230
231 for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
232 VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
233 vduse_blk_enable_queue(vblk_exp->dev, vq);
234 }
235}
236
237static void vduse_blk_drained_begin(void *opaque)
238{
239 BlockExport *exp = opaque;
240 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
241
242 vduse_blk_stop_virtqueues(vblk_exp);
243}
244
245static void vduse_blk_drained_end(void *opaque)
246{
247 BlockExport *exp = opaque;
248 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
249
250 vduse_blk_start_virtqueues(vblk_exp);
251}
252
253static bool vduse_blk_drained_poll(void *opaque)
254{
255 BlockExport *exp = opaque;
256 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
257
258 return qatomic_read(&vblk_exp->inflight) > 0;
259}
260
9e4dea67 261static const BlockDevOps vduse_block_ops = {
195332c1
SH
262 .resize_cb = vduse_blk_resize,
263 .drained_begin = vduse_blk_drained_begin,
264 .drained_end = vduse_blk_drained_end,
265 .drained_poll = vduse_blk_drained_poll,
9e4dea67
XY
266};
267
2a2359b8
XY
268static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
269 Error **errp)
270{
271 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
272 BlockExportOptionsVduseBlk *vblk_opts = &opts->u.vduse_blk;
273 uint64_t logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
274 uint16_t num_queues = VDUSE_DEFAULT_NUM_QUEUE;
275 uint16_t queue_size = VDUSE_DEFAULT_QUEUE_SIZE;
276 Error *local_err = NULL;
277 struct virtio_blk_config config = { 0 };
278 uint64_t features;
0862a087 279 int i, ret;
2a2359b8
XY
280
281 if (vblk_opts->has_num_queues) {
282 num_queues = vblk_opts->num_queues;
283 if (num_queues == 0) {
284 error_setg(errp, "num-queues must be greater than 0");
285 return -EINVAL;
286 }
287 }
288
289 if (vblk_opts->has_queue_size) {
290 queue_size = vblk_opts->queue_size;
291 if (queue_size <= 2 || !is_power_of_2(queue_size) ||
292 queue_size > VIRTQUEUE_MAX_SIZE) {
293 error_setg(errp, "queue-size is invalid");
294 return -EINVAL;
295 }
296 }
297
298 if (vblk_opts->has_logical_block_size) {
299 logical_block_size = vblk_opts->logical_block_size;
300 check_block_size(exp->id, "logical-block-size", logical_block_size,
301 &local_err);
302 if (local_err) {
303 error_propagate(errp, local_err);
304 return -EINVAL;
305 }
306 }
307 vblk_exp->num_queues = num_queues;
308 vblk_exp->handler.blk = exp->blk;
54fde4ff 309 vblk_exp->handler.serial = g_strdup(vblk_opts->serial ?: "");
2a2359b8
XY
310 vblk_exp->handler.logical_block_size = logical_block_size;
311 vblk_exp->handler.writable = opts->writable;
195332c1 312 vblk_exp->vqs_started = true;
2a2359b8
XY
313
314 config.capacity =
315 cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
316 config.seg_max = cpu_to_le32(queue_size - 2);
317 config.min_io_size = cpu_to_le16(1);
318 config.opt_io_size = cpu_to_le32(1);
319 config.num_queues = cpu_to_le16(num_queues);
320 config.blk_size = cpu_to_le32(logical_block_size);
321 config.max_discard_sectors = cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS);
322 config.max_discard_seg = cpu_to_le32(1);
323 config.discard_sector_alignment =
324 cpu_to_le32(logical_block_size >> VIRTIO_BLK_SECTOR_BITS);
325 config.max_write_zeroes_sectors =
326 cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS);
327 config.max_write_zeroes_seg = cpu_to_le32(1);
328
329 features = vduse_get_virtio_features() |
330 (1ULL << VIRTIO_BLK_F_SEG_MAX) |
331 (1ULL << VIRTIO_BLK_F_TOPOLOGY) |
332 (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
333 (1ULL << VIRTIO_BLK_F_FLUSH) |
334 (1ULL << VIRTIO_BLK_F_DISCARD) |
335 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
336
337 if (num_queues > 1) {
338 features |= 1ULL << VIRTIO_BLK_F_MQ;
339 }
340 if (!opts->writable) {
341 features |= 1ULL << VIRTIO_BLK_F_RO;
342 }
343
779d82e1 344 vblk_exp->dev = vduse_dev_create(vblk_opts->name, VIRTIO_ID_BLOCK, 0,
2a2359b8
XY
345 features, num_queues,
346 sizeof(struct virtio_blk_config),
347 (char *)&config, &vduse_blk_ops,
348 vblk_exp);
349 if (!vblk_exp->dev) {
350 error_setg(errp, "failed to create vduse device");
0862a087
XY
351 ret = -ENOMEM;
352 goto err_dev;
2a2359b8
XY
353 }
354
d043e2db 355 vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s",
779d82e1 356 g_get_tmp_dir(), vblk_opts->name);
d043e2db
XY
357 if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) {
358 error_setg(errp, "failed to set reconnect log file");
0862a087
XY
359 ret = -EINVAL;
360 goto err;
d043e2db
XY
361 }
362
2a2359b8
XY
363 for (i = 0; i < num_queues; i++) {
364 vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
365 }
366
60f782b6 367 aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev),
2a2359b8
XY
368 on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
369
370 blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
371 vblk_exp);
9e4dea67
XY
372 blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
373
195332c1
SH
374 /*
375 * We handle draining ourselves using an in-flight counter and by disabling
376 * virtqueue fd handlers. Do not queue BlockBackend requests, they need to
377 * complete so the in-flight counter reaches zero.
378 */
379 blk_set_disable_request_queuing(exp->blk, true);
380
2a2359b8 381 return 0;
0862a087
XY
382err:
383 vduse_dev_destroy(vblk_exp->dev);
384 g_free(vblk_exp->recon_file);
385err_dev:
386 g_free(vblk_exp->handler.serial);
387 return ret;
2a2359b8
XY
388}
389
390static void vduse_blk_exp_delete(BlockExport *exp)
391{
392 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
d043e2db 393 int ret;
2a2359b8 394
195332c1
SH
395 assert(qatomic_read(&vblk_exp->inflight) == 0);
396
397 vduse_blk_detach_ctx(vblk_exp);
2a2359b8
XY
398 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
399 vblk_exp);
d043e2db
XY
400 ret = vduse_dev_destroy(vblk_exp->dev);
401 if (ret != -EBUSY) {
402 unlink(vblk_exp->recon_file);
403 }
404 g_free(vblk_exp->recon_file);
0862a087 405 g_free(vblk_exp->handler.serial);
2a2359b8
XY
406}
407
195332c1 408/* Called with exp->ctx acquired */
2a2359b8
XY
409static void vduse_blk_exp_request_shutdown(BlockExport *exp)
410{
411 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
412
195332c1 413 vduse_blk_stop_virtqueues(vblk_exp);
2a2359b8
XY
414}
415
416const BlockExportDriver blk_exp_vduse_blk = {
417 .type = BLOCK_EXPORT_TYPE_VDUSE_BLK,
418 .instance_size = sizeof(VduseBlkExport),
419 .create = vduse_blk_exp_create,
420 .delete = vduse_blk_exp_delete,
421 .request_shutdown = vduse_blk_exp_request_shutdown,
422};