]> git.proxmox.com Git - mirror_qemu.git/blame - hw/block/virtio-blk.c
Merge tag 'pull-maintainer-may24-160524-2' of https://gitlab.com/stsquad/qemu into...
[mirror_qemu.git] / hw / block / virtio-blk.c
CommitLineData
6e02c38d
AL
1/*
2 * Virtio Block Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
80c71a24 14#include "qemu/osdep.h"
433fcea4 15#include "qemu/defer-call.h"
da34e65c 16#include "qapi/error.h"
827805a2 17#include "qemu/iov.h"
0b8fa32f 18#include "qemu/module.h"
1de7afc9 19#include "qemu/error-report.h"
9b92fbcf 20#include "qemu/main-loop.h"
4f736650 21#include "block/block_int.h"
6d519a5f 22#include "trace.h"
0d09e41a 23#include "hw/block/block.h"
a27bd6c7 24#include "hw/qdev-properties.h"
9c17d615 25#include "sysemu/blockdev.h"
baf42268 26#include "sysemu/block-ram-registrar.h"
2f780b6a 27#include "sysemu/sysemu.h"
54d31236 28#include "sysemu/runstate.h"
0d09e41a 29#include "hw/virtio/virtio-blk.h"
08e2c9f1 30#include "scsi/constants.h"
1063b8b1
CH
31#ifdef __linux__
32# include <scsi/sg.h>
33#endif
0d09e41a 34#include "hw/virtio/virtio-bus.h"
ca77ee28 35#include "migration/qemu-file-types.h"
783d1897 36#include "hw/virtio/virtio-access.h"
d9cf55a8 37#include "hw/virtio/virtio-blk-common.h"
4c41c69e 38#include "qemu/coroutine.h"
6e02c38d 39
52bff01f
HC
40static void virtio_blk_ioeventfd_attach(VirtIOBlock *s);
41
d14dde5e
GK
42static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq,
43 VirtIOBlockReq *req)
671ec3f0 44{
671ec3f0 45 req->dev = s;
edaffd9f 46 req->vq = vq;
869d66af 47 req->qiov.size = 0;
2a6cdd6d 48 req->in_len = 0;
869d66af 49 req->next = NULL;
95f7142a 50 req->mr_next = NULL;
671ec3f0
FZ
51}
52
d14dde5e 53static void virtio_blk_free_request(VirtIOBlockReq *req)
671ec3f0 54{
1d29b5b0 55 g_free(req);
671ec3f0
FZ
56}
57
03de2f52 58static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
869a5c6d
AL
59{
60 VirtIOBlock *s = req->dev;
1cc91b7d 61 VirtIODevice *vdev = VIRTIO_DEVICE(s);
869a5c6d 62
a576ceac 63 trace_virtio_blk_req_complete(vdev, req, status);
6d519a5f 64
92e3c2a3 65 stb_p(&req->in->status, status);
7bd04a04
SH
66 iov_discard_undo(&req->inhdr_undo);
67 iov_discard_undo(&req->outhdr_undo);
edaffd9f 68 virtqueue_push(req->vq, &req->elem, req->in_len);
bfa36802 69 if (qemu_in_iothread()) {
3bcc17f0 70 virtio_notify_irqfd(vdev, req->vq);
03de2f52 71 } else {
edaffd9f 72 virtio_notify(vdev, req->vq);
03de2f52 73 }
bf4bd461
FZ
74}
75
f35d68f0 76static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
00f639fb 77 bool is_read, bool acct_failed)
869a5c6d 78{
869a5c6d 79 VirtIOBlock *s = req->dev;
9a6719d5 80 BlockErrorAction action = blk_get_error_action(s->blk, is_read, error);
869a5c6d 81
a589569f 82 if (action == BLOCK_ERROR_ACTION_STOP) {
466138dc
FZ
83 /* Break the link as the next request is going to be parsed from the
84 * ring again. Otherwise we may end up doing a double completion! */
85 req->mr_next = NULL;
9c67f33f
SH
86
87 WITH_QEMU_LOCK_GUARD(&s->rq_lock) {
88 req->next = s->rq;
89 s->rq = req;
90 }
a589569f 91 } else if (action == BLOCK_ERROR_ACTION_REPORT) {
869a5c6d 92 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
00f639fb
SG
93 if (acct_failed) {
94 block_acct_failed(blk_get_stats(s->blk), &req->acct);
95 }
671ec3f0 96 virtio_blk_free_request(req);
869a5c6d
AL
97 }
98
4be74634 99 blk_error_action(s->blk, action, is_read, error);
a589569f 100 return action != BLOCK_ERROR_ACTION_IGNORE;
869a5c6d
AL
101}
102
6e02c38d
AL
103static void virtio_blk_rw_complete(void *opaque, int ret)
104{
95f7142a 105 VirtIOBlockReq *next = opaque;
b9e413dd 106 VirtIOBlock *s = next->dev;
a576ceac 107 VirtIODevice *vdev = VIRTIO_DEVICE(s);
95f7142a
PL
108
109 while (next) {
110 VirtIOBlockReq *req = next;
111 next = req->mr_next;
a576ceac 112 trace_virtio_blk_rw_complete(vdev, req, ret);
95f7142a
PL
113
114 if (req->qiov.nalloc != -1) {
e61809ed 115 /* If nalloc is != -1 req->qiov is a local copy of the original
9bb192a4
YB
116 * external iovec. It was allocated in submit_requests to be
117 * able to merge requests. */
95f7142a
PL
118 qemu_iovec_destroy(&req->qiov);
119 }
6e02c38d 120
95f7142a 121 if (ret) {
bf4069fb 122 int p = virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type);
95f7142a 123 bool is_read = !(p & VIRTIO_BLK_T_OUT);
2a6cdd6d
PB
124 /* Note that memory may be dirtied on read failure. If the
125 * virtio request is not completed here, as is the case for
126 * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
127 * correctly during live migration. While this is ugly,
128 * it is acceptable because the device is free to write to
129 * the memory until the request is completed (which will
130 * happen on the other side of the migration).
131 */
00f639fb 132 if (virtio_blk_handle_rw_error(req, -ret, is_read, true)) {
95f7142a
PL
133 continue;
134 }
135 }
6d519a5f 136
95f7142a 137 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
bf4069fb 138 block_acct_done(blk_get_stats(s->blk), &req->acct);
95f7142a 139 virtio_blk_free_request(req);
6e02c38d 140 }
869a5c6d 141}
6e02c38d 142
aa659be3
CH
143static void virtio_blk_flush_complete(void *opaque, int ret)
144{
145 VirtIOBlockReq *req = opaque;
b9e413dd 146 VirtIOBlock *s = req->dev;
aa659be3 147
c1135913
SH
148 if (ret && virtio_blk_handle_rw_error(req, -ret, 0, true)) {
149 return;
8c269b54
KW
150 }
151
152 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
9a6719d5 153 block_acct_done(blk_get_stats(s->blk), &req->acct);
671ec3f0 154 virtio_blk_free_request(req);
6e02c38d
AL
155}
156
37b06f8d
SG
157static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret)
158{
159 VirtIOBlockReq *req = opaque;
160 VirtIOBlock *s = req->dev;
161 bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) &
162 ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES;
163
c1135913
SH
164 if (ret && virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) {
165 return;
37b06f8d
SG
166 }
167
168 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
169 if (is_write_zeroes) {
170 block_acct_done(blk_get_stats(s->blk), &req->acct);
171 }
172 virtio_blk_free_request(req);
37b06f8d
SG
173}
174
1dc936aa
FZ
175#ifdef __linux__
176
177typedef struct {
178 VirtIOBlockReq *req;
179 struct sg_io_hdr hdr;
180} VirtIOBlockIoctlReq;
181
182static void virtio_blk_ioctl_complete(void *opaque, int status)
183{
184 VirtIOBlockIoctlReq *ioctl_req = opaque;
185 VirtIOBlockReq *req = ioctl_req->req;
9d456654
PB
186 VirtIOBlock *s = req->dev;
187 VirtIODevice *vdev = VIRTIO_DEVICE(s);
1dc936aa
FZ
188 struct virtio_scsi_inhdr *scsi;
189 struct sg_io_hdr *hdr;
190
191 scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
192
193 if (status) {
194 status = VIRTIO_BLK_S_UNSUPP;
195 virtio_stl_p(vdev, &scsi->errors, 255);
196 goto out;
197 }
198
199 hdr = &ioctl_req->hdr;
200 /*
201 * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi)
202 * clear the masked_status field [hence status gets cleared too, see
203 * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED
204 * status has occurred. However they do set DRIVER_SENSE in driver_status
205 * field. Also a (sb_len_wr > 0) indicates there is a sense buffer.
206 */
207 if (hdr->status == 0 && hdr->sb_len_wr > 0) {
208 hdr->status = CHECK_CONDITION;
209 }
210
211 virtio_stl_p(vdev, &scsi->errors,
212 hdr->status | (hdr->msg_status << 8) |
213 (hdr->host_status << 16) | (hdr->driver_status << 24));
214 virtio_stl_p(vdev, &scsi->residual, hdr->resid);
215 virtio_stl_p(vdev, &scsi->sense_len, hdr->sb_len_wr);
216 virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len);
217
218out:
219 virtio_blk_req_complete(req, status);
220 virtio_blk_free_request(req);
221 g_free(ioctl_req);
222}
223
224#endif
225
edaffd9f 226static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s, VirtQueue *vq)
6e02c38d 227{
edaffd9f 228 VirtIOBlockReq *req = virtqueue_pop(vq, sizeof(VirtIOBlockReq));
6e02c38d 229
51b19ebe 230 if (req) {
edaffd9f 231 virtio_blk_init_request(s, vq, req);
6e02c38d 232 }
6e02c38d
AL
233 return req;
234}
235
75344fa4 236static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req)
1063b8b1 237{
5a05cbee
FZ
238 int status = VIRTIO_BLK_S_OK;
239 struct virtio_scsi_inhdr *scsi = NULL;
75344fa4 240 VirtIOBlock *blk = req->dev;
bf4069fb
AR
241 VirtIODevice *vdev = VIRTIO_DEVICE(blk);
242 VirtQueueElement *elem = &req->elem;
783d1897 243
47ce9ef7 244#ifdef __linux__
1063b8b1 245 int i;
1dc936aa 246 VirtIOBlockIoctlReq *ioctl_req;
a209f461 247 BlockAIOCB *acb;
47ce9ef7 248#endif
1063b8b1
CH
249
250 /*
251 * We require at least one output segment each for the virtio_blk_outhdr
252 * and the SCSI command block.
253 *
254 * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
255 * and the sense buffer pointer in the input segments.
256 */
5a05cbee
FZ
257 if (elem->out_num < 2 || elem->in_num < 3) {
258 status = VIRTIO_BLK_S_IOERR;
259 goto fail;
1063b8b1
CH
260 }
261
262 /*
f34e73cd
PB
263 * The scsi inhdr is placed in the second-to-last input segment, just
264 * before the regular inhdr.
1063b8b1 265 */
5a05cbee 266 scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base;
f34e73cd 267
bbe8bd4d 268 if (!virtio_has_feature(blk->host_features, VIRTIO_BLK_F_SCSI)) {
f34e73cd
PB
269 status = VIRTIO_BLK_S_UNSUPP;
270 goto fail;
1063b8b1
CH
271 }
272
273 /*
f34e73cd 274 * No support for bidirection commands yet.
1063b8b1 275 */
5a05cbee 276 if (elem->out_num > 2 && elem->in_num > 3) {
f34e73cd
PB
277 status = VIRTIO_BLK_S_UNSUPP;
278 goto fail;
279 }
1063b8b1 280
f34e73cd 281#ifdef __linux__
1dc936aa
FZ
282 ioctl_req = g_new0(VirtIOBlockIoctlReq, 1);
283 ioctl_req->req = req;
284 ioctl_req->hdr.interface_id = 'S';
285 ioctl_req->hdr.cmd_len = elem->out_sg[1].iov_len;
286 ioctl_req->hdr.cmdp = elem->out_sg[1].iov_base;
287 ioctl_req->hdr.dxfer_len = 0;
1063b8b1 288
5a05cbee 289 if (elem->out_num > 2) {
1063b8b1
CH
290 /*
291 * If there are more than the minimally required 2 output segments
292 * there is write payload starting from the third iovec.
293 */
1dc936aa
FZ
294 ioctl_req->hdr.dxfer_direction = SG_DXFER_TO_DEV;
295 ioctl_req->hdr.iovec_count = elem->out_num - 2;
1063b8b1 296
1dc936aa
FZ
297 for (i = 0; i < ioctl_req->hdr.iovec_count; i++) {
298 ioctl_req->hdr.dxfer_len += elem->out_sg[i + 2].iov_len;
299 }
1063b8b1 300
1dc936aa 301 ioctl_req->hdr.dxferp = elem->out_sg + 2;
1063b8b1 302
5a05cbee 303 } else if (elem->in_num > 3) {
1063b8b1
CH
304 /*
305 * If we have more than 3 input segments the guest wants to actually
306 * read data.
307 */
1dc936aa
FZ
308 ioctl_req->hdr.dxfer_direction = SG_DXFER_FROM_DEV;
309 ioctl_req->hdr.iovec_count = elem->in_num - 3;
310 for (i = 0; i < ioctl_req->hdr.iovec_count; i++) {
311 ioctl_req->hdr.dxfer_len += elem->in_sg[i].iov_len;
312 }
1063b8b1 313
1dc936aa 314 ioctl_req->hdr.dxferp = elem->in_sg;
1063b8b1
CH
315 } else {
316 /*
317 * Some SCSI commands don't actually transfer any data.
318 */
1dc936aa 319 ioctl_req->hdr.dxfer_direction = SG_DXFER_NONE;
1063b8b1
CH
320 }
321
1dc936aa
FZ
322 ioctl_req->hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base;
323 ioctl_req->hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len;
1063b8b1 324
a209f461
FZ
325 acb = blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr,
326 virtio_blk_ioctl_complete, ioctl_req);
327 if (!acb) {
328 g_free(ioctl_req);
329 status = VIRTIO_BLK_S_UNSUPP;
330 goto fail;
331 }
1dc936aa 332 return -EINPROGRESS;
1063b8b1 333#else
f34e73cd
PB
334 abort();
335#endif
336
337fail:
338 /* Just put anything nonzero so that the ioctl fails in the guest. */
5a05cbee 339 if (scsi) {
783d1897 340 virtio_stl_p(vdev, &scsi->errors, 255);
5a05cbee
FZ
341 }
342 return status;
343}
344
345static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
346{
347 int status;
348
75344fa4 349 status = virtio_blk_handle_scsi_req(req);
1dc936aa
FZ
350 if (status != -EINPROGRESS) {
351 virtio_blk_req_complete(req, status);
352 virtio_blk_free_request(req);
353 }
1063b8b1 354}
1063b8b1 355
baf42268 356static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb,
95f7142a 357 int start, int num_reqs, int niov)
869a5c6d 358{
baf42268 359 BlockBackend *blk = s->blk;
95f7142a
PL
360 QEMUIOVector *qiov = &mrb->reqs[start]->qiov;
361 int64_t sector_num = mrb->reqs[start]->sector_num;
95f7142a 362 bool is_write = mrb->is_write;
baf42268 363 BdrvRequestFlags flags = 0;
95f7142a
PL
364
365 if (num_reqs > 1) {
366 int i;
367 struct iovec *tmp_iov = qiov->iov;
368 int tmp_niov = qiov->niov;
369
370 /* mrb->reqs[start]->qiov was initialized from external so we can't
b5772fdd 371 * modify it here. We need to initialize it locally and then add the
95f7142a
PL
372 * external iovecs. */
373 qemu_iovec_init(qiov, niov);
374
375 for (i = 0; i < tmp_niov; i++) {
376 qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len);
377 }
378
379 for (i = start + 1; i < start + num_reqs; i++) {
380 qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0,
381 mrb->reqs[i]->qiov.size);
382 mrb->reqs[i - 1]->mr_next = mrb->reqs[i];
95f7142a 383 }
95f7142a 384
a576ceac
SH
385 trace_virtio_blk_submit_multireq(VIRTIO_DEVICE(mrb->reqs[start]->dev),
386 mrb, start, num_reqs,
b5772fdd
EB
387 sector_num << BDRV_SECTOR_BITS,
388 qiov->size, is_write);
95f7142a
PL
389 block_acct_merge_done(blk_get_stats(blk),
390 is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ,
391 num_reqs - 1);
392 }
91553dcc 393
baf42268
SH
394 if (blk_ram_registrar_ok(&s->blk_ram_registrar)) {
395 flags |= BDRV_REQ_REGISTERED_BUF;
396 }
397
95f7142a 398 if (is_write) {
baf42268
SH
399 blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov,
400 flags, virtio_blk_rw_complete,
401 mrb->reqs[start]);
95f7142a 402 } else {
baf42268
SH
403 blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov,
404 flags, virtio_blk_rw_complete,
405 mrb->reqs[start]);
95f7142a
PL
406 }
407}
408
409static int multireq_compare(const void *a, const void *b)
410{
411 const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a,
412 *req2 = *(VirtIOBlockReq **)b;
413
414 /*
415 * Note that we can't simply subtract sector_num1 from sector_num2
416 * here as that could overflow the return value.
417 */
418 if (req1->sector_num > req2->sector_num) {
419 return 1;
420 } else if (req1->sector_num < req2->sector_num) {
421 return -1;
422 } else {
423 return 0;
424 }
425}
426
baf42268 427static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb)
95f7142a
PL
428{
429 int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0;
5def6b80 430 uint32_t max_transfer;
95f7142a
PL
431 int64_t sector_num = 0;
432
433 if (mrb->num_reqs == 1) {
baf42268 434 submit_requests(s, mrb, 0, 1, -1);
95f7142a 435 mrb->num_reqs = 0;
c20fd872
CH
436 return;
437 }
438
5def6b80 439 max_transfer = blk_get_max_transfer(mrb->reqs[0]->dev->blk);
95f7142a
PL
440
441 qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs),
442 &multireq_compare);
443
444 for (i = 0; i < mrb->num_reqs; i++) {
445 VirtIOBlockReq *req = mrb->reqs[i];
446 if (num_reqs > 0) {
49cffbc6
GA
447 /*
448 * NOTE: We cannot merge the requests in below situations:
449 * 1. requests are not sequential
450 * 2. merge would exceed maximum number of IOVs
451 * 3. merge would exceed maximum transfer length of backend device
452 */
453 if (sector_num + nb_sectors != req->sector_num ||
baf42268 454 niov > blk_get_max_iov(s->blk) - req->qiov.niov ||
5def6b80
EB
455 req->qiov.size > max_transfer ||
456 nb_sectors > (max_transfer -
457 req->qiov.size) / BDRV_SECTOR_SIZE) {
baf42268 458 submit_requests(s, mrb, start, num_reqs, niov);
95f7142a 459 num_reqs = 0;
91553dcc
KW
460 }
461 }
95f7142a
PL
462
463 if (num_reqs == 0) {
464 sector_num = req->sector_num;
465 nb_sectors = niov = 0;
466 start = i;
467 }
468
469 nb_sectors += req->qiov.size / BDRV_SECTOR_SIZE;
470 niov += req->qiov.niov;
471 num_reqs++;
91553dcc 472 }
c20fd872 473
baf42268 474 submit_requests(s, mrb, start, num_reqs, niov);
95f7142a 475 mrb->num_reqs = 0;
91553dcc 476}
87b245db 477
c20fd872 478static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
aa659be3 479{
bf4069fb
AR
480 VirtIOBlock *s = req->dev;
481
482 block_acct_start(blk_get_stats(s->blk), &req->acct, 0,
5366d0c8 483 BLOCK_ACCT_FLUSH);
a597e79c 484
618fbb84
CH
485 /*
486 * Make sure all outstanding writes are posted to the backing device.
487 */
95f7142a 488 if (mrb->is_write && mrb->num_reqs > 0) {
baf42268 489 virtio_blk_submit_multireq(s, mrb);
95f7142a 490 }
bf4069fb 491 blk_aio_flush(s->blk, virtio_blk_flush_complete, req);
aa659be3
CH
492}
493
d0e14376
MA
494static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
495 uint64_t sector, size_t size)
496{
3c2daac0
MA
497 uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
498 uint64_t total_sectors;
499
75af1f34 500 if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
95f7142a
PL
501 return false;
502 }
d0e14376
MA
503 if (sector & dev->sector_mask) {
504 return false;
505 }
2a30307f 506 if (size % dev->conf.conf.logical_block_size) {
d0e14376
MA
507 return false;
508 }
4be74634 509 blk_get_geometry(dev->blk, &total_sectors);
3c2daac0
MA
510 if (sector > total_sectors || nb_sectors > total_sectors - sector) {
511 return false;
512 }
d0e14376
MA
513 return true;
514}
515
37b06f8d
SG
516static uint8_t virtio_blk_handle_discard_write_zeroes(VirtIOBlockReq *req,
517 struct virtio_blk_discard_write_zeroes *dwz_hdr, bool is_write_zeroes)
518{
519 VirtIOBlock *s = req->dev;
520 VirtIODevice *vdev = VIRTIO_DEVICE(s);
521 uint64_t sector;
522 uint32_t num_sectors, flags, max_sectors;
523 uint8_t err_status;
524 int bytes;
525
526 sector = virtio_ldq_p(vdev, &dwz_hdr->sector);
527 num_sectors = virtio_ldl_p(vdev, &dwz_hdr->num_sectors);
528 flags = virtio_ldl_p(vdev, &dwz_hdr->flags);
529 max_sectors = is_write_zeroes ? s->conf.max_write_zeroes_sectors :
530 s->conf.max_discard_sectors;
531
532 /*
533 * max_sectors is at most BDRV_REQUEST_MAX_SECTORS, this check
534 * make us sure that "num_sectors << BDRV_SECTOR_BITS" can fit in
535 * the integer variable.
536 */
537 if (unlikely(num_sectors > max_sectors)) {
538 err_status = VIRTIO_BLK_S_IOERR;
539 goto err;
540 }
541
542 bytes = num_sectors << BDRV_SECTOR_BITS;
543
544 if (unlikely(!virtio_blk_sect_range_ok(s, sector, bytes))) {
545 err_status = VIRTIO_BLK_S_IOERR;
546 goto err;
547 }
548
549 /*
550 * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard
551 * and write zeroes commands if any unknown flag is set.
552 */
553 if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
554 err_status = VIRTIO_BLK_S_UNSUPP;
555 goto err;
556 }
557
558 if (is_write_zeroes) { /* VIRTIO_BLK_T_WRITE_ZEROES */
559 int blk_aio_flags = 0;
560
561 if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
562 blk_aio_flags |= BDRV_REQ_MAY_UNMAP;
563 }
564
565 block_acct_start(blk_get_stats(s->blk), &req->acct, bytes,
566 BLOCK_ACCT_WRITE);
567
568 blk_aio_pwrite_zeroes(s->blk, sector << BDRV_SECTOR_BITS,
569 bytes, blk_aio_flags,
570 virtio_blk_discard_write_zeroes_complete, req);
571 } else { /* VIRTIO_BLK_T_DISCARD */
572 /*
573 * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for
574 * discard commands if the unmap flag is set.
575 */
576 if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
577 err_status = VIRTIO_BLK_S_UNSUPP;
578 goto err;
579 }
580
581 blk_aio_pdiscard(s->blk, sector << BDRV_SECTOR_BITS, bytes,
582 virtio_blk_discard_write_zeroes_complete, req);
583 }
584
585 return VIRTIO_BLK_S_OK;
586
587err:
588 if (is_write_zeroes) {
589 block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_WRITE);
590 }
591 return err_status;
592}
593
4f736650
SL
594typedef struct ZoneCmdData {
595 VirtIOBlockReq *req;
596 struct iovec *in_iov;
597 unsigned in_num;
598 union {
599 struct {
600 unsigned int nr_zones;
601 BlockZoneDescriptor *zones;
602 } zone_report_data;
603 struct {
604 int64_t offset;
605 } zone_append_data;
606 };
607} ZoneCmdData;
608
609/*
610 * check zoned_request: error checking before issuing requests. If all checks
611 * passed, return true.
612 * append: true if only zone append requests issued.
613 */
614static bool check_zoned_request(VirtIOBlock *s, int64_t offset, int64_t len,
615 bool append, uint8_t *status) {
616 BlockDriverState *bs = blk_bs(s->blk);
617 int index;
618
619 if (!virtio_has_feature(s->host_features, VIRTIO_BLK_F_ZONED)) {
620 *status = VIRTIO_BLK_S_UNSUPP;
621 return false;
622 }
623
624 if (offset < 0 || len < 0 || len > (bs->total_sectors << BDRV_SECTOR_BITS)
625 || offset > (bs->total_sectors << BDRV_SECTOR_BITS) - len) {
626 *status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
627 return false;
628 }
629
630 if (append) {
631 if (bs->bl.write_granularity) {
632 if ((offset % bs->bl.write_granularity) != 0) {
633 *status = VIRTIO_BLK_S_ZONE_UNALIGNED_WP;
634 return false;
635 }
636 }
637
638 index = offset / bs->bl.zone_size;
639 if (BDRV_ZT_IS_CONV(bs->wps->wp[index])) {
640 *status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
641 return false;
642 }
643
644 if (len / 512 > bs->bl.max_append_sectors) {
645 if (bs->bl.max_append_sectors == 0) {
646 *status = VIRTIO_BLK_S_UNSUPP;
647 } else {
648 *status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
649 }
650 return false;
651 }
652 }
653 return true;
654}
655
656static void virtio_blk_zone_report_complete(void *opaque, int ret)
657{
658 ZoneCmdData *data = opaque;
659 VirtIOBlockReq *req = data->req;
4f736650
SL
660 VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
661 struct iovec *in_iov = data->in_iov;
662 unsigned in_num = data->in_num;
663 int64_t zrp_size, n, j = 0;
664 int64_t nz = data->zone_report_data.nr_zones;
665 int8_t err_status = VIRTIO_BLK_S_OK;
b3d9bb9a
SH
666 struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report) {
667 .nr_zones = cpu_to_le64(nz),
668 };
4f736650 669
4e92acf7 670 trace_virtio_blk_zone_report_complete(vdev, req, nz, ret);
4f736650
SL
671 if (ret) {
672 err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
673 goto out;
674 }
675
4f736650
SL
676 zrp_size = sizeof(struct virtio_blk_zone_report)
677 + sizeof(struct virtio_blk_zone_descriptor) * nz;
678 n = iov_from_buf(in_iov, in_num, 0, &zrp_hdr, sizeof(zrp_hdr));
679 if (n != sizeof(zrp_hdr)) {
680 virtio_error(vdev, "Driver provided input buffer that is too small!");
681 err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
682 goto out;
683 }
684
685 for (size_t i = sizeof(zrp_hdr); i < zrp_size;
686 i += sizeof(struct virtio_blk_zone_descriptor), ++j) {
687 struct virtio_blk_zone_descriptor desc =
688 (struct virtio_blk_zone_descriptor) {
689 .z_start = cpu_to_le64(data->zone_report_data.zones[j].start
690 >> BDRV_SECTOR_BITS),
691 .z_cap = cpu_to_le64(data->zone_report_data.zones[j].cap
692 >> BDRV_SECTOR_BITS),
693 .z_wp = cpu_to_le64(data->zone_report_data.zones[j].wp
694 >> BDRV_SECTOR_BITS),
695 };
696
697 switch (data->zone_report_data.zones[j].type) {
698 case BLK_ZT_CONV:
699 desc.z_type = VIRTIO_BLK_ZT_CONV;
700 break;
701 case BLK_ZT_SWR:
702 desc.z_type = VIRTIO_BLK_ZT_SWR;
703 break;
704 case BLK_ZT_SWP:
705 desc.z_type = VIRTIO_BLK_ZT_SWP;
706 break;
707 default:
708 g_assert_not_reached();
709 }
710
711 switch (data->zone_report_data.zones[j].state) {
712 case BLK_ZS_RDONLY:
713 desc.z_state = VIRTIO_BLK_ZS_RDONLY;
714 break;
715 case BLK_ZS_OFFLINE:
716 desc.z_state = VIRTIO_BLK_ZS_OFFLINE;
717 break;
718 case BLK_ZS_EMPTY:
719 desc.z_state = VIRTIO_BLK_ZS_EMPTY;
720 break;
721 case BLK_ZS_CLOSED:
722 desc.z_state = VIRTIO_BLK_ZS_CLOSED;
723 break;
724 case BLK_ZS_FULL:
725 desc.z_state = VIRTIO_BLK_ZS_FULL;
726 break;
727 case BLK_ZS_EOPEN:
728 desc.z_state = VIRTIO_BLK_ZS_EOPEN;
729 break;
730 case BLK_ZS_IOPEN:
731 desc.z_state = VIRTIO_BLK_ZS_IOPEN;
732 break;
733 case BLK_ZS_NOT_WP:
734 desc.z_state = VIRTIO_BLK_ZS_NOT_WP;
735 break;
736 default:
737 g_assert_not_reached();
738 }
739
740 /* TODO: it takes O(n^2) time complexity. Optimizations required. */
741 n = iov_from_buf(in_iov, in_num, i, &desc, sizeof(desc));
742 if (n != sizeof(desc)) {
743 virtio_error(vdev, "Driver provided input buffer "
744 "for descriptors that is too small!");
745 err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
746 }
747 }
748
749out:
4f736650
SL
750 virtio_blk_req_complete(req, err_status);
751 virtio_blk_free_request(req);
4f736650
SL
752 g_free(data->zone_report_data.zones);
753 g_free(data);
754}
755
756static void virtio_blk_handle_zone_report(VirtIOBlockReq *req,
757 struct iovec *in_iov,
758 unsigned in_num)
759{
760 VirtIOBlock *s = req->dev;
761 VirtIODevice *vdev = VIRTIO_DEVICE(s);
762 unsigned int nr_zones;
763 ZoneCmdData *data;
764 int64_t zone_size, offset;
765 uint8_t err_status;
766
767 if (req->in_len < sizeof(struct virtio_blk_inhdr) +
768 sizeof(struct virtio_blk_zone_report) +
769 sizeof(struct virtio_blk_zone_descriptor)) {
770 virtio_error(vdev, "in buffer too small for zone report");
bbdf9023
ZM
771 err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
772 goto out;
4f736650
SL
773 }
774
775 /* start byte offset of the zone report */
776 offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS;
777 if (!check_zoned_request(s, offset, 0, false, &err_status)) {
778 goto out;
779 }
780 nr_zones = (req->in_len - sizeof(struct virtio_blk_inhdr) -
781 sizeof(struct virtio_blk_zone_report)) /
782 sizeof(struct virtio_blk_zone_descriptor);
4e92acf7
SL
783 trace_virtio_blk_handle_zone_report(vdev, req,
784 offset >> BDRV_SECTOR_BITS, nr_zones);
4f736650
SL
785
786 zone_size = sizeof(BlockZoneDescriptor) * nr_zones;
787 data = g_malloc(sizeof(ZoneCmdData));
788 data->req = req;
789 data->in_iov = in_iov;
790 data->in_num = in_num;
791 data->zone_report_data.nr_zones = nr_zones;
792 data->zone_report_data.zones = g_malloc(zone_size),
793
794 blk_aio_zone_report(s->blk, offset, &data->zone_report_data.nr_zones,
795 data->zone_report_data.zones,
796 virtio_blk_zone_report_complete, data);
797 return;
798out:
799 virtio_blk_req_complete(req, err_status);
800 virtio_blk_free_request(req);
801}
802
803static void virtio_blk_zone_mgmt_complete(void *opaque, int ret)
804{
805 VirtIOBlockReq *req = opaque;
806 VirtIOBlock *s = req->dev;
4e92acf7 807 VirtIODevice *vdev = VIRTIO_DEVICE(s);
4f736650 808 int8_t err_status = VIRTIO_BLK_S_OK;
4e92acf7 809 trace_virtio_blk_zone_mgmt_complete(vdev, req,ret);
4f736650
SL
810
811 if (ret) {
812 err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
813 }
814
4f736650
SL
815 virtio_blk_req_complete(req, err_status);
816 virtio_blk_free_request(req);
4f736650
SL
817}
818
819static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op)
820{
821 VirtIOBlock *s = req->dev;
822 VirtIODevice *vdev = VIRTIO_DEVICE(s);
823 BlockDriverState *bs = blk_bs(s->blk);
824 int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS;
825 uint64_t len;
826 uint64_t capacity = bs->total_sectors << BDRV_SECTOR_BITS;
827 uint8_t err_status = VIRTIO_BLK_S_OK;
828
829 uint32_t type = virtio_ldl_p(vdev, &req->out.type);
830 if (type == VIRTIO_BLK_T_ZONE_RESET_ALL) {
831 /* Entire drive capacity */
832 offset = 0;
833 len = capacity;
4e92acf7
SL
834 trace_virtio_blk_handle_zone_reset_all(vdev, req, 0,
835 bs->total_sectors);
4f736650
SL
836 } else {
837 if (bs->bl.zone_size > capacity - offset) {
838 /* The zoned device allows the last smaller zone. */
839 len = capacity - bs->bl.zone_size * (bs->bl.nr_zones - 1);
840 } else {
841 len = bs->bl.zone_size;
842 }
4e92acf7
SL
843 trace_virtio_blk_handle_zone_mgmt(vdev, req, op,
844 offset >> BDRV_SECTOR_BITS,
845 len >> BDRV_SECTOR_BITS);
4f736650
SL
846 }
847
848 if (!check_zoned_request(s, offset, len, false, &err_status)) {
849 goto out;
850 }
851
852 blk_aio_zone_mgmt(s->blk, op, offset, len,
853 virtio_blk_zone_mgmt_complete, req);
854
855 return 0;
856out:
857 virtio_blk_req_complete(req, err_status);
858 virtio_blk_free_request(req);
859 return err_status;
860}
861
862static void virtio_blk_zone_append_complete(void *opaque, int ret)
863{
864 ZoneCmdData *data = opaque;
865 VirtIOBlockReq *req = data->req;
4f736650
SL
866 VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
867 int64_t append_sector, n;
868 uint8_t err_status = VIRTIO_BLK_S_OK;
869
870 if (ret) {
871 err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
872 goto out;
873 }
874
875 virtio_stq_p(vdev, &append_sector,
876 data->zone_append_data.offset >> BDRV_SECTOR_BITS);
877 n = iov_from_buf(data->in_iov, data->in_num, 0, &append_sector,
878 sizeof(append_sector));
879 if (n != sizeof(append_sector)) {
880 virtio_error(vdev, "Driver provided input buffer less than size of "
881 "append_sector");
882 err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
883 goto out;
884 }
4e92acf7 885 trace_virtio_blk_zone_append_complete(vdev, req, append_sector, ret);
4f736650
SL
886
887out:
4f736650
SL
888 virtio_blk_req_complete(req, err_status);
889 virtio_blk_free_request(req);
4f736650
SL
890 g_free(data);
891}
892
893static int virtio_blk_handle_zone_append(VirtIOBlockReq *req,
894 struct iovec *out_iov,
895 struct iovec *in_iov,
896 uint64_t out_num,
897 unsigned in_num) {
898 VirtIOBlock *s = req->dev;
899 VirtIODevice *vdev = VIRTIO_DEVICE(s);
900 uint8_t err_status = VIRTIO_BLK_S_OK;
901
902 int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS;
903 int64_t len = iov_size(out_iov, out_num);
b3d9bb9a 904 ZoneCmdData *data;
4f736650 905
4e92acf7 906 trace_virtio_blk_handle_zone_append(vdev, req, offset >> BDRV_SECTOR_BITS);
4f736650
SL
907 if (!check_zoned_request(s, offset, len, true, &err_status)) {
908 goto out;
909 }
910
b3d9bb9a 911 data = g_malloc(sizeof(ZoneCmdData));
4f736650
SL
912 data->req = req;
913 data->in_iov = in_iov;
914 data->in_num = in_num;
915 data->zone_append_data.offset = offset;
916 qemu_iovec_init_external(&req->qiov, out_iov, out_num);
52eb76f4
SL
917
918 block_acct_start(blk_get_stats(s->blk), &req->acct, len,
919 BLOCK_ACCT_ZONE_APPEND);
920
4f736650
SL
921 blk_aio_zone_append(s->blk, &data->zone_append_data.offset, &req->qiov, 0,
922 virtio_blk_zone_append_complete, data);
923 return 0;
924
925out:
4f736650
SL
926 virtio_blk_req_complete(req, err_status);
927 virtio_blk_free_request(req);
4f736650
SL
928 return err_status;
929}
930
20ea686a 931static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
bc6694d4 932{
92e3c2a3 933 uint32_t type;
f897bf75 934 struct iovec *in_iov = req->elem.in_sg;
5636da76 935 struct iovec *out_iov = req->elem.out_sg;
f897bf75
SH
936 unsigned in_num = req->elem.in_num;
937 unsigned out_num = req->elem.out_num;
20ea686a
GK
938 VirtIOBlock *s = req->dev;
939 VirtIODevice *vdev = VIRTIO_DEVICE(s);
92e3c2a3 940
f897bf75 941 if (req->elem.out_num < 1 || req->elem.in_num < 1) {
20ea686a
GK
942 virtio_error(vdev, "virtio-blk missing headers");
943 return -1;
bc6694d4
KW
944 }
945
5636da76 946 if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
827805a2 947 sizeof(req->out)) != sizeof(req->out))) {
20ea686a
GK
948 virtio_error(vdev, "virtio-blk request outhdr too short");
949 return -1;
827805a2 950 }
ee17e848 951
7bd04a04
SH
952 iov_discard_front_undoable(&out_iov, &out_num, sizeof(req->out),
953 &req->outhdr_undo);
ee17e848 954
12048545 955 if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
20ea686a 956 virtio_error(vdev, "virtio-blk request inhdr too short");
7bd04a04 957 iov_discard_undo(&req->outhdr_undo);
20ea686a 958 return -1;
ee17e848
FZ
959 }
960
2a6cdd6d
PB
961 /* We always touch the last byte, so just see how big in_iov is. */
962 req->in_len = iov_size(in_iov, in_num);
ee17e848
FZ
963 req->in = (void *)in_iov[in_num - 1].iov_base
964 + in_iov[in_num - 1].iov_len
965 - sizeof(struct virtio_blk_inhdr);
7bd04a04
SH
966 iov_discard_back_undoable(in_iov, &in_num, sizeof(struct virtio_blk_inhdr),
967 &req->inhdr_undo);
bc6694d4 968
9a6719d5 969 type = virtio_ldl_p(vdev, &req->out.type);
92e3c2a3 970
95f7142a 971 /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER
631b22ea 972 * is an optional flag. Although a guest should not send this flag if
95f7142a
PL
973 * not negotiated we ignored it in the past. So keep ignoring it. */
974 switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
975 case VIRTIO_BLK_T_IN:
976 {
977 bool is_write = type & VIRTIO_BLK_T_OUT;
9a6719d5 978 req->sector_num = virtio_ldq_p(vdev, &req->out.sector);
95f7142a
PL
979
980 if (is_write) {
5636da76 981 qemu_iovec_init_external(&req->qiov, out_iov, out_num);
a576ceac 982 trace_virtio_blk_handle_write(vdev, req, req->sector_num,
95f7142a
PL
983 req->qiov.size / BDRV_SECTOR_SIZE);
984 } else {
985 qemu_iovec_init_external(&req->qiov, in_iov, in_num);
a576ceac 986 trace_virtio_blk_handle_read(vdev, req, req->sector_num,
95f7142a
PL
987 req->qiov.size / BDRV_SECTOR_SIZE);
988 }
989
9a6719d5 990 if (!virtio_blk_sect_range_ok(s, req->sector_num, req->qiov.size)) {
95f7142a 991 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
9a6719d5 992 block_acct_invalid(blk_get_stats(s->blk),
01762e03 993 is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
95f7142a 994 virtio_blk_free_request(req);
20ea686a 995 return 0;
95f7142a
PL
996 }
997
9a6719d5 998 block_acct_start(blk_get_stats(s->blk), &req->acct, req->qiov.size,
95f7142a
PL
999 is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
1000
1001 /* merge would exceed maximum number of requests or IO direction
1002 * changes */
1003 if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS ||
c99495ac 1004 is_write != mrb->is_write ||
9a6719d5 1005 !s->conf.request_merging)) {
baf42268 1006 virtio_blk_submit_multireq(s, mrb);
95f7142a
PL
1007 }
1008
1009 assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS);
1010 mrb->reqs[mrb->num_reqs++] = req;
1011 mrb->is_write = is_write;
1012 break;
1013 }
1014 case VIRTIO_BLK_T_FLUSH:
c20fd872 1015 virtio_blk_handle_flush(req, mrb);
95f7142a 1016 break;
4f736650
SL
1017 case VIRTIO_BLK_T_ZONE_REPORT:
1018 virtio_blk_handle_zone_report(req, in_iov, in_num);
1019 break;
1020 case VIRTIO_BLK_T_ZONE_OPEN:
1021 virtio_blk_handle_zone_mgmt(req, BLK_ZO_OPEN);
1022 break;
1023 case VIRTIO_BLK_T_ZONE_CLOSE:
1024 virtio_blk_handle_zone_mgmt(req, BLK_ZO_CLOSE);
1025 break;
1026 case VIRTIO_BLK_T_ZONE_FINISH:
1027 virtio_blk_handle_zone_mgmt(req, BLK_ZO_FINISH);
1028 break;
1029 case VIRTIO_BLK_T_ZONE_RESET:
1030 virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET);
1031 break;
1032 case VIRTIO_BLK_T_ZONE_RESET_ALL:
1033 virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET);
1034 break;
95f7142a 1035 case VIRTIO_BLK_T_SCSI_CMD:
bc6694d4 1036 virtio_blk_handle_scsi(req);
95f7142a
PL
1037 break;
1038 case VIRTIO_BLK_T_GET_ID:
1039 {
a8686a9b
MA
1040 /*
1041 * NB: per existing s/n string convention the string is
1042 * terminated by '\0' only when shorter than buffer.
1043 */
2a30307f 1044 const char *serial = s->conf.serial ? s->conf.serial : "";
a83ceea8
MM
1045 size_t size = MIN(strlen(serial) + 1,
1046 MIN(iov_size(in_iov, in_num),
1047 VIRTIO_BLK_ID_BYTES));
1048 iov_from_buf(in_iov, in_num, 0, serial, size);
2930b313 1049 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
671ec3f0 1050 virtio_blk_free_request(req);
95f7142a
PL
1051 break;
1052 }
4f736650
SL
1053 case VIRTIO_BLK_T_ZONE_APPEND & ~VIRTIO_BLK_T_OUT:
1054 /*
1055 * Passing out_iov/out_num and in_iov/in_num is not safe
1056 * to access req->elem.out_sg directly because it may be
1057 * modified by virtio_blk_handle_request().
1058 */
1059 virtio_blk_handle_zone_append(req, out_iov, in_iov, out_num, in_num);
1060 break;
37b06f8d
SG
1061 /*
1062 * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with
1063 * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement,
1064 * so we must mask it for these requests, then we will check if it is set.
1065 */
1066 case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT:
1067 case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT:
1068 {
1069 struct virtio_blk_discard_write_zeroes dwz_hdr;
1070 size_t out_len = iov_size(out_iov, out_num);
1071 bool is_write_zeroes = (type & ~VIRTIO_BLK_T_BARRIER) ==
1072 VIRTIO_BLK_T_WRITE_ZEROES;
1073 uint8_t err_status;
1074
1075 /*
1076 * Unsupported if VIRTIO_BLK_T_OUT is not set or the request contains
1077 * more than one segment.
1078 */
1079 if (unlikely(!(type & VIRTIO_BLK_T_OUT) ||
1080 out_len > sizeof(dwz_hdr))) {
1081 virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
1082 virtio_blk_free_request(req);
1083 return 0;
1084 }
1085
1086 if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr,
1087 sizeof(dwz_hdr)) != sizeof(dwz_hdr))) {
7bd04a04
SH
1088 iov_discard_undo(&req->inhdr_undo);
1089 iov_discard_undo(&req->outhdr_undo);
37b06f8d
SG
1090 virtio_error(vdev, "virtio-blk discard/write_zeroes header"
1091 " too short");
1092 return -1;
1093 }
1094
1095 err_status = virtio_blk_handle_discard_write_zeroes(req, &dwz_hdr,
1096 is_write_zeroes);
1097 if (err_status != VIRTIO_BLK_S_OK) {
1098 virtio_blk_req_complete(req, err_status);
1099 virtio_blk_free_request(req);
1100 }
1101
1102 break;
1103 }
95f7142a 1104 default:
9e72c450 1105 virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
671ec3f0 1106 virtio_blk_free_request(req);
bc6694d4 1107 }
20ea686a 1108 return 0;
bc6694d4
KW
1109}
1110
186b9691 1111void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
6e02c38d 1112{
6e02c38d 1113 VirtIOBlockReq *req;
95f7142a 1114 MultiReqBuffer mrb = {};
d0435bc5 1115 bool suppress_notifications = virtio_queue_get_notification(vq);
6e02c38d 1116
ccee48aa 1117 defer_call_begin();
fc73548e 1118
9ef9d402 1119 do {
d0435bc5
SH
1120 if (suppress_notifications) {
1121 virtio_queue_set_notification(vq, 0);
1122 }
9ef9d402
SH
1123
1124 while ((req = virtio_blk_get_request(s, vq))) {
1125 if (virtio_blk_handle_request(req, &mrb)) {
1126 virtqueue_detach_element(req->vq, &req->elem, 0);
1127 virtio_blk_free_request(req);
1128 break;
1129 }
20ea686a 1130 }
9ef9d402 1131
d0435bc5
SH
1132 if (suppress_notifications) {
1133 virtio_queue_set_notification(vq, 1);
1134 }
9ef9d402 1135 } while (!virtio_queue_empty(vq));
91553dcc 1136
95f7142a 1137 if (mrb.num_reqs) {
baf42268 1138 virtio_blk_submit_multireq(s, &mrb);
95f7142a 1139 }
fc73548e 1140
ccee48aa 1141 defer_call_end();
6e02c38d
AL
1142}
1143
8a2fad57
MT
1144static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
1145{
1146 VirtIOBlock *s = (VirtIOBlock *)vdev;
1147
3cdaf3dd 1148 if (!s->ioeventfd_disabled && !s->ioeventfd_started) {
8a2fad57 1149 /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
3cdaf3dd 1150 * ioeventfd here instead of waiting for .set_status().
8a2fad57 1151 */
9ffe337c 1152 virtio_device_start_ioeventfd(vdev);
3cdaf3dd 1153 if (!s->ioeventfd_disabled) {
8a2fad57
MT
1154 return;
1155 }
1156 }
b6948ab0 1157
186b9691 1158 virtio_blk_handle_vq(s, vq);
8a2fad57
MT
1159}
1160
a937f8e8 1161static void virtio_blk_dma_restart_bh(void *opaque)
869a5c6d 1162{
71ee0cdd
SH
1163 VirtIOBlockReq *req = opaque;
1164 VirtIOBlock *s = req->dev; /* we're called with at least one request */
a937f8e8 1165
95f7142a 1166 MultiReqBuffer mrb = {};
869a5c6d 1167
869a5c6d 1168 while (req) {
1bdb176a 1169 VirtIOBlockReq *next = req->next;
20ea686a
GK
1170 if (virtio_blk_handle_request(req, &mrb)) {
1171 /* Device is now broken and won't do any processing until it gets
1172 * reset. Already queued requests will be lost: let's purge them.
1173 */
1174 while (req) {
1175 next = req->next;
1176 virtqueue_detach_element(req->vq, &req->elem, 0);
1177 virtio_blk_free_request(req);
1178 req = next;
1179 }
1180 break;
1181 }
1bdb176a 1182 req = next;
869a5c6d 1183 }
f1b52868 1184
95f7142a 1185 if (mrb.num_reqs) {
baf42268 1186 virtio_blk_submit_multireq(s, &mrb);
95f7142a 1187 }
7aa1c247 1188
a937f8e8
SH
1189 /* Paired with inc in virtio_blk_dma_restart_cb() */
1190 blk_dec_in_flight(s->conf.conf.blk);
7aa1c247
SL
1191}
1192
538f0497 1193static void virtio_blk_dma_restart_cb(void *opaque, bool running,
1dfb4dd9 1194 RunState state)
213189ab
MA
1195{
1196 VirtIOBlock *s = opaque;
71ee0cdd 1197 uint16_t num_queues = s->conf.num_queues;
b3d9bb9a
SH
1198 g_autofree VirtIOBlockReq **vq_rq = NULL;
1199 VirtIOBlockReq *rq;
213189ab 1200
392808b4 1201 if (!running) {
213189ab 1202 return;
392808b4 1203 }
213189ab 1204
71ee0cdd 1205 /* Split the device-wide s->rq request list into per-vq request lists */
b3d9bb9a 1206 vq_rq = g_new0(VirtIOBlockReq *, num_queues);
71ee0cdd
SH
1207
1208 WITH_QEMU_LOCK_GUARD(&s->rq_lock) {
1209 rq = s->rq;
1210 s->rq = NULL;
1211 }
1212
1213 while (rq) {
1214 VirtIOBlockReq *next = rq->next;
1215 uint16_t idx = virtio_get_queue_index(rq->vq);
1216
f2eea93c
SH
1217 /* Only num_queues vqs were created so vq_rq[idx] is within bounds */
1218 assert(idx < num_queues);
71ee0cdd
SH
1219 rq->next = vq_rq[idx];
1220 vq_rq[idx] = rq;
1221 rq = next;
1222 }
a937f8e8 1223
71ee0cdd
SH
1224 /* Schedule a BH to submit the requests in each vq's AioContext */
1225 for (uint16_t i = 0; i < num_queues; i++) {
1226 if (!vq_rq[i]) {
1227 continue;
1228 }
1229
1230 /* Paired with dec in virtio_blk_dma_restart_bh() */
1231 blk_inc_in_flight(s->conf.conf.blk);
1232
1233 aio_bh_schedule_oneshot(s->vq_aio_context[i],
1234 virtio_blk_dma_restart_bh,
1235 vq_rq[i]);
1236 }
213189ab
MA
1237}
1238
6e02c38d
AL
1239static void virtio_blk_reset(VirtIODevice *vdev)
1240{
1cc91b7d 1241 VirtIOBlock *s = VIRTIO_BLK(vdev);
26307f6a 1242 VirtIOBlockReq *req;
392808b4 1243
9c67f33f 1244 /* Dataplane has stopped... */
3cdaf3dd 1245 assert(!s->ioeventfd_started);
9c67f33f
SH
1246
1247 /* ...but requests may still be in flight. */
6e40b3bf
AY
1248 blk_drain(s->blk);
1249
26307f6a
FZ
1250 /* We drop queued requests after blk_drain() because blk_drain() itself can
1251 * produce them. */
9c67f33f
SH
1252 WITH_QEMU_LOCK_GUARD(&s->rq_lock) {
1253 while (s->rq) {
1254 req = s->rq;
1255 s->rq = req->next;
26307f6a 1256
9c67f33f
SH
1257 /* No other threads can access req->vq here */
1258 virtqueue_detach_element(req->vq, &req->elem, 0);
1259
1260 virtio_blk_free_request(req);
1261 }
1262 }
6e40b3bf 1263
4be74634 1264 blk_set_enable_write_cache(s->blk, s->original_wce);
6e02c38d
AL
1265}
1266
bf011293 1267/* coalesce internal state, copy to pci i/o region 0
1268 */
6e02c38d
AL
1269static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
1270{
1cc91b7d 1271 VirtIOBlock *s = VIRTIO_BLK(vdev);
2a30307f 1272 BlockConf *conf = &s->conf.conf;
4f736650 1273 BlockDriverState *bs = blk_bs(s->blk);
6e02c38d
AL
1274 struct virtio_blk_config blkcfg;
1275 uint64_t capacity;
17d0bc01 1276 int64_t length;
f7516731 1277 int blk_size = conf->logical_block_size;
6e02c38d 1278
4be74634 1279 blk_get_geometry(s->blk, &capacity);
5c5dafdc 1280 memset(&blkcfg, 0, sizeof(blkcfg));
783d1897 1281 virtio_stq_p(vdev, &blkcfg.capacity, capacity);
1bf8a989
DP
1282 virtio_stl_p(vdev, &blkcfg.seg_max,
1283 s->conf.seg_max_adjust ? s->conf.queue_size - 2 : 128 - 2);
907eb3e5 1284 virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls);
783d1897 1285 virtio_stl_p(vdev, &blkcfg.blk_size, blk_size);
f7516731 1286 virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size);
6abee260 1287 virtio_stl_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size);
907eb3e5 1288 blkcfg.geometry.heads = conf->heads;
136be99e
CB
1289 /*
1290 * We must ensure that the block device capacity is a multiple of
e03ba136 1291 * the logical block size. If that is not the case, let's use
136be99e
CB
1292 * sector_mask to adopt the geometry to have a correct picture.
1293 * For those devices where the capacity is ok for the given geometry
e03ba136 1294 * we don't touch the sector value of the geometry, since some devices
136be99e
CB
1295 * (like s390 dasd) need a specific value. Here the capacity is already
1296 * cyls*heads*secs*blk_size and the sector value is not block size
1297 * divided by 512 - instead it is the amount of blk_size blocks
1298 * per track (cylinder).
1299 */
17d0bc01
SH
1300 length = blk_getlength(s->blk);
1301 if (length > 0 && length / conf->heads / conf->secs % blk_size) {
907eb3e5 1302 blkcfg.geometry.sectors = conf->secs & ~s->sector_mask;
136be99e 1303 } else {
907eb3e5 1304 blkcfg.geometry.sectors = conf->secs;
136be99e 1305 }
c7085da7 1306 blkcfg.size_max = 0;
f7516731 1307 blkcfg.physical_block_exp = get_physical_block_exp(conf);
9752c371 1308 blkcfg.alignment_offset = 0;
4be74634 1309 blkcfg.wce = blk_enable_write_cache(s->blk);
2f270590 1310 virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues);
37b06f8d 1311 if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD)) {
fb0b154c
AO
1312 uint32_t discard_granularity = conf->discard_granularity;
1313 if (discard_granularity == -1 || !s->conf.report_discard_granularity) {
1314 discard_granularity = blk_size;
1315 }
37b06f8d
SG
1316 virtio_stl_p(vdev, &blkcfg.max_discard_sectors,
1317 s->conf.max_discard_sectors);
1318 virtio_stl_p(vdev, &blkcfg.discard_sector_alignment,
fb0b154c 1319 discard_granularity >> BDRV_SECTOR_BITS);
37b06f8d
SG
1320 /*
1321 * We support only one segment per request since multiple segments
1322 * are not widely used and there are no userspace APIs that allow
1323 * applications to submit multiple segments in a single call.
1324 */
1325 virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1);
1326 }
1327 if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES)) {
1328 virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors,
1329 s->conf.max_write_zeroes_sectors);
1330 blkcfg.write_zeroes_may_unmap = 1;
1331 virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1);
1332 }
4f736650
SL
1333 if (bs->bl.zoned != BLK_Z_NONE) {
1334 switch (bs->bl.zoned) {
1335 case BLK_Z_HM:
1336 blkcfg.zoned.model = VIRTIO_BLK_Z_HM;
1337 break;
1338 case BLK_Z_HA:
1339 blkcfg.zoned.model = VIRTIO_BLK_Z_HA;
1340 break;
1341 default:
1342 g_assert_not_reached();
1343 }
1344
1345 virtio_stl_p(vdev, &blkcfg.zoned.zone_sectors,
1346 bs->bl.zone_size / 512);
1347 virtio_stl_p(vdev, &blkcfg.zoned.max_active_zones,
1348 bs->bl.max_active_zones);
1349 virtio_stl_p(vdev, &blkcfg.zoned.max_open_zones,
1350 bs->bl.max_open_zones);
1351 virtio_stl_p(vdev, &blkcfg.zoned.write_granularity, blk_size);
1352 virtio_stl_p(vdev, &blkcfg.zoned.max_append_sectors,
1353 bs->bl.max_append_sectors);
1354 } else {
1355 blkcfg.zoned.model = VIRTIO_BLK_Z_NONE;
1356 }
20764be0 1357 memcpy(config, &blkcfg, s->config_size);
6e02c38d
AL
1358}
1359
13e3dce0
PB
1360static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
1361{
1cc91b7d 1362 VirtIOBlock *s = VIRTIO_BLK(vdev);
13e3dce0
PB
1363 struct virtio_blk_config blkcfg;
1364
20764be0 1365 memcpy(&blkcfg, config, s->config_size);
6d7e73d6 1366
4be74634 1367 blk_set_enable_write_cache(s->blk, blkcfg.wce != 0);
13e3dce0
PB
1368}
1369
9d5b731d
JW
1370static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features,
1371 Error **errp)
6e02c38d 1372{
1cc91b7d 1373 VirtIOBlock *s = VIRTIO_BLK(vdev);
1063b8b1 1374
bbe8bd4d
SG
1375 /* Firstly sync all virtio-blk possible supported features */
1376 features |= s->host_features;
1377
0cd09c3a
CH
1378 virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX);
1379 virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
1380 virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
1381 virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
95129d6f 1382 if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) {
bbe8bd4d 1383 if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_SCSI)) {
efb8206c
JW
1384 error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0");
1385 return 0;
1386 }
efb8206c 1387 } else {
c9b11f97 1388 virtio_clear_feature(&features, VIRTIO_F_ANY_LAYOUT);
efb8206c
JW
1389 virtio_add_feature(&features, VIRTIO_BLK_F_SCSI);
1390 }
aa659be3 1391
5f258577
EY
1392 if (blk_enable_write_cache(s->blk) ||
1393 (s->conf.x_enable_wce_if_config_wce &&
1394 virtio_has_feature(features, VIRTIO_BLK_F_CONFIG_WCE))) {
0cd09c3a 1395 virtio_add_feature(&features, VIRTIO_BLK_F_WCE);
4be74634 1396 }
86b1cf32 1397 if (!blk_is_writable(s->blk)) {
0cd09c3a 1398 virtio_add_feature(&features, VIRTIO_BLK_F_RO);
4be74634 1399 }
2f270590
SH
1400 if (s->conf.num_queues > 1) {
1401 virtio_add_feature(&features, VIRTIO_BLK_F_MQ);
1402 }
1063b8b1
CH
1403
1404 return features;
6e02c38d
AL
1405}
1406
9315cbfd
PB
1407static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
1408{
1cc91b7d 1409 VirtIOBlock *s = VIRTIO_BLK(vdev);
9315cbfd 1410
9ffe337c 1411 if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) {
3cdaf3dd 1412 assert(!s->ioeventfd_started);
392808b4 1413 }
392808b4 1414
9315cbfd
PB
1415 if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1416 return;
1417 }
1418
ef5bc962
PB
1419 /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send
1420 * cache flushes. Thus, the "auto writethrough" behavior is never
1421 * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature.
1422 * Leaving it enabled would break the following sequence:
1423 *
1424 * Guest started with "-drive cache=writethrough"
1425 * Guest sets status to 0
1426 * Guest sets DRIVER bit in status field
1427 * Guest reads host features (WCE=0, CONFIG_WCE=1)
1428 * Guest writes guest features (WCE=0, CONFIG_WCE=1)
1429 * Guest writes 1 to the WCE configuration field (writeback mode)
1430 * Guest sets DRIVER_OK bit in status field
1431 *
4be74634 1432 * s->blk would erroneously be placed in writethrough mode.
ef5bc962 1433 */
95129d6f 1434 if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) {
4be74634 1435 blk_set_enable_write_cache(s->blk,
95129d6f
CH
1436 virtio_vdev_has_feature(vdev,
1437 VIRTIO_BLK_F_WCE));
ef5bc962 1438 }
9315cbfd
PB
1439}
1440
b2b295a7
GK
1441static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
1442{
1443 VirtIOBlock *s = VIRTIO_BLK(vdev);
b2b295a7 1444
9c67f33f
SH
1445 WITH_QEMU_LOCK_GUARD(&s->rq_lock) {
1446 VirtIOBlockReq *req = s->rq;
30d8bf6d 1447
9c67f33f
SH
1448 while (req) {
1449 qemu_put_sbyte(f, 1);
30d8bf6d 1450
9c67f33f
SH
1451 if (s->conf.num_queues > 1) {
1452 qemu_put_be32(f, virtio_get_queue_index(req->vq));
1453 }
1454
1455 qemu_put_virtqueue_element(vdev, f, &req->elem);
1456 req = req->next;
1457 }
869a5c6d 1458 }
9c67f33f 1459
869a5c6d 1460 qemu_put_sbyte(f, 0);
6e02c38d
AL
1461}
1462
b2b295a7
GK
1463static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
1464 int version_id)
1465{
1466 VirtIOBlock *s = VIRTIO_BLK(vdev);
2a633c46 1467
869a5c6d 1468 while (qemu_get_sbyte(f)) {
30d8bf6d
SH
1469 unsigned nvqs = s->conf.num_queues;
1470 unsigned vq_idx = 0;
ab281c17 1471 VirtIOBlockReq *req;
30d8bf6d
SH
1472
1473 if (nvqs > 1) {
1474 vq_idx = qemu_get_be32(f);
1475
1476 if (vq_idx >= nvqs) {
1477 error_report("Invalid virtqueue index in request list: %#x",
1478 vq_idx);
1479 return -EINVAL;
1480 }
1481 }
1482
8607f5c3 1483 req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq));
30d8bf6d 1484 virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req);
9c67f33f
SH
1485
1486 WITH_QEMU_LOCK_GUARD(&s->rq_lock) {
1487 req->next = s->rq;
1488 s->rq = req;
1489 }
869a5c6d 1490 }
6e02c38d
AL
1491
1492 return 0;
1493}
1494
9b92fbcf
SL
1495static void virtio_resize_cb(void *opaque)
1496{
1497 VirtIODevice *vdev = opaque;
1498
1499 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
1500 virtio_notify_config(vdev);
1501}
1502
145feb17 1503static void virtio_blk_resize(void *opaque)
e5051fc7 1504{
1cc91b7d 1505 VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
e5051fc7 1506
9b92fbcf 1507 /*
0b2675c4 1508 * virtio_notify_config() needs to acquire the BQL,
9b92fbcf
SL
1509 * so it can't be called from an iothread. Instead, schedule
1510 * it to be run in the main context BH.
1511 */
1512 aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev);
e5051fc7
CH
1513}
1514
3cdaf3dd 1515static void virtio_blk_ioeventfd_detach(VirtIOBlock *s)
3bcc17f0
SH
1516{
1517 VirtIODevice *vdev = VIRTIO_DEVICE(s);
1518
1519 for (uint16_t i = 0; i < s->conf.num_queues; i++) {
1520 VirtQueue *vq = virtio_get_queue(vdev, i);
1521 virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]);
1522 }
1523}
1524
3cdaf3dd 1525static void virtio_blk_ioeventfd_attach(VirtIOBlock *s)
3bcc17f0
SH
1526{
1527 VirtIODevice *vdev = VIRTIO_DEVICE(s);
1528
1529 for (uint16_t i = 0; i < s->conf.num_queues; i++) {
1530 VirtQueue *vq = virtio_get_queue(vdev, i);
1531 virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]);
1532 }
1533}
1534
1665d932
SH
1535/* Suspend virtqueue ioeventfd processing during drain */
1536static void virtio_blk_drained_begin(void *opaque)
1537{
1538 VirtIOBlock *s = opaque;
1665d932 1539
3cdaf3dd
SH
1540 if (s->ioeventfd_started) {
1541 virtio_blk_ioeventfd_detach(s);
1665d932 1542 }
1665d932
SH
1543}
1544
1545/* Resume virtqueue ioeventfd processing after drain */
1546static void virtio_blk_drained_end(void *opaque)
1547{
1548 VirtIOBlock *s = opaque;
1665d932 1549
3cdaf3dd
SH
1550 if (s->ioeventfd_started) {
1551 virtio_blk_ioeventfd_attach(s);
1665d932 1552 }
1665d932
SH
1553}
1554
0e49de52 1555static const BlockDevOps virtio_block_ops = {
1665d932
SH
1556 .resize_cb = virtio_blk_resize,
1557 .drained_begin = virtio_blk_drained_begin,
1558 .drained_end = virtio_blk_drained_end,
0e49de52
MA
1559};
1560
1f995a47
SH
1561static bool
1562validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
1563 uint16_t num_queues, Error **errp)
1564{
1565 g_autofree unsigned long *vqs = bitmap_new(num_queues);
1566 g_autoptr(GHashTable) iothreads =
1567 g_hash_table_new(g_str_hash, g_str_equal);
1568
1569 for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
1570 const char *name = node->value->iothread;
1571 uint16List *vq;
1572
1573 if (!iothread_by_id(name)) {
1574 error_setg(errp, "IOThread \"%s\" object does not exist", name);
1575 return false;
1576 }
1577
1578 if (!g_hash_table_add(iothreads, (gpointer)name)) {
1579 error_setg(errp,
1580 "duplicate IOThread name \"%s\" in iothread-vq-mapping",
1581 name);
1582 return false;
1583 }
1584
1585 if (node != list) {
1586 if (!!node->value->vqs != !!list->value->vqs) {
1587 error_setg(errp, "either all items in iothread-vq-mapping "
1588 "must have vqs or none of them must have it");
1589 return false;
1590 }
1591 }
1592
1593 for (vq = node->value->vqs; vq; vq = vq->next) {
1594 if (vq->value >= num_queues) {
1595 error_setg(errp, "vq index %u for IOThread \"%s\" must be "
1596 "less than num_queues %u in iothread-vq-mapping",
1597 vq->value, name, num_queues);
1598 return false;
1599 }
1600
1601 if (test_and_set_bit(vq->value, vqs)) {
1602 error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
1603 "because it is already assigned", vq->value, name);
1604 return false;
1605 }
1606 }
1607 }
1608
1609 if (list->value->vqs) {
1610 for (uint16_t i = 0; i < num_queues; i++) {
1611 if (!test_bit(i, vqs)) {
1612 error_setg(errp,
1613 "missing vq %u IOThread assignment in iothread-vq-mapping",
1614 i);
1615 return false;
1616 }
1617 }
1618 }
1619
1620 return true;
1621}
1622
1623/**
1624 * apply_iothread_vq_mapping:
1625 * @iothread_vq_mapping_list: The mapping of virtqueues to IOThreads.
1626 * @vq_aio_context: The array of AioContext pointers to fill in.
1627 * @num_queues: The length of @vq_aio_context.
1628 * @errp: If an error occurs, a pointer to the area to store the error.
1629 *
1630 * Fill in the AioContext for each virtqueue in the @vq_aio_context array given
1631 * the iothread-vq-mapping parameter in @iothread_vq_mapping_list.
1632 *
1633 * Returns: %true on success, %false on failure.
1634 **/
1635static bool apply_iothread_vq_mapping(
1636 IOThreadVirtQueueMappingList *iothread_vq_mapping_list,
1637 AioContext **vq_aio_context,
1638 uint16_t num_queues,
1639 Error **errp)
3bcc17f0
SH
1640{
1641 IOThreadVirtQueueMappingList *node;
1642 size_t num_iothreads = 0;
1643 size_t cur_iothread = 0;
1644
1f995a47
SH
1645 if (!validate_iothread_vq_mapping_list(iothread_vq_mapping_list,
1646 num_queues, errp)) {
1647 return false;
1648 }
1649
3bcc17f0
SH
1650 for (node = iothread_vq_mapping_list; node; node = node->next) {
1651 num_iothreads++;
1652 }
1653
1654 for (node = iothread_vq_mapping_list; node; node = node->next) {
1655 IOThread *iothread = iothread_by_id(node->value->iothread);
1656 AioContext *ctx = iothread_get_aio_context(iothread);
1657
57bc2658 1658 /* Released in virtio_blk_vq_aio_context_cleanup() */
3bcc17f0
SH
1659 object_ref(OBJECT(iothread));
1660
1661 if (node->value->vqs) {
1662 uint16List *vq;
1663
1664 /* Explicit vq:IOThread assignment */
1665 for (vq = node->value->vqs; vq; vq = vq->next) {
1f995a47 1666 assert(vq->value < num_queues);
3bcc17f0
SH
1667 vq_aio_context[vq->value] = ctx;
1668 }
1669 } else {
1670 /* Round-robin vq:IOThread assignment */
1671 for (unsigned i = cur_iothread; i < num_queues;
1672 i += num_iothreads) {
1673 vq_aio_context[i] = ctx;
1674 }
1675 }
1676
1677 cur_iothread++;
1678 }
1f995a47
SH
1679
1680 return true;
3bcc17f0
SH
1681}
1682
1683/* Context: BQL held */
57bc2658 1684static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
3bcc17f0 1685{
0ea5f594 1686 ERRP_GUARD();
3bcc17f0
SH
1687 VirtIODevice *vdev = VIRTIO_DEVICE(s);
1688 VirtIOBlkConf *conf = &s->conf;
1689 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
1690 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1691
1f995a47
SH
1692 if (conf->iothread && conf->iothread_vq_mapping_list) {
1693 error_setg(errp,
1694 "iothread and iothread-vq-mapping properties cannot be set "
1695 "at the same time");
1696 return false;
1697 }
1698
3bcc17f0
SH
1699 if (conf->iothread || conf->iothread_vq_mapping_list) {
1700 if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
1701 error_setg(errp,
1702 "device is incompatible with iothread "
1703 "(transport does not support notifiers)");
1704 return false;
1705 }
1706 if (!virtio_device_ioeventfd_enabled(vdev)) {
1707 error_setg(errp, "ioeventfd is required for iothread");
1708 return false;
1709 }
1710
1711 /*
3cdaf3dd 1712 * If ioeventfd is (re-)enabled while the guest is running there could
3bcc17f0
SH
1713 * be block jobs that can conflict.
1714 */
1715 if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
3cdaf3dd 1716 error_prepend(errp, "cannot start virtio-blk ioeventfd: ");
3bcc17f0
SH
1717 return false;
1718 }
1719 }
3bcc17f0
SH
1720
1721 s->vq_aio_context = g_new(AioContext *, conf->num_queues);
1722
1723 if (conf->iothread_vq_mapping_list) {
1f995a47
SH
1724 if (!apply_iothread_vq_mapping(conf->iothread_vq_mapping_list,
1725 s->vq_aio_context,
1726 conf->num_queues,
1727 errp)) {
1728 g_free(s->vq_aio_context);
1729 s->vq_aio_context = NULL;
1730 return false;
1731 }
3bcc17f0
SH
1732 } else if (conf->iothread) {
1733 AioContext *ctx = iothread_get_aio_context(conf->iothread);
1734 for (unsigned i = 0; i < conf->num_queues; i++) {
1735 s->vq_aio_context[i] = ctx;
1736 }
1737
57bc2658 1738 /* Released in virtio_blk_vq_aio_context_cleanup() */
3bcc17f0
SH
1739 object_ref(OBJECT(conf->iothread));
1740 } else {
1741 AioContext *ctx = qemu_get_aio_context();
1742 for (unsigned i = 0; i < conf->num_queues; i++) {
1743 s->vq_aio_context[i] = ctx;
1744 }
1745 }
1746
1747 return true;
1748}
1749
1750/* Context: BQL held */
57bc2658 1751static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s)
3bcc17f0
SH
1752{
1753 VirtIOBlkConf *conf = &s->conf;
1754
3cdaf3dd 1755 assert(!s->ioeventfd_started);
3bcc17f0
SH
1756
1757 if (conf->iothread_vq_mapping_list) {
1758 IOThreadVirtQueueMappingList *node;
1759
1760 for (node = conf->iothread_vq_mapping_list; node; node = node->next) {
1761 IOThread *iothread = iothread_by_id(node->value->iothread);
1762 object_unref(OBJECT(iothread));
1763 }
1764 }
1765
1766 if (conf->iothread) {
1767 object_unref(OBJECT(conf->iothread));
1768 }
1769
1770 g_free(s->vq_aio_context);
1771 s->vq_aio_context = NULL;
1772}
1773
1774/* Context: BQL held */
3cdaf3dd 1775static int virtio_blk_start_ioeventfd(VirtIODevice *vdev)
3bcc17f0
SH
1776{
1777 VirtIOBlock *s = VIRTIO_BLK(vdev);
1778 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s)));
1779 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1780 unsigned i;
1781 unsigned nvqs = s->conf.num_queues;
1782 Error *local_err = NULL;
1783 int r;
1784
3cdaf3dd 1785 if (s->ioeventfd_started || s->ioeventfd_starting) {
3bcc17f0
SH
1786 return 0;
1787 }
1788
3cdaf3dd 1789 s->ioeventfd_starting = true;
3bcc17f0
SH
1790
1791 /* Set up guest notifier (irq) */
1792 r = k->set_guest_notifiers(qbus->parent, nvqs, true);
1793 if (r != 0) {
1794 error_report("virtio-blk failed to set guest notifier (%d), "
1795 "ensure -accel kvm is set.", r);
1796 goto fail_guest_notifiers;
1797 }
1798
1799 /*
1800 * Batch all the host notifiers in a single transaction to avoid
1801 * quadratic time complexity in address_space_update_ioeventfds().
1802 */
1803 memory_region_transaction_begin();
1804
1805 /* Set up virtqueue notify */
1806 for (i = 0; i < nvqs; i++) {
1807 r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true);
1808 if (r != 0) {
1809 int j = i;
1810
1811 fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r);
1812 while (i--) {
1813 virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
1814 }
1815
1816 /*
1817 * The transaction expects the ioeventfds to be open when it
1818 * commits. Do it now, before the cleanup loop.
1819 */
1820 memory_region_transaction_commit();
1821
1822 while (j--) {
1823 virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j);
1824 }
1825 goto fail_host_notifiers;
1826 }
1827 }
1828
1829 memory_region_transaction_commit();
1830
ea0736d7
SH
1831 /*
1832 * Try to change the AioContext so that block jobs and other operations can
1833 * co-locate their activity in the same AioContext. If it fails, nevermind.
1834 */
5fbcbd50 1835 assert(nvqs > 0); /* enforced during ->realize() */
3bcc17f0
SH
1836 r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0],
1837 &local_err);
1838 if (r < 0) {
ea0736d7 1839 warn_report_err(local_err);
3bcc17f0
SH
1840 }
1841
1842 /*
1843 * These fields must be visible to the IOThread when it processes the
3cdaf3dd 1844 * virtqueue, otherwise it will think ioeventfd has not started yet.
3bcc17f0 1845 *
3cdaf3dd 1846 * Make sure ->ioeventfd_started is false when blk_set_aio_context() is
3bcc17f0
SH
1847 * called above so that draining does not cause the host notifier to be
1848 * detached/attached prematurely.
1849 */
3cdaf3dd
SH
1850 s->ioeventfd_starting = false;
1851 s->ioeventfd_started = true;
3bcc17f0
SH
1852 smp_wmb(); /* paired with aio_notify_accept() on the read side */
1853
52bff01f
HC
1854 /*
1855 * Get this show started by hooking up our callbacks. If drained now,
1856 * virtio_blk_drained_end() will do this later.
1857 * Attaching the notifier also kicks the virtqueues, processing any requests
1858 * they may already have.
1859 */
1860 if (!blk_in_drain(s->conf.conf.blk)) {
1861 virtio_blk_ioeventfd_attach(s);
3bcc17f0
SH
1862 }
1863 return 0;
1864
3bcc17f0
SH
1865 fail_host_notifiers:
1866 k->set_guest_notifiers(qbus->parent, nvqs, false);
1867 fail_guest_notifiers:
3cdaf3dd
SH
1868 s->ioeventfd_disabled = true;
1869 s->ioeventfd_starting = false;
3bcc17f0
SH
1870 return -ENOSYS;
1871}
1872
1873/* Stop notifications for new requests from guest.
1874 *
1875 * Context: BH in IOThread
1876 */
3cdaf3dd 1877static void virtio_blk_ioeventfd_stop_vq_bh(void *opaque)
3bcc17f0
SH
1878{
1879 VirtQueue *vq = opaque;
1880 EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq);
1881
1882 virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context());
1883
1884 /*
1885 * Test and clear notifier after disabling event, in case poll callback
1886 * didn't have time to run.
1887 */
1888 virtio_queue_host_notifier_read(host_notifier);
1889}
1890
1891/* Context: BQL held */
3cdaf3dd 1892static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev)
3bcc17f0
SH
1893{
1894 VirtIOBlock *s = VIRTIO_BLK(vdev);
1895 BusState *qbus = qdev_get_parent_bus(DEVICE(s));
1896 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1897 unsigned i;
1898 unsigned nvqs = s->conf.num_queues;
1899
3cdaf3dd 1900 if (!s->ioeventfd_started || s->ioeventfd_stopping) {
3bcc17f0
SH
1901 return;
1902 }
1903
1904 /* Better luck next time. */
3cdaf3dd
SH
1905 if (s->ioeventfd_disabled) {
1906 s->ioeventfd_disabled = false;
1907 s->ioeventfd_started = false;
3bcc17f0
SH
1908 return;
1909 }
3cdaf3dd 1910 s->ioeventfd_stopping = true;
3bcc17f0
SH
1911
1912 if (!blk_in_drain(s->conf.conf.blk)) {
1913 for (i = 0; i < nvqs; i++) {
1914 VirtQueue *vq = virtio_get_queue(vdev, i);
1915 AioContext *ctx = s->vq_aio_context[i];
1916
3cdaf3dd 1917 aio_wait_bh_oneshot(ctx, virtio_blk_ioeventfd_stop_vq_bh, vq);
3bcc17f0
SH
1918 }
1919 }
1920
1921 /*
1922 * Batch all the host notifiers in a single transaction to avoid
1923 * quadratic time complexity in address_space_update_ioeventfds().
1924 */
1925 memory_region_transaction_begin();
1926
1927 for (i = 0; i < nvqs; i++) {
1928 virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
1929 }
1930
1931 /*
1932 * The transaction expects the ioeventfds to be open when it
1933 * commits. Do it now, before the cleanup loop.
1934 */
1935 memory_region_transaction_commit();
1936
1937 for (i = 0; i < nvqs; i++) {
1938 virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
1939 }
1940
1941 /*
3cdaf3dd 1942 * Set ->ioeventfd_started to false before draining so that host notifiers
3bcc17f0
SH
1943 * are not detached/attached anymore.
1944 */
3cdaf3dd 1945 s->ioeventfd_started = false;
3bcc17f0
SH
1946
1947 /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
1948 blk_drain(s->conf.conf.blk);
1949
1950 /*
1951 * Try to switch bs back to the QEMU main loop. If other users keep the
1952 * BlockBackend in the iothread, that's ok
1953 */
1954 blk_set_aio_context(s->conf.conf.blk, qemu_get_aio_context(), NULL);
1955
1956 /* Clean up guest notifier (irq) */
1957 k->set_guest_notifiers(qbus->parent, nvqs, false);
1958
3cdaf3dd 1959 s->ioeventfd_stopping = false;
3bcc17f0
SH
1960}
1961
75884afd 1962static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
1c028ddf 1963{
75884afd 1964 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
179b417e 1965 VirtIOBlock *s = VIRTIO_BLK(dev);
2a30307f 1966 VirtIOBlkConf *conf = &s->conf;
b3d9bb9a 1967 BlockDriverState *bs;
3ffeeef7 1968 Error *err = NULL;
2f270590 1969 unsigned i;
cf21e106 1970
4be74634 1971 if (!conf->conf.blk) {
75884afd
AF
1972 error_setg(errp, "drive property not set");
1973 return;
d75d25e3 1974 }
4be74634 1975 if (!blk_is_inserted(conf->conf.blk)) {
75884afd
AF
1976 error_setg(errp, "Device needs media, but drive is empty");
1977 return;
98f28ad7 1978 }
9445e1e1
SH
1979 if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) {
1980 conf->num_queues = 1;
1981 }
2f270590
SH
1982 if (!conf->num_queues) {
1983 error_setg(errp, "num-queues property must be larger than 0");
1984 return;
1985 }
1bf8a989
DP
1986 if (conf->queue_size <= 2) {
1987 error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
1988 "must be > 2", conf->queue_size);
1989 return;
1990 }
6040aedd
MK
1991 if (!is_power_of_2(conf->queue_size) ||
1992 conf->queue_size > VIRTQUEUE_MAX_SIZE) {
1993 error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
1994 "must be a power of 2 (max %d)",
1995 conf->queue_size, VIRTQUEUE_MAX_SIZE);
1996 return;
1997 }
d75d25e3 1998
ceff3e1f 1999 if (!blkconf_apply_backend_options(&conf->conf,
86b1cf32
KW
2000 !blk_supports_write_perm(conf->conf.blk),
2001 true, errp)) {
a17c17a2
KW
2002 return;
2003 }
4be74634 2004 s->original_wce = blk_enable_write_cache(conf->conf.blk);
ceff3e1f 2005 if (!blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, errp)) {
75884afd 2006 return;
b7eb0c9f 2007 }
ceff3e1f 2008
c56ee92f 2009 if (!blkconf_blocksizes(&conf->conf, errp)) {
0a75b60c
MK
2010 return;
2011 }
2012
b3d9bb9a 2013 bs = blk_bs(conf->conf.blk);
4f736650
SL
2014 if (bs->bl.zoned != BLK_Z_NONE) {
2015 virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED);
2016 if (bs->bl.zoned == BLK_Z_HM) {
2017 virtio_clear_feature(&s->host_features, VIRTIO_BLK_F_DISCARD);
2018 }
2019 }
2020
37b06f8d
SG
2021 if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) &&
2022 (!conf->max_discard_sectors ||
2023 conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) {
2024 error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")"
2025 ", must be between 1 and %d",
2026 conf->max_discard_sectors, (int)BDRV_REQUEST_MAX_SECTORS);
2027 return;
2028 }
2029
2030 if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) &&
2031 (!conf->max_write_zeroes_sectors ||
2032 conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) {
2033 error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32
2034 "), must be between 1 and %d",
2035 conf->max_write_zeroes_sectors,
2036 (int)BDRV_REQUEST_MAX_SECTORS);
2037 return;
2038 }
2039
d9cf55a8 2040 s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params,
d74c30c8 2041 s->host_features);
3857cd5c 2042 virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);
6e02c38d 2043
9c67f33f
SH
2044 qemu_mutex_init(&s->rq_lock);
2045
4be74634 2046 s->blk = conf->conf.blk;
869a5c6d 2047 s->rq = NULL;
2a30307f 2048 s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
e63e7fde 2049
2f270590 2050 for (i = 0; i < conf->num_queues; i++) {
6040aedd 2051 virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output);
2f270590 2052 }
98e3ab35 2053 qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2);
57bc2658 2054
3cdaf3dd 2055 /* Don't start ioeventfd if transport does not support notifiers. */
57bc2658 2056 if (!virtio_device_ioeventfd_enabled(vdev)) {
3cdaf3dd 2057 s->ioeventfd_disabled = true;
57bc2658
SH
2058 }
2059
2060 virtio_blk_vq_aio_context_init(s, &err);
3ffeeef7 2061 if (err != NULL) {
75884afd 2062 error_propagate(errp, err);
cfaf757e
PN
2063 for (i = 0; i < conf->num_queues; i++) {
2064 virtio_del_queue(vdev, i);
2065 }
6a1a8cc7 2066 virtio_cleanup(vdev);
75884afd 2067 return;
392808b4 2068 }
6e02c38d 2069
a937f8e8
SH
2070 /*
2071 * This must be after virtio_init() so virtio_blk_dma_restart_cb() gets
2072 * called after ->start_ioeventfd() has already set blk's AioContext.
2073 */
2074 s->change =
2075 qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s);
2076
baf42268 2077 blk_ram_registrar_init(&s->blk_ram_registrar, s->blk);
4be74634 2078 blk_set_dev_ops(s->blk, &virtio_block_ops, s);
6e02c38d 2079
4be74634 2080 blk_iostatus_enable(s->blk);
71f571a2
SE
2081
2082 add_boot_device_lchs(dev, "/disk@0,0",
2083 conf->conf.lcyls,
2084 conf->conf.lheads,
2085 conf->conf.lsecs);
1c028ddf
FK
2086}
2087
b69c3c21 2088static void virtio_blk_device_unrealize(DeviceState *dev)
1c028ddf 2089{
306ec6c3
AF
2090 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2091 VirtIOBlock *s = VIRTIO_BLK(dev);
4a0117cf
EP
2092 VirtIOBlkConf *conf = &s->conf;
2093 unsigned i;
306ec6c3 2094
7bfde688 2095 blk_drain(s->blk);
71f571a2 2096 del_boot_device_lchs(dev, "/disk@0,0");
57bc2658 2097 virtio_blk_vq_aio_context_cleanup(s);
4a0117cf
EP
2098 for (i = 0; i < conf->num_queues; i++) {
2099 virtio_del_queue(vdev, i);
2100 }
98e3ab35 2101 qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
9c67f33f 2102 qemu_mutex_destroy(&s->rq_lock);
baf42268 2103 blk_ram_registrar_destroy(&s->blk_ram_registrar);
1c028ddf 2104 qemu_del_vm_change_state_handler(s->change);
4be74634 2105 blockdev_mark_auto_del(s->blk);
6a1a8cc7 2106 virtio_cleanup(vdev);
1c028ddf
FK
2107}
2108
467b3f33
SH
2109static void virtio_blk_instance_init(Object *obj)
2110{
2111 VirtIOBlock *s = VIRTIO_BLK(obj);
2112
2a30307f 2113 device_add_bootindex_property(obj, &s->conf.conf.bootindex,
3342ec32 2114 "bootindex", "/disk@0,0",
40c2281c 2115 DEVICE(obj));
467b3f33
SH
2116}
2117
977a117f
HP
2118static const VMStateDescription vmstate_virtio_blk = {
2119 .name = "virtio-blk",
2120 .minimum_version_id = 2,
2121 .version_id = 2,
7d5dc0a3 2122 .fields = (const VMStateField[]) {
977a117f
HP
2123 VMSTATE_VIRTIO_DEVICE,
2124 VMSTATE_END_OF_LIST()
2125 },
2126};
bbded32c 2127
1c028ddf 2128static Property virtio_blk_properties[] = {
2a30307f 2129 DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf),
8c398252 2130 DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf),
2a30307f
MA
2131 DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, conf.conf),
2132 DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial),
bbe8bd4d
SG
2133 DEFINE_PROP_BIT64("config-wce", VirtIOBlock, host_features,
2134 VIRTIO_BLK_F_CONFIG_WCE, true),
32a877e4 2135#ifdef __linux__
bbe8bd4d
SG
2136 DEFINE_PROP_BIT64("scsi", VirtIOBlock, host_features,
2137 VIRTIO_BLK_F_SCSI, false),
32a877e4 2138#endif
c99495ac
PL
2139 DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0,
2140 true),
9445e1e1
SH
2141 DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues,
2142 VIRTIO_BLK_AUTO_NUM_QUEUES),
c9b7d9ec 2143 DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 256),
1bf8a989 2144 DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true),
d679ac09
FZ
2145 DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD,
2146 IOThread *),
b6948ab0
SH
2147 DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOBlock,
2148 conf.iothread_vq_mapping_list),
5c81161f
SG
2149 DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features,
2150 VIRTIO_BLK_F_DISCARD, true),
fb0b154c
AO
2151 DEFINE_PROP_BOOL("report-discard-granularity", VirtIOBlock,
2152 conf.report_discard_granularity, true),
5c81161f
SG
2153 DEFINE_PROP_BIT64("write-zeroes", VirtIOBlock, host_features,
2154 VIRTIO_BLK_F_WRITE_ZEROES, true),
37b06f8d
SG
2155 DEFINE_PROP_UINT32("max-discard-sectors", VirtIOBlock,
2156 conf.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS),
2157 DEFINE_PROP_UINT32("max-write-zeroes-sectors", VirtIOBlock,
2158 conf.max_write_zeroes_sectors, BDRV_REQUEST_MAX_SECTORS),
5f258577
EY
2159 DEFINE_PROP_BOOL("x-enable-wce-if-config-wce", VirtIOBlock,
2160 conf.x_enable_wce_if_config_wce, true),
1c028ddf
FK
2161 DEFINE_PROP_END_OF_LIST(),
2162};
2163
2164static void virtio_blk_class_init(ObjectClass *klass, void *data)
2165{
2166 DeviceClass *dc = DEVICE_CLASS(klass);
2167 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
75884afd 2168
4f67d30b 2169 device_class_set_props(dc, virtio_blk_properties);
bbded32c 2170 dc->vmsd = &vmstate_virtio_blk;
125ee0ed 2171 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
75884afd 2172 vdc->realize = virtio_blk_device_realize;
306ec6c3 2173 vdc->unrealize = virtio_blk_device_unrealize;
1c028ddf
FK
2174 vdc->get_config = virtio_blk_update_config;
2175 vdc->set_config = virtio_blk_set_config;
2176 vdc->get_features = virtio_blk_get_features;
2177 vdc->set_status = virtio_blk_set_status;
2178 vdc->reset = virtio_blk_reset;
b2b295a7
GK
2179 vdc->save = virtio_blk_save_device;
2180 vdc->load = virtio_blk_load_device;
3cdaf3dd
SH
2181 vdc->start_ioeventfd = virtio_blk_start_ioeventfd;
2182 vdc->stop_ioeventfd = virtio_blk_stop_ioeventfd;
1c028ddf
FK
2183}
2184
b5c7ceaf 2185static const TypeInfo virtio_blk_info = {
1c028ddf
FK
2186 .name = TYPE_VIRTIO_BLK,
2187 .parent = TYPE_VIRTIO_DEVICE,
2188 .instance_size = sizeof(VirtIOBlock),
467b3f33 2189 .instance_init = virtio_blk_instance_init,
1c028ddf
FK
2190 .class_init = virtio_blk_class_init,
2191};
2192
2193static void virtio_register_types(void)
2194{
b5c7ceaf 2195 type_register_static(&virtio_blk_info);
1c028ddf
FK
2196}
2197
2198type_init(virtio_register_types)