]>
Commit | Line | Data |
---|---|---|
6e02c38d AL |
1 | /* |
2 | * Virtio Block Device | |
3 | * | |
4 | * Copyright IBM, Corp. 2007 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <aliguori@us.ibm.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
80c71a24 | 14 | #include "qemu/osdep.h" |
433fcea4 | 15 | #include "qemu/defer-call.h" |
da34e65c | 16 | #include "qapi/error.h" |
827805a2 | 17 | #include "qemu/iov.h" |
0b8fa32f | 18 | #include "qemu/module.h" |
1de7afc9 | 19 | #include "qemu/error-report.h" |
9b92fbcf | 20 | #include "qemu/main-loop.h" |
4f736650 | 21 | #include "block/block_int.h" |
6d519a5f | 22 | #include "trace.h" |
0d09e41a | 23 | #include "hw/block/block.h" |
a27bd6c7 | 24 | #include "hw/qdev-properties.h" |
9c17d615 | 25 | #include "sysemu/blockdev.h" |
baf42268 | 26 | #include "sysemu/block-ram-registrar.h" |
2f780b6a | 27 | #include "sysemu/sysemu.h" |
54d31236 | 28 | #include "sysemu/runstate.h" |
0d09e41a | 29 | #include "hw/virtio/virtio-blk.h" |
08e2c9f1 | 30 | #include "scsi/constants.h" |
1063b8b1 CH |
31 | #ifdef __linux__ |
32 | # include <scsi/sg.h> | |
33 | #endif | |
0d09e41a | 34 | #include "hw/virtio/virtio-bus.h" |
ca77ee28 | 35 | #include "migration/qemu-file-types.h" |
783d1897 | 36 | #include "hw/virtio/virtio-access.h" |
d9cf55a8 | 37 | #include "hw/virtio/virtio-blk-common.h" |
4c41c69e | 38 | #include "qemu/coroutine.h" |
6e02c38d | 39 | |
d14dde5e GK |
40 | static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, |
41 | VirtIOBlockReq *req) | |
671ec3f0 | 42 | { |
671ec3f0 | 43 | req->dev = s; |
edaffd9f | 44 | req->vq = vq; |
869d66af | 45 | req->qiov.size = 0; |
2a6cdd6d | 46 | req->in_len = 0; |
869d66af | 47 | req->next = NULL; |
95f7142a | 48 | req->mr_next = NULL; |
671ec3f0 FZ |
49 | } |
50 | ||
d14dde5e | 51 | static void virtio_blk_free_request(VirtIOBlockReq *req) |
671ec3f0 | 52 | { |
1d29b5b0 | 53 | g_free(req); |
671ec3f0 FZ |
54 | } |
55 | ||
03de2f52 | 56 | static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) |
869a5c6d AL |
57 | { |
58 | VirtIOBlock *s = req->dev; | |
1cc91b7d | 59 | VirtIODevice *vdev = VIRTIO_DEVICE(s); |
869a5c6d | 60 | |
a576ceac | 61 | trace_virtio_blk_req_complete(vdev, req, status); |
6d519a5f | 62 | |
92e3c2a3 | 63 | stb_p(&req->in->status, status); |
7bd04a04 SH |
64 | iov_discard_undo(&req->inhdr_undo); |
65 | iov_discard_undo(&req->outhdr_undo); | |
edaffd9f | 66 | virtqueue_push(req->vq, &req->elem, req->in_len); |
eb41cf78 | 67 | if (s->dataplane_started && !s->dataplane_disabled) { |
3bcc17f0 | 68 | virtio_notify_irqfd(vdev, req->vq); |
03de2f52 | 69 | } else { |
edaffd9f | 70 | virtio_notify(vdev, req->vq); |
03de2f52 | 71 | } |
bf4bd461 FZ |
72 | } |
73 | ||
f35d68f0 | 74 | static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, |
00f639fb | 75 | bool is_read, bool acct_failed) |
869a5c6d | 76 | { |
869a5c6d | 77 | VirtIOBlock *s = req->dev; |
9a6719d5 | 78 | BlockErrorAction action = blk_get_error_action(s->blk, is_read, error); |
869a5c6d | 79 | |
a589569f | 80 | if (action == BLOCK_ERROR_ACTION_STOP) { |
466138dc FZ |
81 | /* Break the link as the next request is going to be parsed from the |
82 | * ring again. Otherwise we may end up doing a double completion! */ | |
83 | req->mr_next = NULL; | |
9c67f33f SH |
84 | |
85 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { | |
86 | req->next = s->rq; | |
87 | s->rq = req; | |
88 | } | |
a589569f | 89 | } else if (action == BLOCK_ERROR_ACTION_REPORT) { |
869a5c6d | 90 | virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); |
00f639fb SG |
91 | if (acct_failed) { |
92 | block_acct_failed(blk_get_stats(s->blk), &req->acct); | |
93 | } | |
671ec3f0 | 94 | virtio_blk_free_request(req); |
869a5c6d AL |
95 | } |
96 | ||
4be74634 | 97 | blk_error_action(s->blk, action, is_read, error); |
a589569f | 98 | return action != BLOCK_ERROR_ACTION_IGNORE; |
869a5c6d AL |
99 | } |
100 | ||
6e02c38d AL |
101 | static void virtio_blk_rw_complete(void *opaque, int ret) |
102 | { | |
95f7142a | 103 | VirtIOBlockReq *next = opaque; |
b9e413dd | 104 | VirtIOBlock *s = next->dev; |
a576ceac | 105 | VirtIODevice *vdev = VIRTIO_DEVICE(s); |
95f7142a PL |
106 | |
107 | while (next) { | |
108 | VirtIOBlockReq *req = next; | |
109 | next = req->mr_next; | |
a576ceac | 110 | trace_virtio_blk_rw_complete(vdev, req, ret); |
95f7142a PL |
111 | |
112 | if (req->qiov.nalloc != -1) { | |
e61809ed | 113 | /* If nalloc is != -1 req->qiov is a local copy of the original |
9bb192a4 YB |
114 | * external iovec. It was allocated in submit_requests to be |
115 | * able to merge requests. */ | |
95f7142a PL |
116 | qemu_iovec_destroy(&req->qiov); |
117 | } | |
6e02c38d | 118 | |
95f7142a | 119 | if (ret) { |
bf4069fb | 120 | int p = virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type); |
95f7142a | 121 | bool is_read = !(p & VIRTIO_BLK_T_OUT); |
2a6cdd6d PB |
122 | /* Note that memory may be dirtied on read failure. If the |
123 | * virtio request is not completed here, as is the case for | |
124 | * BLOCK_ERROR_ACTION_STOP, the memory may not be copied | |
125 | * correctly during live migration. While this is ugly, | |
126 | * it is acceptable because the device is free to write to | |
127 | * the memory until the request is completed (which will | |
128 | * happen on the other side of the migration). | |
129 | */ | |
00f639fb | 130 | if (virtio_blk_handle_rw_error(req, -ret, is_read, true)) { |
95f7142a PL |
131 | continue; |
132 | } | |
133 | } | |
6d519a5f | 134 | |
95f7142a | 135 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); |
bf4069fb | 136 | block_acct_done(blk_get_stats(s->blk), &req->acct); |
95f7142a | 137 | virtio_blk_free_request(req); |
6e02c38d | 138 | } |
869a5c6d | 139 | } |
6e02c38d | 140 | |
aa659be3 CH |
141 | static void virtio_blk_flush_complete(void *opaque, int ret) |
142 | { | |
143 | VirtIOBlockReq *req = opaque; | |
b9e413dd | 144 | VirtIOBlock *s = req->dev; |
aa659be3 | 145 | |
c1135913 SH |
146 | if (ret && virtio_blk_handle_rw_error(req, -ret, 0, true)) { |
147 | return; | |
8c269b54 KW |
148 | } |
149 | ||
150 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); | |
9a6719d5 | 151 | block_acct_done(blk_get_stats(s->blk), &req->acct); |
671ec3f0 | 152 | virtio_blk_free_request(req); |
6e02c38d AL |
153 | } |
154 | ||
37b06f8d SG |
155 | static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) |
156 | { | |
157 | VirtIOBlockReq *req = opaque; | |
158 | VirtIOBlock *s = req->dev; | |
159 | bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) & | |
160 | ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES; | |
161 | ||
c1135913 SH |
162 | if (ret && virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { |
163 | return; | |
37b06f8d SG |
164 | } |
165 | ||
166 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); | |
167 | if (is_write_zeroes) { | |
168 | block_acct_done(blk_get_stats(s->blk), &req->acct); | |
169 | } | |
170 | virtio_blk_free_request(req); | |
37b06f8d SG |
171 | } |
172 | ||
1dc936aa FZ |
173 | #ifdef __linux__ |
174 | ||
175 | typedef struct { | |
176 | VirtIOBlockReq *req; | |
177 | struct sg_io_hdr hdr; | |
178 | } VirtIOBlockIoctlReq; | |
179 | ||
180 | static void virtio_blk_ioctl_complete(void *opaque, int status) | |
181 | { | |
182 | VirtIOBlockIoctlReq *ioctl_req = opaque; | |
183 | VirtIOBlockReq *req = ioctl_req->req; | |
9d456654 PB |
184 | VirtIOBlock *s = req->dev; |
185 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
1dc936aa FZ |
186 | struct virtio_scsi_inhdr *scsi; |
187 | struct sg_io_hdr *hdr; | |
188 | ||
189 | scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base; | |
190 | ||
191 | if (status) { | |
192 | status = VIRTIO_BLK_S_UNSUPP; | |
193 | virtio_stl_p(vdev, &scsi->errors, 255); | |
194 | goto out; | |
195 | } | |
196 | ||
197 | hdr = &ioctl_req->hdr; | |
198 | /* | |
199 | * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi) | |
200 | * clear the masked_status field [hence status gets cleared too, see | |
201 | * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED | |
202 | * status has occurred. However they do set DRIVER_SENSE in driver_status | |
203 | * field. Also a (sb_len_wr > 0) indicates there is a sense buffer. | |
204 | */ | |
205 | if (hdr->status == 0 && hdr->sb_len_wr > 0) { | |
206 | hdr->status = CHECK_CONDITION; | |
207 | } | |
208 | ||
209 | virtio_stl_p(vdev, &scsi->errors, | |
210 | hdr->status | (hdr->msg_status << 8) | | |
211 | (hdr->host_status << 16) | (hdr->driver_status << 24)); | |
212 | virtio_stl_p(vdev, &scsi->residual, hdr->resid); | |
213 | virtio_stl_p(vdev, &scsi->sense_len, hdr->sb_len_wr); | |
214 | virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); | |
215 | ||
216 | out: | |
217 | virtio_blk_req_complete(req, status); | |
218 | virtio_blk_free_request(req); | |
219 | g_free(ioctl_req); | |
220 | } | |
221 | ||
222 | #endif | |
223 | ||
edaffd9f | 224 | static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s, VirtQueue *vq) |
6e02c38d | 225 | { |
edaffd9f | 226 | VirtIOBlockReq *req = virtqueue_pop(vq, sizeof(VirtIOBlockReq)); |
6e02c38d | 227 | |
51b19ebe | 228 | if (req) { |
edaffd9f | 229 | virtio_blk_init_request(s, vq, req); |
6e02c38d | 230 | } |
6e02c38d AL |
231 | return req; |
232 | } | |
233 | ||
75344fa4 | 234 | static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) |
1063b8b1 | 235 | { |
5a05cbee FZ |
236 | int status = VIRTIO_BLK_S_OK; |
237 | struct virtio_scsi_inhdr *scsi = NULL; | |
75344fa4 | 238 | VirtIOBlock *blk = req->dev; |
bf4069fb AR |
239 | VirtIODevice *vdev = VIRTIO_DEVICE(blk); |
240 | VirtQueueElement *elem = &req->elem; | |
783d1897 | 241 | |
47ce9ef7 | 242 | #ifdef __linux__ |
1063b8b1 | 243 | int i; |
1dc936aa | 244 | VirtIOBlockIoctlReq *ioctl_req; |
a209f461 | 245 | BlockAIOCB *acb; |
47ce9ef7 | 246 | #endif |
1063b8b1 CH |
247 | |
248 | /* | |
249 | * We require at least one output segment each for the virtio_blk_outhdr | |
250 | * and the SCSI command block. | |
251 | * | |
252 | * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr | |
253 | * and the sense buffer pointer in the input segments. | |
254 | */ | |
5a05cbee FZ |
255 | if (elem->out_num < 2 || elem->in_num < 3) { |
256 | status = VIRTIO_BLK_S_IOERR; | |
257 | goto fail; | |
1063b8b1 CH |
258 | } |
259 | ||
260 | /* | |
f34e73cd PB |
261 | * The scsi inhdr is placed in the second-to-last input segment, just |
262 | * before the regular inhdr. | |
1063b8b1 | 263 | */ |
5a05cbee | 264 | scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; |
f34e73cd | 265 | |
bbe8bd4d | 266 | if (!virtio_has_feature(blk->host_features, VIRTIO_BLK_F_SCSI)) { |
f34e73cd PB |
267 | status = VIRTIO_BLK_S_UNSUPP; |
268 | goto fail; | |
1063b8b1 CH |
269 | } |
270 | ||
271 | /* | |
f34e73cd | 272 | * No support for bidirection commands yet. |
1063b8b1 | 273 | */ |
5a05cbee | 274 | if (elem->out_num > 2 && elem->in_num > 3) { |
f34e73cd PB |
275 | status = VIRTIO_BLK_S_UNSUPP; |
276 | goto fail; | |
277 | } | |
1063b8b1 | 278 | |
f34e73cd | 279 | #ifdef __linux__ |
1dc936aa FZ |
280 | ioctl_req = g_new0(VirtIOBlockIoctlReq, 1); |
281 | ioctl_req->req = req; | |
282 | ioctl_req->hdr.interface_id = 'S'; | |
283 | ioctl_req->hdr.cmd_len = elem->out_sg[1].iov_len; | |
284 | ioctl_req->hdr.cmdp = elem->out_sg[1].iov_base; | |
285 | ioctl_req->hdr.dxfer_len = 0; | |
1063b8b1 | 286 | |
5a05cbee | 287 | if (elem->out_num > 2) { |
1063b8b1 CH |
288 | /* |
289 | * If there are more than the minimally required 2 output segments | |
290 | * there is write payload starting from the third iovec. | |
291 | */ | |
1dc936aa FZ |
292 | ioctl_req->hdr.dxfer_direction = SG_DXFER_TO_DEV; |
293 | ioctl_req->hdr.iovec_count = elem->out_num - 2; | |
1063b8b1 | 294 | |
1dc936aa FZ |
295 | for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { |
296 | ioctl_req->hdr.dxfer_len += elem->out_sg[i + 2].iov_len; | |
297 | } | |
1063b8b1 | 298 | |
1dc936aa | 299 | ioctl_req->hdr.dxferp = elem->out_sg + 2; |
1063b8b1 | 300 | |
5a05cbee | 301 | } else if (elem->in_num > 3) { |
1063b8b1 CH |
302 | /* |
303 | * If we have more than 3 input segments the guest wants to actually | |
304 | * read data. | |
305 | */ | |
1dc936aa FZ |
306 | ioctl_req->hdr.dxfer_direction = SG_DXFER_FROM_DEV; |
307 | ioctl_req->hdr.iovec_count = elem->in_num - 3; | |
308 | for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { | |
309 | ioctl_req->hdr.dxfer_len += elem->in_sg[i].iov_len; | |
310 | } | |
1063b8b1 | 311 | |
1dc936aa | 312 | ioctl_req->hdr.dxferp = elem->in_sg; |
1063b8b1 CH |
313 | } else { |
314 | /* | |
315 | * Some SCSI commands don't actually transfer any data. | |
316 | */ | |
1dc936aa | 317 | ioctl_req->hdr.dxfer_direction = SG_DXFER_NONE; |
1063b8b1 CH |
318 | } |
319 | ||
1dc936aa FZ |
320 | ioctl_req->hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; |
321 | ioctl_req->hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; | |
1063b8b1 | 322 | |
a209f461 FZ |
323 | acb = blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr, |
324 | virtio_blk_ioctl_complete, ioctl_req); | |
325 | if (!acb) { | |
326 | g_free(ioctl_req); | |
327 | status = VIRTIO_BLK_S_UNSUPP; | |
328 | goto fail; | |
329 | } | |
1dc936aa | 330 | return -EINPROGRESS; |
1063b8b1 | 331 | #else |
f34e73cd PB |
332 | abort(); |
333 | #endif | |
334 | ||
335 | fail: | |
336 | /* Just put anything nonzero so that the ioctl fails in the guest. */ | |
5a05cbee | 337 | if (scsi) { |
783d1897 | 338 | virtio_stl_p(vdev, &scsi->errors, 255); |
5a05cbee FZ |
339 | } |
340 | return status; | |
341 | } | |
342 | ||
343 | static void virtio_blk_handle_scsi(VirtIOBlockReq *req) | |
344 | { | |
345 | int status; | |
346 | ||
75344fa4 | 347 | status = virtio_blk_handle_scsi_req(req); |
1dc936aa FZ |
348 | if (status != -EINPROGRESS) { |
349 | virtio_blk_req_complete(req, status); | |
350 | virtio_blk_free_request(req); | |
351 | } | |
1063b8b1 | 352 | } |
1063b8b1 | 353 | |
baf42268 | 354 | static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb, |
95f7142a | 355 | int start, int num_reqs, int niov) |
869a5c6d | 356 | { |
baf42268 | 357 | BlockBackend *blk = s->blk; |
95f7142a PL |
358 | QEMUIOVector *qiov = &mrb->reqs[start]->qiov; |
359 | int64_t sector_num = mrb->reqs[start]->sector_num; | |
95f7142a | 360 | bool is_write = mrb->is_write; |
baf42268 | 361 | BdrvRequestFlags flags = 0; |
95f7142a PL |
362 | |
363 | if (num_reqs > 1) { | |
364 | int i; | |
365 | struct iovec *tmp_iov = qiov->iov; | |
366 | int tmp_niov = qiov->niov; | |
367 | ||
368 | /* mrb->reqs[start]->qiov was initialized from external so we can't | |
b5772fdd | 369 | * modify it here. We need to initialize it locally and then add the |
95f7142a PL |
370 | * external iovecs. */ |
371 | qemu_iovec_init(qiov, niov); | |
372 | ||
373 | for (i = 0; i < tmp_niov; i++) { | |
374 | qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len); | |
375 | } | |
376 | ||
377 | for (i = start + 1; i < start + num_reqs; i++) { | |
378 | qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0, | |
379 | mrb->reqs[i]->qiov.size); | |
380 | mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; | |
95f7142a | 381 | } |
95f7142a | 382 | |
a576ceac SH |
383 | trace_virtio_blk_submit_multireq(VIRTIO_DEVICE(mrb->reqs[start]->dev), |
384 | mrb, start, num_reqs, | |
b5772fdd EB |
385 | sector_num << BDRV_SECTOR_BITS, |
386 | qiov->size, is_write); | |
95f7142a PL |
387 | block_acct_merge_done(blk_get_stats(blk), |
388 | is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ, | |
389 | num_reqs - 1); | |
390 | } | |
91553dcc | 391 | |
baf42268 SH |
392 | if (blk_ram_registrar_ok(&s->blk_ram_registrar)) { |
393 | flags |= BDRV_REQ_REGISTERED_BUF; | |
394 | } | |
395 | ||
95f7142a | 396 | if (is_write) { |
baf42268 SH |
397 | blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, |
398 | flags, virtio_blk_rw_complete, | |
399 | mrb->reqs[start]); | |
95f7142a | 400 | } else { |
baf42268 SH |
401 | blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, |
402 | flags, virtio_blk_rw_complete, | |
403 | mrb->reqs[start]); | |
95f7142a PL |
404 | } |
405 | } | |
406 | ||
407 | static int multireq_compare(const void *a, const void *b) | |
408 | { | |
409 | const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a, | |
410 | *req2 = *(VirtIOBlockReq **)b; | |
411 | ||
412 | /* | |
413 | * Note that we can't simply subtract sector_num1 from sector_num2 | |
414 | * here as that could overflow the return value. | |
415 | */ | |
416 | if (req1->sector_num > req2->sector_num) { | |
417 | return 1; | |
418 | } else if (req1->sector_num < req2->sector_num) { | |
419 | return -1; | |
420 | } else { | |
421 | return 0; | |
422 | } | |
423 | } | |
424 | ||
baf42268 | 425 | static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb) |
95f7142a PL |
426 | { |
427 | int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0; | |
5def6b80 | 428 | uint32_t max_transfer; |
95f7142a PL |
429 | int64_t sector_num = 0; |
430 | ||
431 | if (mrb->num_reqs == 1) { | |
baf42268 | 432 | submit_requests(s, mrb, 0, 1, -1); |
95f7142a | 433 | mrb->num_reqs = 0; |
c20fd872 CH |
434 | return; |
435 | } | |
436 | ||
5def6b80 | 437 | max_transfer = blk_get_max_transfer(mrb->reqs[0]->dev->blk); |
95f7142a PL |
438 | |
439 | qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs), | |
440 | &multireq_compare); | |
441 | ||
442 | for (i = 0; i < mrb->num_reqs; i++) { | |
443 | VirtIOBlockReq *req = mrb->reqs[i]; | |
444 | if (num_reqs > 0) { | |
49cffbc6 GA |
445 | /* |
446 | * NOTE: We cannot merge the requests in below situations: | |
447 | * 1. requests are not sequential | |
448 | * 2. merge would exceed maximum number of IOVs | |
449 | * 3. merge would exceed maximum transfer length of backend device | |
450 | */ | |
451 | if (sector_num + nb_sectors != req->sector_num || | |
baf42268 | 452 | niov > blk_get_max_iov(s->blk) - req->qiov.niov || |
5def6b80 EB |
453 | req->qiov.size > max_transfer || |
454 | nb_sectors > (max_transfer - | |
455 | req->qiov.size) / BDRV_SECTOR_SIZE) { | |
baf42268 | 456 | submit_requests(s, mrb, start, num_reqs, niov); |
95f7142a | 457 | num_reqs = 0; |
91553dcc KW |
458 | } |
459 | } | |
95f7142a PL |
460 | |
461 | if (num_reqs == 0) { | |
462 | sector_num = req->sector_num; | |
463 | nb_sectors = niov = 0; | |
464 | start = i; | |
465 | } | |
466 | ||
467 | nb_sectors += req->qiov.size / BDRV_SECTOR_SIZE; | |
468 | niov += req->qiov.niov; | |
469 | num_reqs++; | |
91553dcc | 470 | } |
c20fd872 | 471 | |
baf42268 | 472 | submit_requests(s, mrb, start, num_reqs, niov); |
95f7142a | 473 | mrb->num_reqs = 0; |
91553dcc | 474 | } |
87b245db | 475 | |
c20fd872 | 476 | static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) |
aa659be3 | 477 | { |
bf4069fb AR |
478 | VirtIOBlock *s = req->dev; |
479 | ||
480 | block_acct_start(blk_get_stats(s->blk), &req->acct, 0, | |
5366d0c8 | 481 | BLOCK_ACCT_FLUSH); |
a597e79c | 482 | |
618fbb84 CH |
483 | /* |
484 | * Make sure all outstanding writes are posted to the backing device. | |
485 | */ | |
95f7142a | 486 | if (mrb->is_write && mrb->num_reqs > 0) { |
baf42268 | 487 | virtio_blk_submit_multireq(s, mrb); |
95f7142a | 488 | } |
bf4069fb | 489 | blk_aio_flush(s->blk, virtio_blk_flush_complete, req); |
aa659be3 CH |
490 | } |
491 | ||
d0e14376 MA |
492 | static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, |
493 | uint64_t sector, size_t size) | |
494 | { | |
3c2daac0 MA |
495 | uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; |
496 | uint64_t total_sectors; | |
497 | ||
75af1f34 | 498 | if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { |
95f7142a PL |
499 | return false; |
500 | } | |
d0e14376 MA |
501 | if (sector & dev->sector_mask) { |
502 | return false; | |
503 | } | |
2a30307f | 504 | if (size % dev->conf.conf.logical_block_size) { |
d0e14376 MA |
505 | return false; |
506 | } | |
4be74634 | 507 | blk_get_geometry(dev->blk, &total_sectors); |
3c2daac0 MA |
508 | if (sector > total_sectors || nb_sectors > total_sectors - sector) { |
509 | return false; | |
510 | } | |
d0e14376 MA |
511 | return true; |
512 | } | |
513 | ||
37b06f8d SG |
514 | static uint8_t virtio_blk_handle_discard_write_zeroes(VirtIOBlockReq *req, |
515 | struct virtio_blk_discard_write_zeroes *dwz_hdr, bool is_write_zeroes) | |
516 | { | |
517 | VirtIOBlock *s = req->dev; | |
518 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
519 | uint64_t sector; | |
520 | uint32_t num_sectors, flags, max_sectors; | |
521 | uint8_t err_status; | |
522 | int bytes; | |
523 | ||
524 | sector = virtio_ldq_p(vdev, &dwz_hdr->sector); | |
525 | num_sectors = virtio_ldl_p(vdev, &dwz_hdr->num_sectors); | |
526 | flags = virtio_ldl_p(vdev, &dwz_hdr->flags); | |
527 | max_sectors = is_write_zeroes ? s->conf.max_write_zeroes_sectors : | |
528 | s->conf.max_discard_sectors; | |
529 | ||
530 | /* | |
531 | * max_sectors is at most BDRV_REQUEST_MAX_SECTORS, this check | |
532 | * make us sure that "num_sectors << BDRV_SECTOR_BITS" can fit in | |
533 | * the integer variable. | |
534 | */ | |
535 | if (unlikely(num_sectors > max_sectors)) { | |
536 | err_status = VIRTIO_BLK_S_IOERR; | |
537 | goto err; | |
538 | } | |
539 | ||
540 | bytes = num_sectors << BDRV_SECTOR_BITS; | |
541 | ||
542 | if (unlikely(!virtio_blk_sect_range_ok(s, sector, bytes))) { | |
543 | err_status = VIRTIO_BLK_S_IOERR; | |
544 | goto err; | |
545 | } | |
546 | ||
547 | /* | |
548 | * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard | |
549 | * and write zeroes commands if any unknown flag is set. | |
550 | */ | |
551 | if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { | |
552 | err_status = VIRTIO_BLK_S_UNSUPP; | |
553 | goto err; | |
554 | } | |
555 | ||
556 | if (is_write_zeroes) { /* VIRTIO_BLK_T_WRITE_ZEROES */ | |
557 | int blk_aio_flags = 0; | |
558 | ||
559 | if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { | |
560 | blk_aio_flags |= BDRV_REQ_MAY_UNMAP; | |
561 | } | |
562 | ||
563 | block_acct_start(blk_get_stats(s->blk), &req->acct, bytes, | |
564 | BLOCK_ACCT_WRITE); | |
565 | ||
566 | blk_aio_pwrite_zeroes(s->blk, sector << BDRV_SECTOR_BITS, | |
567 | bytes, blk_aio_flags, | |
568 | virtio_blk_discard_write_zeroes_complete, req); | |
569 | } else { /* VIRTIO_BLK_T_DISCARD */ | |
570 | /* | |
571 | * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for | |
572 | * discard commands if the unmap flag is set. | |
573 | */ | |
574 | if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { | |
575 | err_status = VIRTIO_BLK_S_UNSUPP; | |
576 | goto err; | |
577 | } | |
578 | ||
579 | blk_aio_pdiscard(s->blk, sector << BDRV_SECTOR_BITS, bytes, | |
580 | virtio_blk_discard_write_zeroes_complete, req); | |
581 | } | |
582 | ||
583 | return VIRTIO_BLK_S_OK; | |
584 | ||
585 | err: | |
586 | if (is_write_zeroes) { | |
587 | block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_WRITE); | |
588 | } | |
589 | return err_status; | |
590 | } | |
591 | ||
4f736650 SL |
592 | typedef struct ZoneCmdData { |
593 | VirtIOBlockReq *req; | |
594 | struct iovec *in_iov; | |
595 | unsigned in_num; | |
596 | union { | |
597 | struct { | |
598 | unsigned int nr_zones; | |
599 | BlockZoneDescriptor *zones; | |
600 | } zone_report_data; | |
601 | struct { | |
602 | int64_t offset; | |
603 | } zone_append_data; | |
604 | }; | |
605 | } ZoneCmdData; | |
606 | ||
607 | /* | |
608 | * check zoned_request: error checking before issuing requests. If all checks | |
609 | * passed, return true. | |
610 | * append: true if only zone append requests issued. | |
611 | */ | |
612 | static bool check_zoned_request(VirtIOBlock *s, int64_t offset, int64_t len, | |
613 | bool append, uint8_t *status) { | |
614 | BlockDriverState *bs = blk_bs(s->blk); | |
615 | int index; | |
616 | ||
617 | if (!virtio_has_feature(s->host_features, VIRTIO_BLK_F_ZONED)) { | |
618 | *status = VIRTIO_BLK_S_UNSUPP; | |
619 | return false; | |
620 | } | |
621 | ||
622 | if (offset < 0 || len < 0 || len > (bs->total_sectors << BDRV_SECTOR_BITS) | |
623 | || offset > (bs->total_sectors << BDRV_SECTOR_BITS) - len) { | |
624 | *status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
625 | return false; | |
626 | } | |
627 | ||
628 | if (append) { | |
629 | if (bs->bl.write_granularity) { | |
630 | if ((offset % bs->bl.write_granularity) != 0) { | |
631 | *status = VIRTIO_BLK_S_ZONE_UNALIGNED_WP; | |
632 | return false; | |
633 | } | |
634 | } | |
635 | ||
636 | index = offset / bs->bl.zone_size; | |
637 | if (BDRV_ZT_IS_CONV(bs->wps->wp[index])) { | |
638 | *status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
639 | return false; | |
640 | } | |
641 | ||
642 | if (len / 512 > bs->bl.max_append_sectors) { | |
643 | if (bs->bl.max_append_sectors == 0) { | |
644 | *status = VIRTIO_BLK_S_UNSUPP; | |
645 | } else { | |
646 | *status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
647 | } | |
648 | return false; | |
649 | } | |
650 | } | |
651 | return true; | |
652 | } | |
653 | ||
654 | static void virtio_blk_zone_report_complete(void *opaque, int ret) | |
655 | { | |
656 | ZoneCmdData *data = opaque; | |
657 | VirtIOBlockReq *req = data->req; | |
4f736650 SL |
658 | VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); |
659 | struct iovec *in_iov = data->in_iov; | |
660 | unsigned in_num = data->in_num; | |
661 | int64_t zrp_size, n, j = 0; | |
662 | int64_t nz = data->zone_report_data.nr_zones; | |
663 | int8_t err_status = VIRTIO_BLK_S_OK; | |
664 | ||
4e92acf7 | 665 | trace_virtio_blk_zone_report_complete(vdev, req, nz, ret); |
4f736650 SL |
666 | if (ret) { |
667 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
668 | goto out; | |
669 | } | |
670 | ||
671 | struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report) { | |
672 | .nr_zones = cpu_to_le64(nz), | |
673 | }; | |
674 | zrp_size = sizeof(struct virtio_blk_zone_report) | |
675 | + sizeof(struct virtio_blk_zone_descriptor) * nz; | |
676 | n = iov_from_buf(in_iov, in_num, 0, &zrp_hdr, sizeof(zrp_hdr)); | |
677 | if (n != sizeof(zrp_hdr)) { | |
678 | virtio_error(vdev, "Driver provided input buffer that is too small!"); | |
679 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
680 | goto out; | |
681 | } | |
682 | ||
683 | for (size_t i = sizeof(zrp_hdr); i < zrp_size; | |
684 | i += sizeof(struct virtio_blk_zone_descriptor), ++j) { | |
685 | struct virtio_blk_zone_descriptor desc = | |
686 | (struct virtio_blk_zone_descriptor) { | |
687 | .z_start = cpu_to_le64(data->zone_report_data.zones[j].start | |
688 | >> BDRV_SECTOR_BITS), | |
689 | .z_cap = cpu_to_le64(data->zone_report_data.zones[j].cap | |
690 | >> BDRV_SECTOR_BITS), | |
691 | .z_wp = cpu_to_le64(data->zone_report_data.zones[j].wp | |
692 | >> BDRV_SECTOR_BITS), | |
693 | }; | |
694 | ||
695 | switch (data->zone_report_data.zones[j].type) { | |
696 | case BLK_ZT_CONV: | |
697 | desc.z_type = VIRTIO_BLK_ZT_CONV; | |
698 | break; | |
699 | case BLK_ZT_SWR: | |
700 | desc.z_type = VIRTIO_BLK_ZT_SWR; | |
701 | break; | |
702 | case BLK_ZT_SWP: | |
703 | desc.z_type = VIRTIO_BLK_ZT_SWP; | |
704 | break; | |
705 | default: | |
706 | g_assert_not_reached(); | |
707 | } | |
708 | ||
709 | switch (data->zone_report_data.zones[j].state) { | |
710 | case BLK_ZS_RDONLY: | |
711 | desc.z_state = VIRTIO_BLK_ZS_RDONLY; | |
712 | break; | |
713 | case BLK_ZS_OFFLINE: | |
714 | desc.z_state = VIRTIO_BLK_ZS_OFFLINE; | |
715 | break; | |
716 | case BLK_ZS_EMPTY: | |
717 | desc.z_state = VIRTIO_BLK_ZS_EMPTY; | |
718 | break; | |
719 | case BLK_ZS_CLOSED: | |
720 | desc.z_state = VIRTIO_BLK_ZS_CLOSED; | |
721 | break; | |
722 | case BLK_ZS_FULL: | |
723 | desc.z_state = VIRTIO_BLK_ZS_FULL; | |
724 | break; | |
725 | case BLK_ZS_EOPEN: | |
726 | desc.z_state = VIRTIO_BLK_ZS_EOPEN; | |
727 | break; | |
728 | case BLK_ZS_IOPEN: | |
729 | desc.z_state = VIRTIO_BLK_ZS_IOPEN; | |
730 | break; | |
731 | case BLK_ZS_NOT_WP: | |
732 | desc.z_state = VIRTIO_BLK_ZS_NOT_WP; | |
733 | break; | |
734 | default: | |
735 | g_assert_not_reached(); | |
736 | } | |
737 | ||
738 | /* TODO: it takes O(n^2) time complexity. Optimizations required. */ | |
739 | n = iov_from_buf(in_iov, in_num, i, &desc, sizeof(desc)); | |
740 | if (n != sizeof(desc)) { | |
741 | virtio_error(vdev, "Driver provided input buffer " | |
742 | "for descriptors that is too small!"); | |
743 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
744 | } | |
745 | } | |
746 | ||
747 | out: | |
4f736650 SL |
748 | virtio_blk_req_complete(req, err_status); |
749 | virtio_blk_free_request(req); | |
4f736650 SL |
750 | g_free(data->zone_report_data.zones); |
751 | g_free(data); | |
752 | } | |
753 | ||
754 | static void virtio_blk_handle_zone_report(VirtIOBlockReq *req, | |
755 | struct iovec *in_iov, | |
756 | unsigned in_num) | |
757 | { | |
758 | VirtIOBlock *s = req->dev; | |
759 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
760 | unsigned int nr_zones; | |
761 | ZoneCmdData *data; | |
762 | int64_t zone_size, offset; | |
763 | uint8_t err_status; | |
764 | ||
765 | if (req->in_len < sizeof(struct virtio_blk_inhdr) + | |
766 | sizeof(struct virtio_blk_zone_report) + | |
767 | sizeof(struct virtio_blk_zone_descriptor)) { | |
768 | virtio_error(vdev, "in buffer too small for zone report"); | |
769 | return; | |
770 | } | |
771 | ||
772 | /* start byte offset of the zone report */ | |
773 | offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; | |
774 | if (!check_zoned_request(s, offset, 0, false, &err_status)) { | |
775 | goto out; | |
776 | } | |
777 | nr_zones = (req->in_len - sizeof(struct virtio_blk_inhdr) - | |
778 | sizeof(struct virtio_blk_zone_report)) / | |
779 | sizeof(struct virtio_blk_zone_descriptor); | |
4e92acf7 SL |
780 | trace_virtio_blk_handle_zone_report(vdev, req, |
781 | offset >> BDRV_SECTOR_BITS, nr_zones); | |
4f736650 SL |
782 | |
783 | zone_size = sizeof(BlockZoneDescriptor) * nr_zones; | |
784 | data = g_malloc(sizeof(ZoneCmdData)); | |
785 | data->req = req; | |
786 | data->in_iov = in_iov; | |
787 | data->in_num = in_num; | |
788 | data->zone_report_data.nr_zones = nr_zones; | |
789 | data->zone_report_data.zones = g_malloc(zone_size), | |
790 | ||
791 | blk_aio_zone_report(s->blk, offset, &data->zone_report_data.nr_zones, | |
792 | data->zone_report_data.zones, | |
793 | virtio_blk_zone_report_complete, data); | |
794 | return; | |
795 | out: | |
796 | virtio_blk_req_complete(req, err_status); | |
797 | virtio_blk_free_request(req); | |
798 | } | |
799 | ||
800 | static void virtio_blk_zone_mgmt_complete(void *opaque, int ret) | |
801 | { | |
802 | VirtIOBlockReq *req = opaque; | |
803 | VirtIOBlock *s = req->dev; | |
4e92acf7 | 804 | VirtIODevice *vdev = VIRTIO_DEVICE(s); |
4f736650 | 805 | int8_t err_status = VIRTIO_BLK_S_OK; |
4e92acf7 | 806 | trace_virtio_blk_zone_mgmt_complete(vdev, req,ret); |
4f736650 SL |
807 | |
808 | if (ret) { | |
809 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
810 | } | |
811 | ||
4f736650 SL |
812 | virtio_blk_req_complete(req, err_status); |
813 | virtio_blk_free_request(req); | |
4f736650 SL |
814 | } |
815 | ||
816 | static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op) | |
817 | { | |
818 | VirtIOBlock *s = req->dev; | |
819 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
820 | BlockDriverState *bs = blk_bs(s->blk); | |
821 | int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; | |
822 | uint64_t len; | |
823 | uint64_t capacity = bs->total_sectors << BDRV_SECTOR_BITS; | |
824 | uint8_t err_status = VIRTIO_BLK_S_OK; | |
825 | ||
826 | uint32_t type = virtio_ldl_p(vdev, &req->out.type); | |
827 | if (type == VIRTIO_BLK_T_ZONE_RESET_ALL) { | |
828 | /* Entire drive capacity */ | |
829 | offset = 0; | |
830 | len = capacity; | |
4e92acf7 SL |
831 | trace_virtio_blk_handle_zone_reset_all(vdev, req, 0, |
832 | bs->total_sectors); | |
4f736650 SL |
833 | } else { |
834 | if (bs->bl.zone_size > capacity - offset) { | |
835 | /* The zoned device allows the last smaller zone. */ | |
836 | len = capacity - bs->bl.zone_size * (bs->bl.nr_zones - 1); | |
837 | } else { | |
838 | len = bs->bl.zone_size; | |
839 | } | |
4e92acf7 SL |
840 | trace_virtio_blk_handle_zone_mgmt(vdev, req, op, |
841 | offset >> BDRV_SECTOR_BITS, | |
842 | len >> BDRV_SECTOR_BITS); | |
4f736650 SL |
843 | } |
844 | ||
845 | if (!check_zoned_request(s, offset, len, false, &err_status)) { | |
846 | goto out; | |
847 | } | |
848 | ||
849 | blk_aio_zone_mgmt(s->blk, op, offset, len, | |
850 | virtio_blk_zone_mgmt_complete, req); | |
851 | ||
852 | return 0; | |
853 | out: | |
854 | virtio_blk_req_complete(req, err_status); | |
855 | virtio_blk_free_request(req); | |
856 | return err_status; | |
857 | } | |
858 | ||
859 | static void virtio_blk_zone_append_complete(void *opaque, int ret) | |
860 | { | |
861 | ZoneCmdData *data = opaque; | |
862 | VirtIOBlockReq *req = data->req; | |
4f736650 SL |
863 | VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); |
864 | int64_t append_sector, n; | |
865 | uint8_t err_status = VIRTIO_BLK_S_OK; | |
866 | ||
867 | if (ret) { | |
868 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
869 | goto out; | |
870 | } | |
871 | ||
872 | virtio_stq_p(vdev, &append_sector, | |
873 | data->zone_append_data.offset >> BDRV_SECTOR_BITS); | |
874 | n = iov_from_buf(data->in_iov, data->in_num, 0, &append_sector, | |
875 | sizeof(append_sector)); | |
876 | if (n != sizeof(append_sector)) { | |
877 | virtio_error(vdev, "Driver provided input buffer less than size of " | |
878 | "append_sector"); | |
879 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
880 | goto out; | |
881 | } | |
4e92acf7 | 882 | trace_virtio_blk_zone_append_complete(vdev, req, append_sector, ret); |
4f736650 SL |
883 | |
884 | out: | |
4f736650 SL |
885 | virtio_blk_req_complete(req, err_status); |
886 | virtio_blk_free_request(req); | |
4f736650 SL |
887 | g_free(data); |
888 | } | |
889 | ||
890 | static int virtio_blk_handle_zone_append(VirtIOBlockReq *req, | |
891 | struct iovec *out_iov, | |
892 | struct iovec *in_iov, | |
893 | uint64_t out_num, | |
894 | unsigned in_num) { | |
895 | VirtIOBlock *s = req->dev; | |
896 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
897 | uint8_t err_status = VIRTIO_BLK_S_OK; | |
898 | ||
899 | int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; | |
900 | int64_t len = iov_size(out_iov, out_num); | |
901 | ||
4e92acf7 | 902 | trace_virtio_blk_handle_zone_append(vdev, req, offset >> BDRV_SECTOR_BITS); |
4f736650 SL |
903 | if (!check_zoned_request(s, offset, len, true, &err_status)) { |
904 | goto out; | |
905 | } | |
906 | ||
907 | ZoneCmdData *data = g_malloc(sizeof(ZoneCmdData)); | |
908 | data->req = req; | |
909 | data->in_iov = in_iov; | |
910 | data->in_num = in_num; | |
911 | data->zone_append_data.offset = offset; | |
912 | qemu_iovec_init_external(&req->qiov, out_iov, out_num); | |
52eb76f4 SL |
913 | |
914 | block_acct_start(blk_get_stats(s->blk), &req->acct, len, | |
915 | BLOCK_ACCT_ZONE_APPEND); | |
916 | ||
4f736650 SL |
917 | blk_aio_zone_append(s->blk, &data->zone_append_data.offset, &req->qiov, 0, |
918 | virtio_blk_zone_append_complete, data); | |
919 | return 0; | |
920 | ||
921 | out: | |
4f736650 SL |
922 | virtio_blk_req_complete(req, err_status); |
923 | virtio_blk_free_request(req); | |
4f736650 SL |
924 | return err_status; |
925 | } | |
926 | ||
20ea686a | 927 | static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) |
bc6694d4 | 928 | { |
92e3c2a3 | 929 | uint32_t type; |
f897bf75 | 930 | struct iovec *in_iov = req->elem.in_sg; |
5636da76 | 931 | struct iovec *out_iov = req->elem.out_sg; |
f897bf75 SH |
932 | unsigned in_num = req->elem.in_num; |
933 | unsigned out_num = req->elem.out_num; | |
20ea686a GK |
934 | VirtIOBlock *s = req->dev; |
935 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
92e3c2a3 | 936 | |
f897bf75 | 937 | if (req->elem.out_num < 1 || req->elem.in_num < 1) { |
20ea686a GK |
938 | virtio_error(vdev, "virtio-blk missing headers"); |
939 | return -1; | |
bc6694d4 KW |
940 | } |
941 | ||
5636da76 | 942 | if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out, |
827805a2 | 943 | sizeof(req->out)) != sizeof(req->out))) { |
20ea686a GK |
944 | virtio_error(vdev, "virtio-blk request outhdr too short"); |
945 | return -1; | |
827805a2 | 946 | } |
ee17e848 | 947 | |
7bd04a04 SH |
948 | iov_discard_front_undoable(&out_iov, &out_num, sizeof(req->out), |
949 | &req->outhdr_undo); | |
ee17e848 | 950 | |
12048545 | 951 | if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { |
20ea686a | 952 | virtio_error(vdev, "virtio-blk request inhdr too short"); |
7bd04a04 | 953 | iov_discard_undo(&req->outhdr_undo); |
20ea686a | 954 | return -1; |
ee17e848 FZ |
955 | } |
956 | ||
2a6cdd6d PB |
957 | /* We always touch the last byte, so just see how big in_iov is. */ |
958 | req->in_len = iov_size(in_iov, in_num); | |
ee17e848 FZ |
959 | req->in = (void *)in_iov[in_num - 1].iov_base |
960 | + in_iov[in_num - 1].iov_len | |
961 | - sizeof(struct virtio_blk_inhdr); | |
7bd04a04 SH |
962 | iov_discard_back_undoable(in_iov, &in_num, sizeof(struct virtio_blk_inhdr), |
963 | &req->inhdr_undo); | |
bc6694d4 | 964 | |
9a6719d5 | 965 | type = virtio_ldl_p(vdev, &req->out.type); |
92e3c2a3 | 966 | |
95f7142a | 967 | /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER |
631b22ea | 968 | * is an optional flag. Although a guest should not send this flag if |
95f7142a PL |
969 | * not negotiated we ignored it in the past. So keep ignoring it. */ |
970 | switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { | |
971 | case VIRTIO_BLK_T_IN: | |
972 | { | |
973 | bool is_write = type & VIRTIO_BLK_T_OUT; | |
9a6719d5 | 974 | req->sector_num = virtio_ldq_p(vdev, &req->out.sector); |
95f7142a PL |
975 | |
976 | if (is_write) { | |
5636da76 | 977 | qemu_iovec_init_external(&req->qiov, out_iov, out_num); |
a576ceac | 978 | trace_virtio_blk_handle_write(vdev, req, req->sector_num, |
95f7142a PL |
979 | req->qiov.size / BDRV_SECTOR_SIZE); |
980 | } else { | |
981 | qemu_iovec_init_external(&req->qiov, in_iov, in_num); | |
a576ceac | 982 | trace_virtio_blk_handle_read(vdev, req, req->sector_num, |
95f7142a PL |
983 | req->qiov.size / BDRV_SECTOR_SIZE); |
984 | } | |
985 | ||
9a6719d5 | 986 | if (!virtio_blk_sect_range_ok(s, req->sector_num, req->qiov.size)) { |
95f7142a | 987 | virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); |
9a6719d5 | 988 | block_acct_invalid(blk_get_stats(s->blk), |
01762e03 | 989 | is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); |
95f7142a | 990 | virtio_blk_free_request(req); |
20ea686a | 991 | return 0; |
95f7142a PL |
992 | } |
993 | ||
9a6719d5 | 994 | block_acct_start(blk_get_stats(s->blk), &req->acct, req->qiov.size, |
95f7142a PL |
995 | is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); |
996 | ||
997 | /* merge would exceed maximum number of requests or IO direction | |
998 | * changes */ | |
999 | if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS || | |
c99495ac | 1000 | is_write != mrb->is_write || |
9a6719d5 | 1001 | !s->conf.request_merging)) { |
baf42268 | 1002 | virtio_blk_submit_multireq(s, mrb); |
95f7142a PL |
1003 | } |
1004 | ||
1005 | assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS); | |
1006 | mrb->reqs[mrb->num_reqs++] = req; | |
1007 | mrb->is_write = is_write; | |
1008 | break; | |
1009 | } | |
1010 | case VIRTIO_BLK_T_FLUSH: | |
c20fd872 | 1011 | virtio_blk_handle_flush(req, mrb); |
95f7142a | 1012 | break; |
4f736650 SL |
1013 | case VIRTIO_BLK_T_ZONE_REPORT: |
1014 | virtio_blk_handle_zone_report(req, in_iov, in_num); | |
1015 | break; | |
1016 | case VIRTIO_BLK_T_ZONE_OPEN: | |
1017 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_OPEN); | |
1018 | break; | |
1019 | case VIRTIO_BLK_T_ZONE_CLOSE: | |
1020 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_CLOSE); | |
1021 | break; | |
1022 | case VIRTIO_BLK_T_ZONE_FINISH: | |
1023 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_FINISH); | |
1024 | break; | |
1025 | case VIRTIO_BLK_T_ZONE_RESET: | |
1026 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET); | |
1027 | break; | |
1028 | case VIRTIO_BLK_T_ZONE_RESET_ALL: | |
1029 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET); | |
1030 | break; | |
95f7142a | 1031 | case VIRTIO_BLK_T_SCSI_CMD: |
bc6694d4 | 1032 | virtio_blk_handle_scsi(req); |
95f7142a PL |
1033 | break; |
1034 | case VIRTIO_BLK_T_GET_ID: | |
1035 | { | |
a8686a9b MA |
1036 | /* |
1037 | * NB: per existing s/n string convention the string is | |
1038 | * terminated by '\0' only when shorter than buffer. | |
1039 | */ | |
2a30307f | 1040 | const char *serial = s->conf.serial ? s->conf.serial : ""; |
a83ceea8 MM |
1041 | size_t size = MIN(strlen(serial) + 1, |
1042 | MIN(iov_size(in_iov, in_num), | |
1043 | VIRTIO_BLK_ID_BYTES)); | |
1044 | iov_from_buf(in_iov, in_num, 0, serial, size); | |
2930b313 | 1045 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); |
671ec3f0 | 1046 | virtio_blk_free_request(req); |
95f7142a PL |
1047 | break; |
1048 | } | |
4f736650 SL |
1049 | case VIRTIO_BLK_T_ZONE_APPEND & ~VIRTIO_BLK_T_OUT: |
1050 | /* | |
1051 | * Passing out_iov/out_num and in_iov/in_num is not safe | |
1052 | * to access req->elem.out_sg directly because it may be | |
1053 | * modified by virtio_blk_handle_request(). | |
1054 | */ | |
1055 | virtio_blk_handle_zone_append(req, out_iov, in_iov, out_num, in_num); | |
1056 | break; | |
37b06f8d SG |
1057 | /* |
1058 | * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with | |
1059 | * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement, | |
1060 | * so we must mask it for these requests, then we will check if it is set. | |
1061 | */ | |
1062 | case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT: | |
1063 | case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT: | |
1064 | { | |
1065 | struct virtio_blk_discard_write_zeroes dwz_hdr; | |
1066 | size_t out_len = iov_size(out_iov, out_num); | |
1067 | bool is_write_zeroes = (type & ~VIRTIO_BLK_T_BARRIER) == | |
1068 | VIRTIO_BLK_T_WRITE_ZEROES; | |
1069 | uint8_t err_status; | |
1070 | ||
1071 | /* | |
1072 | * Unsupported if VIRTIO_BLK_T_OUT is not set or the request contains | |
1073 | * more than one segment. | |
1074 | */ | |
1075 | if (unlikely(!(type & VIRTIO_BLK_T_OUT) || | |
1076 | out_len > sizeof(dwz_hdr))) { | |
1077 | virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); | |
1078 | virtio_blk_free_request(req); | |
1079 | return 0; | |
1080 | } | |
1081 | ||
1082 | if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr, | |
1083 | sizeof(dwz_hdr)) != sizeof(dwz_hdr))) { | |
7bd04a04 SH |
1084 | iov_discard_undo(&req->inhdr_undo); |
1085 | iov_discard_undo(&req->outhdr_undo); | |
37b06f8d SG |
1086 | virtio_error(vdev, "virtio-blk discard/write_zeroes header" |
1087 | " too short"); | |
1088 | return -1; | |
1089 | } | |
1090 | ||
1091 | err_status = virtio_blk_handle_discard_write_zeroes(req, &dwz_hdr, | |
1092 | is_write_zeroes); | |
1093 | if (err_status != VIRTIO_BLK_S_OK) { | |
1094 | virtio_blk_req_complete(req, err_status); | |
1095 | virtio_blk_free_request(req); | |
1096 | } | |
1097 | ||
1098 | break; | |
1099 | } | |
95f7142a | 1100 | default: |
9e72c450 | 1101 | virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); |
671ec3f0 | 1102 | virtio_blk_free_request(req); |
bc6694d4 | 1103 | } |
20ea686a | 1104 | return 0; |
bc6694d4 KW |
1105 | } |
1106 | ||
186b9691 | 1107 | void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) |
6e02c38d | 1108 | { |
6e02c38d | 1109 | VirtIOBlockReq *req; |
95f7142a | 1110 | MultiReqBuffer mrb = {}; |
d0435bc5 | 1111 | bool suppress_notifications = virtio_queue_get_notification(vq); |
6e02c38d | 1112 | |
ccee48aa | 1113 | defer_call_begin(); |
fc73548e | 1114 | |
9ef9d402 | 1115 | do { |
d0435bc5 SH |
1116 | if (suppress_notifications) { |
1117 | virtio_queue_set_notification(vq, 0); | |
1118 | } | |
9ef9d402 SH |
1119 | |
1120 | while ((req = virtio_blk_get_request(s, vq))) { | |
1121 | if (virtio_blk_handle_request(req, &mrb)) { | |
1122 | virtqueue_detach_element(req->vq, &req->elem, 0); | |
1123 | virtio_blk_free_request(req); | |
1124 | break; | |
1125 | } | |
20ea686a | 1126 | } |
9ef9d402 | 1127 | |
d0435bc5 SH |
1128 | if (suppress_notifications) { |
1129 | virtio_queue_set_notification(vq, 1); | |
1130 | } | |
9ef9d402 | 1131 | } while (!virtio_queue_empty(vq)); |
91553dcc | 1132 | |
95f7142a | 1133 | if (mrb.num_reqs) { |
baf42268 | 1134 | virtio_blk_submit_multireq(s, &mrb); |
95f7142a | 1135 | } |
fc73548e | 1136 | |
ccee48aa | 1137 | defer_call_end(); |
6e02c38d AL |
1138 | } |
1139 | ||
8a2fad57 MT |
1140 | static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) |
1141 | { | |
1142 | VirtIOBlock *s = (VirtIOBlock *)vdev; | |
1143 | ||
3bcc17f0 | 1144 | if (!s->dataplane_disabled && !s->dataplane_started) { |
8a2fad57 MT |
1145 | /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start |
1146 | * dataplane here instead of waiting for .set_status(). | |
1147 | */ | |
9ffe337c | 1148 | virtio_device_start_ioeventfd(vdev); |
8a2fad57 MT |
1149 | if (!s->dataplane_disabled) { |
1150 | return; | |
1151 | } | |
1152 | } | |
b6948ab0 | 1153 | |
186b9691 | 1154 | virtio_blk_handle_vq(s, vq); |
8a2fad57 MT |
1155 | } |
1156 | ||
a937f8e8 | 1157 | static void virtio_blk_dma_restart_bh(void *opaque) |
869a5c6d | 1158 | { |
a937f8e8 SH |
1159 | VirtIOBlock *s = opaque; |
1160 | ||
9c67f33f | 1161 | VirtIOBlockReq *req; |
95f7142a | 1162 | MultiReqBuffer mrb = {}; |
869a5c6d | 1163 | |
9c67f33f SH |
1164 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { |
1165 | req = s->rq; | |
1166 | s->rq = NULL; | |
1167 | } | |
869a5c6d AL |
1168 | |
1169 | while (req) { | |
1bdb176a | 1170 | VirtIOBlockReq *next = req->next; |
20ea686a GK |
1171 | if (virtio_blk_handle_request(req, &mrb)) { |
1172 | /* Device is now broken and won't do any processing until it gets | |
1173 | * reset. Already queued requests will be lost: let's purge them. | |
1174 | */ | |
1175 | while (req) { | |
1176 | next = req->next; | |
1177 | virtqueue_detach_element(req->vq, &req->elem, 0); | |
1178 | virtio_blk_free_request(req); | |
1179 | req = next; | |
1180 | } | |
1181 | break; | |
1182 | } | |
1bdb176a | 1183 | req = next; |
869a5c6d | 1184 | } |
f1b52868 | 1185 | |
95f7142a | 1186 | if (mrb.num_reqs) { |
baf42268 | 1187 | virtio_blk_submit_multireq(s, &mrb); |
95f7142a | 1188 | } |
7aa1c247 | 1189 | |
a937f8e8 SH |
1190 | /* Paired with inc in virtio_blk_dma_restart_cb() */ |
1191 | blk_dec_in_flight(s->conf.conf.blk); | |
7aa1c247 SL |
1192 | } |
1193 | ||
538f0497 | 1194 | static void virtio_blk_dma_restart_cb(void *opaque, bool running, |
1dfb4dd9 | 1195 | RunState state) |
213189ab MA |
1196 | { |
1197 | VirtIOBlock *s = opaque; | |
1198 | ||
392808b4 | 1199 | if (!running) { |
213189ab | 1200 | return; |
392808b4 | 1201 | } |
213189ab | 1202 | |
a937f8e8 SH |
1203 | /* Paired with dec in virtio_blk_dma_restart_bh() */ |
1204 | blk_inc_in_flight(s->conf.conf.blk); | |
1205 | ||
1206 | aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.conf.blk), | |
1207 | virtio_blk_dma_restart_bh, s); | |
213189ab MA |
1208 | } |
1209 | ||
6e02c38d AL |
1210 | static void virtio_blk_reset(VirtIODevice *vdev) |
1211 | { | |
1cc91b7d | 1212 | VirtIOBlock *s = VIRTIO_BLK(vdev); |
26307f6a | 1213 | VirtIOBlockReq *req; |
392808b4 | 1214 | |
9c67f33f SH |
1215 | /* Dataplane has stopped... */ |
1216 | assert(!s->dataplane_started); | |
1217 | ||
1218 | /* ...but requests may still be in flight. */ | |
6e40b3bf AY |
1219 | blk_drain(s->blk); |
1220 | ||
26307f6a FZ |
1221 | /* We drop queued requests after blk_drain() because blk_drain() itself can |
1222 | * produce them. */ | |
9c67f33f SH |
1223 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { |
1224 | while (s->rq) { | |
1225 | req = s->rq; | |
1226 | s->rq = req->next; | |
26307f6a | 1227 | |
9c67f33f SH |
1228 | /* No other threads can access req->vq here */ |
1229 | virtqueue_detach_element(req->vq, &req->elem, 0); | |
1230 | ||
1231 | virtio_blk_free_request(req); | |
1232 | } | |
1233 | } | |
6e40b3bf | 1234 | |
4be74634 | 1235 | blk_set_enable_write_cache(s->blk, s->original_wce); |
6e02c38d AL |
1236 | } |
1237 | ||
bf011293 | 1238 | /* coalesce internal state, copy to pci i/o region 0 |
1239 | */ | |
6e02c38d AL |
1240 | static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) |
1241 | { | |
1cc91b7d | 1242 | VirtIOBlock *s = VIRTIO_BLK(vdev); |
2a30307f | 1243 | BlockConf *conf = &s->conf.conf; |
4f736650 | 1244 | BlockDriverState *bs = blk_bs(s->blk); |
6e02c38d AL |
1245 | struct virtio_blk_config blkcfg; |
1246 | uint64_t capacity; | |
17d0bc01 | 1247 | int64_t length; |
f7516731 | 1248 | int blk_size = conf->logical_block_size; |
6e02c38d | 1249 | |
4be74634 | 1250 | blk_get_geometry(s->blk, &capacity); |
5c5dafdc | 1251 | memset(&blkcfg, 0, sizeof(blkcfg)); |
783d1897 | 1252 | virtio_stq_p(vdev, &blkcfg.capacity, capacity); |
1bf8a989 DP |
1253 | virtio_stl_p(vdev, &blkcfg.seg_max, |
1254 | s->conf.seg_max_adjust ? s->conf.queue_size - 2 : 128 - 2); | |
907eb3e5 | 1255 | virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls); |
783d1897 | 1256 | virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); |
f7516731 | 1257 | virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size); |
6abee260 | 1258 | virtio_stl_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size); |
907eb3e5 | 1259 | blkcfg.geometry.heads = conf->heads; |
136be99e CB |
1260 | /* |
1261 | * We must ensure that the block device capacity is a multiple of | |
e03ba136 | 1262 | * the logical block size. If that is not the case, let's use |
136be99e CB |
1263 | * sector_mask to adopt the geometry to have a correct picture. |
1264 | * For those devices where the capacity is ok for the given geometry | |
e03ba136 | 1265 | * we don't touch the sector value of the geometry, since some devices |
136be99e CB |
1266 | * (like s390 dasd) need a specific value. Here the capacity is already |
1267 | * cyls*heads*secs*blk_size and the sector value is not block size | |
1268 | * divided by 512 - instead it is the amount of blk_size blocks | |
1269 | * per track (cylinder). | |
1270 | */ | |
17d0bc01 SH |
1271 | length = blk_getlength(s->blk); |
1272 | if (length > 0 && length / conf->heads / conf->secs % blk_size) { | |
907eb3e5 | 1273 | blkcfg.geometry.sectors = conf->secs & ~s->sector_mask; |
136be99e | 1274 | } else { |
907eb3e5 | 1275 | blkcfg.geometry.sectors = conf->secs; |
136be99e | 1276 | } |
c7085da7 | 1277 | blkcfg.size_max = 0; |
f7516731 | 1278 | blkcfg.physical_block_exp = get_physical_block_exp(conf); |
9752c371 | 1279 | blkcfg.alignment_offset = 0; |
4be74634 | 1280 | blkcfg.wce = blk_enable_write_cache(s->blk); |
2f270590 | 1281 | virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues); |
37b06f8d | 1282 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD)) { |
fb0b154c AO |
1283 | uint32_t discard_granularity = conf->discard_granularity; |
1284 | if (discard_granularity == -1 || !s->conf.report_discard_granularity) { | |
1285 | discard_granularity = blk_size; | |
1286 | } | |
37b06f8d SG |
1287 | virtio_stl_p(vdev, &blkcfg.max_discard_sectors, |
1288 | s->conf.max_discard_sectors); | |
1289 | virtio_stl_p(vdev, &blkcfg.discard_sector_alignment, | |
fb0b154c | 1290 | discard_granularity >> BDRV_SECTOR_BITS); |
37b06f8d SG |
1291 | /* |
1292 | * We support only one segment per request since multiple segments | |
1293 | * are not widely used and there are no userspace APIs that allow | |
1294 | * applications to submit multiple segments in a single call. | |
1295 | */ | |
1296 | virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1); | |
1297 | } | |
1298 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES)) { | |
1299 | virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors, | |
1300 | s->conf.max_write_zeroes_sectors); | |
1301 | blkcfg.write_zeroes_may_unmap = 1; | |
1302 | virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1); | |
1303 | } | |
4f736650 SL |
1304 | if (bs->bl.zoned != BLK_Z_NONE) { |
1305 | switch (bs->bl.zoned) { | |
1306 | case BLK_Z_HM: | |
1307 | blkcfg.zoned.model = VIRTIO_BLK_Z_HM; | |
1308 | break; | |
1309 | case BLK_Z_HA: | |
1310 | blkcfg.zoned.model = VIRTIO_BLK_Z_HA; | |
1311 | break; | |
1312 | default: | |
1313 | g_assert_not_reached(); | |
1314 | } | |
1315 | ||
1316 | virtio_stl_p(vdev, &blkcfg.zoned.zone_sectors, | |
1317 | bs->bl.zone_size / 512); | |
1318 | virtio_stl_p(vdev, &blkcfg.zoned.max_active_zones, | |
1319 | bs->bl.max_active_zones); | |
1320 | virtio_stl_p(vdev, &blkcfg.zoned.max_open_zones, | |
1321 | bs->bl.max_open_zones); | |
1322 | virtio_stl_p(vdev, &blkcfg.zoned.write_granularity, blk_size); | |
1323 | virtio_stl_p(vdev, &blkcfg.zoned.max_append_sectors, | |
1324 | bs->bl.max_append_sectors); | |
1325 | } else { | |
1326 | blkcfg.zoned.model = VIRTIO_BLK_Z_NONE; | |
1327 | } | |
20764be0 | 1328 | memcpy(config, &blkcfg, s->config_size); |
6e02c38d AL |
1329 | } |
1330 | ||
13e3dce0 PB |
1331 | static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) |
1332 | { | |
1cc91b7d | 1333 | VirtIOBlock *s = VIRTIO_BLK(vdev); |
13e3dce0 PB |
1334 | struct virtio_blk_config blkcfg; |
1335 | ||
20764be0 | 1336 | memcpy(&blkcfg, config, s->config_size); |
6d7e73d6 | 1337 | |
4be74634 | 1338 | blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); |
13e3dce0 PB |
1339 | } |
1340 | ||
9d5b731d JW |
1341 | static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, |
1342 | Error **errp) | |
6e02c38d | 1343 | { |
1cc91b7d | 1344 | VirtIOBlock *s = VIRTIO_BLK(vdev); |
1063b8b1 | 1345 | |
bbe8bd4d SG |
1346 | /* Firstly sync all virtio-blk possible supported features */ |
1347 | features |= s->host_features; | |
1348 | ||
0cd09c3a CH |
1349 | virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); |
1350 | virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); | |
1351 | virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); | |
1352 | virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); | |
95129d6f | 1353 | if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) { |
bbe8bd4d | 1354 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_SCSI)) { |
efb8206c JW |
1355 | error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0"); |
1356 | return 0; | |
1357 | } | |
efb8206c | 1358 | } else { |
c9b11f97 | 1359 | virtio_clear_feature(&features, VIRTIO_F_ANY_LAYOUT); |
efb8206c JW |
1360 | virtio_add_feature(&features, VIRTIO_BLK_F_SCSI); |
1361 | } | |
aa659be3 | 1362 | |
5f258577 EY |
1363 | if (blk_enable_write_cache(s->blk) || |
1364 | (s->conf.x_enable_wce_if_config_wce && | |
1365 | virtio_has_feature(features, VIRTIO_BLK_F_CONFIG_WCE))) { | |
0cd09c3a | 1366 | virtio_add_feature(&features, VIRTIO_BLK_F_WCE); |
4be74634 | 1367 | } |
86b1cf32 | 1368 | if (!blk_is_writable(s->blk)) { |
0cd09c3a | 1369 | virtio_add_feature(&features, VIRTIO_BLK_F_RO); |
4be74634 | 1370 | } |
2f270590 SH |
1371 | if (s->conf.num_queues > 1) { |
1372 | virtio_add_feature(&features, VIRTIO_BLK_F_MQ); | |
1373 | } | |
1063b8b1 CH |
1374 | |
1375 | return features; | |
6e02c38d AL |
1376 | } |
1377 | ||
9315cbfd PB |
1378 | static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) |
1379 | { | |
1cc91b7d | 1380 | VirtIOBlock *s = VIRTIO_BLK(vdev); |
9315cbfd | 1381 | |
9ffe337c PB |
1382 | if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) { |
1383 | assert(!s->dataplane_started); | |
392808b4 | 1384 | } |
392808b4 | 1385 | |
9315cbfd PB |
1386 | if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { |
1387 | return; | |
1388 | } | |
1389 | ||
ef5bc962 PB |
1390 | /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send |
1391 | * cache flushes. Thus, the "auto writethrough" behavior is never | |
1392 | * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature. | |
1393 | * Leaving it enabled would break the following sequence: | |
1394 | * | |
1395 | * Guest started with "-drive cache=writethrough" | |
1396 | * Guest sets status to 0 | |
1397 | * Guest sets DRIVER bit in status field | |
1398 | * Guest reads host features (WCE=0, CONFIG_WCE=1) | |
1399 | * Guest writes guest features (WCE=0, CONFIG_WCE=1) | |
1400 | * Guest writes 1 to the WCE configuration field (writeback mode) | |
1401 | * Guest sets DRIVER_OK bit in status field | |
1402 | * | |
4be74634 | 1403 | * s->blk would erroneously be placed in writethrough mode. |
ef5bc962 | 1404 | */ |
95129d6f | 1405 | if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) { |
4be74634 | 1406 | blk_set_enable_write_cache(s->blk, |
95129d6f CH |
1407 | virtio_vdev_has_feature(vdev, |
1408 | VIRTIO_BLK_F_WCE)); | |
ef5bc962 | 1409 | } |
9315cbfd PB |
1410 | } |
1411 | ||
b2b295a7 GK |
1412 | static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) |
1413 | { | |
1414 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
b2b295a7 | 1415 | |
9c67f33f SH |
1416 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { |
1417 | VirtIOBlockReq *req = s->rq; | |
30d8bf6d | 1418 | |
9c67f33f SH |
1419 | while (req) { |
1420 | qemu_put_sbyte(f, 1); | |
30d8bf6d | 1421 | |
9c67f33f SH |
1422 | if (s->conf.num_queues > 1) { |
1423 | qemu_put_be32(f, virtio_get_queue_index(req->vq)); | |
1424 | } | |
1425 | ||
1426 | qemu_put_virtqueue_element(vdev, f, &req->elem); | |
1427 | req = req->next; | |
1428 | } | |
869a5c6d | 1429 | } |
9c67f33f | 1430 | |
869a5c6d | 1431 | qemu_put_sbyte(f, 0); |
6e02c38d AL |
1432 | } |
1433 | ||
b2b295a7 GK |
1434 | static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, |
1435 | int version_id) | |
1436 | { | |
1437 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
2a633c46 | 1438 | |
869a5c6d | 1439 | while (qemu_get_sbyte(f)) { |
30d8bf6d SH |
1440 | unsigned nvqs = s->conf.num_queues; |
1441 | unsigned vq_idx = 0; | |
ab281c17 | 1442 | VirtIOBlockReq *req; |
30d8bf6d SH |
1443 | |
1444 | if (nvqs > 1) { | |
1445 | vq_idx = qemu_get_be32(f); | |
1446 | ||
1447 | if (vq_idx >= nvqs) { | |
1448 | error_report("Invalid virtqueue index in request list: %#x", | |
1449 | vq_idx); | |
1450 | return -EINVAL; | |
1451 | } | |
1452 | } | |
1453 | ||
8607f5c3 | 1454 | req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq)); |
30d8bf6d | 1455 | virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req); |
9c67f33f SH |
1456 | |
1457 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { | |
1458 | req->next = s->rq; | |
1459 | s->rq = req; | |
1460 | } | |
869a5c6d | 1461 | } |
6e02c38d AL |
1462 | |
1463 | return 0; | |
1464 | } | |
1465 | ||
b6948ab0 SH |
1466 | static bool |
1467 | validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, | |
1468 | uint16_t num_queues, Error **errp) | |
1469 | { | |
1470 | g_autofree unsigned long *vqs = bitmap_new(num_queues); | |
1471 | g_autoptr(GHashTable) iothreads = | |
1472 | g_hash_table_new(g_str_hash, g_str_equal); | |
1473 | ||
1474 | for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) { | |
1475 | const char *name = node->value->iothread; | |
1476 | uint16List *vq; | |
1477 | ||
1478 | if (!iothread_by_id(name)) { | |
1479 | error_setg(errp, "IOThread \"%s\" object does not exist", name); | |
1480 | return false; | |
1481 | } | |
1482 | ||
1483 | if (!g_hash_table_add(iothreads, (gpointer)name)) { | |
1484 | error_setg(errp, | |
1485 | "duplicate IOThread name \"%s\" in iothread-vq-mapping", | |
1486 | name); | |
1487 | return false; | |
1488 | } | |
1489 | ||
1490 | if (node != list) { | |
1491 | if (!!node->value->vqs != !!list->value->vqs) { | |
1492 | error_setg(errp, "either all items in iothread-vq-mapping " | |
1493 | "must have vqs or none of them must have it"); | |
1494 | return false; | |
1495 | } | |
1496 | } | |
1497 | ||
1498 | for (vq = node->value->vqs; vq; vq = vq->next) { | |
1499 | if (vq->value >= num_queues) { | |
1500 | error_setg(errp, "vq index %u for IOThread \"%s\" must be " | |
1501 | "less than num_queues %u in iothread-vq-mapping", | |
1502 | vq->value, name, num_queues); | |
1503 | return false; | |
1504 | } | |
1505 | ||
1506 | if (test_and_set_bit(vq->value, vqs)) { | |
1507 | error_setg(errp, "cannot assign vq %u to IOThread \"%s\" " | |
1508 | "because it is already assigned", vq->value, name); | |
1509 | return false; | |
1510 | } | |
1511 | } | |
1512 | } | |
1513 | ||
1514 | if (list->value->vqs) { | |
1515 | for (uint16_t i = 0; i < num_queues; i++) { | |
1516 | if (!test_bit(i, vqs)) { | |
1517 | error_setg(errp, | |
1518 | "missing vq %u IOThread assignment in iothread-vq-mapping", | |
1519 | i); | |
1520 | return false; | |
1521 | } | |
1522 | } | |
1523 | } | |
1524 | ||
1525 | return true; | |
1526 | } | |
1527 | ||
9b92fbcf SL |
1528 | static void virtio_resize_cb(void *opaque) |
1529 | { | |
1530 | VirtIODevice *vdev = opaque; | |
1531 | ||
1532 | assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | |
1533 | virtio_notify_config(vdev); | |
1534 | } | |
1535 | ||
145feb17 | 1536 | static void virtio_blk_resize(void *opaque) |
e5051fc7 | 1537 | { |
1cc91b7d | 1538 | VirtIODevice *vdev = VIRTIO_DEVICE(opaque); |
e5051fc7 | 1539 | |
9b92fbcf | 1540 | /* |
0b2675c4 | 1541 | * virtio_notify_config() needs to acquire the BQL, |
9b92fbcf SL |
1542 | * so it can't be called from an iothread. Instead, schedule |
1543 | * it to be run in the main context BH. | |
1544 | */ | |
1545 | aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); | |
e5051fc7 CH |
1546 | } |
1547 | ||
3bcc17f0 SH |
1548 | static void virtio_blk_data_plane_detach(VirtIOBlock *s) |
1549 | { | |
1550 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
1551 | ||
1552 | for (uint16_t i = 0; i < s->conf.num_queues; i++) { | |
1553 | VirtQueue *vq = virtio_get_queue(vdev, i); | |
1554 | virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); | |
1555 | } | |
1556 | } | |
1557 | ||
1558 | static void virtio_blk_data_plane_attach(VirtIOBlock *s) | |
1559 | { | |
1560 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
1561 | ||
1562 | for (uint16_t i = 0; i < s->conf.num_queues; i++) { | |
1563 | VirtQueue *vq = virtio_get_queue(vdev, i); | |
1564 | virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); | |
1565 | } | |
1566 | } | |
1567 | ||
1665d932 SH |
1568 | /* Suspend virtqueue ioeventfd processing during drain */ |
1569 | static void virtio_blk_drained_begin(void *opaque) | |
1570 | { | |
1571 | VirtIOBlock *s = opaque; | |
1665d932 | 1572 | |
3bcc17f0 SH |
1573 | if (s->dataplane_started) { |
1574 | virtio_blk_data_plane_detach(s); | |
1665d932 | 1575 | } |
1665d932 SH |
1576 | } |
1577 | ||
1578 | /* Resume virtqueue ioeventfd processing after drain */ | |
1579 | static void virtio_blk_drained_end(void *opaque) | |
1580 | { | |
1581 | VirtIOBlock *s = opaque; | |
1665d932 | 1582 | |
3bcc17f0 SH |
1583 | if (s->dataplane_started) { |
1584 | virtio_blk_data_plane_attach(s); | |
1665d932 | 1585 | } |
1665d932 SH |
1586 | } |
1587 | ||
0e49de52 | 1588 | static const BlockDevOps virtio_block_ops = { |
1665d932 SH |
1589 | .resize_cb = virtio_blk_resize, |
1590 | .drained_begin = virtio_blk_drained_begin, | |
1591 | .drained_end = virtio_blk_drained_end, | |
0e49de52 MA |
1592 | }; |
1593 | ||
3bcc17f0 SH |
1594 | /* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ |
1595 | static void | |
1596 | apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, | |
1597 | AioContext **vq_aio_context, uint16_t num_queues) | |
1598 | { | |
1599 | IOThreadVirtQueueMappingList *node; | |
1600 | size_t num_iothreads = 0; | |
1601 | size_t cur_iothread = 0; | |
1602 | ||
1603 | for (node = iothread_vq_mapping_list; node; node = node->next) { | |
1604 | num_iothreads++; | |
1605 | } | |
1606 | ||
1607 | for (node = iothread_vq_mapping_list; node; node = node->next) { | |
1608 | IOThread *iothread = iothread_by_id(node->value->iothread); | |
1609 | AioContext *ctx = iothread_get_aio_context(iothread); | |
1610 | ||
1611 | /* Released in virtio_blk_data_plane_destroy() */ | |
1612 | object_ref(OBJECT(iothread)); | |
1613 | ||
1614 | if (node->value->vqs) { | |
1615 | uint16List *vq; | |
1616 | ||
1617 | /* Explicit vq:IOThread assignment */ | |
1618 | for (vq = node->value->vqs; vq; vq = vq->next) { | |
1619 | vq_aio_context[vq->value] = ctx; | |
1620 | } | |
1621 | } else { | |
1622 | /* Round-robin vq:IOThread assignment */ | |
1623 | for (unsigned i = cur_iothread; i < num_queues; | |
1624 | i += num_iothreads) { | |
1625 | vq_aio_context[i] = ctx; | |
1626 | } | |
1627 | } | |
1628 | ||
1629 | cur_iothread++; | |
1630 | } | |
1631 | } | |
1632 | ||
1633 | /* Context: BQL held */ | |
1634 | static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) | |
1635 | { | |
1636 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
1637 | VirtIOBlkConf *conf = &s->conf; | |
1638 | BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); | |
1639 | VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); | |
1640 | ||
1641 | if (conf->iothread || conf->iothread_vq_mapping_list) { | |
1642 | if (!k->set_guest_notifiers || !k->ioeventfd_assign) { | |
1643 | error_setg(errp, | |
1644 | "device is incompatible with iothread " | |
1645 | "(transport does not support notifiers)"); | |
1646 | return false; | |
1647 | } | |
1648 | if (!virtio_device_ioeventfd_enabled(vdev)) { | |
1649 | error_setg(errp, "ioeventfd is required for iothread"); | |
1650 | return false; | |
1651 | } | |
1652 | ||
1653 | /* | |
1654 | * If dataplane is (re-)enabled while the guest is running there could | |
1655 | * be block jobs that can conflict. | |
1656 | */ | |
1657 | if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { | |
1658 | error_prepend(errp, "cannot start virtio-blk dataplane: "); | |
1659 | return false; | |
1660 | } | |
1661 | } | |
1662 | /* Don't try if transport does not support notifiers. */ | |
1663 | if (!virtio_device_ioeventfd_enabled(vdev)) { | |
1664 | s->dataplane_disabled = true; | |
1665 | return false; | |
1666 | } | |
1667 | ||
1668 | s->vq_aio_context = g_new(AioContext *, conf->num_queues); | |
1669 | ||
1670 | if (conf->iothread_vq_mapping_list) { | |
1671 | apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, | |
1672 | conf->num_queues); | |
1673 | } else if (conf->iothread) { | |
1674 | AioContext *ctx = iothread_get_aio_context(conf->iothread); | |
1675 | for (unsigned i = 0; i < conf->num_queues; i++) { | |
1676 | s->vq_aio_context[i] = ctx; | |
1677 | } | |
1678 | ||
1679 | /* Released in virtio_blk_data_plane_destroy() */ | |
1680 | object_ref(OBJECT(conf->iothread)); | |
1681 | } else { | |
1682 | AioContext *ctx = qemu_get_aio_context(); | |
1683 | for (unsigned i = 0; i < conf->num_queues; i++) { | |
1684 | s->vq_aio_context[i] = ctx; | |
1685 | } | |
1686 | } | |
1687 | ||
1688 | return true; | |
1689 | } | |
1690 | ||
1691 | /* Context: BQL held */ | |
1692 | static void virtio_blk_data_plane_destroy(VirtIOBlock *s) | |
1693 | { | |
1694 | VirtIOBlkConf *conf = &s->conf; | |
1695 | ||
1696 | assert(!s->dataplane_started); | |
1697 | ||
1698 | if (conf->iothread_vq_mapping_list) { | |
1699 | IOThreadVirtQueueMappingList *node; | |
1700 | ||
1701 | for (node = conf->iothread_vq_mapping_list; node; node = node->next) { | |
1702 | IOThread *iothread = iothread_by_id(node->value->iothread); | |
1703 | object_unref(OBJECT(iothread)); | |
1704 | } | |
1705 | } | |
1706 | ||
1707 | if (conf->iothread) { | |
1708 | object_unref(OBJECT(conf->iothread)); | |
1709 | } | |
1710 | ||
1711 | g_free(s->vq_aio_context); | |
1712 | s->vq_aio_context = NULL; | |
1713 | } | |
1714 | ||
1715 | /* Context: BQL held */ | |
1716 | static int virtio_blk_data_plane_start(VirtIODevice *vdev) | |
1717 | { | |
1718 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
1719 | BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); | |
1720 | VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); | |
1721 | unsigned i; | |
1722 | unsigned nvqs = s->conf.num_queues; | |
1723 | Error *local_err = NULL; | |
1724 | int r; | |
1725 | ||
1726 | if (s->dataplane_started || s->dataplane_starting) { | |
1727 | return 0; | |
1728 | } | |
1729 | ||
1730 | s->dataplane_starting = true; | |
1731 | ||
1732 | /* Set up guest notifier (irq) */ | |
1733 | r = k->set_guest_notifiers(qbus->parent, nvqs, true); | |
1734 | if (r != 0) { | |
1735 | error_report("virtio-blk failed to set guest notifier (%d), " | |
1736 | "ensure -accel kvm is set.", r); | |
1737 | goto fail_guest_notifiers; | |
1738 | } | |
1739 | ||
1740 | /* | |
1741 | * Batch all the host notifiers in a single transaction to avoid | |
1742 | * quadratic time complexity in address_space_update_ioeventfds(). | |
1743 | */ | |
1744 | memory_region_transaction_begin(); | |
1745 | ||
1746 | /* Set up virtqueue notify */ | |
1747 | for (i = 0; i < nvqs; i++) { | |
1748 | r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); | |
1749 | if (r != 0) { | |
1750 | int j = i; | |
1751 | ||
1752 | fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); | |
1753 | while (i--) { | |
1754 | virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | |
1755 | } | |
1756 | ||
1757 | /* | |
1758 | * The transaction expects the ioeventfds to be open when it | |
1759 | * commits. Do it now, before the cleanup loop. | |
1760 | */ | |
1761 | memory_region_transaction_commit(); | |
1762 | ||
1763 | while (j--) { | |
1764 | virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); | |
1765 | } | |
1766 | goto fail_host_notifiers; | |
1767 | } | |
1768 | } | |
1769 | ||
1770 | memory_region_transaction_commit(); | |
1771 | ||
1772 | r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], | |
1773 | &local_err); | |
1774 | if (r < 0) { | |
1775 | error_report_err(local_err); | |
1776 | goto fail_aio_context; | |
1777 | } | |
1778 | ||
1779 | /* | |
1780 | * These fields must be visible to the IOThread when it processes the | |
1781 | * virtqueue, otherwise it will think dataplane has not started yet. | |
1782 | * | |
1783 | * Make sure ->dataplane_started is false when blk_set_aio_context() is | |
1784 | * called above so that draining does not cause the host notifier to be | |
1785 | * detached/attached prematurely. | |
1786 | */ | |
1787 | s->dataplane_starting = false; | |
1788 | s->dataplane_started = true; | |
1789 | smp_wmb(); /* paired with aio_notify_accept() on the read side */ | |
1790 | ||
1791 | /* Get this show started by hooking up our callbacks */ | |
1792 | if (!blk_in_drain(s->conf.conf.blk)) { | |
1793 | for (i = 0; i < nvqs; i++) { | |
1794 | VirtQueue *vq = virtio_get_queue(vdev, i); | |
1795 | AioContext *ctx = s->vq_aio_context[i]; | |
1796 | ||
1797 | /* Kick right away to begin processing requests already in vring */ | |
1798 | event_notifier_set(virtio_queue_get_host_notifier(vq)); | |
1799 | ||
1800 | virtio_queue_aio_attach_host_notifier(vq, ctx); | |
1801 | } | |
1802 | } | |
1803 | return 0; | |
1804 | ||
1805 | fail_aio_context: | |
1806 | memory_region_transaction_begin(); | |
1807 | ||
1808 | for (i = 0; i < nvqs; i++) { | |
1809 | virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | |
1810 | } | |
1811 | ||
1812 | memory_region_transaction_commit(); | |
1813 | ||
1814 | for (i = 0; i < nvqs; i++) { | |
1815 | virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); | |
1816 | } | |
1817 | fail_host_notifiers: | |
1818 | k->set_guest_notifiers(qbus->parent, nvqs, false); | |
1819 | fail_guest_notifiers: | |
1820 | s->dataplane_disabled = true; | |
1821 | s->dataplane_starting = false; | |
1822 | return -ENOSYS; | |
1823 | } | |
1824 | ||
1825 | /* Stop notifications for new requests from guest. | |
1826 | * | |
1827 | * Context: BH in IOThread | |
1828 | */ | |
1829 | static void virtio_blk_data_plane_stop_vq_bh(void *opaque) | |
1830 | { | |
1831 | VirtQueue *vq = opaque; | |
1832 | EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); | |
1833 | ||
1834 | virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); | |
1835 | ||
1836 | /* | |
1837 | * Test and clear notifier after disabling event, in case poll callback | |
1838 | * didn't have time to run. | |
1839 | */ | |
1840 | virtio_queue_host_notifier_read(host_notifier); | |
1841 | } | |
1842 | ||
1843 | /* Context: BQL held */ | |
1844 | static void virtio_blk_data_plane_stop(VirtIODevice *vdev) | |
1845 | { | |
1846 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
1847 | BusState *qbus = qdev_get_parent_bus(DEVICE(s)); | |
1848 | VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); | |
1849 | unsigned i; | |
1850 | unsigned nvqs = s->conf.num_queues; | |
1851 | ||
1852 | if (!s->dataplane_started || s->dataplane_stopping) { | |
1853 | return; | |
1854 | } | |
1855 | ||
1856 | /* Better luck next time. */ | |
1857 | if (s->dataplane_disabled) { | |
1858 | s->dataplane_disabled = false; | |
1859 | s->dataplane_started = false; | |
1860 | return; | |
1861 | } | |
1862 | s->dataplane_stopping = true; | |
1863 | ||
1864 | if (!blk_in_drain(s->conf.conf.blk)) { | |
1865 | for (i = 0; i < nvqs; i++) { | |
1866 | VirtQueue *vq = virtio_get_queue(vdev, i); | |
1867 | AioContext *ctx = s->vq_aio_context[i]; | |
1868 | ||
1869 | aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); | |
1870 | } | |
1871 | } | |
1872 | ||
1873 | /* | |
1874 | * Batch all the host notifiers in a single transaction to avoid | |
1875 | * quadratic time complexity in address_space_update_ioeventfds(). | |
1876 | */ | |
1877 | memory_region_transaction_begin(); | |
1878 | ||
1879 | for (i = 0; i < nvqs; i++) { | |
1880 | virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | |
1881 | } | |
1882 | ||
1883 | /* | |
1884 | * The transaction expects the ioeventfds to be open when it | |
1885 | * commits. Do it now, before the cleanup loop. | |
1886 | */ | |
1887 | memory_region_transaction_commit(); | |
1888 | ||
1889 | for (i = 0; i < nvqs; i++) { | |
1890 | virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); | |
1891 | } | |
1892 | ||
1893 | /* | |
1894 | * Set ->dataplane_started to false before draining so that host notifiers | |
1895 | * are not detached/attached anymore. | |
1896 | */ | |
1897 | s->dataplane_started = false; | |
1898 | ||
1899 | /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ | |
1900 | blk_drain(s->conf.conf.blk); | |
1901 | ||
1902 | /* | |
1903 | * Try to switch bs back to the QEMU main loop. If other users keep the | |
1904 | * BlockBackend in the iothread, that's ok | |
1905 | */ | |
1906 | blk_set_aio_context(s->conf.conf.blk, qemu_get_aio_context(), NULL); | |
1907 | ||
1908 | /* Clean up guest notifier (irq) */ | |
1909 | k->set_guest_notifiers(qbus->parent, nvqs, false); | |
1910 | ||
1911 | s->dataplane_stopping = false; | |
1912 | } | |
1913 | ||
75884afd | 1914 | static void virtio_blk_device_realize(DeviceState *dev, Error **errp) |
1c028ddf | 1915 | { |
75884afd | 1916 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); |
179b417e | 1917 | VirtIOBlock *s = VIRTIO_BLK(dev); |
2a30307f | 1918 | VirtIOBlkConf *conf = &s->conf; |
3ffeeef7 | 1919 | Error *err = NULL; |
2f270590 | 1920 | unsigned i; |
cf21e106 | 1921 | |
4be74634 | 1922 | if (!conf->conf.blk) { |
75884afd AF |
1923 | error_setg(errp, "drive property not set"); |
1924 | return; | |
d75d25e3 | 1925 | } |
4be74634 | 1926 | if (!blk_is_inserted(conf->conf.blk)) { |
75884afd AF |
1927 | error_setg(errp, "Device needs media, but drive is empty"); |
1928 | return; | |
98f28ad7 | 1929 | } |
9445e1e1 SH |
1930 | if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) { |
1931 | conf->num_queues = 1; | |
1932 | } | |
2f270590 SH |
1933 | if (!conf->num_queues) { |
1934 | error_setg(errp, "num-queues property must be larger than 0"); | |
1935 | return; | |
1936 | } | |
1bf8a989 DP |
1937 | if (conf->queue_size <= 2) { |
1938 | error_setg(errp, "invalid queue-size property (%" PRIu16 "), " | |
1939 | "must be > 2", conf->queue_size); | |
1940 | return; | |
1941 | } | |
6040aedd MK |
1942 | if (!is_power_of_2(conf->queue_size) || |
1943 | conf->queue_size > VIRTQUEUE_MAX_SIZE) { | |
1944 | error_setg(errp, "invalid queue-size property (%" PRIu16 "), " | |
1945 | "must be a power of 2 (max %d)", | |
1946 | conf->queue_size, VIRTQUEUE_MAX_SIZE); | |
1947 | return; | |
1948 | } | |
d75d25e3 | 1949 | |
ceff3e1f | 1950 | if (!blkconf_apply_backend_options(&conf->conf, |
86b1cf32 KW |
1951 | !blk_supports_write_perm(conf->conf.blk), |
1952 | true, errp)) { | |
a17c17a2 KW |
1953 | return; |
1954 | } | |
4be74634 | 1955 | s->original_wce = blk_enable_write_cache(conf->conf.blk); |
ceff3e1f | 1956 | if (!blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, errp)) { |
75884afd | 1957 | return; |
b7eb0c9f | 1958 | } |
ceff3e1f | 1959 | |
c56ee92f | 1960 | if (!blkconf_blocksizes(&conf->conf, errp)) { |
0a75b60c MK |
1961 | return; |
1962 | } | |
1963 | ||
4f736650 SL |
1964 | BlockDriverState *bs = blk_bs(conf->conf.blk); |
1965 | if (bs->bl.zoned != BLK_Z_NONE) { | |
1966 | virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED); | |
1967 | if (bs->bl.zoned == BLK_Z_HM) { | |
1968 | virtio_clear_feature(&s->host_features, VIRTIO_BLK_F_DISCARD); | |
1969 | } | |
1970 | } | |
1971 | ||
37b06f8d SG |
1972 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) && |
1973 | (!conf->max_discard_sectors || | |
1974 | conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) { | |
1975 | error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")" | |
1976 | ", must be between 1 and %d", | |
1977 | conf->max_discard_sectors, (int)BDRV_REQUEST_MAX_SECTORS); | |
1978 | return; | |
1979 | } | |
1980 | ||
1981 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) && | |
1982 | (!conf->max_write_zeroes_sectors || | |
1983 | conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) { | |
1984 | error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32 | |
1985 | "), must be between 1 and %d", | |
1986 | conf->max_write_zeroes_sectors, | |
1987 | (int)BDRV_REQUEST_MAX_SECTORS); | |
1988 | return; | |
1989 | } | |
1990 | ||
b6948ab0 SH |
1991 | if (conf->iothread_vq_mapping_list) { |
1992 | if (conf->iothread) { | |
1993 | error_setg(errp, "iothread and iothread-vq-mapping properties " | |
1994 | "cannot be set at the same time"); | |
1995 | return; | |
1996 | } | |
1997 | ||
1998 | if (!validate_iothread_vq_mapping_list(conf->iothread_vq_mapping_list, | |
1999 | conf->num_queues, errp)) { | |
2000 | return; | |
2001 | } | |
2002 | } | |
2003 | ||
d9cf55a8 | 2004 | s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, |
d74c30c8 | 2005 | s->host_features); |
3857cd5c | 2006 | virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); |
6e02c38d | 2007 | |
9c67f33f SH |
2008 | qemu_mutex_init(&s->rq_lock); |
2009 | ||
4be74634 | 2010 | s->blk = conf->conf.blk; |
869a5c6d | 2011 | s->rq = NULL; |
2a30307f | 2012 | s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; |
e63e7fde | 2013 | |
2f270590 | 2014 | for (i = 0; i < conf->num_queues; i++) { |
6040aedd | 2015 | virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); |
2f270590 | 2016 | } |
98e3ab35 | 2017 | qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); |
3bcc17f0 | 2018 | virtio_blk_data_plane_create(s, &err); |
3ffeeef7 | 2019 | if (err != NULL) { |
75884afd | 2020 | error_propagate(errp, err); |
cfaf757e PN |
2021 | for (i = 0; i < conf->num_queues; i++) { |
2022 | virtio_del_queue(vdev, i); | |
2023 | } | |
6a1a8cc7 | 2024 | virtio_cleanup(vdev); |
75884afd | 2025 | return; |
392808b4 | 2026 | } |
6e02c38d | 2027 | |
a937f8e8 SH |
2028 | /* |
2029 | * This must be after virtio_init() so virtio_blk_dma_restart_cb() gets | |
2030 | * called after ->start_ioeventfd() has already set blk's AioContext. | |
2031 | */ | |
2032 | s->change = | |
2033 | qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s); | |
2034 | ||
baf42268 | 2035 | blk_ram_registrar_init(&s->blk_ram_registrar, s->blk); |
4be74634 | 2036 | blk_set_dev_ops(s->blk, &virtio_block_ops, s); |
6e02c38d | 2037 | |
4be74634 | 2038 | blk_iostatus_enable(s->blk); |
71f571a2 SE |
2039 | |
2040 | add_boot_device_lchs(dev, "/disk@0,0", | |
2041 | conf->conf.lcyls, | |
2042 | conf->conf.lheads, | |
2043 | conf->conf.lsecs); | |
1c028ddf FK |
2044 | } |
2045 | ||
b69c3c21 | 2046 | static void virtio_blk_device_unrealize(DeviceState *dev) |
1c028ddf | 2047 | { |
306ec6c3 AF |
2048 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); |
2049 | VirtIOBlock *s = VIRTIO_BLK(dev); | |
4a0117cf EP |
2050 | VirtIOBlkConf *conf = &s->conf; |
2051 | unsigned i; | |
306ec6c3 | 2052 | |
7bfde688 | 2053 | blk_drain(s->blk); |
71f571a2 | 2054 | del_boot_device_lchs(dev, "/disk@0,0"); |
3bcc17f0 | 2055 | virtio_blk_data_plane_destroy(s); |
4a0117cf EP |
2056 | for (i = 0; i < conf->num_queues; i++) { |
2057 | virtio_del_queue(vdev, i); | |
2058 | } | |
98e3ab35 | 2059 | qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); |
9c67f33f | 2060 | qemu_mutex_destroy(&s->rq_lock); |
baf42268 | 2061 | blk_ram_registrar_destroy(&s->blk_ram_registrar); |
1c028ddf | 2062 | qemu_del_vm_change_state_handler(s->change); |
4be74634 | 2063 | blockdev_mark_auto_del(s->blk); |
6a1a8cc7 | 2064 | virtio_cleanup(vdev); |
1c028ddf FK |
2065 | } |
2066 | ||
467b3f33 SH |
2067 | static void virtio_blk_instance_init(Object *obj) |
2068 | { | |
2069 | VirtIOBlock *s = VIRTIO_BLK(obj); | |
2070 | ||
2a30307f | 2071 | device_add_bootindex_property(obj, &s->conf.conf.bootindex, |
3342ec32 | 2072 | "bootindex", "/disk@0,0", |
40c2281c | 2073 | DEVICE(obj)); |
467b3f33 SH |
2074 | } |
2075 | ||
977a117f HP |
2076 | static const VMStateDescription vmstate_virtio_blk = { |
2077 | .name = "virtio-blk", | |
2078 | .minimum_version_id = 2, | |
2079 | .version_id = 2, | |
7d5dc0a3 | 2080 | .fields = (const VMStateField[]) { |
977a117f HP |
2081 | VMSTATE_VIRTIO_DEVICE, |
2082 | VMSTATE_END_OF_LIST() | |
2083 | }, | |
2084 | }; | |
bbded32c | 2085 | |
1c028ddf | 2086 | static Property virtio_blk_properties[] = { |
2a30307f | 2087 | DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf), |
8c398252 | 2088 | DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf), |
2a30307f MA |
2089 | DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, conf.conf), |
2090 | DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial), | |
bbe8bd4d SG |
2091 | DEFINE_PROP_BIT64("config-wce", VirtIOBlock, host_features, |
2092 | VIRTIO_BLK_F_CONFIG_WCE, true), | |
32a877e4 | 2093 | #ifdef __linux__ |
bbe8bd4d SG |
2094 | DEFINE_PROP_BIT64("scsi", VirtIOBlock, host_features, |
2095 | VIRTIO_BLK_F_SCSI, false), | |
32a877e4 | 2096 | #endif |
c99495ac PL |
2097 | DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, |
2098 | true), | |
9445e1e1 SH |
2099 | DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, |
2100 | VIRTIO_BLK_AUTO_NUM_QUEUES), | |
c9b7d9ec | 2101 | DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 256), |
1bf8a989 | 2102 | DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true), |
d679ac09 FZ |
2103 | DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, |
2104 | IOThread *), | |
b6948ab0 SH |
2105 | DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOBlock, |
2106 | conf.iothread_vq_mapping_list), | |
5c81161f SG |
2107 | DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features, |
2108 | VIRTIO_BLK_F_DISCARD, true), | |
fb0b154c AO |
2109 | DEFINE_PROP_BOOL("report-discard-granularity", VirtIOBlock, |
2110 | conf.report_discard_granularity, true), | |
5c81161f SG |
2111 | DEFINE_PROP_BIT64("write-zeroes", VirtIOBlock, host_features, |
2112 | VIRTIO_BLK_F_WRITE_ZEROES, true), | |
37b06f8d SG |
2113 | DEFINE_PROP_UINT32("max-discard-sectors", VirtIOBlock, |
2114 | conf.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS), | |
2115 | DEFINE_PROP_UINT32("max-write-zeroes-sectors", VirtIOBlock, | |
2116 | conf.max_write_zeroes_sectors, BDRV_REQUEST_MAX_SECTORS), | |
5f258577 EY |
2117 | DEFINE_PROP_BOOL("x-enable-wce-if-config-wce", VirtIOBlock, |
2118 | conf.x_enable_wce_if_config_wce, true), | |
1c028ddf FK |
2119 | DEFINE_PROP_END_OF_LIST(), |
2120 | }; | |
2121 | ||
2122 | static void virtio_blk_class_init(ObjectClass *klass, void *data) | |
2123 | { | |
2124 | DeviceClass *dc = DEVICE_CLASS(klass); | |
2125 | VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); | |
75884afd | 2126 | |
4f67d30b | 2127 | device_class_set_props(dc, virtio_blk_properties); |
bbded32c | 2128 | dc->vmsd = &vmstate_virtio_blk; |
125ee0ed | 2129 | set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); |
75884afd | 2130 | vdc->realize = virtio_blk_device_realize; |
306ec6c3 | 2131 | vdc->unrealize = virtio_blk_device_unrealize; |
1c028ddf FK |
2132 | vdc->get_config = virtio_blk_update_config; |
2133 | vdc->set_config = virtio_blk_set_config; | |
2134 | vdc->get_features = virtio_blk_get_features; | |
2135 | vdc->set_status = virtio_blk_set_status; | |
2136 | vdc->reset = virtio_blk_reset; | |
b2b295a7 GK |
2137 | vdc->save = virtio_blk_save_device; |
2138 | vdc->load = virtio_blk_load_device; | |
9ffe337c PB |
2139 | vdc->start_ioeventfd = virtio_blk_data_plane_start; |
2140 | vdc->stop_ioeventfd = virtio_blk_data_plane_stop; | |
1c028ddf FK |
2141 | } |
2142 | ||
b5c7ceaf | 2143 | static const TypeInfo virtio_blk_info = { |
1c028ddf FK |
2144 | .name = TYPE_VIRTIO_BLK, |
2145 | .parent = TYPE_VIRTIO_DEVICE, | |
2146 | .instance_size = sizeof(VirtIOBlock), | |
467b3f33 | 2147 | .instance_init = virtio_blk_instance_init, |
1c028ddf FK |
2148 | .class_init = virtio_blk_class_init, |
2149 | }; | |
2150 | ||
2151 | static void virtio_register_types(void) | |
2152 | { | |
b5c7ceaf | 2153 | type_register_static(&virtio_blk_info); |
1c028ddf FK |
2154 | } |
2155 | ||
2156 | type_init(virtio_register_types) |