]>
Commit | Line | Data |
---|---|---|
6e02c38d AL |
1 | /* |
2 | * Virtio Block Device | |
3 | * | |
4 | * Copyright IBM, Corp. 2007 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <aliguori@us.ibm.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
80c71a24 | 14 | #include "qemu/osdep.h" |
433fcea4 | 15 | #include "qemu/defer-call.h" |
da34e65c | 16 | #include "qapi/error.h" |
827805a2 | 17 | #include "qemu/iov.h" |
0b8fa32f | 18 | #include "qemu/module.h" |
1de7afc9 | 19 | #include "qemu/error-report.h" |
9b92fbcf | 20 | #include "qemu/main-loop.h" |
4f736650 | 21 | #include "block/block_int.h" |
6d519a5f | 22 | #include "trace.h" |
0d09e41a | 23 | #include "hw/block/block.h" |
a27bd6c7 | 24 | #include "hw/qdev-properties.h" |
9c17d615 | 25 | #include "sysemu/blockdev.h" |
baf42268 | 26 | #include "sysemu/block-ram-registrar.h" |
2f780b6a | 27 | #include "sysemu/sysemu.h" |
54d31236 | 28 | #include "sysemu/runstate.h" |
0d09e41a | 29 | #include "hw/virtio/virtio-blk.h" |
08e2c9f1 | 30 | #include "scsi/constants.h" |
1063b8b1 CH |
31 | #ifdef __linux__ |
32 | # include <scsi/sg.h> | |
33 | #endif | |
0d09e41a | 34 | #include "hw/virtio/virtio-bus.h" |
ca77ee28 | 35 | #include "migration/qemu-file-types.h" |
783d1897 | 36 | #include "hw/virtio/virtio-access.h" |
d9cf55a8 | 37 | #include "hw/virtio/virtio-blk-common.h" |
4c41c69e | 38 | #include "qemu/coroutine.h" |
6e02c38d | 39 | |
52bff01f HC |
40 | static void virtio_blk_ioeventfd_attach(VirtIOBlock *s); |
41 | ||
d14dde5e GK |
42 | static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, |
43 | VirtIOBlockReq *req) | |
671ec3f0 | 44 | { |
671ec3f0 | 45 | req->dev = s; |
edaffd9f | 46 | req->vq = vq; |
869d66af | 47 | req->qiov.size = 0; |
2a6cdd6d | 48 | req->in_len = 0; |
869d66af | 49 | req->next = NULL; |
95f7142a | 50 | req->mr_next = NULL; |
671ec3f0 FZ |
51 | } |
52 | ||
d14dde5e | 53 | static void virtio_blk_free_request(VirtIOBlockReq *req) |
671ec3f0 | 54 | { |
1d29b5b0 | 55 | g_free(req); |
671ec3f0 FZ |
56 | } |
57 | ||
03de2f52 | 58 | static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) |
869a5c6d AL |
59 | { |
60 | VirtIOBlock *s = req->dev; | |
1cc91b7d | 61 | VirtIODevice *vdev = VIRTIO_DEVICE(s); |
869a5c6d | 62 | |
a576ceac | 63 | trace_virtio_blk_req_complete(vdev, req, status); |
6d519a5f | 64 | |
92e3c2a3 | 65 | stb_p(&req->in->status, status); |
7bd04a04 SH |
66 | iov_discard_undo(&req->inhdr_undo); |
67 | iov_discard_undo(&req->outhdr_undo); | |
edaffd9f | 68 | virtqueue_push(req->vq, &req->elem, req->in_len); |
bfa36802 | 69 | if (qemu_in_iothread()) { |
3bcc17f0 | 70 | virtio_notify_irqfd(vdev, req->vq); |
03de2f52 | 71 | } else { |
edaffd9f | 72 | virtio_notify(vdev, req->vq); |
03de2f52 | 73 | } |
bf4bd461 FZ |
74 | } |
75 | ||
f35d68f0 | 76 | static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, |
00f639fb | 77 | bool is_read, bool acct_failed) |
869a5c6d | 78 | { |
869a5c6d | 79 | VirtIOBlock *s = req->dev; |
9a6719d5 | 80 | BlockErrorAction action = blk_get_error_action(s->blk, is_read, error); |
869a5c6d | 81 | |
a589569f | 82 | if (action == BLOCK_ERROR_ACTION_STOP) { |
466138dc FZ |
83 | /* Break the link as the next request is going to be parsed from the |
84 | * ring again. Otherwise we may end up doing a double completion! */ | |
85 | req->mr_next = NULL; | |
9c67f33f SH |
86 | |
87 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { | |
88 | req->next = s->rq; | |
89 | s->rq = req; | |
90 | } | |
a589569f | 91 | } else if (action == BLOCK_ERROR_ACTION_REPORT) { |
869a5c6d | 92 | virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); |
00f639fb SG |
93 | if (acct_failed) { |
94 | block_acct_failed(blk_get_stats(s->blk), &req->acct); | |
95 | } | |
671ec3f0 | 96 | virtio_blk_free_request(req); |
869a5c6d AL |
97 | } |
98 | ||
4be74634 | 99 | blk_error_action(s->blk, action, is_read, error); |
a589569f | 100 | return action != BLOCK_ERROR_ACTION_IGNORE; |
869a5c6d AL |
101 | } |
102 | ||
6e02c38d AL |
103 | static void virtio_blk_rw_complete(void *opaque, int ret) |
104 | { | |
95f7142a | 105 | VirtIOBlockReq *next = opaque; |
b9e413dd | 106 | VirtIOBlock *s = next->dev; |
a576ceac | 107 | VirtIODevice *vdev = VIRTIO_DEVICE(s); |
95f7142a PL |
108 | |
109 | while (next) { | |
110 | VirtIOBlockReq *req = next; | |
111 | next = req->mr_next; | |
a576ceac | 112 | trace_virtio_blk_rw_complete(vdev, req, ret); |
95f7142a PL |
113 | |
114 | if (req->qiov.nalloc != -1) { | |
e61809ed | 115 | /* If nalloc is != -1 req->qiov is a local copy of the original |
9bb192a4 YB |
116 | * external iovec. It was allocated in submit_requests to be |
117 | * able to merge requests. */ | |
95f7142a PL |
118 | qemu_iovec_destroy(&req->qiov); |
119 | } | |
6e02c38d | 120 | |
95f7142a | 121 | if (ret) { |
bf4069fb | 122 | int p = virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type); |
95f7142a | 123 | bool is_read = !(p & VIRTIO_BLK_T_OUT); |
2a6cdd6d PB |
124 | /* Note that memory may be dirtied on read failure. If the |
125 | * virtio request is not completed here, as is the case for | |
126 | * BLOCK_ERROR_ACTION_STOP, the memory may not be copied | |
127 | * correctly during live migration. While this is ugly, | |
128 | * it is acceptable because the device is free to write to | |
129 | * the memory until the request is completed (which will | |
130 | * happen on the other side of the migration). | |
131 | */ | |
00f639fb | 132 | if (virtio_blk_handle_rw_error(req, -ret, is_read, true)) { |
95f7142a PL |
133 | continue; |
134 | } | |
135 | } | |
6d519a5f | 136 | |
95f7142a | 137 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); |
bf4069fb | 138 | block_acct_done(blk_get_stats(s->blk), &req->acct); |
95f7142a | 139 | virtio_blk_free_request(req); |
6e02c38d | 140 | } |
869a5c6d | 141 | } |
6e02c38d | 142 | |
aa659be3 CH |
143 | static void virtio_blk_flush_complete(void *opaque, int ret) |
144 | { | |
145 | VirtIOBlockReq *req = opaque; | |
b9e413dd | 146 | VirtIOBlock *s = req->dev; |
aa659be3 | 147 | |
c1135913 SH |
148 | if (ret && virtio_blk_handle_rw_error(req, -ret, 0, true)) { |
149 | return; | |
8c269b54 KW |
150 | } |
151 | ||
152 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); | |
9a6719d5 | 153 | block_acct_done(blk_get_stats(s->blk), &req->acct); |
671ec3f0 | 154 | virtio_blk_free_request(req); |
6e02c38d AL |
155 | } |
156 | ||
37b06f8d SG |
157 | static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) |
158 | { | |
159 | VirtIOBlockReq *req = opaque; | |
160 | VirtIOBlock *s = req->dev; | |
161 | bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) & | |
162 | ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES; | |
163 | ||
c1135913 SH |
164 | if (ret && virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { |
165 | return; | |
37b06f8d SG |
166 | } |
167 | ||
168 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); | |
169 | if (is_write_zeroes) { | |
170 | block_acct_done(blk_get_stats(s->blk), &req->acct); | |
171 | } | |
172 | virtio_blk_free_request(req); | |
37b06f8d SG |
173 | } |
174 | ||
1dc936aa FZ |
175 | #ifdef __linux__ |
176 | ||
177 | typedef struct { | |
178 | VirtIOBlockReq *req; | |
179 | struct sg_io_hdr hdr; | |
180 | } VirtIOBlockIoctlReq; | |
181 | ||
182 | static void virtio_blk_ioctl_complete(void *opaque, int status) | |
183 | { | |
184 | VirtIOBlockIoctlReq *ioctl_req = opaque; | |
185 | VirtIOBlockReq *req = ioctl_req->req; | |
9d456654 PB |
186 | VirtIOBlock *s = req->dev; |
187 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
1dc936aa FZ |
188 | struct virtio_scsi_inhdr *scsi; |
189 | struct sg_io_hdr *hdr; | |
190 | ||
191 | scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base; | |
192 | ||
193 | if (status) { | |
194 | status = VIRTIO_BLK_S_UNSUPP; | |
195 | virtio_stl_p(vdev, &scsi->errors, 255); | |
196 | goto out; | |
197 | } | |
198 | ||
199 | hdr = &ioctl_req->hdr; | |
200 | /* | |
201 | * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi) | |
202 | * clear the masked_status field [hence status gets cleared too, see | |
203 | * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED | |
204 | * status has occurred. However they do set DRIVER_SENSE in driver_status | |
205 | * field. Also a (sb_len_wr > 0) indicates there is a sense buffer. | |
206 | */ | |
207 | if (hdr->status == 0 && hdr->sb_len_wr > 0) { | |
208 | hdr->status = CHECK_CONDITION; | |
209 | } | |
210 | ||
211 | virtio_stl_p(vdev, &scsi->errors, | |
212 | hdr->status | (hdr->msg_status << 8) | | |
213 | (hdr->host_status << 16) | (hdr->driver_status << 24)); | |
214 | virtio_stl_p(vdev, &scsi->residual, hdr->resid); | |
215 | virtio_stl_p(vdev, &scsi->sense_len, hdr->sb_len_wr); | |
216 | virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); | |
217 | ||
218 | out: | |
219 | virtio_blk_req_complete(req, status); | |
220 | virtio_blk_free_request(req); | |
221 | g_free(ioctl_req); | |
222 | } | |
223 | ||
224 | #endif | |
225 | ||
edaffd9f | 226 | static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s, VirtQueue *vq) |
6e02c38d | 227 | { |
edaffd9f | 228 | VirtIOBlockReq *req = virtqueue_pop(vq, sizeof(VirtIOBlockReq)); |
6e02c38d | 229 | |
51b19ebe | 230 | if (req) { |
edaffd9f | 231 | virtio_blk_init_request(s, vq, req); |
6e02c38d | 232 | } |
6e02c38d AL |
233 | return req; |
234 | } | |
235 | ||
75344fa4 | 236 | static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) |
1063b8b1 | 237 | { |
5a05cbee FZ |
238 | int status = VIRTIO_BLK_S_OK; |
239 | struct virtio_scsi_inhdr *scsi = NULL; | |
75344fa4 | 240 | VirtIOBlock *blk = req->dev; |
bf4069fb AR |
241 | VirtIODevice *vdev = VIRTIO_DEVICE(blk); |
242 | VirtQueueElement *elem = &req->elem; | |
783d1897 | 243 | |
47ce9ef7 | 244 | #ifdef __linux__ |
1063b8b1 | 245 | int i; |
1dc936aa | 246 | VirtIOBlockIoctlReq *ioctl_req; |
a209f461 | 247 | BlockAIOCB *acb; |
47ce9ef7 | 248 | #endif |
1063b8b1 CH |
249 | |
250 | /* | |
251 | * We require at least one output segment each for the virtio_blk_outhdr | |
252 | * and the SCSI command block. | |
253 | * | |
254 | * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr | |
255 | * and the sense buffer pointer in the input segments. | |
256 | */ | |
5a05cbee FZ |
257 | if (elem->out_num < 2 || elem->in_num < 3) { |
258 | status = VIRTIO_BLK_S_IOERR; | |
259 | goto fail; | |
1063b8b1 CH |
260 | } |
261 | ||
262 | /* | |
f34e73cd PB |
263 | * The scsi inhdr is placed in the second-to-last input segment, just |
264 | * before the regular inhdr. | |
1063b8b1 | 265 | */ |
5a05cbee | 266 | scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; |
f34e73cd | 267 | |
bbe8bd4d | 268 | if (!virtio_has_feature(blk->host_features, VIRTIO_BLK_F_SCSI)) { |
f34e73cd PB |
269 | status = VIRTIO_BLK_S_UNSUPP; |
270 | goto fail; | |
1063b8b1 CH |
271 | } |
272 | ||
273 | /* | |
f34e73cd | 274 | * No support for bidirection commands yet. |
1063b8b1 | 275 | */ |
5a05cbee | 276 | if (elem->out_num > 2 && elem->in_num > 3) { |
f34e73cd PB |
277 | status = VIRTIO_BLK_S_UNSUPP; |
278 | goto fail; | |
279 | } | |
1063b8b1 | 280 | |
f34e73cd | 281 | #ifdef __linux__ |
1dc936aa FZ |
282 | ioctl_req = g_new0(VirtIOBlockIoctlReq, 1); |
283 | ioctl_req->req = req; | |
284 | ioctl_req->hdr.interface_id = 'S'; | |
285 | ioctl_req->hdr.cmd_len = elem->out_sg[1].iov_len; | |
286 | ioctl_req->hdr.cmdp = elem->out_sg[1].iov_base; | |
287 | ioctl_req->hdr.dxfer_len = 0; | |
1063b8b1 | 288 | |
5a05cbee | 289 | if (elem->out_num > 2) { |
1063b8b1 CH |
290 | /* |
291 | * If there are more than the minimally required 2 output segments | |
292 | * there is write payload starting from the third iovec. | |
293 | */ | |
1dc936aa FZ |
294 | ioctl_req->hdr.dxfer_direction = SG_DXFER_TO_DEV; |
295 | ioctl_req->hdr.iovec_count = elem->out_num - 2; | |
1063b8b1 | 296 | |
1dc936aa FZ |
297 | for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { |
298 | ioctl_req->hdr.dxfer_len += elem->out_sg[i + 2].iov_len; | |
299 | } | |
1063b8b1 | 300 | |
1dc936aa | 301 | ioctl_req->hdr.dxferp = elem->out_sg + 2; |
1063b8b1 | 302 | |
5a05cbee | 303 | } else if (elem->in_num > 3) { |
1063b8b1 CH |
304 | /* |
305 | * If we have more than 3 input segments the guest wants to actually | |
306 | * read data. | |
307 | */ | |
1dc936aa FZ |
308 | ioctl_req->hdr.dxfer_direction = SG_DXFER_FROM_DEV; |
309 | ioctl_req->hdr.iovec_count = elem->in_num - 3; | |
310 | for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { | |
311 | ioctl_req->hdr.dxfer_len += elem->in_sg[i].iov_len; | |
312 | } | |
1063b8b1 | 313 | |
1dc936aa | 314 | ioctl_req->hdr.dxferp = elem->in_sg; |
1063b8b1 CH |
315 | } else { |
316 | /* | |
317 | * Some SCSI commands don't actually transfer any data. | |
318 | */ | |
1dc936aa | 319 | ioctl_req->hdr.dxfer_direction = SG_DXFER_NONE; |
1063b8b1 CH |
320 | } |
321 | ||
1dc936aa FZ |
322 | ioctl_req->hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; |
323 | ioctl_req->hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; | |
1063b8b1 | 324 | |
a209f461 FZ |
325 | acb = blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr, |
326 | virtio_blk_ioctl_complete, ioctl_req); | |
327 | if (!acb) { | |
328 | g_free(ioctl_req); | |
329 | status = VIRTIO_BLK_S_UNSUPP; | |
330 | goto fail; | |
331 | } | |
1dc936aa | 332 | return -EINPROGRESS; |
1063b8b1 | 333 | #else |
f34e73cd PB |
334 | abort(); |
335 | #endif | |
336 | ||
337 | fail: | |
338 | /* Just put anything nonzero so that the ioctl fails in the guest. */ | |
5a05cbee | 339 | if (scsi) { |
783d1897 | 340 | virtio_stl_p(vdev, &scsi->errors, 255); |
5a05cbee FZ |
341 | } |
342 | return status; | |
343 | } | |
344 | ||
345 | static void virtio_blk_handle_scsi(VirtIOBlockReq *req) | |
346 | { | |
347 | int status; | |
348 | ||
75344fa4 | 349 | status = virtio_blk_handle_scsi_req(req); |
1dc936aa FZ |
350 | if (status != -EINPROGRESS) { |
351 | virtio_blk_req_complete(req, status); | |
352 | virtio_blk_free_request(req); | |
353 | } | |
1063b8b1 | 354 | } |
1063b8b1 | 355 | |
baf42268 | 356 | static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb, |
95f7142a | 357 | int start, int num_reqs, int niov) |
869a5c6d | 358 | { |
baf42268 | 359 | BlockBackend *blk = s->blk; |
95f7142a PL |
360 | QEMUIOVector *qiov = &mrb->reqs[start]->qiov; |
361 | int64_t sector_num = mrb->reqs[start]->sector_num; | |
95f7142a | 362 | bool is_write = mrb->is_write; |
baf42268 | 363 | BdrvRequestFlags flags = 0; |
95f7142a PL |
364 | |
365 | if (num_reqs > 1) { | |
366 | int i; | |
367 | struct iovec *tmp_iov = qiov->iov; | |
368 | int tmp_niov = qiov->niov; | |
369 | ||
370 | /* mrb->reqs[start]->qiov was initialized from external so we can't | |
b5772fdd | 371 | * modify it here. We need to initialize it locally and then add the |
95f7142a PL |
372 | * external iovecs. */ |
373 | qemu_iovec_init(qiov, niov); | |
374 | ||
375 | for (i = 0; i < tmp_niov; i++) { | |
376 | qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len); | |
377 | } | |
378 | ||
379 | for (i = start + 1; i < start + num_reqs; i++) { | |
380 | qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0, | |
381 | mrb->reqs[i]->qiov.size); | |
382 | mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; | |
95f7142a | 383 | } |
95f7142a | 384 | |
a576ceac SH |
385 | trace_virtio_blk_submit_multireq(VIRTIO_DEVICE(mrb->reqs[start]->dev), |
386 | mrb, start, num_reqs, | |
b5772fdd EB |
387 | sector_num << BDRV_SECTOR_BITS, |
388 | qiov->size, is_write); | |
95f7142a PL |
389 | block_acct_merge_done(blk_get_stats(blk), |
390 | is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ, | |
391 | num_reqs - 1); | |
392 | } | |
91553dcc | 393 | |
baf42268 SH |
394 | if (blk_ram_registrar_ok(&s->blk_ram_registrar)) { |
395 | flags |= BDRV_REQ_REGISTERED_BUF; | |
396 | } | |
397 | ||
95f7142a | 398 | if (is_write) { |
baf42268 SH |
399 | blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, |
400 | flags, virtio_blk_rw_complete, | |
401 | mrb->reqs[start]); | |
95f7142a | 402 | } else { |
baf42268 SH |
403 | blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, |
404 | flags, virtio_blk_rw_complete, | |
405 | mrb->reqs[start]); | |
95f7142a PL |
406 | } |
407 | } | |
408 | ||
409 | static int multireq_compare(const void *a, const void *b) | |
410 | { | |
411 | const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a, | |
412 | *req2 = *(VirtIOBlockReq **)b; | |
413 | ||
414 | /* | |
415 | * Note that we can't simply subtract sector_num1 from sector_num2 | |
416 | * here as that could overflow the return value. | |
417 | */ | |
418 | if (req1->sector_num > req2->sector_num) { | |
419 | return 1; | |
420 | } else if (req1->sector_num < req2->sector_num) { | |
421 | return -1; | |
422 | } else { | |
423 | return 0; | |
424 | } | |
425 | } | |
426 | ||
baf42268 | 427 | static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb) |
95f7142a PL |
428 | { |
429 | int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0; | |
5def6b80 | 430 | uint32_t max_transfer; |
95f7142a PL |
431 | int64_t sector_num = 0; |
432 | ||
433 | if (mrb->num_reqs == 1) { | |
baf42268 | 434 | submit_requests(s, mrb, 0, 1, -1); |
95f7142a | 435 | mrb->num_reqs = 0; |
c20fd872 CH |
436 | return; |
437 | } | |
438 | ||
5def6b80 | 439 | max_transfer = blk_get_max_transfer(mrb->reqs[0]->dev->blk); |
95f7142a PL |
440 | |
441 | qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs), | |
442 | &multireq_compare); | |
443 | ||
444 | for (i = 0; i < mrb->num_reqs; i++) { | |
445 | VirtIOBlockReq *req = mrb->reqs[i]; | |
446 | if (num_reqs > 0) { | |
49cffbc6 GA |
447 | /* |
448 | * NOTE: We cannot merge the requests in below situations: | |
449 | * 1. requests are not sequential | |
450 | * 2. merge would exceed maximum number of IOVs | |
451 | * 3. merge would exceed maximum transfer length of backend device | |
452 | */ | |
453 | if (sector_num + nb_sectors != req->sector_num || | |
baf42268 | 454 | niov > blk_get_max_iov(s->blk) - req->qiov.niov || |
5def6b80 EB |
455 | req->qiov.size > max_transfer || |
456 | nb_sectors > (max_transfer - | |
457 | req->qiov.size) / BDRV_SECTOR_SIZE) { | |
baf42268 | 458 | submit_requests(s, mrb, start, num_reqs, niov); |
95f7142a | 459 | num_reqs = 0; |
91553dcc KW |
460 | } |
461 | } | |
95f7142a PL |
462 | |
463 | if (num_reqs == 0) { | |
464 | sector_num = req->sector_num; | |
465 | nb_sectors = niov = 0; | |
466 | start = i; | |
467 | } | |
468 | ||
469 | nb_sectors += req->qiov.size / BDRV_SECTOR_SIZE; | |
470 | niov += req->qiov.niov; | |
471 | num_reqs++; | |
91553dcc | 472 | } |
c20fd872 | 473 | |
baf42268 | 474 | submit_requests(s, mrb, start, num_reqs, niov); |
95f7142a | 475 | mrb->num_reqs = 0; |
91553dcc | 476 | } |
87b245db | 477 | |
c20fd872 | 478 | static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) |
aa659be3 | 479 | { |
bf4069fb AR |
480 | VirtIOBlock *s = req->dev; |
481 | ||
482 | block_acct_start(blk_get_stats(s->blk), &req->acct, 0, | |
5366d0c8 | 483 | BLOCK_ACCT_FLUSH); |
a597e79c | 484 | |
618fbb84 CH |
485 | /* |
486 | * Make sure all outstanding writes are posted to the backing device. | |
487 | */ | |
95f7142a | 488 | if (mrb->is_write && mrb->num_reqs > 0) { |
baf42268 | 489 | virtio_blk_submit_multireq(s, mrb); |
95f7142a | 490 | } |
bf4069fb | 491 | blk_aio_flush(s->blk, virtio_blk_flush_complete, req); |
aa659be3 CH |
492 | } |
493 | ||
d0e14376 MA |
494 | static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, |
495 | uint64_t sector, size_t size) | |
496 | { | |
3c2daac0 MA |
497 | uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; |
498 | uint64_t total_sectors; | |
499 | ||
75af1f34 | 500 | if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { |
95f7142a PL |
501 | return false; |
502 | } | |
d0e14376 MA |
503 | if (sector & dev->sector_mask) { |
504 | return false; | |
505 | } | |
2a30307f | 506 | if (size % dev->conf.conf.logical_block_size) { |
d0e14376 MA |
507 | return false; |
508 | } | |
4be74634 | 509 | blk_get_geometry(dev->blk, &total_sectors); |
3c2daac0 MA |
510 | if (sector > total_sectors || nb_sectors > total_sectors - sector) { |
511 | return false; | |
512 | } | |
d0e14376 MA |
513 | return true; |
514 | } | |
515 | ||
37b06f8d SG |
516 | static uint8_t virtio_blk_handle_discard_write_zeroes(VirtIOBlockReq *req, |
517 | struct virtio_blk_discard_write_zeroes *dwz_hdr, bool is_write_zeroes) | |
518 | { | |
519 | VirtIOBlock *s = req->dev; | |
520 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
521 | uint64_t sector; | |
522 | uint32_t num_sectors, flags, max_sectors; | |
523 | uint8_t err_status; | |
524 | int bytes; | |
525 | ||
526 | sector = virtio_ldq_p(vdev, &dwz_hdr->sector); | |
527 | num_sectors = virtio_ldl_p(vdev, &dwz_hdr->num_sectors); | |
528 | flags = virtio_ldl_p(vdev, &dwz_hdr->flags); | |
529 | max_sectors = is_write_zeroes ? s->conf.max_write_zeroes_sectors : | |
530 | s->conf.max_discard_sectors; | |
531 | ||
532 | /* | |
533 | * max_sectors is at most BDRV_REQUEST_MAX_SECTORS, this check | |
534 | * make us sure that "num_sectors << BDRV_SECTOR_BITS" can fit in | |
535 | * the integer variable. | |
536 | */ | |
537 | if (unlikely(num_sectors > max_sectors)) { | |
538 | err_status = VIRTIO_BLK_S_IOERR; | |
539 | goto err; | |
540 | } | |
541 | ||
542 | bytes = num_sectors << BDRV_SECTOR_BITS; | |
543 | ||
544 | if (unlikely(!virtio_blk_sect_range_ok(s, sector, bytes))) { | |
545 | err_status = VIRTIO_BLK_S_IOERR; | |
546 | goto err; | |
547 | } | |
548 | ||
549 | /* | |
550 | * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard | |
551 | * and write zeroes commands if any unknown flag is set. | |
552 | */ | |
553 | if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { | |
554 | err_status = VIRTIO_BLK_S_UNSUPP; | |
555 | goto err; | |
556 | } | |
557 | ||
558 | if (is_write_zeroes) { /* VIRTIO_BLK_T_WRITE_ZEROES */ | |
559 | int blk_aio_flags = 0; | |
560 | ||
561 | if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { | |
562 | blk_aio_flags |= BDRV_REQ_MAY_UNMAP; | |
563 | } | |
564 | ||
565 | block_acct_start(blk_get_stats(s->blk), &req->acct, bytes, | |
566 | BLOCK_ACCT_WRITE); | |
567 | ||
568 | blk_aio_pwrite_zeroes(s->blk, sector << BDRV_SECTOR_BITS, | |
569 | bytes, blk_aio_flags, | |
570 | virtio_blk_discard_write_zeroes_complete, req); | |
571 | } else { /* VIRTIO_BLK_T_DISCARD */ | |
572 | /* | |
573 | * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for | |
574 | * discard commands if the unmap flag is set. | |
575 | */ | |
576 | if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { | |
577 | err_status = VIRTIO_BLK_S_UNSUPP; | |
578 | goto err; | |
579 | } | |
580 | ||
581 | blk_aio_pdiscard(s->blk, sector << BDRV_SECTOR_BITS, bytes, | |
582 | virtio_blk_discard_write_zeroes_complete, req); | |
583 | } | |
584 | ||
585 | return VIRTIO_BLK_S_OK; | |
586 | ||
587 | err: | |
588 | if (is_write_zeroes) { | |
589 | block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_WRITE); | |
590 | } | |
591 | return err_status; | |
592 | } | |
593 | ||
4f736650 SL |
594 | typedef struct ZoneCmdData { |
595 | VirtIOBlockReq *req; | |
596 | struct iovec *in_iov; | |
597 | unsigned in_num; | |
598 | union { | |
599 | struct { | |
600 | unsigned int nr_zones; | |
601 | BlockZoneDescriptor *zones; | |
602 | } zone_report_data; | |
603 | struct { | |
604 | int64_t offset; | |
605 | } zone_append_data; | |
606 | }; | |
607 | } ZoneCmdData; | |
608 | ||
609 | /* | |
610 | * check zoned_request: error checking before issuing requests. If all checks | |
611 | * passed, return true. | |
612 | * append: true if only zone append requests issued. | |
613 | */ | |
614 | static bool check_zoned_request(VirtIOBlock *s, int64_t offset, int64_t len, | |
615 | bool append, uint8_t *status) { | |
616 | BlockDriverState *bs = blk_bs(s->blk); | |
617 | int index; | |
618 | ||
619 | if (!virtio_has_feature(s->host_features, VIRTIO_BLK_F_ZONED)) { | |
620 | *status = VIRTIO_BLK_S_UNSUPP; | |
621 | return false; | |
622 | } | |
623 | ||
624 | if (offset < 0 || len < 0 || len > (bs->total_sectors << BDRV_SECTOR_BITS) | |
625 | || offset > (bs->total_sectors << BDRV_SECTOR_BITS) - len) { | |
626 | *status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
627 | return false; | |
628 | } | |
629 | ||
630 | if (append) { | |
631 | if (bs->bl.write_granularity) { | |
632 | if ((offset % bs->bl.write_granularity) != 0) { | |
633 | *status = VIRTIO_BLK_S_ZONE_UNALIGNED_WP; | |
634 | return false; | |
635 | } | |
636 | } | |
637 | ||
638 | index = offset / bs->bl.zone_size; | |
639 | if (BDRV_ZT_IS_CONV(bs->wps->wp[index])) { | |
640 | *status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
641 | return false; | |
642 | } | |
643 | ||
644 | if (len / 512 > bs->bl.max_append_sectors) { | |
645 | if (bs->bl.max_append_sectors == 0) { | |
646 | *status = VIRTIO_BLK_S_UNSUPP; | |
647 | } else { | |
648 | *status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
649 | } | |
650 | return false; | |
651 | } | |
652 | } | |
653 | return true; | |
654 | } | |
655 | ||
656 | static void virtio_blk_zone_report_complete(void *opaque, int ret) | |
657 | { | |
658 | ZoneCmdData *data = opaque; | |
659 | VirtIOBlockReq *req = data->req; | |
4f736650 SL |
660 | VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); |
661 | struct iovec *in_iov = data->in_iov; | |
662 | unsigned in_num = data->in_num; | |
663 | int64_t zrp_size, n, j = 0; | |
664 | int64_t nz = data->zone_report_data.nr_zones; | |
665 | int8_t err_status = VIRTIO_BLK_S_OK; | |
b3d9bb9a SH |
666 | struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report) { |
667 | .nr_zones = cpu_to_le64(nz), | |
668 | }; | |
4f736650 | 669 | |
4e92acf7 | 670 | trace_virtio_blk_zone_report_complete(vdev, req, nz, ret); |
4f736650 SL |
671 | if (ret) { |
672 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
673 | goto out; | |
674 | } | |
675 | ||
4f736650 SL |
676 | zrp_size = sizeof(struct virtio_blk_zone_report) |
677 | + sizeof(struct virtio_blk_zone_descriptor) * nz; | |
678 | n = iov_from_buf(in_iov, in_num, 0, &zrp_hdr, sizeof(zrp_hdr)); | |
679 | if (n != sizeof(zrp_hdr)) { | |
680 | virtio_error(vdev, "Driver provided input buffer that is too small!"); | |
681 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
682 | goto out; | |
683 | } | |
684 | ||
685 | for (size_t i = sizeof(zrp_hdr); i < zrp_size; | |
686 | i += sizeof(struct virtio_blk_zone_descriptor), ++j) { | |
687 | struct virtio_blk_zone_descriptor desc = | |
688 | (struct virtio_blk_zone_descriptor) { | |
689 | .z_start = cpu_to_le64(data->zone_report_data.zones[j].start | |
690 | >> BDRV_SECTOR_BITS), | |
691 | .z_cap = cpu_to_le64(data->zone_report_data.zones[j].cap | |
692 | >> BDRV_SECTOR_BITS), | |
693 | .z_wp = cpu_to_le64(data->zone_report_data.zones[j].wp | |
694 | >> BDRV_SECTOR_BITS), | |
695 | }; | |
696 | ||
697 | switch (data->zone_report_data.zones[j].type) { | |
698 | case BLK_ZT_CONV: | |
699 | desc.z_type = VIRTIO_BLK_ZT_CONV; | |
700 | break; | |
701 | case BLK_ZT_SWR: | |
702 | desc.z_type = VIRTIO_BLK_ZT_SWR; | |
703 | break; | |
704 | case BLK_ZT_SWP: | |
705 | desc.z_type = VIRTIO_BLK_ZT_SWP; | |
706 | break; | |
707 | default: | |
708 | g_assert_not_reached(); | |
709 | } | |
710 | ||
711 | switch (data->zone_report_data.zones[j].state) { | |
712 | case BLK_ZS_RDONLY: | |
713 | desc.z_state = VIRTIO_BLK_ZS_RDONLY; | |
714 | break; | |
715 | case BLK_ZS_OFFLINE: | |
716 | desc.z_state = VIRTIO_BLK_ZS_OFFLINE; | |
717 | break; | |
718 | case BLK_ZS_EMPTY: | |
719 | desc.z_state = VIRTIO_BLK_ZS_EMPTY; | |
720 | break; | |
721 | case BLK_ZS_CLOSED: | |
722 | desc.z_state = VIRTIO_BLK_ZS_CLOSED; | |
723 | break; | |
724 | case BLK_ZS_FULL: | |
725 | desc.z_state = VIRTIO_BLK_ZS_FULL; | |
726 | break; | |
727 | case BLK_ZS_EOPEN: | |
728 | desc.z_state = VIRTIO_BLK_ZS_EOPEN; | |
729 | break; | |
730 | case BLK_ZS_IOPEN: | |
731 | desc.z_state = VIRTIO_BLK_ZS_IOPEN; | |
732 | break; | |
733 | case BLK_ZS_NOT_WP: | |
734 | desc.z_state = VIRTIO_BLK_ZS_NOT_WP; | |
735 | break; | |
736 | default: | |
737 | g_assert_not_reached(); | |
738 | } | |
739 | ||
740 | /* TODO: it takes O(n^2) time complexity. Optimizations required. */ | |
741 | n = iov_from_buf(in_iov, in_num, i, &desc, sizeof(desc)); | |
742 | if (n != sizeof(desc)) { | |
743 | virtio_error(vdev, "Driver provided input buffer " | |
744 | "for descriptors that is too small!"); | |
745 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
746 | } | |
747 | } | |
748 | ||
749 | out: | |
4f736650 SL |
750 | virtio_blk_req_complete(req, err_status); |
751 | virtio_blk_free_request(req); | |
4f736650 SL |
752 | g_free(data->zone_report_data.zones); |
753 | g_free(data); | |
754 | } | |
755 | ||
756 | static void virtio_blk_handle_zone_report(VirtIOBlockReq *req, | |
757 | struct iovec *in_iov, | |
758 | unsigned in_num) | |
759 | { | |
760 | VirtIOBlock *s = req->dev; | |
761 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
762 | unsigned int nr_zones; | |
763 | ZoneCmdData *data; | |
764 | int64_t zone_size, offset; | |
765 | uint8_t err_status; | |
766 | ||
767 | if (req->in_len < sizeof(struct virtio_blk_inhdr) + | |
768 | sizeof(struct virtio_blk_zone_report) + | |
769 | sizeof(struct virtio_blk_zone_descriptor)) { | |
770 | virtio_error(vdev, "in buffer too small for zone report"); | |
bbdf9023 ZM |
771 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; |
772 | goto out; | |
4f736650 SL |
773 | } |
774 | ||
775 | /* start byte offset of the zone report */ | |
776 | offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; | |
777 | if (!check_zoned_request(s, offset, 0, false, &err_status)) { | |
778 | goto out; | |
779 | } | |
780 | nr_zones = (req->in_len - sizeof(struct virtio_blk_inhdr) - | |
781 | sizeof(struct virtio_blk_zone_report)) / | |
782 | sizeof(struct virtio_blk_zone_descriptor); | |
4e92acf7 SL |
783 | trace_virtio_blk_handle_zone_report(vdev, req, |
784 | offset >> BDRV_SECTOR_BITS, nr_zones); | |
4f736650 SL |
785 | |
786 | zone_size = sizeof(BlockZoneDescriptor) * nr_zones; | |
787 | data = g_malloc(sizeof(ZoneCmdData)); | |
788 | data->req = req; | |
789 | data->in_iov = in_iov; | |
790 | data->in_num = in_num; | |
791 | data->zone_report_data.nr_zones = nr_zones; | |
792 | data->zone_report_data.zones = g_malloc(zone_size), | |
793 | ||
794 | blk_aio_zone_report(s->blk, offset, &data->zone_report_data.nr_zones, | |
795 | data->zone_report_data.zones, | |
796 | virtio_blk_zone_report_complete, data); | |
797 | return; | |
798 | out: | |
799 | virtio_blk_req_complete(req, err_status); | |
800 | virtio_blk_free_request(req); | |
801 | } | |
802 | ||
803 | static void virtio_blk_zone_mgmt_complete(void *opaque, int ret) | |
804 | { | |
805 | VirtIOBlockReq *req = opaque; | |
806 | VirtIOBlock *s = req->dev; | |
4e92acf7 | 807 | VirtIODevice *vdev = VIRTIO_DEVICE(s); |
4f736650 | 808 | int8_t err_status = VIRTIO_BLK_S_OK; |
4e92acf7 | 809 | trace_virtio_blk_zone_mgmt_complete(vdev, req,ret); |
4f736650 SL |
810 | |
811 | if (ret) { | |
812 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
813 | } | |
814 | ||
4f736650 SL |
815 | virtio_blk_req_complete(req, err_status); |
816 | virtio_blk_free_request(req); | |
4f736650 SL |
817 | } |
818 | ||
819 | static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op) | |
820 | { | |
821 | VirtIOBlock *s = req->dev; | |
822 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
823 | BlockDriverState *bs = blk_bs(s->blk); | |
824 | int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; | |
825 | uint64_t len; | |
826 | uint64_t capacity = bs->total_sectors << BDRV_SECTOR_BITS; | |
827 | uint8_t err_status = VIRTIO_BLK_S_OK; | |
828 | ||
829 | uint32_t type = virtio_ldl_p(vdev, &req->out.type); | |
830 | if (type == VIRTIO_BLK_T_ZONE_RESET_ALL) { | |
831 | /* Entire drive capacity */ | |
832 | offset = 0; | |
833 | len = capacity; | |
4e92acf7 SL |
834 | trace_virtio_blk_handle_zone_reset_all(vdev, req, 0, |
835 | bs->total_sectors); | |
4f736650 SL |
836 | } else { |
837 | if (bs->bl.zone_size > capacity - offset) { | |
838 | /* The zoned device allows the last smaller zone. */ | |
839 | len = capacity - bs->bl.zone_size * (bs->bl.nr_zones - 1); | |
840 | } else { | |
841 | len = bs->bl.zone_size; | |
842 | } | |
4e92acf7 SL |
843 | trace_virtio_blk_handle_zone_mgmt(vdev, req, op, |
844 | offset >> BDRV_SECTOR_BITS, | |
845 | len >> BDRV_SECTOR_BITS); | |
4f736650 SL |
846 | } |
847 | ||
848 | if (!check_zoned_request(s, offset, len, false, &err_status)) { | |
849 | goto out; | |
850 | } | |
851 | ||
852 | blk_aio_zone_mgmt(s->blk, op, offset, len, | |
853 | virtio_blk_zone_mgmt_complete, req); | |
854 | ||
855 | return 0; | |
856 | out: | |
857 | virtio_blk_req_complete(req, err_status); | |
858 | virtio_blk_free_request(req); | |
859 | return err_status; | |
860 | } | |
861 | ||
862 | static void virtio_blk_zone_append_complete(void *opaque, int ret) | |
863 | { | |
864 | ZoneCmdData *data = opaque; | |
865 | VirtIOBlockReq *req = data->req; | |
4f736650 SL |
866 | VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); |
867 | int64_t append_sector, n; | |
868 | uint8_t err_status = VIRTIO_BLK_S_OK; | |
869 | ||
870 | if (ret) { | |
871 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
872 | goto out; | |
873 | } | |
874 | ||
875 | virtio_stq_p(vdev, &append_sector, | |
876 | data->zone_append_data.offset >> BDRV_SECTOR_BITS); | |
877 | n = iov_from_buf(data->in_iov, data->in_num, 0, &append_sector, | |
878 | sizeof(append_sector)); | |
879 | if (n != sizeof(append_sector)) { | |
880 | virtio_error(vdev, "Driver provided input buffer less than size of " | |
881 | "append_sector"); | |
882 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
883 | goto out; | |
884 | } | |
4e92acf7 | 885 | trace_virtio_blk_zone_append_complete(vdev, req, append_sector, ret); |
4f736650 SL |
886 | |
887 | out: | |
4f736650 SL |
888 | virtio_blk_req_complete(req, err_status); |
889 | virtio_blk_free_request(req); | |
4f736650 SL |
890 | g_free(data); |
891 | } | |
892 | ||
893 | static int virtio_blk_handle_zone_append(VirtIOBlockReq *req, | |
894 | struct iovec *out_iov, | |
895 | struct iovec *in_iov, | |
896 | uint64_t out_num, | |
897 | unsigned in_num) { | |
898 | VirtIOBlock *s = req->dev; | |
899 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
900 | uint8_t err_status = VIRTIO_BLK_S_OK; | |
901 | ||
902 | int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; | |
903 | int64_t len = iov_size(out_iov, out_num); | |
b3d9bb9a | 904 | ZoneCmdData *data; |
4f736650 | 905 | |
4e92acf7 | 906 | trace_virtio_blk_handle_zone_append(vdev, req, offset >> BDRV_SECTOR_BITS); |
4f736650 SL |
907 | if (!check_zoned_request(s, offset, len, true, &err_status)) { |
908 | goto out; | |
909 | } | |
910 | ||
b3d9bb9a | 911 | data = g_malloc(sizeof(ZoneCmdData)); |
4f736650 SL |
912 | data->req = req; |
913 | data->in_iov = in_iov; | |
914 | data->in_num = in_num; | |
915 | data->zone_append_data.offset = offset; | |
916 | qemu_iovec_init_external(&req->qiov, out_iov, out_num); | |
52eb76f4 SL |
917 | |
918 | block_acct_start(blk_get_stats(s->blk), &req->acct, len, | |
919 | BLOCK_ACCT_ZONE_APPEND); | |
920 | ||
4f736650 SL |
921 | blk_aio_zone_append(s->blk, &data->zone_append_data.offset, &req->qiov, 0, |
922 | virtio_blk_zone_append_complete, data); | |
923 | return 0; | |
924 | ||
925 | out: | |
4f736650 SL |
926 | virtio_blk_req_complete(req, err_status); |
927 | virtio_blk_free_request(req); | |
4f736650 SL |
928 | return err_status; |
929 | } | |
930 | ||
20ea686a | 931 | static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) |
bc6694d4 | 932 | { |
92e3c2a3 | 933 | uint32_t type; |
f897bf75 | 934 | struct iovec *in_iov = req->elem.in_sg; |
5636da76 | 935 | struct iovec *out_iov = req->elem.out_sg; |
f897bf75 SH |
936 | unsigned in_num = req->elem.in_num; |
937 | unsigned out_num = req->elem.out_num; | |
20ea686a GK |
938 | VirtIOBlock *s = req->dev; |
939 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
92e3c2a3 | 940 | |
f897bf75 | 941 | if (req->elem.out_num < 1 || req->elem.in_num < 1) { |
20ea686a GK |
942 | virtio_error(vdev, "virtio-blk missing headers"); |
943 | return -1; | |
bc6694d4 KW |
944 | } |
945 | ||
5636da76 | 946 | if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out, |
827805a2 | 947 | sizeof(req->out)) != sizeof(req->out))) { |
20ea686a GK |
948 | virtio_error(vdev, "virtio-blk request outhdr too short"); |
949 | return -1; | |
827805a2 | 950 | } |
ee17e848 | 951 | |
7bd04a04 SH |
952 | iov_discard_front_undoable(&out_iov, &out_num, sizeof(req->out), |
953 | &req->outhdr_undo); | |
ee17e848 | 954 | |
12048545 | 955 | if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { |
20ea686a | 956 | virtio_error(vdev, "virtio-blk request inhdr too short"); |
7bd04a04 | 957 | iov_discard_undo(&req->outhdr_undo); |
20ea686a | 958 | return -1; |
ee17e848 FZ |
959 | } |
960 | ||
2a6cdd6d PB |
961 | /* We always touch the last byte, so just see how big in_iov is. */ |
962 | req->in_len = iov_size(in_iov, in_num); | |
ee17e848 FZ |
963 | req->in = (void *)in_iov[in_num - 1].iov_base |
964 | + in_iov[in_num - 1].iov_len | |
965 | - sizeof(struct virtio_blk_inhdr); | |
7bd04a04 SH |
966 | iov_discard_back_undoable(in_iov, &in_num, sizeof(struct virtio_blk_inhdr), |
967 | &req->inhdr_undo); | |
bc6694d4 | 968 | |
9a6719d5 | 969 | type = virtio_ldl_p(vdev, &req->out.type); |
92e3c2a3 | 970 | |
95f7142a | 971 | /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER |
631b22ea | 972 | * is an optional flag. Although a guest should not send this flag if |
95f7142a PL |
973 | * not negotiated we ignored it in the past. So keep ignoring it. */ |
974 | switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { | |
975 | case VIRTIO_BLK_T_IN: | |
976 | { | |
977 | bool is_write = type & VIRTIO_BLK_T_OUT; | |
9a6719d5 | 978 | req->sector_num = virtio_ldq_p(vdev, &req->out.sector); |
95f7142a PL |
979 | |
980 | if (is_write) { | |
5636da76 | 981 | qemu_iovec_init_external(&req->qiov, out_iov, out_num); |
a576ceac | 982 | trace_virtio_blk_handle_write(vdev, req, req->sector_num, |
95f7142a PL |
983 | req->qiov.size / BDRV_SECTOR_SIZE); |
984 | } else { | |
985 | qemu_iovec_init_external(&req->qiov, in_iov, in_num); | |
a576ceac | 986 | trace_virtio_blk_handle_read(vdev, req, req->sector_num, |
95f7142a PL |
987 | req->qiov.size / BDRV_SECTOR_SIZE); |
988 | } | |
989 | ||
9a6719d5 | 990 | if (!virtio_blk_sect_range_ok(s, req->sector_num, req->qiov.size)) { |
95f7142a | 991 | virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); |
9a6719d5 | 992 | block_acct_invalid(blk_get_stats(s->blk), |
01762e03 | 993 | is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); |
95f7142a | 994 | virtio_blk_free_request(req); |
20ea686a | 995 | return 0; |
95f7142a PL |
996 | } |
997 | ||
9a6719d5 | 998 | block_acct_start(blk_get_stats(s->blk), &req->acct, req->qiov.size, |
95f7142a PL |
999 | is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); |
1000 | ||
1001 | /* merge would exceed maximum number of requests or IO direction | |
1002 | * changes */ | |
1003 | if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS || | |
c99495ac | 1004 | is_write != mrb->is_write || |
9a6719d5 | 1005 | !s->conf.request_merging)) { |
baf42268 | 1006 | virtio_blk_submit_multireq(s, mrb); |
95f7142a PL |
1007 | } |
1008 | ||
1009 | assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS); | |
1010 | mrb->reqs[mrb->num_reqs++] = req; | |
1011 | mrb->is_write = is_write; | |
1012 | break; | |
1013 | } | |
1014 | case VIRTIO_BLK_T_FLUSH: | |
c20fd872 | 1015 | virtio_blk_handle_flush(req, mrb); |
95f7142a | 1016 | break; |
4f736650 SL |
1017 | case VIRTIO_BLK_T_ZONE_REPORT: |
1018 | virtio_blk_handle_zone_report(req, in_iov, in_num); | |
1019 | break; | |
1020 | case VIRTIO_BLK_T_ZONE_OPEN: | |
1021 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_OPEN); | |
1022 | break; | |
1023 | case VIRTIO_BLK_T_ZONE_CLOSE: | |
1024 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_CLOSE); | |
1025 | break; | |
1026 | case VIRTIO_BLK_T_ZONE_FINISH: | |
1027 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_FINISH); | |
1028 | break; | |
1029 | case VIRTIO_BLK_T_ZONE_RESET: | |
1030 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET); | |
1031 | break; | |
1032 | case VIRTIO_BLK_T_ZONE_RESET_ALL: | |
1033 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET); | |
1034 | break; | |
95f7142a | 1035 | case VIRTIO_BLK_T_SCSI_CMD: |
bc6694d4 | 1036 | virtio_blk_handle_scsi(req); |
95f7142a PL |
1037 | break; |
1038 | case VIRTIO_BLK_T_GET_ID: | |
1039 | { | |
a8686a9b MA |
1040 | /* |
1041 | * NB: per existing s/n string convention the string is | |
1042 | * terminated by '\0' only when shorter than buffer. | |
1043 | */ | |
2a30307f | 1044 | const char *serial = s->conf.serial ? s->conf.serial : ""; |
a83ceea8 MM |
1045 | size_t size = MIN(strlen(serial) + 1, |
1046 | MIN(iov_size(in_iov, in_num), | |
1047 | VIRTIO_BLK_ID_BYTES)); | |
1048 | iov_from_buf(in_iov, in_num, 0, serial, size); | |
2930b313 | 1049 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); |
671ec3f0 | 1050 | virtio_blk_free_request(req); |
95f7142a PL |
1051 | break; |
1052 | } | |
4f736650 SL |
1053 | case VIRTIO_BLK_T_ZONE_APPEND & ~VIRTIO_BLK_T_OUT: |
1054 | /* | |
1055 | * Passing out_iov/out_num and in_iov/in_num is not safe | |
1056 | * to access req->elem.out_sg directly because it may be | |
1057 | * modified by virtio_blk_handle_request(). | |
1058 | */ | |
1059 | virtio_blk_handle_zone_append(req, out_iov, in_iov, out_num, in_num); | |
1060 | break; | |
37b06f8d SG |
1061 | /* |
1062 | * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with | |
1063 | * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement, | |
1064 | * so we must mask it for these requests, then we will check if it is set. | |
1065 | */ | |
1066 | case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT: | |
1067 | case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT: | |
1068 | { | |
1069 | struct virtio_blk_discard_write_zeroes dwz_hdr; | |
1070 | size_t out_len = iov_size(out_iov, out_num); | |
1071 | bool is_write_zeroes = (type & ~VIRTIO_BLK_T_BARRIER) == | |
1072 | VIRTIO_BLK_T_WRITE_ZEROES; | |
1073 | uint8_t err_status; | |
1074 | ||
1075 | /* | |
1076 | * Unsupported if VIRTIO_BLK_T_OUT is not set or the request contains | |
1077 | * more than one segment. | |
1078 | */ | |
1079 | if (unlikely(!(type & VIRTIO_BLK_T_OUT) || | |
1080 | out_len > sizeof(dwz_hdr))) { | |
1081 | virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); | |
1082 | virtio_blk_free_request(req); | |
1083 | return 0; | |
1084 | } | |
1085 | ||
1086 | if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr, | |
1087 | sizeof(dwz_hdr)) != sizeof(dwz_hdr))) { | |
7bd04a04 SH |
1088 | iov_discard_undo(&req->inhdr_undo); |
1089 | iov_discard_undo(&req->outhdr_undo); | |
37b06f8d SG |
1090 | virtio_error(vdev, "virtio-blk discard/write_zeroes header" |
1091 | " too short"); | |
1092 | return -1; | |
1093 | } | |
1094 | ||
1095 | err_status = virtio_blk_handle_discard_write_zeroes(req, &dwz_hdr, | |
1096 | is_write_zeroes); | |
1097 | if (err_status != VIRTIO_BLK_S_OK) { | |
1098 | virtio_blk_req_complete(req, err_status); | |
1099 | virtio_blk_free_request(req); | |
1100 | } | |
1101 | ||
1102 | break; | |
1103 | } | |
95f7142a | 1104 | default: |
9e72c450 | 1105 | virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); |
671ec3f0 | 1106 | virtio_blk_free_request(req); |
bc6694d4 | 1107 | } |
20ea686a | 1108 | return 0; |
bc6694d4 KW |
1109 | } |
1110 | ||
186b9691 | 1111 | void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) |
6e02c38d | 1112 | { |
6e02c38d | 1113 | VirtIOBlockReq *req; |
95f7142a | 1114 | MultiReqBuffer mrb = {}; |
d0435bc5 | 1115 | bool suppress_notifications = virtio_queue_get_notification(vq); |
6e02c38d | 1116 | |
ccee48aa | 1117 | defer_call_begin(); |
fc73548e | 1118 | |
9ef9d402 | 1119 | do { |
d0435bc5 SH |
1120 | if (suppress_notifications) { |
1121 | virtio_queue_set_notification(vq, 0); | |
1122 | } | |
9ef9d402 SH |
1123 | |
1124 | while ((req = virtio_blk_get_request(s, vq))) { | |
1125 | if (virtio_blk_handle_request(req, &mrb)) { | |
1126 | virtqueue_detach_element(req->vq, &req->elem, 0); | |
1127 | virtio_blk_free_request(req); | |
1128 | break; | |
1129 | } | |
20ea686a | 1130 | } |
9ef9d402 | 1131 | |
d0435bc5 SH |
1132 | if (suppress_notifications) { |
1133 | virtio_queue_set_notification(vq, 1); | |
1134 | } | |
9ef9d402 | 1135 | } while (!virtio_queue_empty(vq)); |
91553dcc | 1136 | |
95f7142a | 1137 | if (mrb.num_reqs) { |
baf42268 | 1138 | virtio_blk_submit_multireq(s, &mrb); |
95f7142a | 1139 | } |
fc73548e | 1140 | |
ccee48aa | 1141 | defer_call_end(); |
6e02c38d AL |
1142 | } |
1143 | ||
8a2fad57 MT |
1144 | static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) |
1145 | { | |
1146 | VirtIOBlock *s = (VirtIOBlock *)vdev; | |
1147 | ||
3cdaf3dd | 1148 | if (!s->ioeventfd_disabled && !s->ioeventfd_started) { |
8a2fad57 | 1149 | /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start |
3cdaf3dd | 1150 | * ioeventfd here instead of waiting for .set_status(). |
8a2fad57 | 1151 | */ |
9ffe337c | 1152 | virtio_device_start_ioeventfd(vdev); |
3cdaf3dd | 1153 | if (!s->ioeventfd_disabled) { |
8a2fad57 MT |
1154 | return; |
1155 | } | |
1156 | } | |
b6948ab0 | 1157 | |
186b9691 | 1158 | virtio_blk_handle_vq(s, vq); |
8a2fad57 MT |
1159 | } |
1160 | ||
a937f8e8 | 1161 | static void virtio_blk_dma_restart_bh(void *opaque) |
869a5c6d | 1162 | { |
71ee0cdd SH |
1163 | VirtIOBlockReq *req = opaque; |
1164 | VirtIOBlock *s = req->dev; /* we're called with at least one request */ | |
a937f8e8 | 1165 | |
95f7142a | 1166 | MultiReqBuffer mrb = {}; |
869a5c6d | 1167 | |
869a5c6d | 1168 | while (req) { |
1bdb176a | 1169 | VirtIOBlockReq *next = req->next; |
20ea686a GK |
1170 | if (virtio_blk_handle_request(req, &mrb)) { |
1171 | /* Device is now broken and won't do any processing until it gets | |
1172 | * reset. Already queued requests will be lost: let's purge them. | |
1173 | */ | |
1174 | while (req) { | |
1175 | next = req->next; | |
1176 | virtqueue_detach_element(req->vq, &req->elem, 0); | |
1177 | virtio_blk_free_request(req); | |
1178 | req = next; | |
1179 | } | |
1180 | break; | |
1181 | } | |
1bdb176a | 1182 | req = next; |
869a5c6d | 1183 | } |
f1b52868 | 1184 | |
95f7142a | 1185 | if (mrb.num_reqs) { |
baf42268 | 1186 | virtio_blk_submit_multireq(s, &mrb); |
95f7142a | 1187 | } |
7aa1c247 | 1188 | |
a937f8e8 SH |
1189 | /* Paired with inc in virtio_blk_dma_restart_cb() */ |
1190 | blk_dec_in_flight(s->conf.conf.blk); | |
7aa1c247 SL |
1191 | } |
1192 | ||
538f0497 | 1193 | static void virtio_blk_dma_restart_cb(void *opaque, bool running, |
1dfb4dd9 | 1194 | RunState state) |
213189ab MA |
1195 | { |
1196 | VirtIOBlock *s = opaque; | |
71ee0cdd | 1197 | uint16_t num_queues = s->conf.num_queues; |
b3d9bb9a SH |
1198 | g_autofree VirtIOBlockReq **vq_rq = NULL; |
1199 | VirtIOBlockReq *rq; | |
213189ab | 1200 | |
392808b4 | 1201 | if (!running) { |
213189ab | 1202 | return; |
392808b4 | 1203 | } |
213189ab | 1204 | |
71ee0cdd | 1205 | /* Split the device-wide s->rq request list into per-vq request lists */ |
b3d9bb9a | 1206 | vq_rq = g_new0(VirtIOBlockReq *, num_queues); |
71ee0cdd SH |
1207 | |
1208 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { | |
1209 | rq = s->rq; | |
1210 | s->rq = NULL; | |
1211 | } | |
1212 | ||
1213 | while (rq) { | |
1214 | VirtIOBlockReq *next = rq->next; | |
1215 | uint16_t idx = virtio_get_queue_index(rq->vq); | |
1216 | ||
f2eea93c SH |
1217 | /* Only num_queues vqs were created so vq_rq[idx] is within bounds */ |
1218 | assert(idx < num_queues); | |
71ee0cdd SH |
1219 | rq->next = vq_rq[idx]; |
1220 | vq_rq[idx] = rq; | |
1221 | rq = next; | |
1222 | } | |
a937f8e8 | 1223 | |
71ee0cdd SH |
1224 | /* Schedule a BH to submit the requests in each vq's AioContext */ |
1225 | for (uint16_t i = 0; i < num_queues; i++) { | |
1226 | if (!vq_rq[i]) { | |
1227 | continue; | |
1228 | } | |
1229 | ||
1230 | /* Paired with dec in virtio_blk_dma_restart_bh() */ | |
1231 | blk_inc_in_flight(s->conf.conf.blk); | |
1232 | ||
1233 | aio_bh_schedule_oneshot(s->vq_aio_context[i], | |
1234 | virtio_blk_dma_restart_bh, | |
1235 | vq_rq[i]); | |
1236 | } | |
213189ab MA |
1237 | } |
1238 | ||
6e02c38d AL |
1239 | static void virtio_blk_reset(VirtIODevice *vdev) |
1240 | { | |
1cc91b7d | 1241 | VirtIOBlock *s = VIRTIO_BLK(vdev); |
26307f6a | 1242 | VirtIOBlockReq *req; |
392808b4 | 1243 | |
9c67f33f | 1244 | /* Dataplane has stopped... */ |
3cdaf3dd | 1245 | assert(!s->ioeventfd_started); |
9c67f33f SH |
1246 | |
1247 | /* ...but requests may still be in flight. */ | |
6e40b3bf AY |
1248 | blk_drain(s->blk); |
1249 | ||
26307f6a FZ |
1250 | /* We drop queued requests after blk_drain() because blk_drain() itself can |
1251 | * produce them. */ | |
9c67f33f SH |
1252 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { |
1253 | while (s->rq) { | |
1254 | req = s->rq; | |
1255 | s->rq = req->next; | |
26307f6a | 1256 | |
9c67f33f SH |
1257 | /* No other threads can access req->vq here */ |
1258 | virtqueue_detach_element(req->vq, &req->elem, 0); | |
1259 | ||
1260 | virtio_blk_free_request(req); | |
1261 | } | |
1262 | } | |
6e40b3bf | 1263 | |
4be74634 | 1264 | blk_set_enable_write_cache(s->blk, s->original_wce); |
6e02c38d AL |
1265 | } |
1266 | ||
bf011293 | 1267 | /* coalesce internal state, copy to pci i/o region 0 |
1268 | */ | |
6e02c38d AL |
1269 | static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) |
1270 | { | |
1cc91b7d | 1271 | VirtIOBlock *s = VIRTIO_BLK(vdev); |
2a30307f | 1272 | BlockConf *conf = &s->conf.conf; |
4f736650 | 1273 | BlockDriverState *bs = blk_bs(s->blk); |
6e02c38d AL |
1274 | struct virtio_blk_config blkcfg; |
1275 | uint64_t capacity; | |
17d0bc01 | 1276 | int64_t length; |
f7516731 | 1277 | int blk_size = conf->logical_block_size; |
6e02c38d | 1278 | |
4be74634 | 1279 | blk_get_geometry(s->blk, &capacity); |
5c5dafdc | 1280 | memset(&blkcfg, 0, sizeof(blkcfg)); |
783d1897 | 1281 | virtio_stq_p(vdev, &blkcfg.capacity, capacity); |
1bf8a989 DP |
1282 | virtio_stl_p(vdev, &blkcfg.seg_max, |
1283 | s->conf.seg_max_adjust ? s->conf.queue_size - 2 : 128 - 2); | |
907eb3e5 | 1284 | virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls); |
783d1897 | 1285 | virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); |
f7516731 | 1286 | virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size); |
6abee260 | 1287 | virtio_stl_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size); |
907eb3e5 | 1288 | blkcfg.geometry.heads = conf->heads; |
136be99e CB |
1289 | /* |
1290 | * We must ensure that the block device capacity is a multiple of | |
e03ba136 | 1291 | * the logical block size. If that is not the case, let's use |
136be99e CB |
1292 | * sector_mask to adopt the geometry to have a correct picture. |
1293 | * For those devices where the capacity is ok for the given geometry | |
e03ba136 | 1294 | * we don't touch the sector value of the geometry, since some devices |
136be99e CB |
1295 | * (like s390 dasd) need a specific value. Here the capacity is already |
1296 | * cyls*heads*secs*blk_size and the sector value is not block size | |
1297 | * divided by 512 - instead it is the amount of blk_size blocks | |
1298 | * per track (cylinder). | |
1299 | */ | |
17d0bc01 SH |
1300 | length = blk_getlength(s->blk); |
1301 | if (length > 0 && length / conf->heads / conf->secs % blk_size) { | |
907eb3e5 | 1302 | blkcfg.geometry.sectors = conf->secs & ~s->sector_mask; |
136be99e | 1303 | } else { |
907eb3e5 | 1304 | blkcfg.geometry.sectors = conf->secs; |
136be99e | 1305 | } |
c7085da7 | 1306 | blkcfg.size_max = 0; |
f7516731 | 1307 | blkcfg.physical_block_exp = get_physical_block_exp(conf); |
9752c371 | 1308 | blkcfg.alignment_offset = 0; |
4be74634 | 1309 | blkcfg.wce = blk_enable_write_cache(s->blk); |
2f270590 | 1310 | virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues); |
37b06f8d | 1311 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD)) { |
fb0b154c AO |
1312 | uint32_t discard_granularity = conf->discard_granularity; |
1313 | if (discard_granularity == -1 || !s->conf.report_discard_granularity) { | |
1314 | discard_granularity = blk_size; | |
1315 | } | |
37b06f8d SG |
1316 | virtio_stl_p(vdev, &blkcfg.max_discard_sectors, |
1317 | s->conf.max_discard_sectors); | |
1318 | virtio_stl_p(vdev, &blkcfg.discard_sector_alignment, | |
fb0b154c | 1319 | discard_granularity >> BDRV_SECTOR_BITS); |
37b06f8d SG |
1320 | /* |
1321 | * We support only one segment per request since multiple segments | |
1322 | * are not widely used and there are no userspace APIs that allow | |
1323 | * applications to submit multiple segments in a single call. | |
1324 | */ | |
1325 | virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1); | |
1326 | } | |
1327 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES)) { | |
1328 | virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors, | |
1329 | s->conf.max_write_zeroes_sectors); | |
1330 | blkcfg.write_zeroes_may_unmap = 1; | |
1331 | virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1); | |
1332 | } | |
4f736650 SL |
1333 | if (bs->bl.zoned != BLK_Z_NONE) { |
1334 | switch (bs->bl.zoned) { | |
1335 | case BLK_Z_HM: | |
1336 | blkcfg.zoned.model = VIRTIO_BLK_Z_HM; | |
1337 | break; | |
1338 | case BLK_Z_HA: | |
1339 | blkcfg.zoned.model = VIRTIO_BLK_Z_HA; | |
1340 | break; | |
1341 | default: | |
1342 | g_assert_not_reached(); | |
1343 | } | |
1344 | ||
1345 | virtio_stl_p(vdev, &blkcfg.zoned.zone_sectors, | |
1346 | bs->bl.zone_size / 512); | |
1347 | virtio_stl_p(vdev, &blkcfg.zoned.max_active_zones, | |
1348 | bs->bl.max_active_zones); | |
1349 | virtio_stl_p(vdev, &blkcfg.zoned.max_open_zones, | |
1350 | bs->bl.max_open_zones); | |
1351 | virtio_stl_p(vdev, &blkcfg.zoned.write_granularity, blk_size); | |
1352 | virtio_stl_p(vdev, &blkcfg.zoned.max_append_sectors, | |
1353 | bs->bl.max_append_sectors); | |
1354 | } else { | |
1355 | blkcfg.zoned.model = VIRTIO_BLK_Z_NONE; | |
1356 | } | |
20764be0 | 1357 | memcpy(config, &blkcfg, s->config_size); |
6e02c38d AL |
1358 | } |
1359 | ||
13e3dce0 PB |
1360 | static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) |
1361 | { | |
1cc91b7d | 1362 | VirtIOBlock *s = VIRTIO_BLK(vdev); |
13e3dce0 PB |
1363 | struct virtio_blk_config blkcfg; |
1364 | ||
20764be0 | 1365 | memcpy(&blkcfg, config, s->config_size); |
6d7e73d6 | 1366 | |
4be74634 | 1367 | blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); |
13e3dce0 PB |
1368 | } |
1369 | ||
9d5b731d JW |
1370 | static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, |
1371 | Error **errp) | |
6e02c38d | 1372 | { |
1cc91b7d | 1373 | VirtIOBlock *s = VIRTIO_BLK(vdev); |
1063b8b1 | 1374 | |
bbe8bd4d SG |
1375 | /* Firstly sync all virtio-blk possible supported features */ |
1376 | features |= s->host_features; | |
1377 | ||
0cd09c3a CH |
1378 | virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); |
1379 | virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); | |
1380 | virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); | |
1381 | virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); | |
95129d6f | 1382 | if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) { |
bbe8bd4d | 1383 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_SCSI)) { |
efb8206c JW |
1384 | error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0"); |
1385 | return 0; | |
1386 | } | |
efb8206c | 1387 | } else { |
c9b11f97 | 1388 | virtio_clear_feature(&features, VIRTIO_F_ANY_LAYOUT); |
efb8206c JW |
1389 | virtio_add_feature(&features, VIRTIO_BLK_F_SCSI); |
1390 | } | |
aa659be3 | 1391 | |
5f258577 EY |
1392 | if (blk_enable_write_cache(s->blk) || |
1393 | (s->conf.x_enable_wce_if_config_wce && | |
1394 | virtio_has_feature(features, VIRTIO_BLK_F_CONFIG_WCE))) { | |
0cd09c3a | 1395 | virtio_add_feature(&features, VIRTIO_BLK_F_WCE); |
4be74634 | 1396 | } |
86b1cf32 | 1397 | if (!blk_is_writable(s->blk)) { |
0cd09c3a | 1398 | virtio_add_feature(&features, VIRTIO_BLK_F_RO); |
4be74634 | 1399 | } |
2f270590 SH |
1400 | if (s->conf.num_queues > 1) { |
1401 | virtio_add_feature(&features, VIRTIO_BLK_F_MQ); | |
1402 | } | |
1063b8b1 CH |
1403 | |
1404 | return features; | |
6e02c38d AL |
1405 | } |
1406 | ||
9315cbfd PB |
1407 | static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) |
1408 | { | |
1cc91b7d | 1409 | VirtIOBlock *s = VIRTIO_BLK(vdev); |
9315cbfd | 1410 | |
9ffe337c | 1411 | if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) { |
3cdaf3dd | 1412 | assert(!s->ioeventfd_started); |
392808b4 | 1413 | } |
392808b4 | 1414 | |
9315cbfd PB |
1415 | if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { |
1416 | return; | |
1417 | } | |
1418 | ||
ef5bc962 PB |
1419 | /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send |
1420 | * cache flushes. Thus, the "auto writethrough" behavior is never | |
1421 | * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature. | |
1422 | * Leaving it enabled would break the following sequence: | |
1423 | * | |
1424 | * Guest started with "-drive cache=writethrough" | |
1425 | * Guest sets status to 0 | |
1426 | * Guest sets DRIVER bit in status field | |
1427 | * Guest reads host features (WCE=0, CONFIG_WCE=1) | |
1428 | * Guest writes guest features (WCE=0, CONFIG_WCE=1) | |
1429 | * Guest writes 1 to the WCE configuration field (writeback mode) | |
1430 | * Guest sets DRIVER_OK bit in status field | |
1431 | * | |
4be74634 | 1432 | * s->blk would erroneously be placed in writethrough mode. |
ef5bc962 | 1433 | */ |
95129d6f | 1434 | if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) { |
4be74634 | 1435 | blk_set_enable_write_cache(s->blk, |
95129d6f CH |
1436 | virtio_vdev_has_feature(vdev, |
1437 | VIRTIO_BLK_F_WCE)); | |
ef5bc962 | 1438 | } |
9315cbfd PB |
1439 | } |
1440 | ||
b2b295a7 GK |
1441 | static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) |
1442 | { | |
1443 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
b2b295a7 | 1444 | |
9c67f33f SH |
1445 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { |
1446 | VirtIOBlockReq *req = s->rq; | |
30d8bf6d | 1447 | |
9c67f33f SH |
1448 | while (req) { |
1449 | qemu_put_sbyte(f, 1); | |
30d8bf6d | 1450 | |
9c67f33f SH |
1451 | if (s->conf.num_queues > 1) { |
1452 | qemu_put_be32(f, virtio_get_queue_index(req->vq)); | |
1453 | } | |
1454 | ||
1455 | qemu_put_virtqueue_element(vdev, f, &req->elem); | |
1456 | req = req->next; | |
1457 | } | |
869a5c6d | 1458 | } |
9c67f33f | 1459 | |
869a5c6d | 1460 | qemu_put_sbyte(f, 0); |
6e02c38d AL |
1461 | } |
1462 | ||
b2b295a7 GK |
1463 | static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, |
1464 | int version_id) | |
1465 | { | |
1466 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
2a633c46 | 1467 | |
869a5c6d | 1468 | while (qemu_get_sbyte(f)) { |
30d8bf6d SH |
1469 | unsigned nvqs = s->conf.num_queues; |
1470 | unsigned vq_idx = 0; | |
ab281c17 | 1471 | VirtIOBlockReq *req; |
30d8bf6d SH |
1472 | |
1473 | if (nvqs > 1) { | |
1474 | vq_idx = qemu_get_be32(f); | |
1475 | ||
1476 | if (vq_idx >= nvqs) { | |
1477 | error_report("Invalid virtqueue index in request list: %#x", | |
1478 | vq_idx); | |
1479 | return -EINVAL; | |
1480 | } | |
1481 | } | |
1482 | ||
8607f5c3 | 1483 | req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq)); |
30d8bf6d | 1484 | virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req); |
9c67f33f SH |
1485 | |
1486 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { | |
1487 | req->next = s->rq; | |
1488 | s->rq = req; | |
1489 | } | |
869a5c6d | 1490 | } |
6e02c38d AL |
1491 | |
1492 | return 0; | |
1493 | } | |
1494 | ||
9b92fbcf SL |
1495 | static void virtio_resize_cb(void *opaque) |
1496 | { | |
1497 | VirtIODevice *vdev = opaque; | |
1498 | ||
1499 | assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | |
1500 | virtio_notify_config(vdev); | |
1501 | } | |
1502 | ||
145feb17 | 1503 | static void virtio_blk_resize(void *opaque) |
e5051fc7 | 1504 | { |
1cc91b7d | 1505 | VirtIODevice *vdev = VIRTIO_DEVICE(opaque); |
e5051fc7 | 1506 | |
9b92fbcf | 1507 | /* |
0b2675c4 | 1508 | * virtio_notify_config() needs to acquire the BQL, |
9b92fbcf SL |
1509 | * so it can't be called from an iothread. Instead, schedule |
1510 | * it to be run in the main context BH. | |
1511 | */ | |
1512 | aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); | |
e5051fc7 CH |
1513 | } |
1514 | ||
3cdaf3dd | 1515 | static void virtio_blk_ioeventfd_detach(VirtIOBlock *s) |
3bcc17f0 SH |
1516 | { |
1517 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
1518 | ||
1519 | for (uint16_t i = 0; i < s->conf.num_queues; i++) { | |
1520 | VirtQueue *vq = virtio_get_queue(vdev, i); | |
1521 | virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); | |
1522 | } | |
1523 | } | |
1524 | ||
3cdaf3dd | 1525 | static void virtio_blk_ioeventfd_attach(VirtIOBlock *s) |
3bcc17f0 SH |
1526 | { |
1527 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
1528 | ||
1529 | for (uint16_t i = 0; i < s->conf.num_queues; i++) { | |
1530 | VirtQueue *vq = virtio_get_queue(vdev, i); | |
1531 | virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); | |
1532 | } | |
1533 | } | |
1534 | ||
1665d932 SH |
1535 | /* Suspend virtqueue ioeventfd processing during drain */ |
1536 | static void virtio_blk_drained_begin(void *opaque) | |
1537 | { | |
1538 | VirtIOBlock *s = opaque; | |
1665d932 | 1539 | |
3cdaf3dd SH |
1540 | if (s->ioeventfd_started) { |
1541 | virtio_blk_ioeventfd_detach(s); | |
1665d932 | 1542 | } |
1665d932 SH |
1543 | } |
1544 | ||
1545 | /* Resume virtqueue ioeventfd processing after drain */ | |
1546 | static void virtio_blk_drained_end(void *opaque) | |
1547 | { | |
1548 | VirtIOBlock *s = opaque; | |
1665d932 | 1549 | |
3cdaf3dd SH |
1550 | if (s->ioeventfd_started) { |
1551 | virtio_blk_ioeventfd_attach(s); | |
1665d932 | 1552 | } |
1665d932 SH |
1553 | } |
1554 | ||
0e49de52 | 1555 | static const BlockDevOps virtio_block_ops = { |
1665d932 SH |
1556 | .resize_cb = virtio_blk_resize, |
1557 | .drained_begin = virtio_blk_drained_begin, | |
1558 | .drained_end = virtio_blk_drained_end, | |
0e49de52 MA |
1559 | }; |
1560 | ||
1f995a47 SH |
1561 | static bool |
1562 | validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, | |
1563 | uint16_t num_queues, Error **errp) | |
1564 | { | |
1565 | g_autofree unsigned long *vqs = bitmap_new(num_queues); | |
1566 | g_autoptr(GHashTable) iothreads = | |
1567 | g_hash_table_new(g_str_hash, g_str_equal); | |
1568 | ||
1569 | for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) { | |
1570 | const char *name = node->value->iothread; | |
1571 | uint16List *vq; | |
1572 | ||
1573 | if (!iothread_by_id(name)) { | |
1574 | error_setg(errp, "IOThread \"%s\" object does not exist", name); | |
1575 | return false; | |
1576 | } | |
1577 | ||
1578 | if (!g_hash_table_add(iothreads, (gpointer)name)) { | |
1579 | error_setg(errp, | |
1580 | "duplicate IOThread name \"%s\" in iothread-vq-mapping", | |
1581 | name); | |
1582 | return false; | |
1583 | } | |
1584 | ||
1585 | if (node != list) { | |
1586 | if (!!node->value->vqs != !!list->value->vqs) { | |
1587 | error_setg(errp, "either all items in iothread-vq-mapping " | |
1588 | "must have vqs or none of them must have it"); | |
1589 | return false; | |
1590 | } | |
1591 | } | |
1592 | ||
1593 | for (vq = node->value->vqs; vq; vq = vq->next) { | |
1594 | if (vq->value >= num_queues) { | |
1595 | error_setg(errp, "vq index %u for IOThread \"%s\" must be " | |
1596 | "less than num_queues %u in iothread-vq-mapping", | |
1597 | vq->value, name, num_queues); | |
1598 | return false; | |
1599 | } | |
1600 | ||
1601 | if (test_and_set_bit(vq->value, vqs)) { | |
1602 | error_setg(errp, "cannot assign vq %u to IOThread \"%s\" " | |
1603 | "because it is already assigned", vq->value, name); | |
1604 | return false; | |
1605 | } | |
1606 | } | |
1607 | } | |
1608 | ||
1609 | if (list->value->vqs) { | |
1610 | for (uint16_t i = 0; i < num_queues; i++) { | |
1611 | if (!test_bit(i, vqs)) { | |
1612 | error_setg(errp, | |
1613 | "missing vq %u IOThread assignment in iothread-vq-mapping", | |
1614 | i); | |
1615 | return false; | |
1616 | } | |
1617 | } | |
1618 | } | |
1619 | ||
1620 | return true; | |
1621 | } | |
1622 | ||
1623 | /** | |
1624 | * apply_iothread_vq_mapping: | |
1625 | * @iothread_vq_mapping_list: The mapping of virtqueues to IOThreads. | |
1626 | * @vq_aio_context: The array of AioContext pointers to fill in. | |
1627 | * @num_queues: The length of @vq_aio_context. | |
1628 | * @errp: If an error occurs, a pointer to the area to store the error. | |
1629 | * | |
1630 | * Fill in the AioContext for each virtqueue in the @vq_aio_context array given | |
1631 | * the iothread-vq-mapping parameter in @iothread_vq_mapping_list. | |
1632 | * | |
1633 | * Returns: %true on success, %false on failure. | |
1634 | **/ | |
1635 | static bool apply_iothread_vq_mapping( | |
1636 | IOThreadVirtQueueMappingList *iothread_vq_mapping_list, | |
1637 | AioContext **vq_aio_context, | |
1638 | uint16_t num_queues, | |
1639 | Error **errp) | |
3bcc17f0 SH |
1640 | { |
1641 | IOThreadVirtQueueMappingList *node; | |
1642 | size_t num_iothreads = 0; | |
1643 | size_t cur_iothread = 0; | |
1644 | ||
1f995a47 SH |
1645 | if (!validate_iothread_vq_mapping_list(iothread_vq_mapping_list, |
1646 | num_queues, errp)) { | |
1647 | return false; | |
1648 | } | |
1649 | ||
3bcc17f0 SH |
1650 | for (node = iothread_vq_mapping_list; node; node = node->next) { |
1651 | num_iothreads++; | |
1652 | } | |
1653 | ||
1654 | for (node = iothread_vq_mapping_list; node; node = node->next) { | |
1655 | IOThread *iothread = iothread_by_id(node->value->iothread); | |
1656 | AioContext *ctx = iothread_get_aio_context(iothread); | |
1657 | ||
57bc2658 | 1658 | /* Released in virtio_blk_vq_aio_context_cleanup() */ |
3bcc17f0 SH |
1659 | object_ref(OBJECT(iothread)); |
1660 | ||
1661 | if (node->value->vqs) { | |
1662 | uint16List *vq; | |
1663 | ||
1664 | /* Explicit vq:IOThread assignment */ | |
1665 | for (vq = node->value->vqs; vq; vq = vq->next) { | |
1f995a47 | 1666 | assert(vq->value < num_queues); |
3bcc17f0 SH |
1667 | vq_aio_context[vq->value] = ctx; |
1668 | } | |
1669 | } else { | |
1670 | /* Round-robin vq:IOThread assignment */ | |
1671 | for (unsigned i = cur_iothread; i < num_queues; | |
1672 | i += num_iothreads) { | |
1673 | vq_aio_context[i] = ctx; | |
1674 | } | |
1675 | } | |
1676 | ||
1677 | cur_iothread++; | |
1678 | } | |
1f995a47 SH |
1679 | |
1680 | return true; | |
3bcc17f0 SH |
1681 | } |
1682 | ||
1683 | /* Context: BQL held */ | |
57bc2658 | 1684 | static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) |
3bcc17f0 | 1685 | { |
0ea5f594 | 1686 | ERRP_GUARD(); |
3bcc17f0 SH |
1687 | VirtIODevice *vdev = VIRTIO_DEVICE(s); |
1688 | VirtIOBlkConf *conf = &s->conf; | |
1689 | BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); | |
1690 | VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); | |
1691 | ||
1f995a47 SH |
1692 | if (conf->iothread && conf->iothread_vq_mapping_list) { |
1693 | error_setg(errp, | |
1694 | "iothread and iothread-vq-mapping properties cannot be set " | |
1695 | "at the same time"); | |
1696 | return false; | |
1697 | } | |
1698 | ||
3bcc17f0 SH |
1699 | if (conf->iothread || conf->iothread_vq_mapping_list) { |
1700 | if (!k->set_guest_notifiers || !k->ioeventfd_assign) { | |
1701 | error_setg(errp, | |
1702 | "device is incompatible with iothread " | |
1703 | "(transport does not support notifiers)"); | |
1704 | return false; | |
1705 | } | |
1706 | if (!virtio_device_ioeventfd_enabled(vdev)) { | |
1707 | error_setg(errp, "ioeventfd is required for iothread"); | |
1708 | return false; | |
1709 | } | |
1710 | ||
1711 | /* | |
3cdaf3dd | 1712 | * If ioeventfd is (re-)enabled while the guest is running there could |
3bcc17f0 SH |
1713 | * be block jobs that can conflict. |
1714 | */ | |
1715 | if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { | |
3cdaf3dd | 1716 | error_prepend(errp, "cannot start virtio-blk ioeventfd: "); |
3bcc17f0 SH |
1717 | return false; |
1718 | } | |
1719 | } | |
3bcc17f0 SH |
1720 | |
1721 | s->vq_aio_context = g_new(AioContext *, conf->num_queues); | |
1722 | ||
1723 | if (conf->iothread_vq_mapping_list) { | |
1f995a47 SH |
1724 | if (!apply_iothread_vq_mapping(conf->iothread_vq_mapping_list, |
1725 | s->vq_aio_context, | |
1726 | conf->num_queues, | |
1727 | errp)) { | |
1728 | g_free(s->vq_aio_context); | |
1729 | s->vq_aio_context = NULL; | |
1730 | return false; | |
1731 | } | |
3bcc17f0 SH |
1732 | } else if (conf->iothread) { |
1733 | AioContext *ctx = iothread_get_aio_context(conf->iothread); | |
1734 | for (unsigned i = 0; i < conf->num_queues; i++) { | |
1735 | s->vq_aio_context[i] = ctx; | |
1736 | } | |
1737 | ||
57bc2658 | 1738 | /* Released in virtio_blk_vq_aio_context_cleanup() */ |
3bcc17f0 SH |
1739 | object_ref(OBJECT(conf->iothread)); |
1740 | } else { | |
1741 | AioContext *ctx = qemu_get_aio_context(); | |
1742 | for (unsigned i = 0; i < conf->num_queues; i++) { | |
1743 | s->vq_aio_context[i] = ctx; | |
1744 | } | |
1745 | } | |
1746 | ||
1747 | return true; | |
1748 | } | |
1749 | ||
1750 | /* Context: BQL held */ | |
57bc2658 | 1751 | static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) |
3bcc17f0 SH |
1752 | { |
1753 | VirtIOBlkConf *conf = &s->conf; | |
1754 | ||
3cdaf3dd | 1755 | assert(!s->ioeventfd_started); |
3bcc17f0 SH |
1756 | |
1757 | if (conf->iothread_vq_mapping_list) { | |
1758 | IOThreadVirtQueueMappingList *node; | |
1759 | ||
1760 | for (node = conf->iothread_vq_mapping_list; node; node = node->next) { | |
1761 | IOThread *iothread = iothread_by_id(node->value->iothread); | |
1762 | object_unref(OBJECT(iothread)); | |
1763 | } | |
1764 | } | |
1765 | ||
1766 | if (conf->iothread) { | |
1767 | object_unref(OBJECT(conf->iothread)); | |
1768 | } | |
1769 | ||
1770 | g_free(s->vq_aio_context); | |
1771 | s->vq_aio_context = NULL; | |
1772 | } | |
1773 | ||
1774 | /* Context: BQL held */ | |
3cdaf3dd | 1775 | static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) |
3bcc17f0 SH |
1776 | { |
1777 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
1778 | BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); | |
1779 | VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); | |
1780 | unsigned i; | |
1781 | unsigned nvqs = s->conf.num_queues; | |
1782 | Error *local_err = NULL; | |
1783 | int r; | |
1784 | ||
3cdaf3dd | 1785 | if (s->ioeventfd_started || s->ioeventfd_starting) { |
3bcc17f0 SH |
1786 | return 0; |
1787 | } | |
1788 | ||
3cdaf3dd | 1789 | s->ioeventfd_starting = true; |
3bcc17f0 SH |
1790 | |
1791 | /* Set up guest notifier (irq) */ | |
1792 | r = k->set_guest_notifiers(qbus->parent, nvqs, true); | |
1793 | if (r != 0) { | |
1794 | error_report("virtio-blk failed to set guest notifier (%d), " | |
1795 | "ensure -accel kvm is set.", r); | |
1796 | goto fail_guest_notifiers; | |
1797 | } | |
1798 | ||
1799 | /* | |
1800 | * Batch all the host notifiers in a single transaction to avoid | |
1801 | * quadratic time complexity in address_space_update_ioeventfds(). | |
1802 | */ | |
1803 | memory_region_transaction_begin(); | |
1804 | ||
1805 | /* Set up virtqueue notify */ | |
1806 | for (i = 0; i < nvqs; i++) { | |
1807 | r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); | |
1808 | if (r != 0) { | |
1809 | int j = i; | |
1810 | ||
1811 | fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); | |
1812 | while (i--) { | |
1813 | virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | |
1814 | } | |
1815 | ||
1816 | /* | |
1817 | * The transaction expects the ioeventfds to be open when it | |
1818 | * commits. Do it now, before the cleanup loop. | |
1819 | */ | |
1820 | memory_region_transaction_commit(); | |
1821 | ||
1822 | while (j--) { | |
1823 | virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); | |
1824 | } | |
1825 | goto fail_host_notifiers; | |
1826 | } | |
1827 | } | |
1828 | ||
1829 | memory_region_transaction_commit(); | |
1830 | ||
ea0736d7 SH |
1831 | /* |
1832 | * Try to change the AioContext so that block jobs and other operations can | |
1833 | * co-locate their activity in the same AioContext. If it fails, nevermind. | |
1834 | */ | |
5fbcbd50 | 1835 | assert(nvqs > 0); /* enforced during ->realize() */ |
3bcc17f0 SH |
1836 | r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], |
1837 | &local_err); | |
1838 | if (r < 0) { | |
ea0736d7 | 1839 | warn_report_err(local_err); |
3bcc17f0 SH |
1840 | } |
1841 | ||
1842 | /* | |
1843 | * These fields must be visible to the IOThread when it processes the | |
3cdaf3dd | 1844 | * virtqueue, otherwise it will think ioeventfd has not started yet. |
3bcc17f0 | 1845 | * |
3cdaf3dd | 1846 | * Make sure ->ioeventfd_started is false when blk_set_aio_context() is |
3bcc17f0 SH |
1847 | * called above so that draining does not cause the host notifier to be |
1848 | * detached/attached prematurely. | |
1849 | */ | |
3cdaf3dd SH |
1850 | s->ioeventfd_starting = false; |
1851 | s->ioeventfd_started = true; | |
3bcc17f0 SH |
1852 | smp_wmb(); /* paired with aio_notify_accept() on the read side */ |
1853 | ||
52bff01f HC |
1854 | /* |
1855 | * Get this show started by hooking up our callbacks. If drained now, | |
1856 | * virtio_blk_drained_end() will do this later. | |
1857 | * Attaching the notifier also kicks the virtqueues, processing any requests | |
1858 | * they may already have. | |
1859 | */ | |
1860 | if (!blk_in_drain(s->conf.conf.blk)) { | |
1861 | virtio_blk_ioeventfd_attach(s); | |
3bcc17f0 SH |
1862 | } |
1863 | return 0; | |
1864 | ||
3bcc17f0 SH |
1865 | fail_host_notifiers: |
1866 | k->set_guest_notifiers(qbus->parent, nvqs, false); | |
1867 | fail_guest_notifiers: | |
3cdaf3dd SH |
1868 | s->ioeventfd_disabled = true; |
1869 | s->ioeventfd_starting = false; | |
3bcc17f0 SH |
1870 | return -ENOSYS; |
1871 | } | |
1872 | ||
1873 | /* Stop notifications for new requests from guest. | |
1874 | * | |
1875 | * Context: BH in IOThread | |
1876 | */ | |
3cdaf3dd | 1877 | static void virtio_blk_ioeventfd_stop_vq_bh(void *opaque) |
3bcc17f0 SH |
1878 | { |
1879 | VirtQueue *vq = opaque; | |
1880 | EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); | |
1881 | ||
1882 | virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); | |
1883 | ||
1884 | /* | |
1885 | * Test and clear notifier after disabling event, in case poll callback | |
1886 | * didn't have time to run. | |
1887 | */ | |
1888 | virtio_queue_host_notifier_read(host_notifier); | |
1889 | } | |
1890 | ||
1891 | /* Context: BQL held */ | |
3cdaf3dd | 1892 | static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev) |
3bcc17f0 SH |
1893 | { |
1894 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
1895 | BusState *qbus = qdev_get_parent_bus(DEVICE(s)); | |
1896 | VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); | |
1897 | unsigned i; | |
1898 | unsigned nvqs = s->conf.num_queues; | |
1899 | ||
3cdaf3dd | 1900 | if (!s->ioeventfd_started || s->ioeventfd_stopping) { |
3bcc17f0 SH |
1901 | return; |
1902 | } | |
1903 | ||
1904 | /* Better luck next time. */ | |
3cdaf3dd SH |
1905 | if (s->ioeventfd_disabled) { |
1906 | s->ioeventfd_disabled = false; | |
1907 | s->ioeventfd_started = false; | |
3bcc17f0 SH |
1908 | return; |
1909 | } | |
3cdaf3dd | 1910 | s->ioeventfd_stopping = true; |
3bcc17f0 SH |
1911 | |
1912 | if (!blk_in_drain(s->conf.conf.blk)) { | |
1913 | for (i = 0; i < nvqs; i++) { | |
1914 | VirtQueue *vq = virtio_get_queue(vdev, i); | |
1915 | AioContext *ctx = s->vq_aio_context[i]; | |
1916 | ||
3cdaf3dd | 1917 | aio_wait_bh_oneshot(ctx, virtio_blk_ioeventfd_stop_vq_bh, vq); |
3bcc17f0 SH |
1918 | } |
1919 | } | |
1920 | ||
1921 | /* | |
1922 | * Batch all the host notifiers in a single transaction to avoid | |
1923 | * quadratic time complexity in address_space_update_ioeventfds(). | |
1924 | */ | |
1925 | memory_region_transaction_begin(); | |
1926 | ||
1927 | for (i = 0; i < nvqs; i++) { | |
1928 | virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | |
1929 | } | |
1930 | ||
1931 | /* | |
1932 | * The transaction expects the ioeventfds to be open when it | |
1933 | * commits. Do it now, before the cleanup loop. | |
1934 | */ | |
1935 | memory_region_transaction_commit(); | |
1936 | ||
1937 | for (i = 0; i < nvqs; i++) { | |
1938 | virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); | |
1939 | } | |
1940 | ||
1941 | /* | |
3cdaf3dd | 1942 | * Set ->ioeventfd_started to false before draining so that host notifiers |
3bcc17f0 SH |
1943 | * are not detached/attached anymore. |
1944 | */ | |
3cdaf3dd | 1945 | s->ioeventfd_started = false; |
3bcc17f0 SH |
1946 | |
1947 | /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ | |
1948 | blk_drain(s->conf.conf.blk); | |
1949 | ||
1950 | /* | |
1951 | * Try to switch bs back to the QEMU main loop. If other users keep the | |
1952 | * BlockBackend in the iothread, that's ok | |
1953 | */ | |
1954 | blk_set_aio_context(s->conf.conf.blk, qemu_get_aio_context(), NULL); | |
1955 | ||
1956 | /* Clean up guest notifier (irq) */ | |
1957 | k->set_guest_notifiers(qbus->parent, nvqs, false); | |
1958 | ||
3cdaf3dd | 1959 | s->ioeventfd_stopping = false; |
3bcc17f0 SH |
1960 | } |
1961 | ||
75884afd | 1962 | static void virtio_blk_device_realize(DeviceState *dev, Error **errp) |
1c028ddf | 1963 | { |
75884afd | 1964 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); |
179b417e | 1965 | VirtIOBlock *s = VIRTIO_BLK(dev); |
2a30307f | 1966 | VirtIOBlkConf *conf = &s->conf; |
b3d9bb9a | 1967 | BlockDriverState *bs; |
3ffeeef7 | 1968 | Error *err = NULL; |
2f270590 | 1969 | unsigned i; |
cf21e106 | 1970 | |
4be74634 | 1971 | if (!conf->conf.blk) { |
75884afd AF |
1972 | error_setg(errp, "drive property not set"); |
1973 | return; | |
d75d25e3 | 1974 | } |
4be74634 | 1975 | if (!blk_is_inserted(conf->conf.blk)) { |
75884afd AF |
1976 | error_setg(errp, "Device needs media, but drive is empty"); |
1977 | return; | |
98f28ad7 | 1978 | } |
9445e1e1 SH |
1979 | if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) { |
1980 | conf->num_queues = 1; | |
1981 | } | |
2f270590 SH |
1982 | if (!conf->num_queues) { |
1983 | error_setg(errp, "num-queues property must be larger than 0"); | |
1984 | return; | |
1985 | } | |
1bf8a989 DP |
1986 | if (conf->queue_size <= 2) { |
1987 | error_setg(errp, "invalid queue-size property (%" PRIu16 "), " | |
1988 | "must be > 2", conf->queue_size); | |
1989 | return; | |
1990 | } | |
6040aedd MK |
1991 | if (!is_power_of_2(conf->queue_size) || |
1992 | conf->queue_size > VIRTQUEUE_MAX_SIZE) { | |
1993 | error_setg(errp, "invalid queue-size property (%" PRIu16 "), " | |
1994 | "must be a power of 2 (max %d)", | |
1995 | conf->queue_size, VIRTQUEUE_MAX_SIZE); | |
1996 | return; | |
1997 | } | |
d75d25e3 | 1998 | |
ceff3e1f | 1999 | if (!blkconf_apply_backend_options(&conf->conf, |
86b1cf32 KW |
2000 | !blk_supports_write_perm(conf->conf.blk), |
2001 | true, errp)) { | |
a17c17a2 KW |
2002 | return; |
2003 | } | |
4be74634 | 2004 | s->original_wce = blk_enable_write_cache(conf->conf.blk); |
ceff3e1f | 2005 | if (!blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, errp)) { |
75884afd | 2006 | return; |
b7eb0c9f | 2007 | } |
ceff3e1f | 2008 | |
c56ee92f | 2009 | if (!blkconf_blocksizes(&conf->conf, errp)) { |
0a75b60c MK |
2010 | return; |
2011 | } | |
2012 | ||
b3d9bb9a | 2013 | bs = blk_bs(conf->conf.blk); |
4f736650 SL |
2014 | if (bs->bl.zoned != BLK_Z_NONE) { |
2015 | virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED); | |
2016 | if (bs->bl.zoned == BLK_Z_HM) { | |
2017 | virtio_clear_feature(&s->host_features, VIRTIO_BLK_F_DISCARD); | |
2018 | } | |
2019 | } | |
2020 | ||
37b06f8d SG |
2021 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) && |
2022 | (!conf->max_discard_sectors || | |
2023 | conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) { | |
2024 | error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")" | |
2025 | ", must be between 1 and %d", | |
2026 | conf->max_discard_sectors, (int)BDRV_REQUEST_MAX_SECTORS); | |
2027 | return; | |
2028 | } | |
2029 | ||
2030 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) && | |
2031 | (!conf->max_write_zeroes_sectors || | |
2032 | conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) { | |
2033 | error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32 | |
2034 | "), must be between 1 and %d", | |
2035 | conf->max_write_zeroes_sectors, | |
2036 | (int)BDRV_REQUEST_MAX_SECTORS); | |
2037 | return; | |
2038 | } | |
2039 | ||
d9cf55a8 | 2040 | s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, |
d74c30c8 | 2041 | s->host_features); |
3857cd5c | 2042 | virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); |
6e02c38d | 2043 | |
9c67f33f SH |
2044 | qemu_mutex_init(&s->rq_lock); |
2045 | ||
4be74634 | 2046 | s->blk = conf->conf.blk; |
869a5c6d | 2047 | s->rq = NULL; |
2a30307f | 2048 | s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; |
e63e7fde | 2049 | |
2f270590 | 2050 | for (i = 0; i < conf->num_queues; i++) { |
6040aedd | 2051 | virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); |
2f270590 | 2052 | } |
98e3ab35 | 2053 | qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); |
57bc2658 | 2054 | |
3cdaf3dd | 2055 | /* Don't start ioeventfd if transport does not support notifiers. */ |
57bc2658 | 2056 | if (!virtio_device_ioeventfd_enabled(vdev)) { |
3cdaf3dd | 2057 | s->ioeventfd_disabled = true; |
57bc2658 SH |
2058 | } |
2059 | ||
2060 | virtio_blk_vq_aio_context_init(s, &err); | |
3ffeeef7 | 2061 | if (err != NULL) { |
75884afd | 2062 | error_propagate(errp, err); |
cfaf757e PN |
2063 | for (i = 0; i < conf->num_queues; i++) { |
2064 | virtio_del_queue(vdev, i); | |
2065 | } | |
6a1a8cc7 | 2066 | virtio_cleanup(vdev); |
75884afd | 2067 | return; |
392808b4 | 2068 | } |
6e02c38d | 2069 | |
a937f8e8 SH |
2070 | /* |
2071 | * This must be after virtio_init() so virtio_blk_dma_restart_cb() gets | |
2072 | * called after ->start_ioeventfd() has already set blk's AioContext. | |
2073 | */ | |
2074 | s->change = | |
2075 | qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s); | |
2076 | ||
baf42268 | 2077 | blk_ram_registrar_init(&s->blk_ram_registrar, s->blk); |
4be74634 | 2078 | blk_set_dev_ops(s->blk, &virtio_block_ops, s); |
6e02c38d | 2079 | |
4be74634 | 2080 | blk_iostatus_enable(s->blk); |
71f571a2 SE |
2081 | |
2082 | add_boot_device_lchs(dev, "/disk@0,0", | |
2083 | conf->conf.lcyls, | |
2084 | conf->conf.lheads, | |
2085 | conf->conf.lsecs); | |
1c028ddf FK |
2086 | } |
2087 | ||
b69c3c21 | 2088 | static void virtio_blk_device_unrealize(DeviceState *dev) |
1c028ddf | 2089 | { |
306ec6c3 AF |
2090 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); |
2091 | VirtIOBlock *s = VIRTIO_BLK(dev); | |
4a0117cf EP |
2092 | VirtIOBlkConf *conf = &s->conf; |
2093 | unsigned i; | |
306ec6c3 | 2094 | |
7bfde688 | 2095 | blk_drain(s->blk); |
71f571a2 | 2096 | del_boot_device_lchs(dev, "/disk@0,0"); |
57bc2658 | 2097 | virtio_blk_vq_aio_context_cleanup(s); |
4a0117cf EP |
2098 | for (i = 0; i < conf->num_queues; i++) { |
2099 | virtio_del_queue(vdev, i); | |
2100 | } | |
98e3ab35 | 2101 | qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); |
9c67f33f | 2102 | qemu_mutex_destroy(&s->rq_lock); |
baf42268 | 2103 | blk_ram_registrar_destroy(&s->blk_ram_registrar); |
1c028ddf | 2104 | qemu_del_vm_change_state_handler(s->change); |
4be74634 | 2105 | blockdev_mark_auto_del(s->blk); |
6a1a8cc7 | 2106 | virtio_cleanup(vdev); |
1c028ddf FK |
2107 | } |
2108 | ||
467b3f33 SH |
2109 | static void virtio_blk_instance_init(Object *obj) |
2110 | { | |
2111 | VirtIOBlock *s = VIRTIO_BLK(obj); | |
2112 | ||
2a30307f | 2113 | device_add_bootindex_property(obj, &s->conf.conf.bootindex, |
3342ec32 | 2114 | "bootindex", "/disk@0,0", |
40c2281c | 2115 | DEVICE(obj)); |
467b3f33 SH |
2116 | } |
2117 | ||
977a117f HP |
2118 | static const VMStateDescription vmstate_virtio_blk = { |
2119 | .name = "virtio-blk", | |
2120 | .minimum_version_id = 2, | |
2121 | .version_id = 2, | |
7d5dc0a3 | 2122 | .fields = (const VMStateField[]) { |
977a117f HP |
2123 | VMSTATE_VIRTIO_DEVICE, |
2124 | VMSTATE_END_OF_LIST() | |
2125 | }, | |
2126 | }; | |
bbded32c | 2127 | |
1c028ddf | 2128 | static Property virtio_blk_properties[] = { |
2a30307f | 2129 | DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf), |
8c398252 | 2130 | DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf), |
2a30307f MA |
2131 | DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, conf.conf), |
2132 | DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial), | |
bbe8bd4d SG |
2133 | DEFINE_PROP_BIT64("config-wce", VirtIOBlock, host_features, |
2134 | VIRTIO_BLK_F_CONFIG_WCE, true), | |
32a877e4 | 2135 | #ifdef __linux__ |
bbe8bd4d SG |
2136 | DEFINE_PROP_BIT64("scsi", VirtIOBlock, host_features, |
2137 | VIRTIO_BLK_F_SCSI, false), | |
32a877e4 | 2138 | #endif |
c99495ac PL |
2139 | DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, |
2140 | true), | |
9445e1e1 SH |
2141 | DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, |
2142 | VIRTIO_BLK_AUTO_NUM_QUEUES), | |
c9b7d9ec | 2143 | DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 256), |
1bf8a989 | 2144 | DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true), |
d679ac09 FZ |
2145 | DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, |
2146 | IOThread *), | |
b6948ab0 SH |
2147 | DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOBlock, |
2148 | conf.iothread_vq_mapping_list), | |
5c81161f SG |
2149 | DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features, |
2150 | VIRTIO_BLK_F_DISCARD, true), | |
fb0b154c AO |
2151 | DEFINE_PROP_BOOL("report-discard-granularity", VirtIOBlock, |
2152 | conf.report_discard_granularity, true), | |
5c81161f SG |
2153 | DEFINE_PROP_BIT64("write-zeroes", VirtIOBlock, host_features, |
2154 | VIRTIO_BLK_F_WRITE_ZEROES, true), | |
37b06f8d SG |
2155 | DEFINE_PROP_UINT32("max-discard-sectors", VirtIOBlock, |
2156 | conf.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS), | |
2157 | DEFINE_PROP_UINT32("max-write-zeroes-sectors", VirtIOBlock, | |
2158 | conf.max_write_zeroes_sectors, BDRV_REQUEST_MAX_SECTORS), | |
5f258577 EY |
2159 | DEFINE_PROP_BOOL("x-enable-wce-if-config-wce", VirtIOBlock, |
2160 | conf.x_enable_wce_if_config_wce, true), | |
1c028ddf FK |
2161 | DEFINE_PROP_END_OF_LIST(), |
2162 | }; | |
2163 | ||
2164 | static void virtio_blk_class_init(ObjectClass *klass, void *data) | |
2165 | { | |
2166 | DeviceClass *dc = DEVICE_CLASS(klass); | |
2167 | VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); | |
75884afd | 2168 | |
4f67d30b | 2169 | device_class_set_props(dc, virtio_blk_properties); |
bbded32c | 2170 | dc->vmsd = &vmstate_virtio_blk; |
125ee0ed | 2171 | set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); |
75884afd | 2172 | vdc->realize = virtio_blk_device_realize; |
306ec6c3 | 2173 | vdc->unrealize = virtio_blk_device_unrealize; |
1c028ddf FK |
2174 | vdc->get_config = virtio_blk_update_config; |
2175 | vdc->set_config = virtio_blk_set_config; | |
2176 | vdc->get_features = virtio_blk_get_features; | |
2177 | vdc->set_status = virtio_blk_set_status; | |
2178 | vdc->reset = virtio_blk_reset; | |
b2b295a7 GK |
2179 | vdc->save = virtio_blk_save_device; |
2180 | vdc->load = virtio_blk_load_device; | |
3cdaf3dd SH |
2181 | vdc->start_ioeventfd = virtio_blk_start_ioeventfd; |
2182 | vdc->stop_ioeventfd = virtio_blk_stop_ioeventfd; | |
1c028ddf FK |
2183 | } |
2184 | ||
b5c7ceaf | 2185 | static const TypeInfo virtio_blk_info = { |
1c028ddf FK |
2186 | .name = TYPE_VIRTIO_BLK, |
2187 | .parent = TYPE_VIRTIO_DEVICE, | |
2188 | .instance_size = sizeof(VirtIOBlock), | |
467b3f33 | 2189 | .instance_init = virtio_blk_instance_init, |
1c028ddf FK |
2190 | .class_init = virtio_blk_class_init, |
2191 | }; | |
2192 | ||
2193 | static void virtio_register_types(void) | |
2194 | { | |
b5c7ceaf | 2195 | type_register_static(&virtio_blk_info); |
1c028ddf FK |
2196 | } |
2197 | ||
2198 | type_init(virtio_register_types) |