]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Virtio Block Device | |
3 | * | |
4 | * Copyright IBM, Corp. 2007 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <aliguori@us.ibm.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include "qemu/osdep.h" | |
15 | #include "qemu/defer-call.h" | |
16 | #include "qapi/error.h" | |
17 | #include "qemu/iov.h" | |
18 | #include "qemu/module.h" | |
19 | #include "qemu/error-report.h" | |
20 | #include "qemu/main-loop.h" | |
21 | #include "block/block_int.h" | |
22 | #include "trace.h" | |
23 | #include "hw/block/block.h" | |
24 | #include "hw/qdev-properties.h" | |
25 | #include "sysemu/blockdev.h" | |
26 | #include "sysemu/block-ram-registrar.h" | |
27 | #include "sysemu/sysemu.h" | |
28 | #include "sysemu/runstate.h" | |
29 | #include "hw/virtio/virtio-blk.h" | |
30 | #include "dataplane/virtio-blk.h" | |
31 | #include "scsi/constants.h" | |
32 | #ifdef __linux__ | |
33 | # include <scsi/sg.h> | |
34 | #endif | |
35 | #include "hw/virtio/virtio-bus.h" | |
36 | #include "migration/qemu-file-types.h" | |
37 | #include "hw/virtio/virtio-access.h" | |
38 | #include "hw/virtio/virtio-blk-common.h" | |
39 | #include "qemu/coroutine.h" | |
40 | ||
41 | static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, | |
42 | VirtIOBlockReq *req) | |
43 | { | |
44 | req->dev = s; | |
45 | req->vq = vq; | |
46 | req->qiov.size = 0; | |
47 | req->in_len = 0; | |
48 | req->next = NULL; | |
49 | req->mr_next = NULL; | |
50 | } | |
51 | ||
52 | static void virtio_blk_free_request(VirtIOBlockReq *req) | |
53 | { | |
54 | g_free(req); | |
55 | } | |
56 | ||
57 | static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) | |
58 | { | |
59 | VirtIOBlock *s = req->dev; | |
60 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
61 | ||
62 | trace_virtio_blk_req_complete(vdev, req, status); | |
63 | ||
64 | stb_p(&req->in->status, status); | |
65 | iov_discard_undo(&req->inhdr_undo); | |
66 | iov_discard_undo(&req->outhdr_undo); | |
67 | virtqueue_push(req->vq, &req->elem, req->in_len); | |
68 | if (s->dataplane_started && !s->dataplane_disabled) { | |
69 | virtio_blk_data_plane_notify(s->dataplane, req->vq); | |
70 | } else { | |
71 | virtio_notify(vdev, req->vq); | |
72 | } | |
73 | } | |
74 | ||
75 | static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, | |
76 | bool is_read, bool acct_failed) | |
77 | { | |
78 | VirtIOBlock *s = req->dev; | |
79 | BlockErrorAction action = blk_get_error_action(s->blk, is_read, error); | |
80 | ||
81 | if (action == BLOCK_ERROR_ACTION_STOP) { | |
82 | /* Break the link as the next request is going to be parsed from the | |
83 | * ring again. Otherwise we may end up doing a double completion! */ | |
84 | req->mr_next = NULL; | |
85 | ||
86 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { | |
87 | req->next = s->rq; | |
88 | s->rq = req; | |
89 | } | |
90 | } else if (action == BLOCK_ERROR_ACTION_REPORT) { | |
91 | virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); | |
92 | if (acct_failed) { | |
93 | block_acct_failed(blk_get_stats(s->blk), &req->acct); | |
94 | } | |
95 | virtio_blk_free_request(req); | |
96 | } | |
97 | ||
98 | blk_error_action(s->blk, action, is_read, error); | |
99 | return action != BLOCK_ERROR_ACTION_IGNORE; | |
100 | } | |
101 | ||
102 | static void virtio_blk_rw_complete(void *opaque, int ret) | |
103 | { | |
104 | VirtIOBlockReq *next = opaque; | |
105 | VirtIOBlock *s = next->dev; | |
106 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
107 | ||
108 | while (next) { | |
109 | VirtIOBlockReq *req = next; | |
110 | next = req->mr_next; | |
111 | trace_virtio_blk_rw_complete(vdev, req, ret); | |
112 | ||
113 | if (req->qiov.nalloc != -1) { | |
114 | /* If nalloc is != -1 req->qiov is a local copy of the original | |
115 | * external iovec. It was allocated in submit_requests to be | |
116 | * able to merge requests. */ | |
117 | qemu_iovec_destroy(&req->qiov); | |
118 | } | |
119 | ||
120 | if (ret) { | |
121 | int p = virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type); | |
122 | bool is_read = !(p & VIRTIO_BLK_T_OUT); | |
123 | /* Note that memory may be dirtied on read failure. If the | |
124 | * virtio request is not completed here, as is the case for | |
125 | * BLOCK_ERROR_ACTION_STOP, the memory may not be copied | |
126 | * correctly during live migration. While this is ugly, | |
127 | * it is acceptable because the device is free to write to | |
128 | * the memory until the request is completed (which will | |
129 | * happen on the other side of the migration). | |
130 | */ | |
131 | if (virtio_blk_handle_rw_error(req, -ret, is_read, true)) { | |
132 | continue; | |
133 | } | |
134 | } | |
135 | ||
136 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); | |
137 | block_acct_done(blk_get_stats(s->blk), &req->acct); | |
138 | virtio_blk_free_request(req); | |
139 | } | |
140 | } | |
141 | ||
142 | static void virtio_blk_flush_complete(void *opaque, int ret) | |
143 | { | |
144 | VirtIOBlockReq *req = opaque; | |
145 | VirtIOBlock *s = req->dev; | |
146 | ||
147 | if (ret && virtio_blk_handle_rw_error(req, -ret, 0, true)) { | |
148 | return; | |
149 | } | |
150 | ||
151 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); | |
152 | block_acct_done(blk_get_stats(s->blk), &req->acct); | |
153 | virtio_blk_free_request(req); | |
154 | } | |
155 | ||
156 | static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) | |
157 | { | |
158 | VirtIOBlockReq *req = opaque; | |
159 | VirtIOBlock *s = req->dev; | |
160 | bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) & | |
161 | ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES; | |
162 | ||
163 | if (ret && virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { | |
164 | return; | |
165 | } | |
166 | ||
167 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); | |
168 | if (is_write_zeroes) { | |
169 | block_acct_done(blk_get_stats(s->blk), &req->acct); | |
170 | } | |
171 | virtio_blk_free_request(req); | |
172 | } | |
173 | ||
174 | #ifdef __linux__ | |
175 | ||
176 | typedef struct { | |
177 | VirtIOBlockReq *req; | |
178 | struct sg_io_hdr hdr; | |
179 | } VirtIOBlockIoctlReq; | |
180 | ||
181 | static void virtio_blk_ioctl_complete(void *opaque, int status) | |
182 | { | |
183 | VirtIOBlockIoctlReq *ioctl_req = opaque; | |
184 | VirtIOBlockReq *req = ioctl_req->req; | |
185 | VirtIOBlock *s = req->dev; | |
186 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
187 | struct virtio_scsi_inhdr *scsi; | |
188 | struct sg_io_hdr *hdr; | |
189 | ||
190 | scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base; | |
191 | ||
192 | if (status) { | |
193 | status = VIRTIO_BLK_S_UNSUPP; | |
194 | virtio_stl_p(vdev, &scsi->errors, 255); | |
195 | goto out; | |
196 | } | |
197 | ||
198 | hdr = &ioctl_req->hdr; | |
199 | /* | |
200 | * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi) | |
201 | * clear the masked_status field [hence status gets cleared too, see | |
202 | * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED | |
203 | * status has occurred. However they do set DRIVER_SENSE in driver_status | |
204 | * field. Also a (sb_len_wr > 0) indicates there is a sense buffer. | |
205 | */ | |
206 | if (hdr->status == 0 && hdr->sb_len_wr > 0) { | |
207 | hdr->status = CHECK_CONDITION; | |
208 | } | |
209 | ||
210 | virtio_stl_p(vdev, &scsi->errors, | |
211 | hdr->status | (hdr->msg_status << 8) | | |
212 | (hdr->host_status << 16) | (hdr->driver_status << 24)); | |
213 | virtio_stl_p(vdev, &scsi->residual, hdr->resid); | |
214 | virtio_stl_p(vdev, &scsi->sense_len, hdr->sb_len_wr); | |
215 | virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); | |
216 | ||
217 | out: | |
218 | virtio_blk_req_complete(req, status); | |
219 | virtio_blk_free_request(req); | |
220 | g_free(ioctl_req); | |
221 | } | |
222 | ||
223 | #endif | |
224 | ||
225 | static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s, VirtQueue *vq) | |
226 | { | |
227 | VirtIOBlockReq *req = virtqueue_pop(vq, sizeof(VirtIOBlockReq)); | |
228 | ||
229 | if (req) { | |
230 | virtio_blk_init_request(s, vq, req); | |
231 | } | |
232 | return req; | |
233 | } | |
234 | ||
235 | static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) | |
236 | { | |
237 | int status = VIRTIO_BLK_S_OK; | |
238 | struct virtio_scsi_inhdr *scsi = NULL; | |
239 | VirtIOBlock *blk = req->dev; | |
240 | VirtIODevice *vdev = VIRTIO_DEVICE(blk); | |
241 | VirtQueueElement *elem = &req->elem; | |
242 | ||
243 | #ifdef __linux__ | |
244 | int i; | |
245 | VirtIOBlockIoctlReq *ioctl_req; | |
246 | BlockAIOCB *acb; | |
247 | #endif | |
248 | ||
249 | /* | |
250 | * We require at least one output segment each for the virtio_blk_outhdr | |
251 | * and the SCSI command block. | |
252 | * | |
253 | * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr | |
254 | * and the sense buffer pointer in the input segments. | |
255 | */ | |
256 | if (elem->out_num < 2 || elem->in_num < 3) { | |
257 | status = VIRTIO_BLK_S_IOERR; | |
258 | goto fail; | |
259 | } | |
260 | ||
261 | /* | |
262 | * The scsi inhdr is placed in the second-to-last input segment, just | |
263 | * before the regular inhdr. | |
264 | */ | |
265 | scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; | |
266 | ||
267 | if (!virtio_has_feature(blk->host_features, VIRTIO_BLK_F_SCSI)) { | |
268 | status = VIRTIO_BLK_S_UNSUPP; | |
269 | goto fail; | |
270 | } | |
271 | ||
272 | /* | |
273 | * No support for bidirection commands yet. | |
274 | */ | |
275 | if (elem->out_num > 2 && elem->in_num > 3) { | |
276 | status = VIRTIO_BLK_S_UNSUPP; | |
277 | goto fail; | |
278 | } | |
279 | ||
280 | #ifdef __linux__ | |
281 | ioctl_req = g_new0(VirtIOBlockIoctlReq, 1); | |
282 | ioctl_req->req = req; | |
283 | ioctl_req->hdr.interface_id = 'S'; | |
284 | ioctl_req->hdr.cmd_len = elem->out_sg[1].iov_len; | |
285 | ioctl_req->hdr.cmdp = elem->out_sg[1].iov_base; | |
286 | ioctl_req->hdr.dxfer_len = 0; | |
287 | ||
288 | if (elem->out_num > 2) { | |
289 | /* | |
290 | * If there are more than the minimally required 2 output segments | |
291 | * there is write payload starting from the third iovec. | |
292 | */ | |
293 | ioctl_req->hdr.dxfer_direction = SG_DXFER_TO_DEV; | |
294 | ioctl_req->hdr.iovec_count = elem->out_num - 2; | |
295 | ||
296 | for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { | |
297 | ioctl_req->hdr.dxfer_len += elem->out_sg[i + 2].iov_len; | |
298 | } | |
299 | ||
300 | ioctl_req->hdr.dxferp = elem->out_sg + 2; | |
301 | ||
302 | } else if (elem->in_num > 3) { | |
303 | /* | |
304 | * If we have more than 3 input segments the guest wants to actually | |
305 | * read data. | |
306 | */ | |
307 | ioctl_req->hdr.dxfer_direction = SG_DXFER_FROM_DEV; | |
308 | ioctl_req->hdr.iovec_count = elem->in_num - 3; | |
309 | for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { | |
310 | ioctl_req->hdr.dxfer_len += elem->in_sg[i].iov_len; | |
311 | } | |
312 | ||
313 | ioctl_req->hdr.dxferp = elem->in_sg; | |
314 | } else { | |
315 | /* | |
316 | * Some SCSI commands don't actually transfer any data. | |
317 | */ | |
318 | ioctl_req->hdr.dxfer_direction = SG_DXFER_NONE; | |
319 | } | |
320 | ||
321 | ioctl_req->hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; | |
322 | ioctl_req->hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; | |
323 | ||
324 | acb = blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr, | |
325 | virtio_blk_ioctl_complete, ioctl_req); | |
326 | if (!acb) { | |
327 | g_free(ioctl_req); | |
328 | status = VIRTIO_BLK_S_UNSUPP; | |
329 | goto fail; | |
330 | } | |
331 | return -EINPROGRESS; | |
332 | #else | |
333 | abort(); | |
334 | #endif | |
335 | ||
336 | fail: | |
337 | /* Just put anything nonzero so that the ioctl fails in the guest. */ | |
338 | if (scsi) { | |
339 | virtio_stl_p(vdev, &scsi->errors, 255); | |
340 | } | |
341 | return status; | |
342 | } | |
343 | ||
344 | static void virtio_blk_handle_scsi(VirtIOBlockReq *req) | |
345 | { | |
346 | int status; | |
347 | ||
348 | status = virtio_blk_handle_scsi_req(req); | |
349 | if (status != -EINPROGRESS) { | |
350 | virtio_blk_req_complete(req, status); | |
351 | virtio_blk_free_request(req); | |
352 | } | |
353 | } | |
354 | ||
355 | static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb, | |
356 | int start, int num_reqs, int niov) | |
357 | { | |
358 | BlockBackend *blk = s->blk; | |
359 | QEMUIOVector *qiov = &mrb->reqs[start]->qiov; | |
360 | int64_t sector_num = mrb->reqs[start]->sector_num; | |
361 | bool is_write = mrb->is_write; | |
362 | BdrvRequestFlags flags = 0; | |
363 | ||
364 | if (num_reqs > 1) { | |
365 | int i; | |
366 | struct iovec *tmp_iov = qiov->iov; | |
367 | int tmp_niov = qiov->niov; | |
368 | ||
369 | /* mrb->reqs[start]->qiov was initialized from external so we can't | |
370 | * modify it here. We need to initialize it locally and then add the | |
371 | * external iovecs. */ | |
372 | qemu_iovec_init(qiov, niov); | |
373 | ||
374 | for (i = 0; i < tmp_niov; i++) { | |
375 | qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len); | |
376 | } | |
377 | ||
378 | for (i = start + 1; i < start + num_reqs; i++) { | |
379 | qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0, | |
380 | mrb->reqs[i]->qiov.size); | |
381 | mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; | |
382 | } | |
383 | ||
384 | trace_virtio_blk_submit_multireq(VIRTIO_DEVICE(mrb->reqs[start]->dev), | |
385 | mrb, start, num_reqs, | |
386 | sector_num << BDRV_SECTOR_BITS, | |
387 | qiov->size, is_write); | |
388 | block_acct_merge_done(blk_get_stats(blk), | |
389 | is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ, | |
390 | num_reqs - 1); | |
391 | } | |
392 | ||
393 | if (blk_ram_registrar_ok(&s->blk_ram_registrar)) { | |
394 | flags |= BDRV_REQ_REGISTERED_BUF; | |
395 | } | |
396 | ||
397 | if (is_write) { | |
398 | blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, | |
399 | flags, virtio_blk_rw_complete, | |
400 | mrb->reqs[start]); | |
401 | } else { | |
402 | blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, | |
403 | flags, virtio_blk_rw_complete, | |
404 | mrb->reqs[start]); | |
405 | } | |
406 | } | |
407 | ||
408 | static int multireq_compare(const void *a, const void *b) | |
409 | { | |
410 | const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a, | |
411 | *req2 = *(VirtIOBlockReq **)b; | |
412 | ||
413 | /* | |
414 | * Note that we can't simply subtract sector_num1 from sector_num2 | |
415 | * here as that could overflow the return value. | |
416 | */ | |
417 | if (req1->sector_num > req2->sector_num) { | |
418 | return 1; | |
419 | } else if (req1->sector_num < req2->sector_num) { | |
420 | return -1; | |
421 | } else { | |
422 | return 0; | |
423 | } | |
424 | } | |
425 | ||
426 | static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb) | |
427 | { | |
428 | int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0; | |
429 | uint32_t max_transfer; | |
430 | int64_t sector_num = 0; | |
431 | ||
432 | if (mrb->num_reqs == 1) { | |
433 | submit_requests(s, mrb, 0, 1, -1); | |
434 | mrb->num_reqs = 0; | |
435 | return; | |
436 | } | |
437 | ||
438 | max_transfer = blk_get_max_transfer(mrb->reqs[0]->dev->blk); | |
439 | ||
440 | qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs), | |
441 | &multireq_compare); | |
442 | ||
443 | for (i = 0; i < mrb->num_reqs; i++) { | |
444 | VirtIOBlockReq *req = mrb->reqs[i]; | |
445 | if (num_reqs > 0) { | |
446 | /* | |
447 | * NOTE: We cannot merge the requests in below situations: | |
448 | * 1. requests are not sequential | |
449 | * 2. merge would exceed maximum number of IOVs | |
450 | * 3. merge would exceed maximum transfer length of backend device | |
451 | */ | |
452 | if (sector_num + nb_sectors != req->sector_num || | |
453 | niov > blk_get_max_iov(s->blk) - req->qiov.niov || | |
454 | req->qiov.size > max_transfer || | |
455 | nb_sectors > (max_transfer - | |
456 | req->qiov.size) / BDRV_SECTOR_SIZE) { | |
457 | submit_requests(s, mrb, start, num_reqs, niov); | |
458 | num_reqs = 0; | |
459 | } | |
460 | } | |
461 | ||
462 | if (num_reqs == 0) { | |
463 | sector_num = req->sector_num; | |
464 | nb_sectors = niov = 0; | |
465 | start = i; | |
466 | } | |
467 | ||
468 | nb_sectors += req->qiov.size / BDRV_SECTOR_SIZE; | |
469 | niov += req->qiov.niov; | |
470 | num_reqs++; | |
471 | } | |
472 | ||
473 | submit_requests(s, mrb, start, num_reqs, niov); | |
474 | mrb->num_reqs = 0; | |
475 | } | |
476 | ||
477 | static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) | |
478 | { | |
479 | VirtIOBlock *s = req->dev; | |
480 | ||
481 | block_acct_start(blk_get_stats(s->blk), &req->acct, 0, | |
482 | BLOCK_ACCT_FLUSH); | |
483 | ||
484 | /* | |
485 | * Make sure all outstanding writes are posted to the backing device. | |
486 | */ | |
487 | if (mrb->is_write && mrb->num_reqs > 0) { | |
488 | virtio_blk_submit_multireq(s, mrb); | |
489 | } | |
490 | blk_aio_flush(s->blk, virtio_blk_flush_complete, req); | |
491 | } | |
492 | ||
493 | static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, | |
494 | uint64_t sector, size_t size) | |
495 | { | |
496 | uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; | |
497 | uint64_t total_sectors; | |
498 | ||
499 | if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { | |
500 | return false; | |
501 | } | |
502 | if (sector & dev->sector_mask) { | |
503 | return false; | |
504 | } | |
505 | if (size % dev->conf.conf.logical_block_size) { | |
506 | return false; | |
507 | } | |
508 | blk_get_geometry(dev->blk, &total_sectors); | |
509 | if (sector > total_sectors || nb_sectors > total_sectors - sector) { | |
510 | return false; | |
511 | } | |
512 | return true; | |
513 | } | |
514 | ||
515 | static uint8_t virtio_blk_handle_discard_write_zeroes(VirtIOBlockReq *req, | |
516 | struct virtio_blk_discard_write_zeroes *dwz_hdr, bool is_write_zeroes) | |
517 | { | |
518 | VirtIOBlock *s = req->dev; | |
519 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
520 | uint64_t sector; | |
521 | uint32_t num_sectors, flags, max_sectors; | |
522 | uint8_t err_status; | |
523 | int bytes; | |
524 | ||
525 | sector = virtio_ldq_p(vdev, &dwz_hdr->sector); | |
526 | num_sectors = virtio_ldl_p(vdev, &dwz_hdr->num_sectors); | |
527 | flags = virtio_ldl_p(vdev, &dwz_hdr->flags); | |
528 | max_sectors = is_write_zeroes ? s->conf.max_write_zeroes_sectors : | |
529 | s->conf.max_discard_sectors; | |
530 | ||
531 | /* | |
532 | * max_sectors is at most BDRV_REQUEST_MAX_SECTORS, this check | |
533 | * make us sure that "num_sectors << BDRV_SECTOR_BITS" can fit in | |
534 | * the integer variable. | |
535 | */ | |
536 | if (unlikely(num_sectors > max_sectors)) { | |
537 | err_status = VIRTIO_BLK_S_IOERR; | |
538 | goto err; | |
539 | } | |
540 | ||
541 | bytes = num_sectors << BDRV_SECTOR_BITS; | |
542 | ||
543 | if (unlikely(!virtio_blk_sect_range_ok(s, sector, bytes))) { | |
544 | err_status = VIRTIO_BLK_S_IOERR; | |
545 | goto err; | |
546 | } | |
547 | ||
548 | /* | |
549 | * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard | |
550 | * and write zeroes commands if any unknown flag is set. | |
551 | */ | |
552 | if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { | |
553 | err_status = VIRTIO_BLK_S_UNSUPP; | |
554 | goto err; | |
555 | } | |
556 | ||
557 | if (is_write_zeroes) { /* VIRTIO_BLK_T_WRITE_ZEROES */ | |
558 | int blk_aio_flags = 0; | |
559 | ||
560 | if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { | |
561 | blk_aio_flags |= BDRV_REQ_MAY_UNMAP; | |
562 | } | |
563 | ||
564 | block_acct_start(blk_get_stats(s->blk), &req->acct, bytes, | |
565 | BLOCK_ACCT_WRITE); | |
566 | ||
567 | blk_aio_pwrite_zeroes(s->blk, sector << BDRV_SECTOR_BITS, | |
568 | bytes, blk_aio_flags, | |
569 | virtio_blk_discard_write_zeroes_complete, req); | |
570 | } else { /* VIRTIO_BLK_T_DISCARD */ | |
571 | /* | |
572 | * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for | |
573 | * discard commands if the unmap flag is set. | |
574 | */ | |
575 | if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { | |
576 | err_status = VIRTIO_BLK_S_UNSUPP; | |
577 | goto err; | |
578 | } | |
579 | ||
580 | blk_aio_pdiscard(s->blk, sector << BDRV_SECTOR_BITS, bytes, | |
581 | virtio_blk_discard_write_zeroes_complete, req); | |
582 | } | |
583 | ||
584 | return VIRTIO_BLK_S_OK; | |
585 | ||
586 | err: | |
587 | if (is_write_zeroes) { | |
588 | block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_WRITE); | |
589 | } | |
590 | return err_status; | |
591 | } | |
592 | ||
593 | typedef struct ZoneCmdData { | |
594 | VirtIOBlockReq *req; | |
595 | struct iovec *in_iov; | |
596 | unsigned in_num; | |
597 | union { | |
598 | struct { | |
599 | unsigned int nr_zones; | |
600 | BlockZoneDescriptor *zones; | |
601 | } zone_report_data; | |
602 | struct { | |
603 | int64_t offset; | |
604 | } zone_append_data; | |
605 | }; | |
606 | } ZoneCmdData; | |
607 | ||
608 | /* | |
609 | * check zoned_request: error checking before issuing requests. If all checks | |
610 | * passed, return true. | |
611 | * append: true if only zone append requests issued. | |
612 | */ | |
613 | static bool check_zoned_request(VirtIOBlock *s, int64_t offset, int64_t len, | |
614 | bool append, uint8_t *status) { | |
615 | BlockDriverState *bs = blk_bs(s->blk); | |
616 | int index; | |
617 | ||
618 | if (!virtio_has_feature(s->host_features, VIRTIO_BLK_F_ZONED)) { | |
619 | *status = VIRTIO_BLK_S_UNSUPP; | |
620 | return false; | |
621 | } | |
622 | ||
623 | if (offset < 0 || len < 0 || len > (bs->total_sectors << BDRV_SECTOR_BITS) | |
624 | || offset > (bs->total_sectors << BDRV_SECTOR_BITS) - len) { | |
625 | *status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
626 | return false; | |
627 | } | |
628 | ||
629 | if (append) { | |
630 | if (bs->bl.write_granularity) { | |
631 | if ((offset % bs->bl.write_granularity) != 0) { | |
632 | *status = VIRTIO_BLK_S_ZONE_UNALIGNED_WP; | |
633 | return false; | |
634 | } | |
635 | } | |
636 | ||
637 | index = offset / bs->bl.zone_size; | |
638 | if (BDRV_ZT_IS_CONV(bs->wps->wp[index])) { | |
639 | *status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
640 | return false; | |
641 | } | |
642 | ||
643 | if (len / 512 > bs->bl.max_append_sectors) { | |
644 | if (bs->bl.max_append_sectors == 0) { | |
645 | *status = VIRTIO_BLK_S_UNSUPP; | |
646 | } else { | |
647 | *status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
648 | } | |
649 | return false; | |
650 | } | |
651 | } | |
652 | return true; | |
653 | } | |
654 | ||
655 | static void virtio_blk_zone_report_complete(void *opaque, int ret) | |
656 | { | |
657 | ZoneCmdData *data = opaque; | |
658 | VirtIOBlockReq *req = data->req; | |
659 | VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); | |
660 | struct iovec *in_iov = data->in_iov; | |
661 | unsigned in_num = data->in_num; | |
662 | int64_t zrp_size, n, j = 0; | |
663 | int64_t nz = data->zone_report_data.nr_zones; | |
664 | int8_t err_status = VIRTIO_BLK_S_OK; | |
665 | ||
666 | trace_virtio_blk_zone_report_complete(vdev, req, nz, ret); | |
667 | if (ret) { | |
668 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
669 | goto out; | |
670 | } | |
671 | ||
672 | struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report) { | |
673 | .nr_zones = cpu_to_le64(nz), | |
674 | }; | |
675 | zrp_size = sizeof(struct virtio_blk_zone_report) | |
676 | + sizeof(struct virtio_blk_zone_descriptor) * nz; | |
677 | n = iov_from_buf(in_iov, in_num, 0, &zrp_hdr, sizeof(zrp_hdr)); | |
678 | if (n != sizeof(zrp_hdr)) { | |
679 | virtio_error(vdev, "Driver provided input buffer that is too small!"); | |
680 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
681 | goto out; | |
682 | } | |
683 | ||
684 | for (size_t i = sizeof(zrp_hdr); i < zrp_size; | |
685 | i += sizeof(struct virtio_blk_zone_descriptor), ++j) { | |
686 | struct virtio_blk_zone_descriptor desc = | |
687 | (struct virtio_blk_zone_descriptor) { | |
688 | .z_start = cpu_to_le64(data->zone_report_data.zones[j].start | |
689 | >> BDRV_SECTOR_BITS), | |
690 | .z_cap = cpu_to_le64(data->zone_report_data.zones[j].cap | |
691 | >> BDRV_SECTOR_BITS), | |
692 | .z_wp = cpu_to_le64(data->zone_report_data.zones[j].wp | |
693 | >> BDRV_SECTOR_BITS), | |
694 | }; | |
695 | ||
696 | switch (data->zone_report_data.zones[j].type) { | |
697 | case BLK_ZT_CONV: | |
698 | desc.z_type = VIRTIO_BLK_ZT_CONV; | |
699 | break; | |
700 | case BLK_ZT_SWR: | |
701 | desc.z_type = VIRTIO_BLK_ZT_SWR; | |
702 | break; | |
703 | case BLK_ZT_SWP: | |
704 | desc.z_type = VIRTIO_BLK_ZT_SWP; | |
705 | break; | |
706 | default: | |
707 | g_assert_not_reached(); | |
708 | } | |
709 | ||
710 | switch (data->zone_report_data.zones[j].state) { | |
711 | case BLK_ZS_RDONLY: | |
712 | desc.z_state = VIRTIO_BLK_ZS_RDONLY; | |
713 | break; | |
714 | case BLK_ZS_OFFLINE: | |
715 | desc.z_state = VIRTIO_BLK_ZS_OFFLINE; | |
716 | break; | |
717 | case BLK_ZS_EMPTY: | |
718 | desc.z_state = VIRTIO_BLK_ZS_EMPTY; | |
719 | break; | |
720 | case BLK_ZS_CLOSED: | |
721 | desc.z_state = VIRTIO_BLK_ZS_CLOSED; | |
722 | break; | |
723 | case BLK_ZS_FULL: | |
724 | desc.z_state = VIRTIO_BLK_ZS_FULL; | |
725 | break; | |
726 | case BLK_ZS_EOPEN: | |
727 | desc.z_state = VIRTIO_BLK_ZS_EOPEN; | |
728 | break; | |
729 | case BLK_ZS_IOPEN: | |
730 | desc.z_state = VIRTIO_BLK_ZS_IOPEN; | |
731 | break; | |
732 | case BLK_ZS_NOT_WP: | |
733 | desc.z_state = VIRTIO_BLK_ZS_NOT_WP; | |
734 | break; | |
735 | default: | |
736 | g_assert_not_reached(); | |
737 | } | |
738 | ||
739 | /* TODO: it takes O(n^2) time complexity. Optimizations required. */ | |
740 | n = iov_from_buf(in_iov, in_num, i, &desc, sizeof(desc)); | |
741 | if (n != sizeof(desc)) { | |
742 | virtio_error(vdev, "Driver provided input buffer " | |
743 | "for descriptors that is too small!"); | |
744 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
745 | } | |
746 | } | |
747 | ||
748 | out: | |
749 | virtio_blk_req_complete(req, err_status); | |
750 | virtio_blk_free_request(req); | |
751 | g_free(data->zone_report_data.zones); | |
752 | g_free(data); | |
753 | } | |
754 | ||
755 | static void virtio_blk_handle_zone_report(VirtIOBlockReq *req, | |
756 | struct iovec *in_iov, | |
757 | unsigned in_num) | |
758 | { | |
759 | VirtIOBlock *s = req->dev; | |
760 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
761 | unsigned int nr_zones; | |
762 | ZoneCmdData *data; | |
763 | int64_t zone_size, offset; | |
764 | uint8_t err_status; | |
765 | ||
766 | if (req->in_len < sizeof(struct virtio_blk_inhdr) + | |
767 | sizeof(struct virtio_blk_zone_report) + | |
768 | sizeof(struct virtio_blk_zone_descriptor)) { | |
769 | virtio_error(vdev, "in buffer too small for zone report"); | |
770 | return; | |
771 | } | |
772 | ||
773 | /* start byte offset of the zone report */ | |
774 | offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; | |
775 | if (!check_zoned_request(s, offset, 0, false, &err_status)) { | |
776 | goto out; | |
777 | } | |
778 | nr_zones = (req->in_len - sizeof(struct virtio_blk_inhdr) - | |
779 | sizeof(struct virtio_blk_zone_report)) / | |
780 | sizeof(struct virtio_blk_zone_descriptor); | |
781 | trace_virtio_blk_handle_zone_report(vdev, req, | |
782 | offset >> BDRV_SECTOR_BITS, nr_zones); | |
783 | ||
784 | zone_size = sizeof(BlockZoneDescriptor) * nr_zones; | |
785 | data = g_malloc(sizeof(ZoneCmdData)); | |
786 | data->req = req; | |
787 | data->in_iov = in_iov; | |
788 | data->in_num = in_num; | |
789 | data->zone_report_data.nr_zones = nr_zones; | |
790 | data->zone_report_data.zones = g_malloc(zone_size), | |
791 | ||
792 | blk_aio_zone_report(s->blk, offset, &data->zone_report_data.nr_zones, | |
793 | data->zone_report_data.zones, | |
794 | virtio_blk_zone_report_complete, data); | |
795 | return; | |
796 | out: | |
797 | virtio_blk_req_complete(req, err_status); | |
798 | virtio_blk_free_request(req); | |
799 | } | |
800 | ||
801 | static void virtio_blk_zone_mgmt_complete(void *opaque, int ret) | |
802 | { | |
803 | VirtIOBlockReq *req = opaque; | |
804 | VirtIOBlock *s = req->dev; | |
805 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
806 | int8_t err_status = VIRTIO_BLK_S_OK; | |
807 | trace_virtio_blk_zone_mgmt_complete(vdev, req,ret); | |
808 | ||
809 | if (ret) { | |
810 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
811 | } | |
812 | ||
813 | virtio_blk_req_complete(req, err_status); | |
814 | virtio_blk_free_request(req); | |
815 | } | |
816 | ||
817 | static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op) | |
818 | { | |
819 | VirtIOBlock *s = req->dev; | |
820 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
821 | BlockDriverState *bs = blk_bs(s->blk); | |
822 | int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; | |
823 | uint64_t len; | |
824 | uint64_t capacity = bs->total_sectors << BDRV_SECTOR_BITS; | |
825 | uint8_t err_status = VIRTIO_BLK_S_OK; | |
826 | ||
827 | uint32_t type = virtio_ldl_p(vdev, &req->out.type); | |
828 | if (type == VIRTIO_BLK_T_ZONE_RESET_ALL) { | |
829 | /* Entire drive capacity */ | |
830 | offset = 0; | |
831 | len = capacity; | |
832 | trace_virtio_blk_handle_zone_reset_all(vdev, req, 0, | |
833 | bs->total_sectors); | |
834 | } else { | |
835 | if (bs->bl.zone_size > capacity - offset) { | |
836 | /* The zoned device allows the last smaller zone. */ | |
837 | len = capacity - bs->bl.zone_size * (bs->bl.nr_zones - 1); | |
838 | } else { | |
839 | len = bs->bl.zone_size; | |
840 | } | |
841 | trace_virtio_blk_handle_zone_mgmt(vdev, req, op, | |
842 | offset >> BDRV_SECTOR_BITS, | |
843 | len >> BDRV_SECTOR_BITS); | |
844 | } | |
845 | ||
846 | if (!check_zoned_request(s, offset, len, false, &err_status)) { | |
847 | goto out; | |
848 | } | |
849 | ||
850 | blk_aio_zone_mgmt(s->blk, op, offset, len, | |
851 | virtio_blk_zone_mgmt_complete, req); | |
852 | ||
853 | return 0; | |
854 | out: | |
855 | virtio_blk_req_complete(req, err_status); | |
856 | virtio_blk_free_request(req); | |
857 | return err_status; | |
858 | } | |
859 | ||
860 | static void virtio_blk_zone_append_complete(void *opaque, int ret) | |
861 | { | |
862 | ZoneCmdData *data = opaque; | |
863 | VirtIOBlockReq *req = data->req; | |
864 | VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); | |
865 | int64_t append_sector, n; | |
866 | uint8_t err_status = VIRTIO_BLK_S_OK; | |
867 | ||
868 | if (ret) { | |
869 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
870 | goto out; | |
871 | } | |
872 | ||
873 | virtio_stq_p(vdev, &append_sector, | |
874 | data->zone_append_data.offset >> BDRV_SECTOR_BITS); | |
875 | n = iov_from_buf(data->in_iov, data->in_num, 0, &append_sector, | |
876 | sizeof(append_sector)); | |
877 | if (n != sizeof(append_sector)) { | |
878 | virtio_error(vdev, "Driver provided input buffer less than size of " | |
879 | "append_sector"); | |
880 | err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; | |
881 | goto out; | |
882 | } | |
883 | trace_virtio_blk_zone_append_complete(vdev, req, append_sector, ret); | |
884 | ||
885 | out: | |
886 | virtio_blk_req_complete(req, err_status); | |
887 | virtio_blk_free_request(req); | |
888 | g_free(data); | |
889 | } | |
890 | ||
891 | static int virtio_blk_handle_zone_append(VirtIOBlockReq *req, | |
892 | struct iovec *out_iov, | |
893 | struct iovec *in_iov, | |
894 | uint64_t out_num, | |
895 | unsigned in_num) { | |
896 | VirtIOBlock *s = req->dev; | |
897 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
898 | uint8_t err_status = VIRTIO_BLK_S_OK; | |
899 | ||
900 | int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; | |
901 | int64_t len = iov_size(out_iov, out_num); | |
902 | ||
903 | trace_virtio_blk_handle_zone_append(vdev, req, offset >> BDRV_SECTOR_BITS); | |
904 | if (!check_zoned_request(s, offset, len, true, &err_status)) { | |
905 | goto out; | |
906 | } | |
907 | ||
908 | ZoneCmdData *data = g_malloc(sizeof(ZoneCmdData)); | |
909 | data->req = req; | |
910 | data->in_iov = in_iov; | |
911 | data->in_num = in_num; | |
912 | data->zone_append_data.offset = offset; | |
913 | qemu_iovec_init_external(&req->qiov, out_iov, out_num); | |
914 | ||
915 | block_acct_start(blk_get_stats(s->blk), &req->acct, len, | |
916 | BLOCK_ACCT_ZONE_APPEND); | |
917 | ||
918 | blk_aio_zone_append(s->blk, &data->zone_append_data.offset, &req->qiov, 0, | |
919 | virtio_blk_zone_append_complete, data); | |
920 | return 0; | |
921 | ||
922 | out: | |
923 | virtio_blk_req_complete(req, err_status); | |
924 | virtio_blk_free_request(req); | |
925 | return err_status; | |
926 | } | |
927 | ||
928 | static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) | |
929 | { | |
930 | uint32_t type; | |
931 | struct iovec *in_iov = req->elem.in_sg; | |
932 | struct iovec *out_iov = req->elem.out_sg; | |
933 | unsigned in_num = req->elem.in_num; | |
934 | unsigned out_num = req->elem.out_num; | |
935 | VirtIOBlock *s = req->dev; | |
936 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
937 | ||
938 | if (req->elem.out_num < 1 || req->elem.in_num < 1) { | |
939 | virtio_error(vdev, "virtio-blk missing headers"); | |
940 | return -1; | |
941 | } | |
942 | ||
943 | if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out, | |
944 | sizeof(req->out)) != sizeof(req->out))) { | |
945 | virtio_error(vdev, "virtio-blk request outhdr too short"); | |
946 | return -1; | |
947 | } | |
948 | ||
949 | iov_discard_front_undoable(&out_iov, &out_num, sizeof(req->out), | |
950 | &req->outhdr_undo); | |
951 | ||
952 | if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { | |
953 | virtio_error(vdev, "virtio-blk request inhdr too short"); | |
954 | iov_discard_undo(&req->outhdr_undo); | |
955 | return -1; | |
956 | } | |
957 | ||
958 | /* We always touch the last byte, so just see how big in_iov is. */ | |
959 | req->in_len = iov_size(in_iov, in_num); | |
960 | req->in = (void *)in_iov[in_num - 1].iov_base | |
961 | + in_iov[in_num - 1].iov_len | |
962 | - sizeof(struct virtio_blk_inhdr); | |
963 | iov_discard_back_undoable(in_iov, &in_num, sizeof(struct virtio_blk_inhdr), | |
964 | &req->inhdr_undo); | |
965 | ||
966 | type = virtio_ldl_p(vdev, &req->out.type); | |
967 | ||
968 | /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER | |
969 | * is an optional flag. Although a guest should not send this flag if | |
970 | * not negotiated we ignored it in the past. So keep ignoring it. */ | |
971 | switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { | |
972 | case VIRTIO_BLK_T_IN: | |
973 | { | |
974 | bool is_write = type & VIRTIO_BLK_T_OUT; | |
975 | req->sector_num = virtio_ldq_p(vdev, &req->out.sector); | |
976 | ||
977 | if (is_write) { | |
978 | qemu_iovec_init_external(&req->qiov, out_iov, out_num); | |
979 | trace_virtio_blk_handle_write(vdev, req, req->sector_num, | |
980 | req->qiov.size / BDRV_SECTOR_SIZE); | |
981 | } else { | |
982 | qemu_iovec_init_external(&req->qiov, in_iov, in_num); | |
983 | trace_virtio_blk_handle_read(vdev, req, req->sector_num, | |
984 | req->qiov.size / BDRV_SECTOR_SIZE); | |
985 | } | |
986 | ||
987 | if (!virtio_blk_sect_range_ok(s, req->sector_num, req->qiov.size)) { | |
988 | virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); | |
989 | block_acct_invalid(blk_get_stats(s->blk), | |
990 | is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); | |
991 | virtio_blk_free_request(req); | |
992 | return 0; | |
993 | } | |
994 | ||
995 | block_acct_start(blk_get_stats(s->blk), &req->acct, req->qiov.size, | |
996 | is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); | |
997 | ||
998 | /* merge would exceed maximum number of requests or IO direction | |
999 | * changes */ | |
1000 | if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS || | |
1001 | is_write != mrb->is_write || | |
1002 | !s->conf.request_merging)) { | |
1003 | virtio_blk_submit_multireq(s, mrb); | |
1004 | } | |
1005 | ||
1006 | assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS); | |
1007 | mrb->reqs[mrb->num_reqs++] = req; | |
1008 | mrb->is_write = is_write; | |
1009 | break; | |
1010 | } | |
1011 | case VIRTIO_BLK_T_FLUSH: | |
1012 | virtio_blk_handle_flush(req, mrb); | |
1013 | break; | |
1014 | case VIRTIO_BLK_T_ZONE_REPORT: | |
1015 | virtio_blk_handle_zone_report(req, in_iov, in_num); | |
1016 | break; | |
1017 | case VIRTIO_BLK_T_ZONE_OPEN: | |
1018 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_OPEN); | |
1019 | break; | |
1020 | case VIRTIO_BLK_T_ZONE_CLOSE: | |
1021 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_CLOSE); | |
1022 | break; | |
1023 | case VIRTIO_BLK_T_ZONE_FINISH: | |
1024 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_FINISH); | |
1025 | break; | |
1026 | case VIRTIO_BLK_T_ZONE_RESET: | |
1027 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET); | |
1028 | break; | |
1029 | case VIRTIO_BLK_T_ZONE_RESET_ALL: | |
1030 | virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET); | |
1031 | break; | |
1032 | case VIRTIO_BLK_T_SCSI_CMD: | |
1033 | virtio_blk_handle_scsi(req); | |
1034 | break; | |
1035 | case VIRTIO_BLK_T_GET_ID: | |
1036 | { | |
1037 | /* | |
1038 | * NB: per existing s/n string convention the string is | |
1039 | * terminated by '\0' only when shorter than buffer. | |
1040 | */ | |
1041 | const char *serial = s->conf.serial ? s->conf.serial : ""; | |
1042 | size_t size = MIN(strlen(serial) + 1, | |
1043 | MIN(iov_size(in_iov, in_num), | |
1044 | VIRTIO_BLK_ID_BYTES)); | |
1045 | iov_from_buf(in_iov, in_num, 0, serial, size); | |
1046 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); | |
1047 | virtio_blk_free_request(req); | |
1048 | break; | |
1049 | } | |
1050 | case VIRTIO_BLK_T_ZONE_APPEND & ~VIRTIO_BLK_T_OUT: | |
1051 | /* | |
1052 | * Passing out_iov/out_num and in_iov/in_num is not safe | |
1053 | * to access req->elem.out_sg directly because it may be | |
1054 | * modified by virtio_blk_handle_request(). | |
1055 | */ | |
1056 | virtio_blk_handle_zone_append(req, out_iov, in_iov, out_num, in_num); | |
1057 | break; | |
1058 | /* | |
1059 | * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with | |
1060 | * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement, | |
1061 | * so we must mask it for these requests, then we will check if it is set. | |
1062 | */ | |
1063 | case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT: | |
1064 | case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT: | |
1065 | { | |
1066 | struct virtio_blk_discard_write_zeroes dwz_hdr; | |
1067 | size_t out_len = iov_size(out_iov, out_num); | |
1068 | bool is_write_zeroes = (type & ~VIRTIO_BLK_T_BARRIER) == | |
1069 | VIRTIO_BLK_T_WRITE_ZEROES; | |
1070 | uint8_t err_status; | |
1071 | ||
1072 | /* | |
1073 | * Unsupported if VIRTIO_BLK_T_OUT is not set or the request contains | |
1074 | * more than one segment. | |
1075 | */ | |
1076 | if (unlikely(!(type & VIRTIO_BLK_T_OUT) || | |
1077 | out_len > sizeof(dwz_hdr))) { | |
1078 | virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); | |
1079 | virtio_blk_free_request(req); | |
1080 | return 0; | |
1081 | } | |
1082 | ||
1083 | if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr, | |
1084 | sizeof(dwz_hdr)) != sizeof(dwz_hdr))) { | |
1085 | iov_discard_undo(&req->inhdr_undo); | |
1086 | iov_discard_undo(&req->outhdr_undo); | |
1087 | virtio_error(vdev, "virtio-blk discard/write_zeroes header" | |
1088 | " too short"); | |
1089 | return -1; | |
1090 | } | |
1091 | ||
1092 | err_status = virtio_blk_handle_discard_write_zeroes(req, &dwz_hdr, | |
1093 | is_write_zeroes); | |
1094 | if (err_status != VIRTIO_BLK_S_OK) { | |
1095 | virtio_blk_req_complete(req, err_status); | |
1096 | virtio_blk_free_request(req); | |
1097 | } | |
1098 | ||
1099 | break; | |
1100 | } | |
1101 | default: | |
1102 | virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); | |
1103 | virtio_blk_free_request(req); | |
1104 | } | |
1105 | return 0; | |
1106 | } | |
1107 | ||
1108 | void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) | |
1109 | { | |
1110 | VirtIOBlockReq *req; | |
1111 | MultiReqBuffer mrb = {}; | |
1112 | bool suppress_notifications = virtio_queue_get_notification(vq); | |
1113 | ||
1114 | defer_call_begin(); | |
1115 | ||
1116 | do { | |
1117 | if (suppress_notifications) { | |
1118 | virtio_queue_set_notification(vq, 0); | |
1119 | } | |
1120 | ||
1121 | while ((req = virtio_blk_get_request(s, vq))) { | |
1122 | if (virtio_blk_handle_request(req, &mrb)) { | |
1123 | virtqueue_detach_element(req->vq, &req->elem, 0); | |
1124 | virtio_blk_free_request(req); | |
1125 | break; | |
1126 | } | |
1127 | } | |
1128 | ||
1129 | if (suppress_notifications) { | |
1130 | virtio_queue_set_notification(vq, 1); | |
1131 | } | |
1132 | } while (!virtio_queue_empty(vq)); | |
1133 | ||
1134 | if (mrb.num_reqs) { | |
1135 | virtio_blk_submit_multireq(s, &mrb); | |
1136 | } | |
1137 | ||
1138 | defer_call_end(); | |
1139 | } | |
1140 | ||
1141 | static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) | |
1142 | { | |
1143 | VirtIOBlock *s = (VirtIOBlock *)vdev; | |
1144 | ||
1145 | if (s->dataplane && !s->dataplane_started) { | |
1146 | /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start | |
1147 | * dataplane here instead of waiting for .set_status(). | |
1148 | */ | |
1149 | virtio_device_start_ioeventfd(vdev); | |
1150 | if (!s->dataplane_disabled) { | |
1151 | return; | |
1152 | } | |
1153 | } | |
1154 | virtio_blk_handle_vq(s, vq); | |
1155 | } | |
1156 | ||
1157 | static void virtio_blk_dma_restart_bh(void *opaque) | |
1158 | { | |
1159 | VirtIOBlock *s = opaque; | |
1160 | ||
1161 | VirtIOBlockReq *req; | |
1162 | MultiReqBuffer mrb = {}; | |
1163 | ||
1164 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { | |
1165 | req = s->rq; | |
1166 | s->rq = NULL; | |
1167 | } | |
1168 | ||
1169 | while (req) { | |
1170 | VirtIOBlockReq *next = req->next; | |
1171 | if (virtio_blk_handle_request(req, &mrb)) { | |
1172 | /* Device is now broken and won't do any processing until it gets | |
1173 | * reset. Already queued requests will be lost: let's purge them. | |
1174 | */ | |
1175 | while (req) { | |
1176 | next = req->next; | |
1177 | virtqueue_detach_element(req->vq, &req->elem, 0); | |
1178 | virtio_blk_free_request(req); | |
1179 | req = next; | |
1180 | } | |
1181 | break; | |
1182 | } | |
1183 | req = next; | |
1184 | } | |
1185 | ||
1186 | if (mrb.num_reqs) { | |
1187 | virtio_blk_submit_multireq(s, &mrb); | |
1188 | } | |
1189 | ||
1190 | /* Paired with inc in virtio_blk_dma_restart_cb() */ | |
1191 | blk_dec_in_flight(s->conf.conf.blk); | |
1192 | } | |
1193 | ||
1194 | static void virtio_blk_dma_restart_cb(void *opaque, bool running, | |
1195 | RunState state) | |
1196 | { | |
1197 | VirtIOBlock *s = opaque; | |
1198 | ||
1199 | if (!running) { | |
1200 | return; | |
1201 | } | |
1202 | ||
1203 | /* Paired with dec in virtio_blk_dma_restart_bh() */ | |
1204 | blk_inc_in_flight(s->conf.conf.blk); | |
1205 | ||
1206 | aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.conf.blk), | |
1207 | virtio_blk_dma_restart_bh, s); | |
1208 | } | |
1209 | ||
1210 | static void virtio_blk_reset(VirtIODevice *vdev) | |
1211 | { | |
1212 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
1213 | VirtIOBlockReq *req; | |
1214 | ||
1215 | /* Dataplane has stopped... */ | |
1216 | assert(!s->dataplane_started); | |
1217 | ||
1218 | /* ...but requests may still be in flight. */ | |
1219 | blk_drain(s->blk); | |
1220 | ||
1221 | /* We drop queued requests after blk_drain() because blk_drain() itself can | |
1222 | * produce them. */ | |
1223 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { | |
1224 | while (s->rq) { | |
1225 | req = s->rq; | |
1226 | s->rq = req->next; | |
1227 | ||
1228 | /* No other threads can access req->vq here */ | |
1229 | virtqueue_detach_element(req->vq, &req->elem, 0); | |
1230 | ||
1231 | virtio_blk_free_request(req); | |
1232 | } | |
1233 | } | |
1234 | ||
1235 | blk_set_enable_write_cache(s->blk, s->original_wce); | |
1236 | } | |
1237 | ||
1238 | /* coalesce internal state, copy to pci i/o region 0 | |
1239 | */ | |
1240 | static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) | |
1241 | { | |
1242 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
1243 | BlockConf *conf = &s->conf.conf; | |
1244 | BlockDriverState *bs = blk_bs(s->blk); | |
1245 | struct virtio_blk_config blkcfg; | |
1246 | uint64_t capacity; | |
1247 | int64_t length; | |
1248 | int blk_size = conf->logical_block_size; | |
1249 | ||
1250 | blk_get_geometry(s->blk, &capacity); | |
1251 | memset(&blkcfg, 0, sizeof(blkcfg)); | |
1252 | virtio_stq_p(vdev, &blkcfg.capacity, capacity); | |
1253 | virtio_stl_p(vdev, &blkcfg.seg_max, | |
1254 | s->conf.seg_max_adjust ? s->conf.queue_size - 2 : 128 - 2); | |
1255 | virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls); | |
1256 | virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); | |
1257 | virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size); | |
1258 | virtio_stl_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size); | |
1259 | blkcfg.geometry.heads = conf->heads; | |
1260 | /* | |
1261 | * We must ensure that the block device capacity is a multiple of | |
1262 | * the logical block size. If that is not the case, let's use | |
1263 | * sector_mask to adopt the geometry to have a correct picture. | |
1264 | * For those devices where the capacity is ok for the given geometry | |
1265 | * we don't touch the sector value of the geometry, since some devices | |
1266 | * (like s390 dasd) need a specific value. Here the capacity is already | |
1267 | * cyls*heads*secs*blk_size and the sector value is not block size | |
1268 | * divided by 512 - instead it is the amount of blk_size blocks | |
1269 | * per track (cylinder). | |
1270 | */ | |
1271 | length = blk_getlength(s->blk); | |
1272 | if (length > 0 && length / conf->heads / conf->secs % blk_size) { | |
1273 | blkcfg.geometry.sectors = conf->secs & ~s->sector_mask; | |
1274 | } else { | |
1275 | blkcfg.geometry.sectors = conf->secs; | |
1276 | } | |
1277 | blkcfg.size_max = 0; | |
1278 | blkcfg.physical_block_exp = get_physical_block_exp(conf); | |
1279 | blkcfg.alignment_offset = 0; | |
1280 | blkcfg.wce = blk_enable_write_cache(s->blk); | |
1281 | virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues); | |
1282 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD)) { | |
1283 | uint32_t discard_granularity = conf->discard_granularity; | |
1284 | if (discard_granularity == -1 || !s->conf.report_discard_granularity) { | |
1285 | discard_granularity = blk_size; | |
1286 | } | |
1287 | virtio_stl_p(vdev, &blkcfg.max_discard_sectors, | |
1288 | s->conf.max_discard_sectors); | |
1289 | virtio_stl_p(vdev, &blkcfg.discard_sector_alignment, | |
1290 | discard_granularity >> BDRV_SECTOR_BITS); | |
1291 | /* | |
1292 | * We support only one segment per request since multiple segments | |
1293 | * are not widely used and there are no userspace APIs that allow | |
1294 | * applications to submit multiple segments in a single call. | |
1295 | */ | |
1296 | virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1); | |
1297 | } | |
1298 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES)) { | |
1299 | virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors, | |
1300 | s->conf.max_write_zeroes_sectors); | |
1301 | blkcfg.write_zeroes_may_unmap = 1; | |
1302 | virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1); | |
1303 | } | |
1304 | if (bs->bl.zoned != BLK_Z_NONE) { | |
1305 | switch (bs->bl.zoned) { | |
1306 | case BLK_Z_HM: | |
1307 | blkcfg.zoned.model = VIRTIO_BLK_Z_HM; | |
1308 | break; | |
1309 | case BLK_Z_HA: | |
1310 | blkcfg.zoned.model = VIRTIO_BLK_Z_HA; | |
1311 | break; | |
1312 | default: | |
1313 | g_assert_not_reached(); | |
1314 | } | |
1315 | ||
1316 | virtio_stl_p(vdev, &blkcfg.zoned.zone_sectors, | |
1317 | bs->bl.zone_size / 512); | |
1318 | virtio_stl_p(vdev, &blkcfg.zoned.max_active_zones, | |
1319 | bs->bl.max_active_zones); | |
1320 | virtio_stl_p(vdev, &blkcfg.zoned.max_open_zones, | |
1321 | bs->bl.max_open_zones); | |
1322 | virtio_stl_p(vdev, &blkcfg.zoned.write_granularity, blk_size); | |
1323 | virtio_stl_p(vdev, &blkcfg.zoned.max_append_sectors, | |
1324 | bs->bl.max_append_sectors); | |
1325 | } else { | |
1326 | blkcfg.zoned.model = VIRTIO_BLK_Z_NONE; | |
1327 | } | |
1328 | memcpy(config, &blkcfg, s->config_size); | |
1329 | } | |
1330 | ||
1331 | static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) | |
1332 | { | |
1333 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
1334 | struct virtio_blk_config blkcfg; | |
1335 | ||
1336 | memcpy(&blkcfg, config, s->config_size); | |
1337 | ||
1338 | blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); | |
1339 | } | |
1340 | ||
1341 | static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, | |
1342 | Error **errp) | |
1343 | { | |
1344 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
1345 | ||
1346 | /* Firstly sync all virtio-blk possible supported features */ | |
1347 | features |= s->host_features; | |
1348 | ||
1349 | virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); | |
1350 | virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); | |
1351 | virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); | |
1352 | virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); | |
1353 | if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) { | |
1354 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_SCSI)) { | |
1355 | error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0"); | |
1356 | return 0; | |
1357 | } | |
1358 | } else { | |
1359 | virtio_clear_feature(&features, VIRTIO_F_ANY_LAYOUT); | |
1360 | virtio_add_feature(&features, VIRTIO_BLK_F_SCSI); | |
1361 | } | |
1362 | ||
1363 | if (blk_enable_write_cache(s->blk) || | |
1364 | (s->conf.x_enable_wce_if_config_wce && | |
1365 | virtio_has_feature(features, VIRTIO_BLK_F_CONFIG_WCE))) { | |
1366 | virtio_add_feature(&features, VIRTIO_BLK_F_WCE); | |
1367 | } | |
1368 | if (!blk_is_writable(s->blk)) { | |
1369 | virtio_add_feature(&features, VIRTIO_BLK_F_RO); | |
1370 | } | |
1371 | if (s->conf.num_queues > 1) { | |
1372 | virtio_add_feature(&features, VIRTIO_BLK_F_MQ); | |
1373 | } | |
1374 | ||
1375 | return features; | |
1376 | } | |
1377 | ||
1378 | static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) | |
1379 | { | |
1380 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
1381 | ||
1382 | if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) { | |
1383 | assert(!s->dataplane_started); | |
1384 | } | |
1385 | ||
1386 | if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { | |
1387 | return; | |
1388 | } | |
1389 | ||
1390 | /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send | |
1391 | * cache flushes. Thus, the "auto writethrough" behavior is never | |
1392 | * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature. | |
1393 | * Leaving it enabled would break the following sequence: | |
1394 | * | |
1395 | * Guest started with "-drive cache=writethrough" | |
1396 | * Guest sets status to 0 | |
1397 | * Guest sets DRIVER bit in status field | |
1398 | * Guest reads host features (WCE=0, CONFIG_WCE=1) | |
1399 | * Guest writes guest features (WCE=0, CONFIG_WCE=1) | |
1400 | * Guest writes 1 to the WCE configuration field (writeback mode) | |
1401 | * Guest sets DRIVER_OK bit in status field | |
1402 | * | |
1403 | * s->blk would erroneously be placed in writethrough mode. | |
1404 | */ | |
1405 | if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) { | |
1406 | blk_set_enable_write_cache(s->blk, | |
1407 | virtio_vdev_has_feature(vdev, | |
1408 | VIRTIO_BLK_F_WCE)); | |
1409 | } | |
1410 | } | |
1411 | ||
1412 | static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) | |
1413 | { | |
1414 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
1415 | ||
1416 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { | |
1417 | VirtIOBlockReq *req = s->rq; | |
1418 | ||
1419 | while (req) { | |
1420 | qemu_put_sbyte(f, 1); | |
1421 | ||
1422 | if (s->conf.num_queues > 1) { | |
1423 | qemu_put_be32(f, virtio_get_queue_index(req->vq)); | |
1424 | } | |
1425 | ||
1426 | qemu_put_virtqueue_element(vdev, f, &req->elem); | |
1427 | req = req->next; | |
1428 | } | |
1429 | } | |
1430 | ||
1431 | qemu_put_sbyte(f, 0); | |
1432 | } | |
1433 | ||
1434 | static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, | |
1435 | int version_id) | |
1436 | { | |
1437 | VirtIOBlock *s = VIRTIO_BLK(vdev); | |
1438 | ||
1439 | while (qemu_get_sbyte(f)) { | |
1440 | unsigned nvqs = s->conf.num_queues; | |
1441 | unsigned vq_idx = 0; | |
1442 | VirtIOBlockReq *req; | |
1443 | ||
1444 | if (nvqs > 1) { | |
1445 | vq_idx = qemu_get_be32(f); | |
1446 | ||
1447 | if (vq_idx >= nvqs) { | |
1448 | error_report("Invalid virtqueue index in request list: %#x", | |
1449 | vq_idx); | |
1450 | return -EINVAL; | |
1451 | } | |
1452 | } | |
1453 | ||
1454 | req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq)); | |
1455 | virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req); | |
1456 | ||
1457 | WITH_QEMU_LOCK_GUARD(&s->rq_lock) { | |
1458 | req->next = s->rq; | |
1459 | s->rq = req; | |
1460 | } | |
1461 | } | |
1462 | ||
1463 | return 0; | |
1464 | } | |
1465 | ||
1466 | static void virtio_resize_cb(void *opaque) | |
1467 | { | |
1468 | VirtIODevice *vdev = opaque; | |
1469 | ||
1470 | assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | |
1471 | virtio_notify_config(vdev); | |
1472 | } | |
1473 | ||
1474 | static void virtio_blk_resize(void *opaque) | |
1475 | { | |
1476 | VirtIODevice *vdev = VIRTIO_DEVICE(opaque); | |
1477 | ||
1478 | /* | |
1479 | * virtio_notify_config() needs to acquire the global mutex, | |
1480 | * so it can't be called from an iothread. Instead, schedule | |
1481 | * it to be run in the main context BH. | |
1482 | */ | |
1483 | aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); | |
1484 | } | |
1485 | ||
1486 | /* Suspend virtqueue ioeventfd processing during drain */ | |
1487 | static void virtio_blk_drained_begin(void *opaque) | |
1488 | { | |
1489 | VirtIOBlock *s = opaque; | |
1490 | VirtIODevice *vdev = VIRTIO_DEVICE(opaque); | |
1491 | AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); | |
1492 | ||
1493 | if (!s->dataplane || !s->dataplane_started) { | |
1494 | return; | |
1495 | } | |
1496 | ||
1497 | for (uint16_t i = 0; i < s->conf.num_queues; i++) { | |
1498 | VirtQueue *vq = virtio_get_queue(vdev, i); | |
1499 | virtio_queue_aio_detach_host_notifier(vq, ctx); | |
1500 | } | |
1501 | } | |
1502 | ||
1503 | /* Resume virtqueue ioeventfd processing after drain */ | |
1504 | static void virtio_blk_drained_end(void *opaque) | |
1505 | { | |
1506 | VirtIOBlock *s = opaque; | |
1507 | VirtIODevice *vdev = VIRTIO_DEVICE(opaque); | |
1508 | AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); | |
1509 | ||
1510 | if (!s->dataplane || !s->dataplane_started) { | |
1511 | return; | |
1512 | } | |
1513 | ||
1514 | for (uint16_t i = 0; i < s->conf.num_queues; i++) { | |
1515 | VirtQueue *vq = virtio_get_queue(vdev, i); | |
1516 | virtio_queue_aio_attach_host_notifier(vq, ctx); | |
1517 | } | |
1518 | } | |
1519 | ||
1520 | static const BlockDevOps virtio_block_ops = { | |
1521 | .resize_cb = virtio_blk_resize, | |
1522 | .drained_begin = virtio_blk_drained_begin, | |
1523 | .drained_end = virtio_blk_drained_end, | |
1524 | }; | |
1525 | ||
1526 | static void virtio_blk_device_realize(DeviceState *dev, Error **errp) | |
1527 | { | |
1528 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); | |
1529 | VirtIOBlock *s = VIRTIO_BLK(dev); | |
1530 | VirtIOBlkConf *conf = &s->conf; | |
1531 | Error *err = NULL; | |
1532 | unsigned i; | |
1533 | ||
1534 | if (!conf->conf.blk) { | |
1535 | error_setg(errp, "drive property not set"); | |
1536 | return; | |
1537 | } | |
1538 | if (!blk_is_inserted(conf->conf.blk)) { | |
1539 | error_setg(errp, "Device needs media, but drive is empty"); | |
1540 | return; | |
1541 | } | |
1542 | if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) { | |
1543 | conf->num_queues = 1; | |
1544 | } | |
1545 | if (!conf->num_queues) { | |
1546 | error_setg(errp, "num-queues property must be larger than 0"); | |
1547 | return; | |
1548 | } | |
1549 | if (conf->queue_size <= 2) { | |
1550 | error_setg(errp, "invalid queue-size property (%" PRIu16 "), " | |
1551 | "must be > 2", conf->queue_size); | |
1552 | return; | |
1553 | } | |
1554 | if (!is_power_of_2(conf->queue_size) || | |
1555 | conf->queue_size > VIRTQUEUE_MAX_SIZE) { | |
1556 | error_setg(errp, "invalid queue-size property (%" PRIu16 "), " | |
1557 | "must be a power of 2 (max %d)", | |
1558 | conf->queue_size, VIRTQUEUE_MAX_SIZE); | |
1559 | return; | |
1560 | } | |
1561 | ||
1562 | if (!blkconf_apply_backend_options(&conf->conf, | |
1563 | !blk_supports_write_perm(conf->conf.blk), | |
1564 | true, errp)) { | |
1565 | return; | |
1566 | } | |
1567 | s->original_wce = blk_enable_write_cache(conf->conf.blk); | |
1568 | if (!blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, errp)) { | |
1569 | return; | |
1570 | } | |
1571 | ||
1572 | if (!blkconf_blocksizes(&conf->conf, errp)) { | |
1573 | return; | |
1574 | } | |
1575 | ||
1576 | BlockDriverState *bs = blk_bs(conf->conf.blk); | |
1577 | if (bs->bl.zoned != BLK_Z_NONE) { | |
1578 | virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED); | |
1579 | if (bs->bl.zoned == BLK_Z_HM) { | |
1580 | virtio_clear_feature(&s->host_features, VIRTIO_BLK_F_DISCARD); | |
1581 | } | |
1582 | } | |
1583 | ||
1584 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) && | |
1585 | (!conf->max_discard_sectors || | |
1586 | conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) { | |
1587 | error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")" | |
1588 | ", must be between 1 and %d", | |
1589 | conf->max_discard_sectors, (int)BDRV_REQUEST_MAX_SECTORS); | |
1590 | return; | |
1591 | } | |
1592 | ||
1593 | if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) && | |
1594 | (!conf->max_write_zeroes_sectors || | |
1595 | conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) { | |
1596 | error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32 | |
1597 | "), must be between 1 and %d", | |
1598 | conf->max_write_zeroes_sectors, | |
1599 | (int)BDRV_REQUEST_MAX_SECTORS); | |
1600 | return; | |
1601 | } | |
1602 | ||
1603 | s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, | |
1604 | s->host_features); | |
1605 | virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); | |
1606 | ||
1607 | qemu_mutex_init(&s->rq_lock); | |
1608 | ||
1609 | s->blk = conf->conf.blk; | |
1610 | s->rq = NULL; | |
1611 | s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; | |
1612 | ||
1613 | for (i = 0; i < conf->num_queues; i++) { | |
1614 | virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); | |
1615 | } | |
1616 | qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); | |
1617 | virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); | |
1618 | if (err != NULL) { | |
1619 | error_propagate(errp, err); | |
1620 | for (i = 0; i < conf->num_queues; i++) { | |
1621 | virtio_del_queue(vdev, i); | |
1622 | } | |
1623 | virtio_cleanup(vdev); | |
1624 | return; | |
1625 | } | |
1626 | ||
1627 | /* | |
1628 | * This must be after virtio_init() so virtio_blk_dma_restart_cb() gets | |
1629 | * called after ->start_ioeventfd() has already set blk's AioContext. | |
1630 | */ | |
1631 | s->change = | |
1632 | qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s); | |
1633 | ||
1634 | blk_ram_registrar_init(&s->blk_ram_registrar, s->blk); | |
1635 | blk_set_dev_ops(s->blk, &virtio_block_ops, s); | |
1636 | ||
1637 | blk_iostatus_enable(s->blk); | |
1638 | ||
1639 | add_boot_device_lchs(dev, "/disk@0,0", | |
1640 | conf->conf.lcyls, | |
1641 | conf->conf.lheads, | |
1642 | conf->conf.lsecs); | |
1643 | } | |
1644 | ||
1645 | static void virtio_blk_device_unrealize(DeviceState *dev) | |
1646 | { | |
1647 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); | |
1648 | VirtIOBlock *s = VIRTIO_BLK(dev); | |
1649 | VirtIOBlkConf *conf = &s->conf; | |
1650 | unsigned i; | |
1651 | ||
1652 | blk_drain(s->blk); | |
1653 | del_boot_device_lchs(dev, "/disk@0,0"); | |
1654 | virtio_blk_data_plane_destroy(s->dataplane); | |
1655 | s->dataplane = NULL; | |
1656 | for (i = 0; i < conf->num_queues; i++) { | |
1657 | virtio_del_queue(vdev, i); | |
1658 | } | |
1659 | qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); | |
1660 | qemu_mutex_destroy(&s->rq_lock); | |
1661 | blk_ram_registrar_destroy(&s->blk_ram_registrar); | |
1662 | qemu_del_vm_change_state_handler(s->change); | |
1663 | blockdev_mark_auto_del(s->blk); | |
1664 | virtio_cleanup(vdev); | |
1665 | } | |
1666 | ||
1667 | static void virtio_blk_instance_init(Object *obj) | |
1668 | { | |
1669 | VirtIOBlock *s = VIRTIO_BLK(obj); | |
1670 | ||
1671 | device_add_bootindex_property(obj, &s->conf.conf.bootindex, | |
1672 | "bootindex", "/disk@0,0", | |
1673 | DEVICE(obj)); | |
1674 | } | |
1675 | ||
1676 | static const VMStateDescription vmstate_virtio_blk = { | |
1677 | .name = "virtio-blk", | |
1678 | .minimum_version_id = 2, | |
1679 | .version_id = 2, | |
1680 | .fields = (VMStateField[]) { | |
1681 | VMSTATE_VIRTIO_DEVICE, | |
1682 | VMSTATE_END_OF_LIST() | |
1683 | }, | |
1684 | }; | |
1685 | ||
1686 | static Property virtio_blk_properties[] = { | |
1687 | DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf), | |
1688 | DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf), | |
1689 | DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, conf.conf), | |
1690 | DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial), | |
1691 | DEFINE_PROP_BIT64("config-wce", VirtIOBlock, host_features, | |
1692 | VIRTIO_BLK_F_CONFIG_WCE, true), | |
1693 | #ifdef __linux__ | |
1694 | DEFINE_PROP_BIT64("scsi", VirtIOBlock, host_features, | |
1695 | VIRTIO_BLK_F_SCSI, false), | |
1696 | #endif | |
1697 | DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, | |
1698 | true), | |
1699 | DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, | |
1700 | VIRTIO_BLK_AUTO_NUM_QUEUES), | |
1701 | DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 256), | |
1702 | DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true), | |
1703 | DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, | |
1704 | IOThread *), | |
1705 | DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features, | |
1706 | VIRTIO_BLK_F_DISCARD, true), | |
1707 | DEFINE_PROP_BOOL("report-discard-granularity", VirtIOBlock, | |
1708 | conf.report_discard_granularity, true), | |
1709 | DEFINE_PROP_BIT64("write-zeroes", VirtIOBlock, host_features, | |
1710 | VIRTIO_BLK_F_WRITE_ZEROES, true), | |
1711 | DEFINE_PROP_UINT32("max-discard-sectors", VirtIOBlock, | |
1712 | conf.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS), | |
1713 | DEFINE_PROP_UINT32("max-write-zeroes-sectors", VirtIOBlock, | |
1714 | conf.max_write_zeroes_sectors, BDRV_REQUEST_MAX_SECTORS), | |
1715 | DEFINE_PROP_BOOL("x-enable-wce-if-config-wce", VirtIOBlock, | |
1716 | conf.x_enable_wce_if_config_wce, true), | |
1717 | DEFINE_PROP_END_OF_LIST(), | |
1718 | }; | |
1719 | ||
1720 | static void virtio_blk_class_init(ObjectClass *klass, void *data) | |
1721 | { | |
1722 | DeviceClass *dc = DEVICE_CLASS(klass); | |
1723 | VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); | |
1724 | ||
1725 | device_class_set_props(dc, virtio_blk_properties); | |
1726 | dc->vmsd = &vmstate_virtio_blk; | |
1727 | set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); | |
1728 | vdc->realize = virtio_blk_device_realize; | |
1729 | vdc->unrealize = virtio_blk_device_unrealize; | |
1730 | vdc->get_config = virtio_blk_update_config; | |
1731 | vdc->set_config = virtio_blk_set_config; | |
1732 | vdc->get_features = virtio_blk_get_features; | |
1733 | vdc->set_status = virtio_blk_set_status; | |
1734 | vdc->reset = virtio_blk_reset; | |
1735 | vdc->save = virtio_blk_save_device; | |
1736 | vdc->load = virtio_blk_load_device; | |
1737 | vdc->start_ioeventfd = virtio_blk_data_plane_start; | |
1738 | vdc->stop_ioeventfd = virtio_blk_data_plane_stop; | |
1739 | } | |
1740 | ||
1741 | static const TypeInfo virtio_blk_info = { | |
1742 | .name = TYPE_VIRTIO_BLK, | |
1743 | .parent = TYPE_VIRTIO_DEVICE, | |
1744 | .instance_size = sizeof(VirtIOBlock), | |
1745 | .instance_init = virtio_blk_instance_init, | |
1746 | .class_init = virtio_blk_class_init, | |
1747 | }; | |
1748 | ||
1749 | static void virtio_register_types(void) | |
1750 | { | |
1751 | type_register_static(&virtio_blk_info); | |
1752 | } | |
1753 | ||
1754 | type_init(virtio_register_types) |