4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include <qemu-common.h>
15 #include "qemu-error.h"
17 #include "virtio-blk.h"
22 typedef struct VirtIOBlock
30 unsigned short sector_mask
;
31 char sn
[BLOCK_SERIAL_STRLEN
];
35 static VirtIOBlock
*to_virtio_blk(VirtIODevice
*vdev
)
37 return (VirtIOBlock
*)vdev
;
40 typedef struct VirtIOBlockReq
43 VirtQueueElement elem
;
44 struct virtio_blk_inhdr
*in
;
45 struct virtio_blk_outhdr
*out
;
46 struct virtio_scsi_inhdr
*scsi
;
48 struct VirtIOBlockReq
*next
;
51 static void virtio_blk_req_complete(VirtIOBlockReq
*req
, int status
)
53 VirtIOBlock
*s
= req
->dev
;
55 req
->in
->status
= status
;
56 virtqueue_push(s
->vq
, &req
->elem
, req
->qiov
.size
+ sizeof(*req
->in
));
57 virtio_notify(&s
->vdev
, s
->vq
);
62 static int virtio_blk_handle_rw_error(VirtIOBlockReq
*req
, int error
,
65 BlockErrorAction action
= bdrv_get_on_error(req
->dev
->bs
, is_read
);
66 VirtIOBlock
*s
= req
->dev
;
68 if (action
== BLOCK_ERR_IGNORE
) {
69 bdrv_mon_event(s
->bs
, BDRV_ACTION_IGNORE
, is_read
);
73 if ((error
== ENOSPC
&& action
== BLOCK_ERR_STOP_ENOSPC
)
74 || action
== BLOCK_ERR_STOP_ANY
) {
77 bdrv_mon_event(s
->bs
, BDRV_ACTION_STOP
, is_read
);
80 virtio_blk_req_complete(req
, VIRTIO_BLK_S_IOERR
);
81 bdrv_mon_event(s
->bs
, BDRV_ACTION_REPORT
, is_read
);
87 static void virtio_blk_rw_complete(void *opaque
, int ret
)
89 VirtIOBlockReq
*req
= opaque
;
92 int is_read
= !(req
->out
->type
& VIRTIO_BLK_T_OUT
);
93 if (virtio_blk_handle_rw_error(req
, -ret
, is_read
))
97 virtio_blk_req_complete(req
, VIRTIO_BLK_S_OK
);
100 static void virtio_blk_flush_complete(void *opaque
, int ret
)
102 VirtIOBlockReq
*req
= opaque
;
104 virtio_blk_req_complete(req
, ret
? VIRTIO_BLK_S_IOERR
: VIRTIO_BLK_S_OK
);
107 static VirtIOBlockReq
*virtio_blk_alloc_request(VirtIOBlock
*s
)
109 VirtIOBlockReq
*req
= qemu_malloc(sizeof(*req
));
116 static VirtIOBlockReq
*virtio_blk_get_request(VirtIOBlock
*s
)
118 VirtIOBlockReq
*req
= virtio_blk_alloc_request(s
);
121 if (!virtqueue_pop(s
->vq
, &req
->elem
)) {
131 static void virtio_blk_handle_scsi(VirtIOBlockReq
*req
)
133 struct sg_io_hdr hdr
;
139 * We require at least one output segment each for the virtio_blk_outhdr
140 * and the SCSI command block.
142 * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
143 * and the sense buffer pointer in the input segments.
145 if (req
->elem
.out_num
< 2 || req
->elem
.in_num
< 3) {
146 virtio_blk_req_complete(req
, VIRTIO_BLK_S_IOERR
);
151 * No support for bidirection commands yet.
153 if (req
->elem
.out_num
> 2 && req
->elem
.in_num
> 3) {
154 virtio_blk_req_complete(req
, VIRTIO_BLK_S_UNSUPP
);
159 * The scsi inhdr is placed in the second-to-last input segment, just
160 * before the regular inhdr.
162 req
->scsi
= (void *)req
->elem
.in_sg
[req
->elem
.in_num
- 2].iov_base
;
164 memset(&hdr
, 0, sizeof(struct sg_io_hdr
));
165 hdr
.interface_id
= 'S';
166 hdr
.cmd_len
= req
->elem
.out_sg
[1].iov_len
;
167 hdr
.cmdp
= req
->elem
.out_sg
[1].iov_base
;
170 if (req
->elem
.out_num
> 2) {
172 * If there are more than the minimally required 2 output segments
173 * there is write payload starting from the third iovec.
175 hdr
.dxfer_direction
= SG_DXFER_TO_DEV
;
176 hdr
.iovec_count
= req
->elem
.out_num
- 2;
178 for (i
= 0; i
< hdr
.iovec_count
; i
++)
179 hdr
.dxfer_len
+= req
->elem
.out_sg
[i
+ 2].iov_len
;
181 hdr
.dxferp
= req
->elem
.out_sg
+ 2;
183 } else if (req
->elem
.in_num
> 3) {
185 * If we have more than 3 input segments the guest wants to actually
188 hdr
.dxfer_direction
= SG_DXFER_FROM_DEV
;
189 hdr
.iovec_count
= req
->elem
.in_num
- 3;
190 for (i
= 0; i
< hdr
.iovec_count
; i
++)
191 hdr
.dxfer_len
+= req
->elem
.in_sg
[i
].iov_len
;
193 hdr
.dxferp
= req
->elem
.in_sg
;
196 * Some SCSI commands don't actually transfer any data.
198 hdr
.dxfer_direction
= SG_DXFER_NONE
;
201 hdr
.sbp
= req
->elem
.in_sg
[req
->elem
.in_num
- 3].iov_base
;
202 hdr
.mx_sb_len
= req
->elem
.in_sg
[req
->elem
.in_num
- 3].iov_len
;
204 ret
= bdrv_ioctl(req
->dev
->bs
, SG_IO
, &hdr
);
206 status
= VIRTIO_BLK_S_UNSUPP
;
208 hdr
.resid
= hdr
.dxfer_len
;
209 } else if (hdr
.status
) {
210 status
= VIRTIO_BLK_S_IOERR
;
212 status
= VIRTIO_BLK_S_OK
;
215 req
->scsi
->errors
= hdr
.status
;
216 req
->scsi
->residual
= hdr
.resid
;
217 req
->scsi
->sense_len
= hdr
.sb_len_wr
;
218 req
->scsi
->data_len
= hdr
.dxfer_len
;
220 virtio_blk_req_complete(req
, status
);
223 static void virtio_blk_handle_scsi(VirtIOBlockReq
*req
)
225 virtio_blk_req_complete(req
, VIRTIO_BLK_S_UNSUPP
);
227 #endif /* __linux__ */
229 typedef struct MultiReqBuffer
{
230 BlockRequest blkreq
[32];
231 unsigned int num_writes
;
234 static void virtio_submit_multiwrite(BlockDriverState
*bs
, MultiReqBuffer
*mrb
)
238 if (!mrb
->num_writes
) {
242 ret
= bdrv_aio_multiwrite(bs
, mrb
->blkreq
, mrb
->num_writes
);
244 for (i
= 0; i
< mrb
->num_writes
; i
++) {
245 if (mrb
->blkreq
[i
].error
) {
246 virtio_blk_rw_complete(mrb
->blkreq
[i
].opaque
, -EIO
);
254 static void virtio_blk_handle_flush(VirtIOBlockReq
*req
, MultiReqBuffer
*mrb
)
256 BlockDriverAIOCB
*acb
;
259 * Make sure all outstanding writes are posted to the backing device.
261 virtio_submit_multiwrite(req
->dev
->bs
, mrb
);
263 acb
= bdrv_aio_flush(req
->dev
->bs
, virtio_blk_flush_complete
, req
);
265 virtio_blk_req_complete(req
, VIRTIO_BLK_S_IOERR
);
269 static void virtio_blk_handle_write(VirtIOBlockReq
*req
, MultiReqBuffer
*mrb
)
271 BlockRequest
*blkreq
;
273 if (req
->out
->sector
& req
->dev
->sector_mask
) {
274 virtio_blk_rw_complete(req
, -EIO
);
278 if (mrb
->num_writes
== 32) {
279 virtio_submit_multiwrite(req
->dev
->bs
, mrb
);
282 blkreq
= &mrb
->blkreq
[mrb
->num_writes
];
283 blkreq
->sector
= req
->out
->sector
;
284 blkreq
->nb_sectors
= req
->qiov
.size
/ BDRV_SECTOR_SIZE
;
285 blkreq
->qiov
= &req
->qiov
;
286 blkreq
->cb
= virtio_blk_rw_complete
;
287 blkreq
->opaque
= req
;
293 static void virtio_blk_handle_read(VirtIOBlockReq
*req
)
295 BlockDriverAIOCB
*acb
;
297 if (req
->out
->sector
& req
->dev
->sector_mask
) {
298 virtio_blk_rw_complete(req
, -EIO
);
302 acb
= bdrv_aio_readv(req
->dev
->bs
, req
->out
->sector
, &req
->qiov
,
303 req
->qiov
.size
/ BDRV_SECTOR_SIZE
,
304 virtio_blk_rw_complete
, req
);
306 virtio_blk_rw_complete(req
, -EIO
);
310 static void virtio_blk_handle_request(VirtIOBlockReq
*req
,
313 if (req
->elem
.out_num
< 1 || req
->elem
.in_num
< 1) {
314 fprintf(stderr
, "virtio-blk missing headers\n");
318 if (req
->elem
.out_sg
[0].iov_len
< sizeof(*req
->out
) ||
319 req
->elem
.in_sg
[req
->elem
.in_num
- 1].iov_len
< sizeof(*req
->in
)) {
320 fprintf(stderr
, "virtio-blk header not in correct element\n");
324 req
->out
= (void *)req
->elem
.out_sg
[0].iov_base
;
325 req
->in
= (void *)req
->elem
.in_sg
[req
->elem
.in_num
- 1].iov_base
;
327 if (req
->out
->type
& VIRTIO_BLK_T_FLUSH
) {
328 virtio_blk_handle_flush(req
, mrb
);
329 } else if (req
->out
->type
& VIRTIO_BLK_T_SCSI_CMD
) {
330 virtio_blk_handle_scsi(req
);
331 } else if (req
->out
->type
& VIRTIO_BLK_T_GET_ID
) {
332 VirtIOBlock
*s
= req
->dev
;
334 memcpy(req
->elem
.in_sg
[0].iov_base
, s
->sn
,
335 MIN(req
->elem
.in_sg
[0].iov_len
, sizeof(s
->sn
)));
336 virtio_blk_req_complete(req
, VIRTIO_BLK_S_OK
);
337 } else if (req
->out
->type
& VIRTIO_BLK_T_OUT
) {
338 qemu_iovec_init_external(&req
->qiov
, &req
->elem
.out_sg
[1],
339 req
->elem
.out_num
- 1);
340 virtio_blk_handle_write(req
, mrb
);
342 qemu_iovec_init_external(&req
->qiov
, &req
->elem
.in_sg
[0],
343 req
->elem
.in_num
- 1);
344 virtio_blk_handle_read(req
);
348 static void virtio_blk_handle_output(VirtIODevice
*vdev
, VirtQueue
*vq
)
350 VirtIOBlock
*s
= to_virtio_blk(vdev
);
352 MultiReqBuffer mrb
= {
356 while ((req
= virtio_blk_get_request(s
))) {
357 virtio_blk_handle_request(req
, &mrb
);
360 virtio_submit_multiwrite(s
->bs
, &mrb
);
363 * FIXME: Want to check for completions before returning to guest mode,
364 * so cached reads and writes are reported as quickly as possible. But
365 * that should be done in the generic block layer.
369 static void virtio_blk_dma_restart_bh(void *opaque
)
371 VirtIOBlock
*s
= opaque
;
372 VirtIOBlockReq
*req
= s
->rq
;
373 MultiReqBuffer mrb
= {
377 qemu_bh_delete(s
->bh
);
383 virtio_blk_handle_request(req
, &mrb
);
387 virtio_submit_multiwrite(s
->bs
, &mrb
);
390 static void virtio_blk_dma_restart_cb(void *opaque
, int running
, int reason
)
392 VirtIOBlock
*s
= opaque
;
398 s
->bh
= qemu_bh_new(virtio_blk_dma_restart_bh
, s
);
399 qemu_bh_schedule(s
->bh
);
403 static void virtio_blk_reset(VirtIODevice
*vdev
)
406 * This should cancel pending requests, but can't do nicely until there
407 * are per-device request lists.
412 /* coalesce internal state, copy to pci i/o region 0
414 static void virtio_blk_update_config(VirtIODevice
*vdev
, uint8_t *config
)
416 VirtIOBlock
*s
= to_virtio_blk(vdev
);
417 struct virtio_blk_config blkcfg
;
419 int cylinders
, heads
, secs
;
421 bdrv_get_geometry(s
->bs
, &capacity
);
422 bdrv_get_geometry_hint(s
->bs
, &cylinders
, &heads
, &secs
);
423 memset(&blkcfg
, 0, sizeof(blkcfg
));
424 stq_raw(&blkcfg
.capacity
, capacity
);
425 stl_raw(&blkcfg
.seg_max
, 128 - 2);
426 stw_raw(&blkcfg
.cylinders
, cylinders
);
427 blkcfg
.heads
= heads
;
428 blkcfg
.sectors
= secs
& ~s
->sector_mask
;
429 blkcfg
.blk_size
= s
->conf
->logical_block_size
;
431 blkcfg
.physical_block_exp
= get_physical_block_exp(s
->conf
);
432 blkcfg
.alignment_offset
= 0;
433 blkcfg
.min_io_size
= s
->conf
->min_io_size
/ blkcfg
.blk_size
;
434 blkcfg
.opt_io_size
= s
->conf
->opt_io_size
/ blkcfg
.blk_size
;
435 memcpy(config
, &blkcfg
, sizeof(struct virtio_blk_config
));
438 static uint32_t virtio_blk_get_features(VirtIODevice
*vdev
, uint32_t features
)
440 VirtIOBlock
*s
= to_virtio_blk(vdev
);
442 features
|= (1 << VIRTIO_BLK_F_SEG_MAX
);
443 features
|= (1 << VIRTIO_BLK_F_GEOMETRY
);
444 features
|= (1 << VIRTIO_BLK_F_TOPOLOGY
);
445 features
|= (1 << VIRTIO_BLK_F_BLK_SIZE
);
447 if (bdrv_enable_write_cache(s
->bs
))
448 features
|= (1 << VIRTIO_BLK_F_WCACHE
);
450 if (bdrv_is_read_only(s
->bs
))
451 features
|= 1 << VIRTIO_BLK_F_RO
;
456 static void virtio_blk_save(QEMUFile
*f
, void *opaque
)
458 VirtIOBlock
*s
= opaque
;
459 VirtIOBlockReq
*req
= s
->rq
;
461 virtio_save(&s
->vdev
, f
);
464 qemu_put_sbyte(f
, 1);
465 qemu_put_buffer(f
, (unsigned char*)&req
->elem
, sizeof(req
->elem
));
468 qemu_put_sbyte(f
, 0);
471 static int virtio_blk_load(QEMUFile
*f
, void *opaque
, int version_id
)
473 VirtIOBlock
*s
= opaque
;
478 virtio_load(&s
->vdev
, f
);
479 while (qemu_get_sbyte(f
)) {
480 VirtIOBlockReq
*req
= virtio_blk_alloc_request(s
);
481 qemu_get_buffer(f
, (unsigned char*)&req
->elem
, sizeof(req
->elem
));
489 VirtIODevice
*virtio_blk_init(DeviceState
*dev
, BlockConf
*conf
)
492 int cylinders
, heads
, secs
;
493 static int virtio_blk_id
;
497 error_report("virtio-blk-pci: drive property not set");
500 if (!bdrv_is_inserted(conf
->bs
)) {
501 error_report("Device needs media, but drive is empty");
505 s
= (VirtIOBlock
*)virtio_common_init("virtio-blk", VIRTIO_ID_BLOCK
,
506 sizeof(struct virtio_blk_config
),
507 sizeof(VirtIOBlock
));
509 s
->vdev
.get_config
= virtio_blk_update_config
;
510 s
->vdev
.get_features
= virtio_blk_get_features
;
511 s
->vdev
.reset
= virtio_blk_reset
;
515 s
->sector_mask
= (s
->conf
->logical_block_size
/ BDRV_SECTOR_SIZE
) - 1;
516 bdrv_guess_geometry(s
->bs
, &cylinders
, &heads
, &secs
);
518 /* NB: per existing s/n string convention the string is terminated
519 * by '\0' only when less than sizeof (s->sn)
521 dinfo
= drive_get_by_blockdev(s
->bs
);
522 strncpy(s
->sn
, dinfo
->serial
, sizeof (s
->sn
));
524 s
->vq
= virtio_add_queue(&s
->vdev
, 128, virtio_blk_handle_output
);
526 qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb
, s
);
528 register_savevm(dev
, "virtio-blk", virtio_blk_id
++, 2,
529 virtio_blk_save
, virtio_blk_load
, s
);
530 bdrv_set_removable(s
->bs
, 0);
535 void virtio_blk_exit(VirtIODevice
*vdev
)
537 VirtIOBlock
*s
= to_virtio_blk(vdev
);
538 unregister_savevm(s
->qdev
, "virtio-blk", s
);