X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=block.c;h=03a21d88de258f583911c45895cb0f6b36c26f28;hb=9f4bd6baf64b8139cf2d7f8f53a98b27531da13c;hp=998df1b54b8c7418052e58e64fe79523cfdf9376;hpb=e54b7f5256659dddaf6b5c021847859829d7ebd7;p=qemu.git diff --git a/block.c b/block.c index 998df1b54..03a21d88d 100644 --- a/block.c +++ b/block.c @@ -28,6 +28,7 @@ #include "block_int.h" #include "module.h" #include "qemu-objects.h" +#include "qemu-coroutine.h" #ifdef CONFIG_BSD #include @@ -57,6 +58,19 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors); static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); +static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); +static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque); +static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *iov); +static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *iov); +static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs); static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -169,14 +183,25 @@ void path_combine(char *dest, int dest_size, void bdrv_register(BlockDriver *bdrv) { - if (!bdrv->bdrv_aio_readv) { - /* add AIO emulation layer */ - bdrv->bdrv_aio_readv = bdrv_aio_readv_em; - bdrv->bdrv_aio_writev = bdrv_aio_writev_em; - } else if (!bdrv->bdrv_read) { - /* add synchronous IO emulation layer */ + if (bdrv->bdrv_co_readv) { + /* Emulate AIO by coroutines, and sync by AIO */ + bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em; + bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em; bdrv->bdrv_read = bdrv_read_em; bdrv->bdrv_write = bdrv_write_em; + } else { + bdrv->bdrv_co_readv = bdrv_co_readv_em; + bdrv->bdrv_co_writev = bdrv_co_writev_em; + + if (!bdrv->bdrv_aio_readv) { + /* add AIO emulation layer */ + bdrv->bdrv_aio_readv = bdrv_aio_readv_em; + bdrv->bdrv_aio_writev = bdrv_aio_writev_em; + } else if (!bdrv->bdrv_read) { + /* add synchronous IO emulation layer */ + bdrv->bdrv_read = bdrv_read_em; + bdrv->bdrv_write = bdrv_write_em; + } } if (!bdrv->bdrv_aio_flush) @@ -190,7 +215,7 @@ BlockDriverState *bdrv_new(const char *device_name) { BlockDriverState *bs; - bs = qemu_mallocz(sizeof(BlockDriverState)); + bs = g_malloc0(sizeof(BlockDriverState)); pstrcpy(bs->device_name, sizeof(bs->device_name), device_name); if (device_name[0] != '\0') { QTAILQ_INSERT_TAIL(&bdrv_states, bs, list); @@ -412,6 +437,33 @@ static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) return 0; } +/** + * Set open flags for a given cache mode + * + * Return 0 on success, -1 if the cache mode was invalid. + */ +int bdrv_parse_cache_flags(const char *mode, int *flags) +{ + *flags &= ~BDRV_O_CACHE_MASK; + + if (!strcmp(mode, "off") || !strcmp(mode, "none")) { + *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; + } else if (!strcmp(mode, "directsync")) { + *flags |= BDRV_O_NOCACHE; + } else if (!strcmp(mode, "writeback")) { + *flags |= BDRV_O_CACHE_WB; + } else if (!strcmp(mode, "unsafe")) { + *flags |= BDRV_O_CACHE_WB; + *flags |= BDRV_O_NO_FLUSH; + } else if (!strcmp(mode, "writethrough")) { + /* this is the default */ + } else { + return -1; + } + + return 0; +} + /* * Common part for opening disk images and files */ @@ -437,15 +489,9 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename, } bs->drv = drv; - bs->opaque = qemu_mallocz(drv->instance_size); + bs->opaque = g_malloc0(drv->instance_size); - /* - * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a - * write cache to the guest. We do need the fdatasync to flush - * out transactions for block allocations, and we maybe have a - * volatile write cache in our backing device to deal with. - */ - if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE)) + if (flags & BDRV_O_CACHE_WB) bs->enable_write_cache = 1; /* @@ -455,7 +501,7 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename, open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); /* - * Snapshots should be writeable. + * Snapshots should be writable. */ if (bs->is_temporary) { open_flags |= BDRV_O_RDWR; @@ -494,7 +540,7 @@ free_and_fail: bdrv_delete(bs->file); bs->file = NULL; } - qemu_free(bs->opaque); + g_free(bs->opaque); bs->opaque = NULL; bs->drv = NULL; return ret; @@ -668,7 +714,7 @@ void bdrv_close(BlockDriverState *bs) bs->backing_hd = NULL; } bs->drv->bdrv_close(bs); - qemu_free(bs->opaque); + g_free(bs->opaque); #ifdef _WIN32 if (bs->is_temporary) { unlink(bs->filename); @@ -697,14 +743,22 @@ void bdrv_close_all(void) } } +/* make a BlockDriverState anonymous by removing from bdrv_state list. + Also, NULL terminate the device_name to prevent double remove */ +void bdrv_make_anon(BlockDriverState *bs) +{ + if (bs->device_name[0] != '\0') { + QTAILQ_REMOVE(&bdrv_states, bs, list); + } + bs->device_name[0] = '\0'; +} + void bdrv_delete(BlockDriverState *bs) { assert(!bs->peer); /* remove from list, if necessary */ - if (bs->device_name[0] != '\0') { - QTAILQ_REMOVE(&bdrv_states, bs, list); - } + bdrv_make_anon(bs); bdrv_close(bs); if (bs->file != NULL) { @@ -712,7 +766,7 @@ void bdrv_delete(BlockDriverState *bs) } assert(bs != bs_snapshots); - qemu_free(bs); + g_free(bs); } int bdrv_attach(BlockDriverState *bs, DeviceState *qdev) @@ -728,6 +782,8 @@ void bdrv_detach(BlockDriverState *bs, DeviceState *qdev) { assert(bs->peer == qdev); bs->peer = NULL; + bs->change_cb = NULL; + bs->change_opaque = NULL; } DeviceState *bdrv_get_attached(BlockDriverState *bs) @@ -739,7 +795,7 @@ DeviceState *bdrv_get_attached(BlockDriverState *bs) * Run consistency checks on an image * * Returns 0 if the check could be completed (it doesn't mean that the image is - * free of errors) or -errno when an internal error occured. The results of the + * free of errors) or -errno when an internal error occurred. The results of the * check are stored in res. */ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res) @@ -808,7 +864,7 @@ int bdrv_commit(BlockDriverState *bs) } total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; - buf = qemu_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); + buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); for (sector = 0; sector < total_sectors; sector += n) { if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) { @@ -838,7 +894,7 @@ int bdrv_commit(BlockDriverState *bs) bdrv_flush(bs->backing_hd); ro_cleanup: - qemu_free(buf); + g_free(buf); if (ro) { /* re-open as RO */ @@ -918,6 +974,17 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, nb_sectors * BDRV_SECTOR_SIZE); } +static inline bool bdrv_has_async_rw(BlockDriver *drv) +{ + return drv->bdrv_co_readv != bdrv_co_readv_em + || drv->bdrv_aio_readv != bdrv_aio_readv_em; +} + +static inline bool bdrv_has_async_flush(BlockDriver *drv) +{ + return drv->bdrv_aio_flush != bdrv_aio_flush_em; +} + /* return < 0 if error. See bdrv_write() for the return codes */ int bdrv_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors) @@ -926,6 +993,18 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num, if (!drv) return -ENOMEDIUM; + + if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) { + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = (void *)buf, + .iov_len = nb_sectors * BDRV_SECTOR_SIZE, + }; + + qemu_iovec_init_external(&qiov, &iov, 1); + return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov); + } + if (bdrv_check_request(bs, sector_num, nb_sectors)) return -EIO; @@ -970,8 +1049,21 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { BlockDriver *drv = bs->drv; + if (!bs->drv) return -ENOMEDIUM; + + if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) { + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = (void *)buf, + .iov_len = nb_sectors * BDRV_SECTOR_SIZE, + }; + + qemu_iovec_init_external(&qiov, &iov, 1); + return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov); + } + if (bs->read_only) return -EACCES; if (bdrv_check_request(bs, sector_num, nb_sectors)) @@ -1098,25 +1190,57 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, return ret; } - /* No flush needed for cache=writethrough, it uses O_DSYNC */ - if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) { + /* No flush needed for cache modes that use O_DSYNC */ + if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) { bdrv_flush(bs); } return 0; } -/* - * Writes to the file and ensures that no writes are reordered across this - * request (acts as a barrier) - * - * Returns 0 on success, -errno in error cases. - */ -int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) +int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) { - return bdrv_pwrite_sync(bs, BDRV_SECTOR_SIZE * sector_num, - buf, BDRV_SECTOR_SIZE * nb_sectors); + BlockDriver *drv = bs->drv; + + trace_bdrv_co_readv(bs, sector_num, nb_sectors); + + if (!drv) { + return -ENOMEDIUM; + } + if (bdrv_check_request(bs, sector_num, nb_sectors)) { + return -EIO; + } + + return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); +} + +int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + BlockDriver *drv = bs->drv; + + trace_bdrv_co_writev(bs, sector_num, nb_sectors); + + if (!bs->drv) { + return -ENOMEDIUM; + } + if (bs->read_only) { + return -EACCES; + } + if (bdrv_check_request(bs, sector_num, nb_sectors)) { + return -EIO; + } + + if (bs->dirty_bitmap) { + set_dirty_bitmap(bs, sector_num, nb_sectors, 1); + } + + if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { + bs->wr_highest_sector = sector_num + nb_sectors - 1; + } + + return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); } /** @@ -1132,6 +1256,8 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset) return -ENOTSUP; if (bs->read_only) return -EACCES; + if (bdrv_in_use(bs)) + return -EBUSY; ret = drv->bdrv_truncate(bs, offset); if (ret == 0) { ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); @@ -1142,6 +1268,25 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset) return ret; } +/** + * Length of a allocated file in bytes. Sparse files are counted by actual + * allocated space. Return < 0 if error or unknown. + */ +int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + if (!drv) { + return -ENOMEDIUM; + } + if (drv->bdrv_get_allocated_file_size) { + return drv->bdrv_get_allocated_file_size(bs); + } + if (bs->file) { + return bdrv_get_allocated_file_size(bs->file); + } + return -ENOTSUP; +} + /** * Length of a file in bytes. Return < 0 if error or unknown. */ @@ -1151,14 +1296,12 @@ int64_t bdrv_getlength(BlockDriverState *bs) if (!drv) return -ENOMEDIUM; - /* Fixed size devices use the total_sectors value for speed instead of - issuing a length query (like lseek) on each call. Also, legacy block - drivers don't provide a bdrv_getlength function and must use - total_sectors. */ - if (!bs->growable || !drv->bdrv_getlength) { - return bs->total_sectors * BDRV_SECTOR_SIZE; + if (bs->growable || bs->removable) { + if (drv->bdrv_getlength) { + return drv->bdrv_getlength(bs); + } } - return drv->bdrv_getlength(bs); + return bs->total_sectors * BDRV_SECTOR_SIZE; } /* return 0 as number of sectors if no device present or error */ @@ -1297,13 +1440,6 @@ void bdrv_set_geometry_hint(BlockDriverState *bs, bs->secs = secs; } -void bdrv_set_type_hint(BlockDriverState *bs, int type) -{ - bs->type = type; - bs->removable = ((type == BDRV_TYPE_CDROM || - type == BDRV_TYPE_FLOPPY)); -} - void bdrv_set_translation_hint(BlockDriverState *bs, int translation) { bs->translation = translation; @@ -1317,9 +1453,107 @@ void bdrv_get_geometry_hint(BlockDriverState *bs, *psecs = bs->secs; } -int bdrv_get_type_hint(BlockDriverState *bs) +/* Recognize floppy formats */ +typedef struct FDFormat { + FDriveType drive; + uint8_t last_sect; + uint8_t max_track; + uint8_t max_head; +} FDFormat; + +static const FDFormat fd_formats[] = { + /* First entry is default format */ + /* 1.44 MB 3"1/2 floppy disks */ + { FDRIVE_DRV_144, 18, 80, 1, }, + { FDRIVE_DRV_144, 20, 80, 1, }, + { FDRIVE_DRV_144, 21, 80, 1, }, + { FDRIVE_DRV_144, 21, 82, 1, }, + { FDRIVE_DRV_144, 21, 83, 1, }, + { FDRIVE_DRV_144, 22, 80, 1, }, + { FDRIVE_DRV_144, 23, 80, 1, }, + { FDRIVE_DRV_144, 24, 80, 1, }, + /* 2.88 MB 3"1/2 floppy disks */ + { FDRIVE_DRV_288, 36, 80, 1, }, + { FDRIVE_DRV_288, 39, 80, 1, }, + { FDRIVE_DRV_288, 40, 80, 1, }, + { FDRIVE_DRV_288, 44, 80, 1, }, + { FDRIVE_DRV_288, 48, 80, 1, }, + /* 720 kB 3"1/2 floppy disks */ + { FDRIVE_DRV_144, 9, 80, 1, }, + { FDRIVE_DRV_144, 10, 80, 1, }, + { FDRIVE_DRV_144, 10, 82, 1, }, + { FDRIVE_DRV_144, 10, 83, 1, }, + { FDRIVE_DRV_144, 13, 80, 1, }, + { FDRIVE_DRV_144, 14, 80, 1, }, + /* 1.2 MB 5"1/4 floppy disks */ + { FDRIVE_DRV_120, 15, 80, 1, }, + { FDRIVE_DRV_120, 18, 80, 1, }, + { FDRIVE_DRV_120, 18, 82, 1, }, + { FDRIVE_DRV_120, 18, 83, 1, }, + { FDRIVE_DRV_120, 20, 80, 1, }, + /* 720 kB 5"1/4 floppy disks */ + { FDRIVE_DRV_120, 9, 80, 1, }, + { FDRIVE_DRV_120, 11, 80, 1, }, + /* 360 kB 5"1/4 floppy disks */ + { FDRIVE_DRV_120, 9, 40, 1, }, + { FDRIVE_DRV_120, 9, 40, 0, }, + { FDRIVE_DRV_120, 10, 41, 1, }, + { FDRIVE_DRV_120, 10, 42, 1, }, + /* 320 kB 5"1/4 floppy disks */ + { FDRIVE_DRV_120, 8, 40, 1, }, + { FDRIVE_DRV_120, 8, 40, 0, }, + /* 360 kB must match 5"1/4 better than 3"1/2... */ + { FDRIVE_DRV_144, 9, 80, 0, }, + /* end */ + { FDRIVE_DRV_NONE, -1, -1, 0, }, +}; + +void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads, + int *max_track, int *last_sect, + FDriveType drive_in, FDriveType *drive) { - return bs->type; + const FDFormat *parse; + uint64_t nb_sectors, size; + int i, first_match, match; + + bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect); + if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) { + /* User defined disk */ + } else { + bdrv_get_geometry(bs, &nb_sectors); + match = -1; + first_match = -1; + for (i = 0; ; i++) { + parse = &fd_formats[i]; + if (parse->drive == FDRIVE_DRV_NONE) { + break; + } + if (drive_in == parse->drive || + drive_in == FDRIVE_DRV_NONE) { + size = (parse->max_head + 1) * parse->max_track * + parse->last_sect; + if (nb_sectors == size) { + match = i; + break; + } + if (first_match == -1) { + first_match = i; + } + } + } + if (match == -1) { + if (first_match == -1) { + match = 1; + } else { + match = first_match; + } + parse = &fd_formats[match]; + } + *nb_heads = parse->max_head + 1; + *max_track = parse->max_track; + *last_sect = parse->last_sect; + *drive = parse->drive; + } } int bdrv_get_translation_hint(BlockDriverState *bs) @@ -1479,6 +1713,10 @@ int bdrv_flush(BlockDriverState *bs) return 0; } + if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) { + return bdrv_co_flush_em(bs); + } + if (bs->drv && bs->drv->bdrv_flush) { return bs->drv->bdrv_flush(bs); } @@ -1593,9 +1831,8 @@ static void bdrv_print_dict(QObject *obj, void *opaque) bs_dict = qobject_to_qdict(obj); - monitor_printf(mon, "%s: type=%s removable=%d", + monitor_printf(mon, "%s: removable=%d", qdict_get_str(bs_dict, "device"), - qdict_get_str(bs_dict, "type"), qdict_get_bool(bs_dict, "removable")); if (qdict_get_bool(bs_dict, "removable")) { @@ -1636,23 +1873,10 @@ void bdrv_info(Monitor *mon, QObject **ret_data) QTAILQ_FOREACH(bs, &bdrv_states, list) { QObject *bs_obj; - const char *type = "unknown"; - - switch(bs->type) { - case BDRV_TYPE_HD: - type = "hd"; - break; - case BDRV_TYPE_CDROM: - type = "cdrom"; - break; - case BDRV_TYPE_FLOPPY: - type = "floppy"; - break; - } - bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': %s, " + bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', " "'removable': %i, 'locked': %i }", - bs->device_name, type, bs->removable, + bs->device_name, bs->removable, bs->locked); if (bs->drv) { @@ -1691,11 +1915,19 @@ static void bdrv_stats_iter(QObject *data, void *opaque) " wr_bytes=%" PRId64 " rd_operations=%" PRId64 " wr_operations=%" PRId64 + " flush_operations=%" PRId64 + " wr_total_time_ns=%" PRId64 + " rd_total_time_ns=%" PRId64 + " flush_total_time_ns=%" PRId64 "\n", qdict_get_int(qdict, "rd_bytes"), qdict_get_int(qdict, "wr_bytes"), qdict_get_int(qdict, "rd_operations"), - qdict_get_int(qdict, "wr_operations")); + qdict_get_int(qdict, "wr_operations"), + qdict_get_int(qdict, "flush_operations"), + qdict_get_int(qdict, "wr_total_time_ns"), + qdict_get_int(qdict, "rd_total_time_ns"), + qdict_get_int(qdict, "flush_total_time_ns")); } void bdrv_stats_print(Monitor *mon, const QObject *data) @@ -1713,12 +1945,22 @@ static QObject* bdrv_info_stats_bs(BlockDriverState *bs) "'wr_bytes': %" PRId64 "," "'rd_operations': %" PRId64 "," "'wr_operations': %" PRId64 "," - "'wr_highest_offset': %" PRId64 + "'wr_highest_offset': %" PRId64 "," + "'flush_operations': %" PRId64 "," + "'wr_total_time_ns': %" PRId64 "," + "'rd_total_time_ns': %" PRId64 "," + "'flush_total_time_ns': %" PRId64 "} }", - bs->rd_bytes, bs->wr_bytes, - bs->rd_ops, bs->wr_ops, + bs->nr_bytes[BDRV_ACCT_READ], + bs->nr_bytes[BDRV_ACCT_WRITE], + bs->nr_ops[BDRV_ACCT_READ], + bs->nr_ops[BDRV_ACCT_WRITE], bs->wr_highest_sector * - (uint64_t)BDRV_SECTOR_SIZE); + (uint64_t)BDRV_SECTOR_SIZE, + bs->nr_ops[BDRV_ACCT_FLUSH], + bs->total_time_ns[BDRV_ACCT_WRITE], + bs->total_time_ns[BDRV_ACCT_READ], + bs->total_time_ns[BDRV_ACCT_FLUSH]); dict = qobject_to_qdict(res); if (*bs->device_name) { @@ -2032,7 +2274,6 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn) return buf; } - /**************************************************************/ /* async I/Os */ @@ -2041,7 +2282,6 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, BlockDriverCompletionFunc *cb, void *opaque) { BlockDriver *drv = bs->drv; - BlockDriverAIOCB *ret; trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); @@ -2050,16 +2290,8 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, if (bdrv_check_request(bs, sector_num, nb_sectors)) return NULL; - ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, - cb, opaque); - - if (ret) { - /* Update stats even though technically transfer has not happened. */ - bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE; - bs->rd_ops ++; - } - - return ret; + return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, + cb, opaque); } typedef struct BlockCompleteData { @@ -2078,7 +2310,7 @@ static void block_complete_cb(void *opaque, int ret) set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1); } b->cb(b->opaque, ret); - qemu_free(b); + g_free(b); } static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs, @@ -2087,7 +2319,7 @@ static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { - BlockCompleteData *blkdata = qemu_mallocz(sizeof(BlockCompleteData)); + BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData)); blkdata->bs = bs; blkdata->cb = cb; @@ -2126,9 +2358,6 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, cb, opaque); if (ret) { - /* Update stats even though technically transfer has not happened. */ - bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE; - bs->wr_ops ++; if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { bs->wr_highest_sector = sector_num + nb_sectors - 1; } @@ -2159,7 +2388,7 @@ static void multiwrite_user_cb(MultiwriteCB *mcb) if (mcb->callbacks[i].free_qiov) { qemu_iovec_destroy(mcb->callbacks[i].free_qiov); } - qemu_free(mcb->callbacks[i].free_qiov); + g_free(mcb->callbacks[i].free_qiov); qemu_vfree(mcb->callbacks[i].free_buf); } } @@ -2177,7 +2406,7 @@ static void multiwrite_cb(void *opaque, int ret) mcb->num_requests--; if (mcb->num_requests == 0) { multiwrite_user_cb(mcb); - qemu_free(mcb); + g_free(mcb); } } @@ -2237,7 +2466,7 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, if (merge) { size_t size; - QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov)); + QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); qemu_iovec_init(qiov, reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); @@ -2293,12 +2522,20 @@ int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) MultiwriteCB *mcb; int i; + /* don't submit writes if we don't have a medium */ + if (bs->drv == NULL) { + for (i = 0; i < num_reqs; i++) { + reqs[i].error = -ENOMEDIUM; + } + return -1; + } + if (num_reqs == 0) { return 0; } // Create MultiwriteCB structure - mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); + mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); mcb->num_requests = 0; mcb->num_callbacks = num_reqs; @@ -2363,7 +2600,7 @@ fail: for (i = 0; i < mcb->num_callbacks; i++) { reqs[i].error = -EIO; } - qemu_free(mcb); + g_free(mcb); return -1; } @@ -2372,6 +2609,8 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, { BlockDriver *drv = bs->drv; + trace_bdrv_aio_flush(bs, opaque); + if (bs->open_flags & BDRV_O_NO_FLUSH) { return bdrv_aio_noop_em(bs, cb, opaque); } @@ -2472,6 +2711,89 @@ static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); } + +typedef struct BlockDriverAIOCBCoroutine { + BlockDriverAIOCB common; + BlockRequest req; + bool is_write; + QEMUBH* bh; +} BlockDriverAIOCBCoroutine; + +static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) +{ + qemu_aio_flush(); +} + +static AIOPool bdrv_em_co_aio_pool = { + .aiocb_size = sizeof(BlockDriverAIOCBCoroutine), + .cancel = bdrv_aio_co_cancel_em, +}; + +static void bdrv_co_rw_bh(void *opaque) +{ + BlockDriverAIOCBCoroutine *acb = opaque; + + acb->common.cb(acb->common.opaque, acb->req.error); + qemu_bh_delete(acb->bh); + qemu_aio_release(acb); +} + +static void coroutine_fn bdrv_co_rw(void *opaque) +{ + BlockDriverAIOCBCoroutine *acb = opaque; + BlockDriverState *bs = acb->common.bs; + + if (!acb->is_write) { + acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector, + acb->req.nb_sectors, acb->req.qiov); + } else { + acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector, + acb->req.nb_sectors, acb->req.qiov); + } + + acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb); + qemu_bh_schedule(acb->bh); +} + +static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque, + bool is_write) +{ + Coroutine *co; + BlockDriverAIOCBCoroutine *acb; + + acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque); + acb->req.sector = sector_num; + acb->req.nb_sectors = nb_sectors; + acb->req.qiov = qiov; + acb->is_write = is_write; + + co = qemu_coroutine_create(bdrv_co_rw); + qemu_coroutine_enter(co, acb); + + return &acb->common; +} + +static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, + false); +} + +static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, + true); +} + static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { @@ -2528,8 +2850,6 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, struct iovec iov; QEMUIOVector qiov; - async_context_push(); - async_ret = NOT_DONE; iov.iov_base = (void *)buf; iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; @@ -2547,7 +2867,6 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, fail: - async_context_pop(); return async_ret; } @@ -2559,8 +2878,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, struct iovec iov; QEMUIOVector qiov; - async_context_push(); - async_ret = NOT_DONE; iov.iov_base = (void *)buf; iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; @@ -2576,7 +2893,6 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, } fail: - async_context_pop(); return async_ret; } @@ -2600,7 +2916,7 @@ void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs, acb = pool->free_aiocb; pool->free_aiocb = acb->next; } else { - acb = qemu_mallocz(pool->aiocb_size); + acb = g_malloc0(pool->aiocb_size); acb->pool = pool; } acb->bs = bs; @@ -2617,6 +2933,77 @@ void qemu_aio_release(void *p) pool->free_aiocb = acb; } +/**************************************************************/ +/* Coroutine block device emulation */ + +typedef struct CoroutineIOCompletion { + Coroutine *coroutine; + int ret; +} CoroutineIOCompletion; + +static void bdrv_co_io_em_complete(void *opaque, int ret) +{ + CoroutineIOCompletion *co = opaque; + + co->ret = ret; + qemu_coroutine_enter(co->coroutine, NULL); +} + +static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *iov, + bool is_write) +{ + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + BlockDriverAIOCB *acb; + + if (is_write) { + acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors, + bdrv_co_io_em_complete, &co); + } else { + acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors, + bdrv_co_io_em_complete, &co); + } + + trace_bdrv_co_io(is_write, acb); + if (!acb) { + return -EIO; + } + qemu_coroutine_yield(); + + return co.ret; +} + +static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *iov) +{ + return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false); +} + +static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *iov) +{ + return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); +} + +static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs) +{ + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + BlockDriverAIOCB *acb; + + acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); + if (!acb) { + return -EIO; + } + qemu_coroutine_yield(); + return co.ret; +} + /**************************************************************/ /* removable device support */ @@ -2660,25 +3047,16 @@ int bdrv_media_changed(BlockDriverState *bs) int bdrv_eject(BlockDriverState *bs, int eject_flag) { BlockDriver *drv = bs->drv; - int ret; - if (bs->locked) { + if (eject_flag && bs->locked) { return -EBUSY; } - if (!drv || !drv->bdrv_eject) { - ret = -ENOTSUP; - } else { - ret = drv->bdrv_eject(bs, eject_flag); - } - if (ret == -ENOTSUP) { - ret = 0; + if (drv && drv->bdrv_eject) { + drv->bdrv_eject(bs, eject_flag); } - if (ret >= 0) { - bs->tray_open = eject_flag; - } - - return ret; + bs->tray_open = eject_flag; + return 0; } int bdrv_is_locked(BlockDriverState *bs) @@ -2694,6 +3072,8 @@ void bdrv_set_locked(BlockDriverState *bs, int locked) { BlockDriver *drv = bs->drv; + trace_bdrv_set_locked(bs, locked); + bs->locked = locked; if (drv && drv->bdrv_set_locked) { drv->bdrv_set_locked(bs, locked); @@ -2740,11 +3120,11 @@ void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable) BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1; bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8; - bs->dirty_bitmap = qemu_mallocz(bitmap_size); + bs->dirty_bitmap = g_malloc0(bitmap_size); } } else { if (bs->dirty_bitmap) { - qemu_free(bs->dirty_bitmap); + g_free(bs->dirty_bitmap); bs->dirty_bitmap = NULL; } } @@ -2774,12 +3154,44 @@ int64_t bdrv_get_dirty_count(BlockDriverState *bs) return bs->dirty_count; } +void bdrv_set_in_use(BlockDriverState *bs, int in_use) +{ + assert(bs->in_use != in_use); + bs->in_use = in_use; +} + +int bdrv_in_use(BlockDriverState *bs) +{ + return bs->in_use; +} + +void +bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes, + enum BlockAcctType type) +{ + assert(type < BDRV_MAX_IOTYPE); + + cookie->bytes = bytes; + cookie->start_time_ns = get_clock(); + cookie->type = type; +} + +void +bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie) +{ + assert(cookie->type < BDRV_MAX_IOTYPE); + + bs->nr_bytes[cookie->type] += cookie->bytes; + bs->nr_ops[cookie->type]++; + bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns; +} + int bdrv_img_create(const char *filename, const char *fmt, const char *base_filename, const char *base_fmt, char *options, uint64_t img_size, int flags) { QEMUOptionParameter *param = NULL, *create_options = NULL; - QEMUOptionParameter *backing_fmt, *backing_file; + QEMUOptionParameter *backing_fmt, *backing_file, *size; BlockDriverState *bs = NULL; BlockDriver *drv, *proto_drv; BlockDriver *backing_drv = NULL; @@ -2862,7 +3274,8 @@ int bdrv_img_create(const char *filename, const char *fmt, // The size for the image must always be specified, with one exception: // If we are using a backing file, we can obtain the size from there - if (get_option_parameter(param, BLOCK_OPT_SIZE)->value.n == -1) { + size = get_option_parameter(param, BLOCK_OPT_SIZE); + if (size && size->value.n == -1) { if (backing_file && backing_file->value.s) { uint64_t size; char buf[32];