X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=block.c;h=382ea71f4b8c28550d4a798377fd1e6c4bae04ae;hb=60aad298cb6de52f2716b2e82e1353ea9de95fd6;hp=e655052c8b534ce29efb25de08b956e8ad0d9de4;hpb=0d51b4debee6fb322751a57097a1d675c7a7c38d;p=qemu.git diff --git a/block.c b/block.c index e655052c8..382ea71f4 100644 --- a/block.c +++ b/block.c @@ -86,13 +86,6 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque); static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors); -static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, double elapsed_time, uint64_t *wait); -static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, - double elapsed_time, uint64_t *wait); -static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, int64_t *wait); - static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -123,69 +116,101 @@ int is_windows_drive(const char *filename) #endif /* throttling disk I/O limits */ -void bdrv_io_limits_disable(BlockDriverState *bs) +void bdrv_set_io_limits(BlockDriverState *bs, + ThrottleConfig *cfg) { - bs->io_limits_enabled = false; + int i; + + throttle_config(&bs->throttle_state, cfg); + + for (i = 0; i < 2; i++) { + qemu_co_enter_next(&bs->throttled_reqs[i]); + } +} - do {} while (qemu_co_enter_next(&bs->throttled_reqs)); +/* this function drain all the throttled IOs */ +static bool bdrv_start_throttled_reqs(BlockDriverState *bs) +{ + bool drained = false; + bool enabled = bs->io_limits_enabled; + int i; - if (bs->block_timer) { - qemu_del_timer(bs->block_timer); - qemu_free_timer(bs->block_timer); - bs->block_timer = NULL; + bs->io_limits_enabled = false; + + for (i = 0; i < 2; i++) { + while (qemu_co_enter_next(&bs->throttled_reqs[i])) { + drained = true; + } } - bs->slice_start = 0; - bs->slice_end = 0; + bs->io_limits_enabled = enabled; + + return drained; } -static void bdrv_block_timer(void *opaque) +void bdrv_io_limits_disable(BlockDriverState *bs) { - BlockDriverState *bs = opaque; + bs->io_limits_enabled = false; + + bdrv_start_throttled_reqs(bs); - qemu_co_enter_next(&bs->throttled_reqs); + throttle_destroy(&bs->throttle_state); } -void bdrv_io_limits_enable(BlockDriverState *bs) +static void bdrv_throttle_read_timer_cb(void *opaque) { - bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs); - bs->io_limits_enabled = true; + BlockDriverState *bs = opaque; + qemu_co_enter_next(&bs->throttled_reqs[0]); } -bool bdrv_io_limits_enabled(BlockDriverState *bs) +static void bdrv_throttle_write_timer_cb(void *opaque) { - BlockIOLimit *io_limits = &bs->io_limits; - return io_limits->bps[BLOCK_IO_LIMIT_READ] - || io_limits->bps[BLOCK_IO_LIMIT_WRITE] - || io_limits->bps[BLOCK_IO_LIMIT_TOTAL] - || io_limits->iops[BLOCK_IO_LIMIT_READ] - || io_limits->iops[BLOCK_IO_LIMIT_WRITE] - || io_limits->iops[BLOCK_IO_LIMIT_TOTAL]; + BlockDriverState *bs = opaque; + qemu_co_enter_next(&bs->throttled_reqs[1]); } +/* should be called before bdrv_set_io_limits if a limit is set */ +void bdrv_io_limits_enable(BlockDriverState *bs) +{ + assert(!bs->io_limits_enabled); + throttle_init(&bs->throttle_state, + QEMU_CLOCK_VIRTUAL, + bdrv_throttle_read_timer_cb, + bdrv_throttle_write_timer_cb, + bs); + bs->io_limits_enabled = true; +} + +/* This function makes an IO wait if needed + * + * @nb_sectors: the number of sectors of the IO + * @is_write: is the IO a write + */ static void bdrv_io_limits_intercept(BlockDriverState *bs, - bool is_write, int nb_sectors) + int nb_sectors, + bool is_write) { - int64_t wait_time = -1; + /* does this io must wait */ + bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write); - if (!qemu_co_queue_empty(&bs->throttled_reqs)) { - qemu_co_queue_wait(&bs->throttled_reqs); + /* if must wait or any request of this type throttled queue the IO */ + if (must_wait || + !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) { + qemu_co_queue_wait(&bs->throttled_reqs[is_write]); } - /* In fact, we hope to keep each request's timing, in FIFO mode. The next - * throttled requests will not be dequeued until the current request is - * allowed to be serviced. So if the current request still exceeds the - * limits, it will be inserted to the head. All requests followed it will - * be still in throttled_reqs queue. - */ + /* the IO will be executed, do the accounting */ + throttle_account(&bs->throttle_state, + is_write, + nb_sectors * BDRV_SECTOR_SIZE); - while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) { - qemu_mod_timer(bs->block_timer, - wait_time + qemu_get_clock_ns(vm_clock)); - qemu_co_queue_wait_insert_head(&bs->throttled_reqs); + /* if the next request must wait -> do nothing */ + if (throttle_schedule_timer(&bs->throttle_state, is_write)) { + return; } - qemu_co_queue_next(&bs->throttled_reqs); + /* else queue next request for execution */ + qemu_co_queue_next(&bs->throttled_reqs[is_write]); } /* check if the path starts with ":" */ @@ -305,7 +330,9 @@ BlockDriverState *bdrv_new(const char *device_name) bdrv_iostatus_disable(bs); notifier_list_init(&bs->close_notifiers); notifier_with_return_list_init(&bs->before_write_notifiers); - qemu_co_queue_init(&bs->throttled_reqs); + qemu_co_queue_init(&bs->throttled_reqs[0]); + qemu_co_queue_init(&bs->throttled_reqs[1]); + bs->refcnt = 1; return bs; } @@ -367,18 +394,26 @@ typedef struct CreateCo { char *filename; QEMUOptionParameter *options; int ret; + Error *err; } CreateCo; static void coroutine_fn bdrv_create_co_entry(void *opaque) { + Error *local_err = NULL; + int ret; + CreateCo *cco = opaque; assert(cco->drv); - cco->ret = cco->drv->bdrv_create(cco->filename, cco->options); + ret = cco->drv->bdrv_create(cco->filename, cco->options, &local_err); + if (error_is_set(&local_err)) { + error_propagate(&cco->err, local_err); + } + cco->ret = ret; } int bdrv_create(BlockDriver *drv, const char* filename, - QEMUOptionParameter *options) + QEMUOptionParameter *options, Error **errp) { int ret; @@ -388,9 +423,11 @@ int bdrv_create(BlockDriver *drv, const char* filename, .filename = g_strdup(filename), .options = options, .ret = NOT_DONE, + .err = NULL, }; if (!drv->bdrv_create) { + error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); ret = -ENOTSUP; goto out; } @@ -407,22 +444,37 @@ int bdrv_create(BlockDriver *drv, const char* filename, } ret = cco.ret; + if (ret < 0) { + if (error_is_set(&cco.err)) { + error_propagate(errp, cco.err); + } else { + error_setg_errno(errp, -ret, "Could not create image"); + } + } out: g_free(cco.filename); return ret; } -int bdrv_create_file(const char* filename, QEMUOptionParameter *options) +int bdrv_create_file(const char* filename, QEMUOptionParameter *options, + Error **errp) { BlockDriver *drv; + Error *local_err = NULL; + int ret; drv = bdrv_find_protocol(filename, true); if (drv == NULL) { + error_setg(errp, "Could not find protocol for file '%s'", filename); return -ENOENT; } - return bdrv_create(drv, filename, options); + ret = bdrv_create(drv, filename, options, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + } + return ret; } /* @@ -525,7 +577,7 @@ BlockDriver *bdrv_find_protocol(const char *filename, } static int find_image_format(BlockDriverState *bs, const char *filename, - BlockDriver **pdrv) + BlockDriver **pdrv, Error **errp) { int score, score_max; BlockDriver *drv1, *drv; @@ -536,6 +588,7 @@ static int find_image_format(BlockDriverState *bs, const char *filename, if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { drv = bdrv_find_format("raw"); if (!drv) { + error_setg(errp, "Could not find raw image format"); ret = -ENOENT; } *pdrv = drv; @@ -544,6 +597,8 @@ static int find_image_format(BlockDriverState *bs, const char *filename, ret = bdrv_pread(bs, 0, buf, sizeof(buf)); if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read image for determining its " + "format"); *pdrv = NULL; return ret; } @@ -560,6 +615,8 @@ static int find_image_format(BlockDriverState *bs, const char *filename, } } if (!drv) { + error_setg(errp, "Could not determine image format: No compatible " + "driver found"); ret = -ENOENT; } *pdrv = drv; @@ -583,7 +640,7 @@ static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) if (length < 0) { return length; } - hint = length >> BDRV_SECTOR_BITS; + hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); } bs->total_sectors = hint; @@ -679,10 +736,11 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags) * Removes all processed options from *options. */ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, - QDict *options, int flags, BlockDriver *drv) + QDict *options, int flags, BlockDriver *drv, Error **errp) { int ret, open_flags; const char *filename; + Error *local_err = NULL; assert(drv != NULL); assert(bs->file == NULL); @@ -711,12 +769,22 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, bs->read_only = !(open_flags & BDRV_O_RDWR); if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { + error_setg(errp, + !bs->read_only && bdrv_is_whitelisted(drv, true) + ? "Driver '%s' can only be used for read-only devices" + : "Driver '%s' is not whitelisted", + drv->format_name); return -ENOTSUP; } assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ - if (!bs->read_only && (flags & BDRV_O_COPY_ON_READ)) { - bdrv_enable_copy_on_read(bs); + if (flags & BDRV_O_COPY_ON_READ) { + if (!bs->read_only) { + bdrv_enable_copy_on_read(bs); + } else { + error_setg(errp, "Can't use copy-on-read on read-only device"); + return -EINVAL; + } } if (filename != NULL) { @@ -733,34 +801,40 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, /* Open the image, either directly or using a protocol */ if (drv->bdrv_file_open) { assert(file == NULL); - assert(drv->bdrv_parse_filename || filename != NULL); - ret = drv->bdrv_file_open(bs, options, open_flags); + assert(!drv->bdrv_needs_filename || filename != NULL); + ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); } else { if (file == NULL) { - qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't use '%s' as a " - "block driver for the protocol level", - drv->format_name); + error_setg(errp, "Can't use '%s' as a block driver for the " + "protocol level", drv->format_name); ret = -EINVAL; goto free_and_fail; } - assert(file != NULL); bs->file = file; - ret = drv->bdrv_open(bs, options, open_flags); + ret = drv->bdrv_open(bs, options, open_flags, &local_err); } if (ret < 0) { + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + } else if (bs->filename[0]) { + error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); + } else { + error_setg_errno(errp, -ret, "Could not open image"); + } goto free_and_fail; } ret = refresh_total_sectors(bs, bs->total_sectors); if (ret < 0) { + error_setg_errno(errp, -ret, "Could not refresh total sector count"); goto free_and_fail; } #ifndef _WIN32 if (bs->is_temporary) { - assert(filename != NULL); - unlink(filename); + assert(bs->filename[0] != '\0'); + unlink(bs->filename); } #endif return 0; @@ -782,12 +856,13 @@ free_and_fail: * dictionary, it needs to use QINCREF() before calling bdrv_file_open. */ int bdrv_file_open(BlockDriverState **pbs, const char *filename, - QDict *options, int flags) + QDict *options, int flags, Error **errp) { BlockDriverState *bs; BlockDriver *drv; const char *drvname; bool allow_protocol_prefix = false; + Error *local_err = NULL; int ret; /* NULL means an empty set of options */ @@ -806,8 +881,8 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, qdict_put(options, "filename", qstring_from_str(filename)); allow_protocol_prefix = true; } else { - qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't specify 'file' and " - "'filename' options at the same time"); + error_setg(errp, "Can't specify 'file' and 'filename' options at the " + "same time"); ret = -EINVAL; goto fail; } @@ -815,54 +890,54 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, /* Find the right block driver */ drvname = qdict_get_try_str(options, "driver"); if (drvname) { - drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR)); + drv = bdrv_find_format(drvname); + if (!drv) { + error_setg(errp, "Unknown driver '%s'", drvname); + } qdict_del(options, "driver"); } else if (filename) { drv = bdrv_find_protocol(filename, allow_protocol_prefix); if (!drv) { - qerror_report(ERROR_CLASS_GENERIC_ERROR, "Unknown protocol"); + error_setg(errp, "Unknown protocol"); } } else { - qerror_report(ERROR_CLASS_GENERIC_ERROR, - "Must specify either driver or file"); + error_setg(errp, "Must specify either driver or file"); drv = NULL; } if (!drv) { + /* errp has been set already */ ret = -ENOENT; goto fail; } /* Parse the filename and open it */ if (drv->bdrv_parse_filename && filename) { - Error *local_err = NULL; drv->bdrv_parse_filename(filename, options, &local_err); if (error_is_set(&local_err)) { - qerror_report_err(local_err); - error_free(local_err); + error_propagate(errp, local_err); ret = -EINVAL; goto fail; } qdict_del(options, "filename"); - } else if (!drv->bdrv_parse_filename && !filename) { - qerror_report(ERROR_CLASS_GENERIC_ERROR, - "The '%s' block driver requires a file name", - drv->format_name); + } else if (drv->bdrv_needs_filename && !filename) { + error_setg(errp, "The '%s' block driver requires a file name", + drv->format_name); ret = -EINVAL; goto fail; } - ret = bdrv_open_common(bs, NULL, options, flags, drv); + ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err); if (ret < 0) { + error_propagate(errp, local_err); goto fail; } /* Check if any unknown options were used */ if (qdict_size(options) != 0) { const QDictEntry *entry = qdict_first(options); - qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block protocol '%s' doesn't " - "support the option '%s'", - drv->format_name, entry->key); + error_setg(errp, "Block protocol '%s' doesn't support the option '%s'", + drv->format_name, entry->key); ret = -EINVAL; goto fail; } @@ -877,7 +952,7 @@ fail: if (!bs->drv) { QDECREF(bs->options); } - bdrv_delete(bs); + bdrv_unref(bs); return ret; } @@ -889,11 +964,12 @@ fail: * function (even on failure), so if the caller intends to reuse the dictionary, * it needs to use QINCREF() before calling bdrv_file_open. */ -int bdrv_open_backing_file(BlockDriverState *bs, QDict *options) +int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) { char backing_filename[PATH_MAX]; int back_flags, ret; BlockDriver *back_drv = NULL; + Error *local_err = NULL; if (bs->backing_hd != NULL) { QDECREF(options); @@ -911,50 +987,38 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options) } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { QDECREF(options); return 0; + } else { + bdrv_get_full_backing_filename(bs, backing_filename, + sizeof(backing_filename)); } bs->backing_hd = bdrv_new(""); - bdrv_get_full_backing_filename(bs, backing_filename, - sizeof(backing_filename)); if (bs->backing_format[0] != '\0') { back_drv = bdrv_find_format(bs->backing_format); } /* backing files always opened read-only */ - back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT); + back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | + BDRV_O_COPY_ON_READ); ret = bdrv_open(bs->backing_hd, *backing_filename ? backing_filename : NULL, options, - back_flags, back_drv); + back_flags, back_drv, &local_err); if (ret < 0) { - bdrv_delete(bs->backing_hd); + bdrv_unref(bs->backing_hd); bs->backing_hd = NULL; bs->open_flags |= BDRV_O_NO_BACKING; + error_setg(errp, "Could not open backing file: %s", + error_get_pretty(local_err)); + error_free(local_err); return ret; } + pstrcpy(bs->backing_file, sizeof(bs->backing_file), + bs->backing_hd->file->filename); return 0; } -static void extract_subqdict(QDict *src, QDict **dst, const char *start) -{ - const QDictEntry *entry, *next; - const char *p; - - *dst = qdict_new(); - entry = qdict_first(src); - - while (entry != NULL) { - next = qdict_next(src, entry); - if (strstart(entry->key, start, &p)) { - qobject_incref(entry->value); - qdict_put_obj(*dst, p, entry->value); - qdict_del(src, entry->key); - } - entry = next; - } -} - /* * Opens a disk image (raw, qcow2, vmdk, ...) * @@ -964,7 +1028,7 @@ static void extract_subqdict(QDict *src, QDict **dst, const char *start) * dictionary, it needs to use QINCREF() before calling bdrv_open. */ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, - int flags, BlockDriver *drv) + int flags, BlockDriver *drv, Error **errp) { int ret; /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ @@ -972,6 +1036,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, BlockDriverState *file = NULL; QDict *file_options = NULL; const char *drvname; + Error *local_err = NULL; /* NULL means an empty set of options */ if (options == NULL) { @@ -990,7 +1055,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, char backing_filename[PATH_MAX]; if (qdict_size(options) != 0) { - error_report("Can't use snapshot=on with driver-specific options"); + error_setg(errp, "Can't use snapshot=on with driver-specific options"); ret = -EINVAL; goto fail; } @@ -1001,17 +1066,18 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, /* if there is a backing file, use it */ bs1 = bdrv_new(""); - ret = bdrv_open(bs1, filename, NULL, 0, drv); + ret = bdrv_open(bs1, filename, NULL, 0, drv, &local_err); if (ret < 0) { - bdrv_delete(bs1); + bdrv_unref(bs1); goto fail; } total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK; - bdrv_delete(bs1); + bdrv_unref(bs1); ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename)); if (ret < 0) { + error_setg_errno(errp, -ret, "Could not get temporary filename"); goto fail; } @@ -1021,6 +1087,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, "%s", filename); } else if (!realpath(filename, backing_filename)) { ret = -errno; + error_setg_errno(errp, errno, "Could not resolve path '%s'", filename); goto fail; } @@ -1036,9 +1103,14 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, drv->format_name); } - ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options); + ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options, &local_err); free_option_parameters(create_options); if (ret < 0) { + error_setg_errno(errp, -ret, "Could not create temporary overlay " + "'%s': %s", tmp_filename, + error_get_pretty(local_err)); + error_free(local_err); + local_err = NULL; goto fail; } @@ -1052,10 +1124,10 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, flags |= BDRV_O_ALLOW_RDWR; } - extract_subqdict(options, &file_options, "file."); + qdict_extract_subqdict(options, &file_options, "file."); ret = bdrv_file_open(&file, filename, file_options, - bdrv_open_flags(bs, flags | BDRV_O_UNMAP)); + bdrv_open_flags(bs, flags | BDRV_O_UNMAP), &local_err); if (ret < 0) { goto fail; } @@ -1063,12 +1135,17 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, /* Find the right image format driver */ drvname = qdict_get_try_str(options, "driver"); if (drvname) { - drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR)); + drv = bdrv_find_format(drvname); qdict_del(options, "driver"); + if (!drv) { + error_setg(errp, "Invalid driver: '%s'", drvname); + ret = -EINVAL; + goto unlink_and_fail; + } } if (!drv) { - ret = find_image_format(file, filename, &drv); + ret = find_image_format(file, filename, &drv, &local_err); } if (!drv) { @@ -1076,13 +1153,13 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, } /* Open the image */ - ret = bdrv_open_common(bs, file, options, flags, drv); + ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); if (ret < 0) { goto unlink_and_fail; } if (bs->file != file) { - bdrv_delete(file); + bdrv_unref(file); file = NULL; } @@ -1090,8 +1167,8 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, if ((flags & BDRV_O_NO_BACKING) == 0) { QDict *backing_options; - extract_subqdict(options, &backing_options, "backing."); - ret = bdrv_open_backing_file(bs, backing_options); + qdict_extract_subqdict(options, &backing_options, "backing."); + ret = bdrv_open_backing_file(bs, backing_options, &local_err); if (ret < 0) { goto close_and_fail; } @@ -1100,9 +1177,9 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, /* Check if any unknown options were used */ if (qdict_size(options) != 0) { const QDictEntry *entry = qdict_first(options); - qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by " - "device '%s' doesn't support the option '%s'", - drv->format_name, bs->device_name, entry->key); + error_setg(errp, "Block format '%s' used by device '%s' doesn't " + "support the option '%s'", drv->format_name, bs->device_name, + entry->key); ret = -EINVAL; goto close_and_fail; @@ -1113,16 +1190,11 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, bdrv_dev_change_media_cb(bs, true); } - /* throttling disk I/O limits */ - if (bs->io_limits_enabled) { - bdrv_io_limits_enable(bs); - } - return 0; unlink_and_fail: if (file != NULL) { - bdrv_delete(file); + bdrv_unref(file); } if (bs->is_temporary) { unlink(filename); @@ -1131,11 +1203,17 @@ fail: QDECREF(bs->options); QDECREF(options); bs->options = NULL; + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + } return ret; close_and_fail: bdrv_close(bs); QDECREF(options); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + } return ret; } @@ -1383,7 +1461,7 @@ void bdrv_close(BlockDriverState *bs) if (bs->drv) { if (bs->backing_hd) { - bdrv_delete(bs->backing_hd); + bdrv_unref(bs->backing_hd); bs->backing_hd = NULL; } bs->drv->bdrv_close(bs); @@ -1408,7 +1486,7 @@ void bdrv_close(BlockDriverState *bs) bs->options = NULL; if (bs->file != NULL) { - bdrv_delete(bs->file); + bdrv_unref(bs->file); bs->file = NULL; } } @@ -1436,7 +1514,10 @@ static bool bdrv_requests_pending(BlockDriverState *bs) if (!QLIST_EMPTY(&bs->tracked_requests)) { return true; } - if (!qemu_co_queue_empty(&bs->throttled_reqs)) { + if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) { + return true; + } + if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { return true; } if (bs->file && bdrv_requests_pending(bs->file)) { @@ -1482,7 +1563,7 @@ void bdrv_drain_all(void) * a busy wait. */ QTAILQ_FOREACH(bs, &bdrv_states, list) { - while (qemu_co_enter_next(&bs->throttled_reqs)) { + if (bdrv_start_throttled_reqs(bs)) { busy = true; } } @@ -1524,13 +1605,12 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->enable_write_cache = bs_src->enable_write_cache; - /* i/o timing parameters */ - bs_dest->slice_start = bs_src->slice_start; - bs_dest->slice_end = bs_src->slice_end; - bs_dest->slice_submitted = bs_src->slice_submitted; - bs_dest->io_limits = bs_src->io_limits; - bs_dest->throttled_reqs = bs_src->throttled_reqs; - bs_dest->block_timer = bs_src->block_timer; + /* i/o throttled req */ + memcpy(&bs_dest->throttle_state, + &bs_src->throttle_state, + sizeof(ThrottleState)); + bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; + bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; bs_dest->io_limits_enabled = bs_src->io_limits_enabled; /* r/w error */ @@ -1544,6 +1624,9 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, /* dirty bitmap */ bs_dest->dirty_bitmap = bs_src->dirty_bitmap; + /* reference count */ + bs_dest->refcnt = bs_src->refcnt; + /* job */ bs_dest->in_use = bs_src->in_use; bs_dest->job = bs_src->job; @@ -1577,7 +1660,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(bs_new->dev == NULL); assert(bs_new->in_use == 0); assert(bs_new->io_limits_enabled == false); - assert(bs_new->block_timer == NULL); + assert(!throttle_have_timer(&bs_new->throttle_state)); tmp = *bs_new; *bs_new = *bs_old; @@ -1596,7 +1679,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(bs_new->job == NULL); assert(bs_new->in_use == 0); assert(bs_new->io_limits_enabled == false); - assert(bs_new->block_timer == NULL); + assert(!throttle_have_timer(&bs_new->throttle_state)); bdrv_rebind(bs_new); bdrv_rebind(bs_old); @@ -1627,11 +1710,12 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) bs_new->drv ? bs_new->drv->format_name : ""); } -void bdrv_delete(BlockDriverState *bs) +static void bdrv_delete(BlockDriverState *bs) { assert(!bs->dev); assert(!bs->job); assert(!bs->in_use); + assert(!bs->refcnt); bdrv_close(bs); @@ -1830,8 +1914,11 @@ int bdrv_commit(BlockDriverState *bs) buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); for (sector = 0; sector < total_sectors; sector += n) { - if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) { - + ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); + if (ret < 0) { + goto ro_cleanup; + } + if (ret) { if (bdrv_read(bs, sector, buf, n) != 0) { ret = -EIO; goto ro_cleanup; @@ -2147,7 +2234,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { /* so that bdrv_close() does not recursively close the chain */ intermediate_state->bs->backing_hd = NULL; - bdrv_delete(intermediate_state->bs); + bdrv_unref(intermediate_state->bs); } ret = 0; @@ -2539,11 +2626,6 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, return -EIO; } - /* throttling disk read I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, false, nb_sectors); - } - if (bs->copy_on_read) { flags |= BDRV_REQ_COPY_ON_READ; } @@ -2555,12 +2637,17 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, wait_for_overlapping_requests(bs, sector_num, nb_sectors); } + /* throttling disk I/O */ + if (bs->io_limits_enabled) { + bdrv_io_limits_intercept(bs, nb_sectors, false); + } + tracked_request_begin(&req, bs, sector_num, nb_sectors, false); if (flags & BDRV_REQ_COPY_ON_READ) { int pnum; - ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum); + ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum); if (ret < 0) { goto out; } @@ -2571,7 +2658,35 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, } } - ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); + if (!(bs->zero_beyond_eof && bs->growable)) { + ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); + } else { + /* Read zeros after EOF of growable BDSes */ + int64_t len, total_sectors, max_nb_sectors; + + len = bdrv_getlength(bs); + if (len < 0) { + ret = len; + goto out; + } + + total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE); + max_nb_sectors = MAX(0, total_sectors - sector_num); + if (max_nb_sectors > 0) { + ret = drv->bdrv_co_readv(bs, sector_num, + MIN(nb_sectors, max_nb_sectors), qiov); + } else { + ret = 0; + } + + /* Reading beyond end of file is supposed to produce zeroes */ + if (ret == 0 && total_sectors < sector_num + nb_sectors) { + uint64_t offset = MAX(0, total_sectors - sector_num); + uint64_t bytes = (sector_num + nb_sectors - offset) * + BDRV_SECTOR_SIZE; + qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes); + } + } out: tracked_request_end(&req); @@ -2652,15 +2767,15 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, return -EIO; } - /* throttling disk write I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, true, nb_sectors); - } - if (bs->copy_on_read_in_flight) { wait_for_overlapping_requests(bs, sector_num, nb_sectors); } + /* throttling disk I/O */ + if (bs->io_limits_enabled) { + bdrv_io_limits_intercept(bs, nb_sectors, true); + } + tracked_request_begin(&req, bs, sector_num, nb_sectors, true); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req); @@ -2684,6 +2799,9 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { bs->wr_highest_sector = sector_num + nb_sectors - 1; } + if (bs->growable && ret >= 0) { + bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); + } tracked_request_end(&req); @@ -2758,9 +2876,10 @@ int64_t bdrv_getlength(BlockDriverState *bs) if (!drv) return -ENOMEDIUM; - if (bs->growable || bdrv_dev_has_removable_media(bs)) { - if (drv->bdrv_getlength) { - return drv->bdrv_getlength(bs); + if (drv->has_variable_length) { + int ret = refresh_total_sectors(bs, bs->total_sectors); + if (ret < 0) { + return ret; } } return bs->total_sectors * BDRV_SECTOR_SIZE; @@ -2778,14 +2897,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) *nb_sectors_ptr = length; } -/* throttling disk io limits */ -void bdrv_set_io_limits(BlockDriverState *bs, - BlockIOLimit *io_limits) -{ - bs->io_limits = *io_limits; - bs->io_limits_enabled = bdrv_io_limits_enabled(bs); -} - void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, BlockdevOnError on_write_error) { @@ -2978,6 +3089,11 @@ int bdrv_has_zero_init(BlockDriverState *bs) { assert(bs->drv); + /* If BS is a copy on write image, it is initialized to + the contents of the base image, which may not be zeroes. */ + if (bs->backing_hd) { + return 0; + } if (bs->drv->bdrv_has_zero_init) { return bs->drv->bdrv_has_zero_init(bs); } @@ -2986,15 +3102,15 @@ int bdrv_has_zero_init(BlockDriverState *bs) return 0; } -typedef struct BdrvCoIsAllocatedData { +typedef struct BdrvCoGetBlockStatusData { BlockDriverState *bs; BlockDriverState *base; int64_t sector_num; int nb_sectors; int *pnum; - int ret; + int64_t ret; bool done; -} BdrvCoIsAllocatedData; +} BdrvCoGetBlockStatusData; /* * Returns true iff the specified sector is present in the disk image. Drivers @@ -3011,12 +3127,20 @@ typedef struct BdrvCoIsAllocatedData { * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes * beyond the end of the disk image it will be clamped. */ -int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) +static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, int *pnum) { + int64_t length; int64_t n; + int64_t ret, ret2; - if (sector_num >= bs->total_sectors) { + length = bdrv_getlength(bs); + if (length < 0) { + return length; + } + + if (sector_num >= (length >> BDRV_SECTOR_BITS)) { *pnum = 0; return 0; } @@ -3026,35 +3150,76 @@ int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, nb_sectors = n; } - if (!bs->drv->bdrv_co_is_allocated) { + if (!bs->drv->bdrv_co_get_block_status) { *pnum = nb_sectors; - return 1; + ret = BDRV_BLOCK_DATA; + if (bs->drv->protocol_name) { + ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE); + } + return ret; + } + + ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum); + if (ret < 0) { + *pnum = 0; + return ret; + } + + if (ret & BDRV_BLOCK_RAW) { + assert(ret & BDRV_BLOCK_OFFSET_VALID); + return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, + *pnum, pnum); + } + + if (!(ret & BDRV_BLOCK_DATA)) { + if (bdrv_has_zero_init(bs)) { + ret |= BDRV_BLOCK_ZERO; + } else if (bs->backing_hd) { + BlockDriverState *bs2 = bs->backing_hd; + int64_t length2 = bdrv_getlength(bs2); + if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) { + ret |= BDRV_BLOCK_ZERO; + } + } } - return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum); + if (bs->file && + (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && + (ret & BDRV_BLOCK_OFFSET_VALID)) { + ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, + *pnum, pnum); + if (ret2 >= 0) { + /* Ignore errors. This is just providing extra information, it + * is useful but not necessary. + */ + ret |= (ret2 & BDRV_BLOCK_ZERO); + } + } + + return ret; } -/* Coroutine wrapper for bdrv_is_allocated() */ -static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque) +/* Coroutine wrapper for bdrv_get_block_status() */ +static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque) { - BdrvCoIsAllocatedData *data = opaque; + BdrvCoGetBlockStatusData *data = opaque; BlockDriverState *bs = data->bs; - data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors, - data->pnum); + data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors, + data->pnum); data->done = true; } /* - * Synchronous wrapper around bdrv_co_is_allocated(). + * Synchronous wrapper around bdrv_co_get_block_status(). * - * See bdrv_co_is_allocated() for details. + * See bdrv_co_get_block_status() for details. */ -int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, - int *pnum) +int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) { Coroutine *co; - BdrvCoIsAllocatedData data = { + BdrvCoGetBlockStatusData data = { .bs = bs, .sector_num = sector_num, .nb_sectors = nb_sectors, @@ -3062,14 +3227,31 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, .done = false, }; - co = qemu_coroutine_create(bdrv_is_allocated_co_entry); - qemu_coroutine_enter(co, &data); - while (!data.done) { - qemu_aio_wait(); + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + bdrv_get_block_status_co_entry(&data); + } else { + co = qemu_coroutine_create(bdrv_get_block_status_co_entry); + qemu_coroutine_enter(co, &data); + while (!data.done) { + qemu_aio_wait(); + } } return data.ret; } +int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) +{ + int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum); + if (ret < 0) { + return ret; + } + return + (ret & BDRV_BLOCK_DATA) || + ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs)); +} + /* * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] * @@ -3082,10 +3264,10 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, * allocated/unallocated state. * */ -int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, - BlockDriverState *base, - int64_t sector_num, - int nb_sectors, int *pnum) +int bdrv_is_allocated_above(BlockDriverState *top, + BlockDriverState *base, + int64_t sector_num, + int nb_sectors, int *pnum) { BlockDriverState *intermediate; int ret, n = nb_sectors; @@ -3093,8 +3275,8 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, intermediate = top; while (intermediate && intermediate != base) { int pnum_inter; - ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors, - &pnum_inter); + ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors, + &pnum_inter); if (ret < 0) { return ret; } else if (ret) { @@ -3121,44 +3303,6 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, return 0; } -/* Coroutine wrapper for bdrv_is_allocated_above() */ -static void coroutine_fn bdrv_is_allocated_above_co_entry(void *opaque) -{ - BdrvCoIsAllocatedData *data = opaque; - BlockDriverState *top = data->bs; - BlockDriverState *base = data->base; - - data->ret = bdrv_co_is_allocated_above(top, base, data->sector_num, - data->nb_sectors, data->pnum); - data->done = true; -} - -/* - * Synchronous wrapper around bdrv_co_is_allocated_above(). - * - * See bdrv_co_is_allocated_above() for details. - */ -int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, - int64_t sector_num, int nb_sectors, int *pnum) -{ - Coroutine *co; - BdrvCoIsAllocatedData data = { - .bs = top, - .base = base, - .sector_num = sector_num, - .nb_sectors = nb_sectors, - .pnum = pnum, - .done = false, - }; - - co = qemu_coroutine_create(bdrv_is_allocated_above_co_entry); - qemu_coroutine_enter(co, &data); - while (!data.done) { - qemu_aio_wait(); - } - return data.ret; -} - const char *bdrv_get_encrypted_filename(BlockDriverState *bs) { if (bs->backing_hd && bs->backing_hd->encrypted) @@ -3202,6 +3346,15 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) return drv->bdrv_get_info(bs, bdi); } +ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + if (drv && drv->bdrv_get_specific_info) { + return drv->bdrv_get_specific_info(bs); + } + return NULL; +} + int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size) { @@ -3595,169 +3748,6 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb) acb->aiocb_info->cancel(acb); } -/* block I/O throttling */ -static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, double elapsed_time, uint64_t *wait) -{ - uint64_t bps_limit = 0; - uint64_t extension; - double bytes_limit, bytes_base, bytes_res; - double slice_time, wait_time; - - if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { - bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; - } else if (bs->io_limits.bps[is_write]) { - bps_limit = bs->io_limits.bps[is_write]; - } else { - if (wait) { - *wait = 0; - } - - return false; - } - - slice_time = bs->slice_end - bs->slice_start; - slice_time /= (NANOSECONDS_PER_SECOND); - bytes_limit = bps_limit * slice_time; - bytes_base = bs->slice_submitted.bytes[is_write]; - if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { - bytes_base += bs->slice_submitted.bytes[!is_write]; - } - - /* bytes_base: the bytes of data which have been read/written; and - * it is obtained from the history statistic info. - * bytes_res: the remaining bytes of data which need to be read/written. - * (bytes_base + bytes_res) / bps_limit: used to calcuate - * the total time for completing reading/writting all data. - */ - bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE; - - if (bytes_base + bytes_res <= bytes_limit) { - if (wait) { - *wait = 0; - } - - return false; - } - - /* Calc approx time to dispatch */ - wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time; - - /* When the I/O rate at runtime exceeds the limits, - * bs->slice_end need to be extended in order that the current statistic - * info can be kept until the timer fire, so it is increased and tuned - * based on the result of experiment. - */ - extension = wait_time * NANOSECONDS_PER_SECOND; - extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) * - BLOCK_IO_SLICE_TIME; - bs->slice_end += extension; - if (wait) { - *wait = wait_time * NANOSECONDS_PER_SECOND; - } - - return true; -} - -static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, - double elapsed_time, uint64_t *wait) -{ - uint64_t iops_limit = 0; - double ios_limit, ios_base; - double slice_time, wait_time; - - if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { - iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; - } else if (bs->io_limits.iops[is_write]) { - iops_limit = bs->io_limits.iops[is_write]; - } else { - if (wait) { - *wait = 0; - } - - return false; - } - - slice_time = bs->slice_end - bs->slice_start; - slice_time /= (NANOSECONDS_PER_SECOND); - ios_limit = iops_limit * slice_time; - ios_base = bs->slice_submitted.ios[is_write]; - if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { - ios_base += bs->slice_submitted.ios[!is_write]; - } - - if (ios_base + 1 <= ios_limit) { - if (wait) { - *wait = 0; - } - - return false; - } - - /* Calc approx time to dispatch, in seconds */ - wait_time = (ios_base + 1) / iops_limit; - if (wait_time > elapsed_time) { - wait_time = wait_time - elapsed_time; - } else { - wait_time = 0; - } - - /* Exceeded current slice, extend it by another slice time */ - bs->slice_end += BLOCK_IO_SLICE_TIME; - if (wait) { - *wait = wait_time * NANOSECONDS_PER_SECOND; - } - - return true; -} - -static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, int64_t *wait) -{ - int64_t now, max_wait; - uint64_t bps_wait = 0, iops_wait = 0; - double elapsed_time; - int bps_ret, iops_ret; - - now = qemu_get_clock_ns(vm_clock); - if (now > bs->slice_end) { - bs->slice_start = now; - bs->slice_end = now + BLOCK_IO_SLICE_TIME; - memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted)); - } - - elapsed_time = now - bs->slice_start; - elapsed_time /= (NANOSECONDS_PER_SECOND); - - bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors, - is_write, elapsed_time, &bps_wait); - iops_ret = bdrv_exceed_iops_limits(bs, is_write, - elapsed_time, &iops_wait); - if (bps_ret || iops_ret) { - max_wait = bps_wait > iops_wait ? bps_wait : iops_wait; - if (wait) { - *wait = max_wait; - } - - now = qemu_get_clock_ns(vm_clock); - if (bs->slice_end < now + max_wait) { - bs->slice_end = now + max_wait; - } - - return true; - } - - if (wait) { - *wait = 0; - } - - bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors * - BDRV_SECTOR_SIZE; - bs->slice_submitted.ios[is_write]++; - - return false; -} - /**************************************************************/ /* async block device emulation */ @@ -4418,6 +4408,23 @@ int64_t bdrv_get_dirty_count(BlockDriverState *bs) } } +/* Get a reference to bs */ +void bdrv_ref(BlockDriverState *bs) +{ + bs->refcnt++; +} + +/* Release a previously grabbed reference to bs. + * If after releasing, reference count is zero, the BlockDriverState is + * deleted. */ +void bdrv_unref(BlockDriverState *bs) +{ + assert(bs->refcnt > 0); + if (--bs->refcnt == 0) { + bdrv_delete(bs); + } +} + void bdrv_set_in_use(BlockDriverState *bs, int in_use) { assert(bs->in_use != in_use); @@ -4500,6 +4507,7 @@ void bdrv_img_create(const char *filename, const char *fmt, BlockDriverState *bs = NULL; BlockDriver *drv, *proto_drv; BlockDriver *backing_drv = NULL; + Error *local_err = NULL; int ret = 0; /* Find driver and parse its options */ @@ -4586,10 +4594,13 @@ void bdrv_img_create(const char *filename, const char *fmt, bs = bdrv_new(""); ret = bdrv_open(bs, backing_file->value.s, NULL, back_flags, - backing_drv); + backing_drv, &local_err); if (ret < 0) { - error_setg_errno(errp, -ret, "Could not open '%s'", - backing_file->value.s); + error_setg_errno(errp, -ret, "Could not open '%s': %s", + backing_file->value.s, + error_get_pretty(local_err)); + error_free(local_err); + local_err = NULL; goto out; } bdrv_get_geometry(bs, &size); @@ -4608,22 +4619,19 @@ void bdrv_img_create(const char *filename, const char *fmt, print_option_parameters(param); puts(""); } - ret = bdrv_create(drv, filename, param); - if (ret < 0) { - if (ret == -ENOTSUP) { - error_setg(errp,"Formatting or formatting option not supported for " - "file format '%s'", fmt); - } else if (ret == -EFBIG) { - const char *cluster_size_hint = ""; - if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) { - cluster_size_hint = " (try using a larger cluster size)"; - } - error_setg(errp, "The image size is too large for file format '%s'%s", - fmt, cluster_size_hint); - } else { - error_setg(errp, "%s: error while creating %s: %s", filename, fmt, - strerror(-ret)); + ret = bdrv_create(drv, filename, param, &local_err); + if (ret == -EFBIG) { + /* This is generally a better message than whatever the driver would + * deliver (especially because of the cluster_size_hint), since that + * is most probably not much different from "image too large". */ + const char *cluster_size_hint = ""; + if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) { + cluster_size_hint = " (try using a larger cluster size)"; } + error_setg(errp, "The image size is too large for file format '%s'" + "%s", fmt, cluster_size_hint); + error_free(local_err); + local_err = NULL; } out: @@ -4631,7 +4639,10 @@ out: free_option_parameters(param); if (bs) { - bdrv_delete(bs); + bdrv_unref(bs); + } + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); } } @@ -4646,3 +4657,30 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs, { notifier_with_return_list_add(&bs->before_write_notifiers, notifier); } + +int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options) +{ + if (bs->drv->bdrv_amend_options == NULL) { + return -ENOTSUP; + } + return bs->drv->bdrv_amend_options(bs, options); +} + +ExtSnapshotPerm bdrv_check_ext_snapshot(BlockDriverState *bs) +{ + if (bs->drv->bdrv_check_ext_snapshot) { + return bs->drv->bdrv_check_ext_snapshot(bs); + } + + if (bs->file && bs->file->drv && bs->file->drv->bdrv_check_ext_snapshot) { + return bs->file->drv->bdrv_check_ext_snapshot(bs); + } + + /* external snapshots are allowed by default */ + return EXT_SNAPSHOT_ALLOWED; +} + +ExtSnapshotPerm bdrv_check_ext_snapshot_forbidden(BlockDriverState *bs) +{ + return EXT_SNAPSHOT_FORBIDDEN; +}