X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=block.c;h=8da89aaa62374432b27776937a25a23e120f178e;hb=da6aef48d9a1bcda301a3a922b240a2c1aba8026;hp=dad9a4fa436da7a3fada90c62c428c72d8f522a7;hpb=d530697ca20e19f7a626f4c1c8b26fccd0dc4470;p=mirror_qemu.git diff --git a/block.c b/block.c index dad9a4fa43..8da89aaa62 100644 --- a/block.c +++ b/block.c @@ -415,7 +415,7 @@ BlockDriverState *bdrv_new(void) for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { QLIST_INIT(&bs->op_blockers[i]); } - qemu_co_mutex_init(&bs->reqs_lock); + qemu_mutex_init(&bs->reqs_lock); qemu_mutex_init(&bs->dirty_bitmap_mutex); bs->refcnt = 1; bs->aio_context = qemu_get_aio_context(); @@ -533,7 +533,6 @@ int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, int ret; GLOBAL_STATE_CODE(); ERRP_GUARD(); - assert_bdrv_graph_readable(); if (!drv->bdrv_co_create_opts) { error_setg(errp, "Driver '%s' does not support image creation", @@ -556,8 +555,9 @@ int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, * On success, return @blk's actual length. * Otherwise, return -errno. */ -static int64_t create_file_fallback_truncate(BlockBackend *blk, - int64_t minimum_size, Error **errp) +static int64_t coroutine_fn GRAPH_UNLOCKED +create_file_fallback_truncate(BlockBackend *blk, int64_t minimum_size, + Error **errp) { Error *local_err = NULL; int64_t size; @@ -565,14 +565,14 @@ static int64_t create_file_fallback_truncate(BlockBackend *blk, GLOBAL_STATE_CODE(); - ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, - &local_err); + ret = blk_co_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, + &local_err); if (ret < 0 && ret != -ENOTSUP) { error_propagate(errp, local_err); return ret; } - size = blk_getlength(blk); + size = blk_co_getlength(blk); if (size < 0) { error_free(local_err); error_setg_errno(errp, -size, @@ -661,8 +661,10 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, blk = blk_co_new_open(filename, NULL, options, BDRV_O_RDWR | BDRV_O_RESIZE, errp); if (!blk) { - error_prepend(errp, "Protocol driver '%s' does not support image " - "creation, and opening the image failed: ", + error_prepend(errp, "Protocol driver '%s' does not support creating " + "new images, so an existing image must be selected as " + "the target; however, opening the given target as an " + "existing image failed: ", drv->format_name); return -EINVAL; } @@ -1614,6 +1616,7 @@ static int no_coroutine_fn GRAPH_UNLOCKED bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, QDict *options, int open_flags, Error **errp) { + AioContext *ctx; Error *local_err = NULL; int i, ret; GLOBAL_STATE_CODE(); @@ -1661,15 +1664,21 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF; bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF; + /* Get the context after .bdrv_open, it can change the context */ + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + ret = bdrv_refresh_total_sectors(bs, bs->total_sectors); if (ret < 0) { error_setg_errno(errp, -ret, "Could not refresh total sector count"); + aio_context_release(ctx); return ret; } bdrv_graph_rdlock_main_loop(); bdrv_refresh_limits(bs, NULL, &local_err); bdrv_graph_rdunlock_main_loop(); + aio_context_release(ctx); if (local_err) { error_propagate(errp, local_err); @@ -2848,7 +2857,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) * Replaces the node that a BdrvChild points to without updating permissions. * * If @new_bs is non-NULL, the parent of @child must already be drained through - * @child. + * @child and the caller must hold the AioContext lock for @new_bs. */ static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) @@ -2887,7 +2896,7 @@ static void bdrv_replace_child_noperm(BdrvChild *child, } /* TODO Pull this up into the callers to avoid polling here */ - bdrv_graph_wrlock(); + bdrv_graph_wrlock(new_bs); if (old_bs) { if (child->klass->detach) { child->klass->detach(child); @@ -2983,6 +2992,10 @@ static TransactionActionDrv bdrv_attach_child_common_drv = { * Function doesn't update permissions, caller is responsible for this. * * Returns new created child. + * + * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and + * @child_bs can move to a different AioContext in this function. Callers must + * make sure that their AioContext locking is still correct after this. */ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, const char *child_name, @@ -2993,7 +3006,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, Transaction *tran, Error **errp) { BdrvChild *new_child; - AioContext *parent_ctx; + AioContext *parent_ctx, *new_child_ctx; AioContext *child_ctx = bdrv_get_aio_context(child_bs); assert(child_class->get_parent_desc); @@ -3044,6 +3057,12 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, } } + new_child_ctx = bdrv_get_aio_context(child_bs); + if (new_child_ctx != child_ctx) { + aio_context_release(child_ctx); + aio_context_acquire(new_child_ctx); + } + bdrv_ref(child_bs); /* * Let every new BdrvChild start with a drained parent. Inserting the child @@ -3073,11 +3092,20 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, }; tran_add(tran, &bdrv_attach_child_common_drv, s); + if (new_child_ctx != child_ctx) { + aio_context_release(new_child_ctx); + aio_context_acquire(child_ctx); + } + return new_child; } /* * Function doesn't update permissions, caller is responsible for this. + * + * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and + * @child_bs can move to a different AioContext in this function. Callers must + * make sure that their AioContext locking is still correct after this. */ static BdrvChild *bdrv_attach_child_noperm(BlockDriverState *parent_bs, BlockDriverState *child_bs, @@ -3341,6 +3369,10 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) * callers which don't need their own reference any more must call bdrv_unref(). * * Function doesn't update permissions, caller is responsible for this. + * + * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and + * @child_bs can move to a different AioContext in this function. Callers must + * make sure that their AioContext locking is still correct after this. */ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, BlockDriverState *child_bs, @@ -3429,6 +3461,11 @@ out: return 0; } +/* + * The caller must hold the AioContext lock for @backing_hd. Both @bs and + * @backing_hd can move to a different AioContext in this function. Callers must + * make sure that their AioContext locking is still correct after this. + */ static int bdrv_set_backing_noperm(BlockDriverState *bs, BlockDriverState *backing_hd, Transaction *tran, Error **errp) @@ -3479,6 +3516,8 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, * itself, all options starting with "${bdref_key}." are considered part of the * BlockdevRef. * + * The caller must hold the main AioContext lock. + * * TODO Can this be unified with bdrv_open_image()? */ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, @@ -3490,6 +3529,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, int ret = 0; bool implicit_backing = false; BlockDriverState *backing_hd; + AioContext *backing_hd_ctx; QDict *options; QDict *tmp_parent_options = NULL; Error *local_err = NULL; @@ -3574,8 +3614,12 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, /* Hook up the backing file link; drop our reference, bs owns the * backing_hd reference now */ + backing_hd_ctx = bdrv_get_aio_context(backing_hd); + aio_context_acquire(backing_hd_ctx); ret = bdrv_set_backing_hd(bs, backing_hd, errp); bdrv_unref(backing_hd); + aio_context_release(backing_hd_ctx); + if (ret < 0) { goto free_exit; } @@ -3645,6 +3689,10 @@ done: * BlockdevRef. * * The BlockdevRef will be removed from the options QDict. + * + * The caller must hold the lock of the main AioContext and no other AioContext. + * @parent can move to a different AioContext in this function. Callers must + * make sure that their AioContext locking is still correct after this. */ BdrvChild *bdrv_open_child(const char *filename, QDict *options, const char *bdref_key, @@ -3654,6 +3702,8 @@ BdrvChild *bdrv_open_child(const char *filename, bool allow_none, Error **errp) { BlockDriverState *bs; + BdrvChild *child; + AioContext *ctx; GLOBAL_STATE_CODE(); @@ -3663,12 +3713,21 @@ BdrvChild *bdrv_open_child(const char *filename, return NULL; } - return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, - errp); + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, + errp); + aio_context_release(ctx); + + return child; } /* * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. + * + * The caller must hold the lock of the main AioContext and no other AioContext. + * @parent can move to a different AioContext in this function. Callers must + * make sure that their AioContext locking is still correct after this. */ int bdrv_open_file_child(const char *filename, QDict *options, const char *bdref_key, @@ -3743,6 +3802,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, int64_t total_size; QemuOpts *opts = NULL; BlockDriverState *bs_snapshot = NULL; + AioContext *ctx = bdrv_get_aio_context(bs); int ret; GLOBAL_STATE_CODE(); @@ -3751,7 +3811,10 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, instead of opening 'filename' directly */ /* Get the required size from the image */ + aio_context_acquire(ctx); total_size = bdrv_getlength(bs); + aio_context_release(ctx); + if (total_size < 0) { error_setg_errno(errp, -total_size, "Could not get image size"); goto out; @@ -3785,7 +3848,10 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, goto out; } + aio_context_acquire(ctx); ret = bdrv_append(bs_snapshot, bs, errp); + aio_context_release(ctx); + if (ret < 0) { bs_snapshot = NULL; goto out; @@ -3811,9 +3877,7 @@ out: * should be opened. If specified, neither options nor a filename may be given, * nor can an existing BDS be reused (that is, *pbs has to be NULL). * - * The caller must always hold @filename AioContext lock, because this - * function eventually calls bdrv_refresh_total_sectors() which polls - * when called from non-coroutine context. + * The caller must always hold the main AioContext lock. */ static BlockDriverState * no_coroutine_fn bdrv_open_inherit(const char *filename, const char *reference, QDict *options, @@ -3831,6 +3895,7 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, Error *local_err = NULL; QDict *snapshot_options = NULL; int snapshot_flags = 0; + AioContext *ctx = qemu_get_aio_context(); assert(!child_class || !flags); assert(!child_class == !parent); @@ -3968,9 +4033,13 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, /* Not requesting BLK_PERM_CONSISTENT_READ because we're only * looking at the header to guess the image format. This works even * in cases where a guest would not see a consistent state. */ - file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL); + ctx = bdrv_get_aio_context(file_bs); + aio_context_acquire(ctx); + file = blk_new(ctx, 0, BLK_PERM_ALL); blk_insert_bs(file, file_bs, &local_err); bdrv_unref(file_bs); + aio_context_release(ctx); + if (local_err) { goto fail; } @@ -4016,8 +4085,13 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, goto fail; } + /* The AioContext could have changed during bdrv_open_common() */ + ctx = bdrv_get_aio_context(bs); + if (file) { + aio_context_acquire(ctx); blk_unref(file); + aio_context_release(ctx); file = NULL; } @@ -4075,13 +4149,16 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, * (snapshot_bs); thus, we have to drop the strong reference to bs * (which we obtained by calling bdrv_new()). bs will not be deleted, * though, because the overlay still has a reference to it. */ + aio_context_acquire(ctx); bdrv_unref(bs); + aio_context_release(ctx); bs = snapshot_bs; } return bs; fail: + aio_context_acquire(ctx); blk_unref(file); qobject_unref(snapshot_options); qobject_unref(bs->explicit_options); @@ -4090,22 +4167,21 @@ fail: bs->options = NULL; bs->explicit_options = NULL; bdrv_unref(bs); + aio_context_release(ctx); error_propagate(errp, local_err); return NULL; close_and_fail: + aio_context_acquire(ctx); bdrv_unref(bs); + aio_context_release(ctx); qobject_unref(snapshot_options); qobject_unref(options); error_propagate(errp, local_err); return NULL; } -/* - * The caller must always hold @filename AioContext lock, because this - * function eventually calls bdrv_refresh_total_sectors() which polls - * when called from non-coroutine context. - */ +/* The caller must always hold the main AioContext lock. */ BlockDriverState *bdrv_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp) { @@ -4570,6 +4646,11 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, * backing BlockDriverState (or NULL). * * Return 0 on success, otherwise return < 0 and set @errp. + * + * The caller must hold the AioContext lock of @reopen_state->bs. + * @reopen_state->bs can move to a different AioContext in this function. + * Callers must make sure that their AioContext locking is still correct after + * this. */ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, bool is_backing, Transaction *tran, @@ -4582,6 +4663,8 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, const char *child_name = is_backing ? "backing" : "file"; QObject *value; const char *str; + AioContext *ctx, *old_ctx; + int ret; GLOBAL_STATE_CODE(); @@ -4646,8 +4729,22 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, reopen_state->old_file_bs = old_child_bs; } - return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing, - tran, errp); + old_ctx = bdrv_get_aio_context(bs); + ctx = bdrv_get_aio_context(new_child_bs); + if (old_ctx != ctx) { + aio_context_release(old_ctx); + aio_context_acquire(ctx); + } + + ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing, + tran, errp); + + if (old_ctx != ctx) { + aio_context_release(ctx); + aio_context_acquire(old_ctx); + } + + return ret; } /* @@ -4666,6 +4763,7 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, * It is the responsibility of the caller to then call the abort() or * commit() for any other BDS that have been left in a prepare() state * + * The caller must hold the AioContext lock of @reopen_state->bs. */ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, @@ -5380,6 +5478,8 @@ static void bdrv_delete(BlockDriverState *bs) bdrv_close(bs); + qemu_mutex_destroy(&bs->reqs_lock); + g_free(bs); } @@ -5391,12 +5491,17 @@ static void bdrv_delete(BlockDriverState *bs) * empty set of options. The reference to the QDict belongs to the block layer * after the call (even on failure), so if the caller intends to reuse the * dictionary, it needs to use qobject_ref() before calling bdrv_open. + * + * The caller holds the AioContext lock for @bs. It must make sure that @bs + * stays in the same AioContext, i.e. @options must not refer to nodes in a + * different AioContext. */ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, int flags, Error **errp) { ERRP_GUARD(); int ret; + AioContext *ctx = bdrv_get_aio_context(bs); BlockDriverState *new_node_bs = NULL; const char *drvname, *node_name; BlockDriver *drv; @@ -5417,8 +5522,14 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, GLOBAL_STATE_CODE(); + aio_context_release(ctx); + aio_context_acquire(qemu_get_aio_context()); new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, errp); + aio_context_release(qemu_get_aio_context()); + aio_context_acquire(ctx); + assert(bdrv_get_aio_context(bs) == ctx); + options = NULL; /* bdrv_new_open_driver() eats options */ if (!new_node_bs) { error_prepend(errp, "Could not create node: "); @@ -6373,6 +6484,13 @@ int coroutine_fn bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) } memset(bdi, 0, sizeof(*bdi)); ret = drv->bdrv_co_get_info(bs, bdi); + if (bdi->subcluster_size == 0) { + /* + * If the driver left this unset, subclusters are not supported. + * Then it is safe to treat each cluster as having only one subcluster. + */ + bdi->subcluster_size = bdi->cluster_size; + } if (ret < 0) { return ret; } @@ -7044,6 +7162,8 @@ void bdrv_img_create(const char *filename, const char *fmt, return; } + aio_context_acquire(qemu_get_aio_context()); + /* Create parameter list */ create_opts = qemu_opts_append(create_opts, drv->create_opts); create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); @@ -7137,7 +7257,7 @@ void bdrv_img_create(const char *filename, const char *fmt, if (!backing_fmt) { error_setg(&local_err, "Backing file specified without backing format"); - error_append_hint(&local_err, "Detected format of %s.", + error_append_hint(&local_err, "Detected format of %s.\n", bs->drv->format_name); goto out; } @@ -7193,6 +7313,7 @@ out: qemu_opts_del(opts); qemu_opts_free(create_opts); error_propagate(errp, local_err); + aio_context_release(qemu_get_aio_context()); } AioContext *bdrv_get_aio_context(BlockDriverState *bs) @@ -7283,9 +7404,6 @@ static void bdrv_detach_aio_context(BlockDriverState *bs) bs->drv->bdrv_detach_aio_context(bs); } - if (bs->quiesce_counter) { - aio_enable_external(bs->aio_context); - } bs->aio_context = NULL; } @@ -7295,10 +7413,6 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, BdrvAioNotifier *ban, *ban_tmp; GLOBAL_STATE_CODE(); - if (bs->quiesce_counter) { - aio_disable_external(new_context); - } - bs->aio_context = new_context; if (bs->drv && bs->drv->bdrv_attach_aio_context) { @@ -7479,7 +7593,7 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, /* * Recursion phase: go through all nodes of the graph. * Take care of checking that all nodes support changing AioContext - * and drain them, builing a linear list of callbacks to run if everything + * and drain them, building a linear list of callbacks to run if everything * is successful (the transaction itself). */ tran = tran_new(); @@ -7982,6 +8096,25 @@ void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, return; } + /* + * Non-zoned block drivers do not follow zoned storage constraints + * (i.e. sequential writes to zones). Refuse mixing zoned and non-zoned + * drivers in a graph. + */ + if (!parent_bs->drv->supports_zoned_children && + child_bs->bl.zoned == BLK_Z_HM) { + /* + * The host-aware model allows zoned storage constraints and random + * write. Allow mixing host-aware and non-zoned drivers. Using + * host-aware device as a regular device. + */ + error_setg(errp, "Cannot add a %s child to a %s parent", + child_bs->bl.zoned == BLK_Z_HM ? "zoned" : "non-zoned", + parent_bs->drv->supports_zoned_children ? + "support zoned children" : "not support zoned children"); + return; + } + if (!QLIST_EMPTY(&child_bs->parents)) { error_setg(errp, "The node %s already has a parent", child_bs->node_name);