X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=block.c;h=c139540f2bac727c17c0c6d150dc1d5c2f813618;hb=6b7ac49d570c66754fad1b80cc200c7596d1facd;hp=35e78e2172d52f1a75fd1c6fe0e24179e9b594db;hpb=62a01a27f7f67853553679201e8617ccd28e965b;p=mirror_qemu.git diff --git a/block.c b/block.c index 35e78e2172..c139540f2b 100644 --- a/block.c +++ b/block.c @@ -426,7 +426,7 @@ BlockDriver *bdrv_find_format(const char *format_name) return bdrv_do_find_format(format_name); } -int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) +static int bdrv_format_is_whitelisted(const char *format_name, bool read_only) { static const char *whitelist_rw[] = { CONFIG_BDRV_RW_WHITELIST @@ -441,13 +441,13 @@ int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) } for (p = whitelist_rw; *p; p++) { - if (!strcmp(drv->format_name, *p)) { + if (!strcmp(format_name, *p)) { return 1; } } if (read_only) { for (p = whitelist_ro; *p; p++) { - if (!strcmp(drv->format_name, *p)) { + if (!strcmp(format_name, *p)) { return 1; } } @@ -455,6 +455,11 @@ int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) return 0; } +int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) +{ + return bdrv_format_is_whitelisted(drv->format_name, read_only); +} + bool bdrv_uses_whitelist(void) { return use_bdrv_whitelist; @@ -931,6 +936,20 @@ static int bdrv_child_cb_inactivate(BdrvChild *child) return 0; } +static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BlockDriverState *bs = child->opaque; + return bdrv_can_set_aio_context(bs, ctx, ignore, errp); +} + +static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore) +{ + BlockDriverState *bs = child->opaque; + return bdrv_set_aio_context_ignore(bs, ctx, ignore); +} + /* * Returns the options and flags that a temporary snapshot should get, based on * the originally requested flags (the originally requested image will have @@ -945,8 +964,9 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); - /* Copy the read-only option from the parent */ + /* Copy the read-only and discard options from the parent */ qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); + qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD); /* aio=native doesn't work for cache.direct=off, so disable it for the * temporary snapshot */ @@ -997,6 +1017,8 @@ const BdrvChildRole child_file = { .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, }; /* @@ -1023,6 +1045,8 @@ const BdrvChildRole child_format = { .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, }; static void bdrv_backing_attach(BdrvChild *c) @@ -1146,6 +1170,8 @@ const BdrvChildRole child_backing = { .drained_end = bdrv_child_cb_drained_end, .inactivate = bdrv_child_cb_inactivate, .update_filename = bdrv_backing_update_filename, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, }; static int bdrv_open_flags(BlockDriverState *bs, int flags) @@ -1158,13 +1184,6 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags) */ open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); - /* - * Snapshots should be writable. - */ - if (flags & BDRV_O_TEMPORARY) { - open_flags |= BDRV_O_RDWR; - } - return open_flags; } @@ -1687,12 +1706,16 @@ static int bdrv_fill_options(QDict **options, const char *filename, static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, uint64_t perm, uint64_t shared, - GSList *ignore_children, Error **errp); + GSList *ignore_children, + bool *tighten_restrictions, Error **errp); static void bdrv_child_abort_perm_update(BdrvChild *c); static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, + uint64_t *shared_perm); typedef struct BlockReopenQueueEntry { bool prepared; + bool perms_checked; BDRVReopenState state; QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; } BlockReopenQueueEntry; @@ -1743,11 +1766,10 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, uint64_t parent_perm, uint64_t parent_shared, uint64_t *nperm, uint64_t *nshared) { - if (bs->drv && bs->drv->bdrv_child_perm) { - bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, - parent_perm, parent_shared, - nperm, nshared); - } + assert(bs->drv && bs->drv->bdrv_child_perm); + bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, + parent_perm, parent_shared, + nperm, nshared); /* TODO Take force_share from reopen_queue */ if (child_bs && child_bs->force_share) { *nshared = BLK_PERM_ALL; @@ -1760,23 +1782,61 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, * permissions of all its parents. This involves checking whether all necessary * permission changes to child nodes can be performed. * + * Will set *tighten_restrictions to true if and only if new permissions have to + * be taken or currently shared permissions are to be unshared. Otherwise, + * errors are not fatal as long as the caller accepts that the restrictions + * remain tighter than they need to be. The caller still has to abort the + * transaction. + * @tighten_restrictions cannot be used together with @q: When reopening, we may + * encounter fatal errors even though no restrictions are to be tightened. For + * example, changing a node from RW to RO will fail if the WRITE permission is + * to be kept. + * * A call to this function must always be followed by a call to bdrv_set_perm() * or bdrv_abort_perm_update(). */ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, uint64_t cumulative_perms, uint64_t cumulative_shared_perms, - GSList *ignore_children, Error **errp) + GSList *ignore_children, + bool *tighten_restrictions, Error **errp) { BlockDriver *drv = bs->drv; BdrvChild *c; int ret; + assert(!q || !tighten_restrictions); + + if (tighten_restrictions) { + uint64_t current_perms, current_shared; + uint64_t added_perms, removed_shared_perms; + + bdrv_get_cumulative_perm(bs, ¤t_perms, ¤t_shared); + + added_perms = cumulative_perms & ~current_perms; + removed_shared_perms = current_shared & ~cumulative_shared_perms; + + *tighten_restrictions = added_perms || removed_shared_perms; + } + /* Write permissions never work with read-only images */ if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && !bdrv_is_writable_after_reopen(bs, q)) { - error_setg(errp, "Block node is read-only"); + if (!bdrv_is_writable_after_reopen(bs, NULL)) { + error_setg(errp, "Block node is read-only"); + } else { + uint64_t current_perms, current_shared; + bdrv_get_cumulative_perm(bs, ¤t_perms, ¤t_shared); + if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { + error_setg(errp, "Cannot make block node read-only, there is " + "a writer on it"); + } else { + error_setg(errp, "Cannot make block node read-only and create " + "a writer on it"); + } + } + return -EPERM; } @@ -1799,11 +1859,18 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, /* Check all children */ QLIST_FOREACH(c, &bs->children, next) { uint64_t cur_perm, cur_shared; + bool child_tighten_restr; + bdrv_child_perm(bs, c->bs, c, c->role, q, cumulative_perms, cumulative_shared_perms, &cur_perm, &cur_shared); - ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, - ignore_children, errp); + ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, ignore_children, + tighten_restrictions ? &child_tighten_restr + : NULL, + errp); + if (tighten_restrictions) { + *tighten_restrictions |= child_tighten_restr; + } if (ret < 0) { return ret; } @@ -1927,17 +1994,23 @@ char *bdrv_perm_names(uint64_t perm) * set, the BdrvChild objects in this list are ignored in the calculations; * this allows checking permission updates for an existing reference. * + * See bdrv_check_perm() for the semantics of @tighten_restrictions. + * * Needs to be followed by a call to either bdrv_set_perm() or * bdrv_abort_perm_update(). */ static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, uint64_t new_used_perm, uint64_t new_shared_perm, - GSList *ignore_children, Error **errp) + GSList *ignore_children, + bool *tighten_restrictions, + Error **errp) { BdrvChild *c; uint64_t cumulative_perms = new_used_perm; uint64_t cumulative_shared_perms = new_shared_perm; + assert(!q || !tighten_restrictions); + /* There is no reason why anyone couldn't tolerate write_unchanged */ assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED); @@ -1949,6 +2022,11 @@ static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, if ((new_used_perm & c->shared_perm) != new_used_perm) { char *user = bdrv_child_user_desc(c); char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm); + + if (tighten_restrictions) { + *tighten_restrictions = true; + } + error_setg(errp, "Conflicts with use by %s as '%s', which does not " "allow '%s' on %s", user, c->name, perm_names, bdrv_get_node_name(c->bs)); @@ -1960,6 +2038,11 @@ static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, if ((c->perm & new_shared_perm) != c->perm) { char *user = bdrv_child_user_desc(c); char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm); + + if (tighten_restrictions) { + *tighten_restrictions = true; + } + error_setg(errp, "Conflicts with use by %s as '%s', which uses " "'%s' on %s", user, c->name, perm_names, bdrv_get_node_name(c->bs)); @@ -1973,19 +2056,21 @@ static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, } return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms, - ignore_children, errp); + ignore_children, tighten_restrictions, errp); } /* Needs to be followed by a call to either bdrv_child_set_perm() or * bdrv_child_abort_perm_update(). */ static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, uint64_t perm, uint64_t shared, - GSList *ignore_children, Error **errp) + GSList *ignore_children, + bool *tighten_restrictions, Error **errp) { int ret; ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c); - ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, errp); + ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, + tighten_restrictions, errp); g_slist_free(ignore_children); if (ret < 0) { @@ -2036,11 +2121,26 @@ static void bdrv_child_abort_perm_update(BdrvChild *c) int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, Error **errp) { + Error *local_err = NULL; int ret; + bool tighten_restrictions; - ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, errp); + ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, + &tighten_restrictions, &local_err); if (ret < 0) { bdrv_child_abort_perm_update(c); + if (tighten_restrictions) { + error_propagate(errp, local_err); + } else { + /* + * Our caller may intend to only loosen restrictions and + * does not expect this function to fail. Errors are not + * fatal in such a case, so we can just hide them from our + * caller. + */ + error_free(local_err); + ret = 0; + } return ret; } @@ -2049,6 +2149,18 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, return 0; } +int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) +{ + uint64_t parent_perms, parent_shared; + uint64_t perms, shared; + + bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); + bdrv_child_perm(bs, c->bs, c, c->role, NULL, parent_perms, parent_shared, + &perms, &shared); + + return bdrv_child_try_set_perm(c, perms, shared, errp); +} + void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, BlockReopenQueue *reopen_queue, @@ -2127,6 +2239,8 @@ static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *old_bs = child->bs; int i; + assert(!child->frozen); + if (old_bs && new_bs) { assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); } @@ -2192,33 +2306,69 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) bdrv_replace_child_noperm(child, new_bs); + /* + * Start with the new node's permissions. If @new_bs is a (direct + * or indirect) child of @old_bs, we must complete the permission + * update on @new_bs before we loosen the restrictions on @old_bs. + * Otherwise, bdrv_check_perm() on @old_bs would re-initiate + * updating the permissions of @new_bs, and thus not purely loosen + * restrictions. + */ + if (new_bs) { + bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm); + bdrv_set_perm(new_bs, perm, shared_perm); + } + if (old_bs) { /* Update permissions for old node. This is guaranteed to succeed * because we're just taking a parent away, so we're loosening * restrictions. */ + bool tighten_restrictions; + int ret; + bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm); - bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, &error_abort); - bdrv_set_perm(old_bs, perm, shared_perm); - } + ret = bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, + &tighten_restrictions, NULL); + assert(tighten_restrictions == false); + if (ret < 0) { + /* We only tried to loosen restrictions, so errors are not fatal */ + bdrv_abort_perm_update(old_bs); + } else { + bdrv_set_perm(old_bs, perm, shared_perm); + } - if (new_bs) { - bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm); - bdrv_set_perm(new_bs, perm, shared_perm); + /* When the parent requiring a non-default AioContext is removed, the + * node moves back to the main AioContext */ + bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL); } } +/* + * This function steals the reference to child_bs from the caller. + * That reference is later dropped by bdrv_root_unref_child(). + * + * On failure NULL is returned, errp is set and the reference to + * child_bs is also dropped. + * + * The caller must hold the AioContext lock @child_bs, but not that of @ctx + * (unless @child_bs is already in @ctx). + */ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, + AioContext *ctx, uint64_t perm, uint64_t shared_perm, void *opaque, Error **errp) { BdrvChild *child; + Error *local_err = NULL; int ret; - ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp); + ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, NULL, + errp); if (ret < 0) { bdrv_abort_perm_update(child_bs); + bdrv_unref(child_bs); return NULL; } @@ -2232,12 +2382,48 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, .opaque = opaque, }; + /* If the AioContexts don't match, first try to move the subtree of + * child_bs into the AioContext of the new parent. If this doesn't work, + * try moving the parent into the AioContext of child_bs instead. */ + if (bdrv_get_aio_context(child_bs) != ctx) { + ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err); + if (ret < 0 && child_role->can_set_aio_ctx) { + GSList *ignore = g_slist_prepend(NULL, child);; + ctx = bdrv_get_aio_context(child_bs); + if (child_role->can_set_aio_ctx(child, ctx, &ignore, NULL)) { + error_free(local_err); + ret = 0; + g_slist_free(ignore); + ignore = g_slist_prepend(NULL, child);; + child_role->set_aio_ctx(child, ctx, &ignore); + } + g_slist_free(ignore); + } + if (ret < 0) { + error_propagate(errp, local_err); + g_free(child); + bdrv_abort_perm_update(child_bs); + return NULL; + } + } + /* This performs the matching bdrv_set_perm() for the above check. */ bdrv_replace_child(child, child_bs); return child; } +/* + * This function transfers the reference to child_bs from the caller + * to parent_bs. That reference is later dropped by parent_bs on + * bdrv_close() or if someone calls bdrv_unref_child(). + * + * On failure NULL is returned, errp is set and the reference to + * child_bs is also dropped. + * + * If @parent_bs and @child_bs are in different AioContexts, the caller must + * hold the AioContext lock for @child_bs, but not for @parent_bs. + */ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, @@ -2250,11 +2436,11 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); assert(parent_bs->drv); - assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs)); bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, perm, shared_perm, &perm, &shared_perm); child = bdrv_root_attach_child(child_bs, child_name, child_role, + bdrv_get_aio_context(parent_bs), perm, shared_perm, parent_bs, errp); if (child == NULL) { return NULL; @@ -2343,6 +2529,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) && bdrv_inherits_from_recursive(backing_hd, bs); + if (bdrv_is_backing_chain_frozen(bs, backing_bs(bs), errp)) { + return; + } + if (backing_hd) { bdrv_ref(backing_hd); } @@ -2361,12 +2551,9 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, /* If backing_hd was already part of bs's backing chain, and * inherits_from pointed recursively to bs then let's update it to * point directly to bs (else it will become NULL). */ - if (update_inherits_from) { + if (bs->backing && update_inherits_from) { backing_hd->inherits_from = bs; } - if (!bs->backing) { - bdrv_unref(backing_hd); - } out: bdrv_refresh_limits(bs, NULL); @@ -2464,7 +2651,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, ret = -EINVAL; goto free_exit; } - bdrv_set_aio_context(backing_hd, bdrv_get_aio_context(bs)); if (implicit_backing) { bdrv_refresh_filename(backing_hd); @@ -2554,7 +2740,6 @@ BdrvChild *bdrv_open_child(const char *filename, const BdrvChildRole *child_role, bool allow_none, Error **errp) { - BdrvChild *c; BlockDriverState *bs; bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role, @@ -2563,13 +2748,7 @@ BdrvChild *bdrv_open_child(const char *filename, return NULL; } - c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp); - if (!c) { - bdrv_unref(bs); - return NULL; - } - - return c; + return bdrv_attach_child(parent, bs, bdref_key, child_role, errp); } /* TODO Future callers may need to specify parent/child_role in order for @@ -2838,7 +3017,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, /* Not requesting BLK_PERM_CONSISTENT_READ because we're only * looking at the header to guess the image format. This works even * in cases where a guest would not see a consistent state. */ - file = blk_new(0, BLK_PERM_ALL); + file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL); blk_insert_bs(file, file_bs, &local_err); bdrv_unref(file_bs); if (local_err) { @@ -2978,6 +3157,74 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, NULL, errp); } +/* Return true if the NULL-terminated @list contains @str */ +static bool is_str_in_list(const char *str, const char *const *list) +{ + if (str && list) { + int i; + for (i = 0; list[i] != NULL; i++) { + if (!strcmp(str, list[i])) { + return true; + } + } + } + return false; +} + +/* + * Check that every option set in @bs->options is also set in + * @new_opts. + * + * Options listed in the common_options list and in + * @bs->drv->mutable_opts are skipped. + * + * Return 0 on success, otherwise return -EINVAL and set @errp. + */ +static int bdrv_reset_options_allowed(BlockDriverState *bs, + const QDict *new_opts, Error **errp) +{ + const QDictEntry *e; + /* These options are common to all block drivers and are handled + * in bdrv_reopen_prepare() so they can be left out of @new_opts */ + const char *const common_options[] = { + "node-name", "discard", "cache.direct", "cache.no-flush", + "read-only", "auto-read-only", "detect-zeroes", NULL + }; + + for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { + if (!qdict_haskey(new_opts, e->key) && + !is_str_in_list(e->key, common_options) && + !is_str_in_list(e->key, bs->drv->mutable_opts)) { + error_setg(errp, "Option '%s' cannot be reset " + "to its default value", e->key); + return -EINVAL; + } + } + + return 0; +} + +/* + * Returns true if @child can be reached recursively from @bs + */ +static bool bdrv_recurse_has_child(BlockDriverState *bs, + BlockDriverState *child) +{ + BdrvChild *c; + + if (bs == child) { + return true; + } + + QLIST_FOREACH(c, &bs->children, next) { + if (bdrv_recurse_has_child(c->bs, child)) { + return true; + } + } + + return false; +} + /* * Adds a BlockDriverState to a simple queue for an atomic, transactional * reopen of multiple devices. @@ -3005,7 +3252,8 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, QDict *options, const BdrvChildRole *role, QDict *parent_options, - int parent_flags) + int parent_flags, + bool keep_old_opts) { assert(bs != NULL); @@ -3045,13 +3293,13 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, */ /* Old explicitly set values (don't overwrite by inherited value) */ - if (bs_entry) { - old_options = qdict_clone_shallow(bs_entry->state.explicit_options); - } else { - old_options = qdict_clone_shallow(bs->explicit_options); + if (bs_entry || keep_old_opts) { + old_options = qdict_clone_shallow(bs_entry ? + bs_entry->state.explicit_options : + bs->explicit_options); + bdrv_join_options(bs, options, old_options); + qobject_unref(old_options); } - bdrv_join_options(bs, options, old_options); - qobject_unref(old_options); explicit_options = qdict_clone_shallow(options); @@ -3063,10 +3311,12 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, flags = bdrv_get_flags(bs); } - /* Old values are used for options that aren't set yet */ - old_options = qdict_clone_shallow(bs->options); - bdrv_join_options(bs, options, old_options); - qobject_unref(old_options); + if (keep_old_opts) { + /* Old values are used for options that aren't set yet */ + old_options = qdict_clone_shallow(bs->options); + bdrv_join_options(bs, options, old_options); + qobject_unref(old_options); + } /* We have the final set of options so let's update the flags */ options_copy = qdict_clone_shallow(options); @@ -3099,9 +3349,21 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, bs_entry->state.perm = UINT64_MAX; bs_entry->state.shared_perm = 0; + /* + * If keep_old_opts is false then it means that unspecified + * options must be reset to their original value. We don't allow + * resetting 'backing' but we need to know if the option is + * missing in order to decide if we have to return an error. + */ + if (!keep_old_opts) { + bs_entry->state.backing_missing = + !qdict_haskey(options, "backing") && + !qdict_haskey(options, "backing.driver"); + } + QLIST_FOREACH(child, &bs->children, next) { - QDict *new_child_options; - char *child_key_dot; + QDict *new_child_options = NULL; + bool child_keep_old = keep_old_opts; /* reopen can only change the options of block devices that were * implicitly created and inherited options. For other (referenced) @@ -3110,13 +3372,32 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, continue; } - child_key_dot = g_strdup_printf("%s.", child->name); - qdict_extract_subqdict(explicit_options, NULL, child_key_dot); - qdict_extract_subqdict(options, &new_child_options, child_key_dot); - g_free(child_key_dot); + /* Check if the options contain a child reference */ + if (qdict_haskey(options, child->name)) { + const char *childref = qdict_get_try_str(options, child->name); + /* + * The current child must not be reopened if the child + * reference is null or points to a different node. + */ + if (g_strcmp0(childref, child->bs->node_name)) { + continue; + } + /* + * If the child reference points to the current child then + * reopen it with its existing set of options (note that + * it can still inherit new options from the parent). + */ + child_keep_old = true; + } else { + /* Extract child options ("child-name.*") */ + char *child_key_dot = g_strdup_printf("%s.", child->name); + qdict_extract_subqdict(explicit_options, NULL, child_key_dot); + qdict_extract_subqdict(options, &new_child_options, child_key_dot); + g_free(child_key_dot); + } bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, - child->role, options, flags); + child->role, options, flags, child_keep_old); } return bs_queue; @@ -3124,9 +3405,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs, - QDict *options) + QDict *options, bool keep_old_opts) { - return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, NULL, 0); + return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, NULL, 0, + keep_old_opts); } /* @@ -3146,23 +3428,44 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, * All affected nodes must be drained between bdrv_reopen_queue() and * bdrv_reopen_multiple(). */ -int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp) +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) { int ret = -1; BlockReopenQueueEntry *bs_entry, *next; - Error *local_err = NULL; assert(bs_queue != NULL); QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { assert(bs_entry->state.bs->quiesce_counter > 0); - if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { - error_propagate(errp, local_err); + if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, errp)) { goto cleanup; } bs_entry->prepared = true; } + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { + BDRVReopenState *state = &bs_entry->state; + ret = bdrv_check_perm(state->bs, bs_queue, state->perm, + state->shared_perm, NULL, NULL, errp); + if (ret < 0) { + goto cleanup_perm; + } + /* Check if new_backing_bs would accept the new permissions */ + if (state->replace_backing_bs && state->new_backing_bs) { + uint64_t nperm, nshared; + bdrv_child_perm(state->bs, state->new_backing_bs, + NULL, &child_backing, bs_queue, + state->perm, state->shared_perm, + &nperm, &nshared); + ret = bdrv_check_update_perm(state->new_backing_bs, NULL, + nperm, nshared, NULL, NULL, errp); + if (ret < 0) { + goto cleanup_perm; + } + } + bs_entry->perms_checked = true; + } + /* If we reach this point, we have success and just need to apply the * changes */ @@ -3171,7 +3474,23 @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er } ret = 0; +cleanup_perm: + QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + BDRVReopenState *state = &bs_entry->state; + + if (!bs_entry->perms_checked) { + continue; + } + if (ret == 0) { + bdrv_set_perm(state->bs, state->perm, state->shared_perm); + } else { + bdrv_abort_perm_update(state->bs); + if (state->replace_backing_bs && state->new_backing_bs) { + bdrv_abort_perm_update(state->new_backing_bs); + } + } + } cleanup: QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { if (ret) { @@ -3181,6 +3500,9 @@ cleanup: qobject_unref(bs_entry->state.explicit_options); qobject_unref(bs_entry->state.options); } + if (bs_entry->state.new_backing_bs) { + bdrv_unref(bs_entry->state.new_backing_bs); + } g_free(bs_entry); } g_free(bs_queue); @@ -3198,8 +3520,8 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); bdrv_subtree_drained_begin(bs); - queue = bdrv_reopen_queue(NULL, bs, opts); - ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, errp); + queue = bdrv_reopen_queue(NULL, bs, opts, true); + ret = bdrv_reopen_multiple(queue, errp); bdrv_subtree_drained_end(bs); return ret; @@ -3252,6 +3574,101 @@ static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, *shared = cumulative_shared_perms; } +/* + * Take a BDRVReopenState and check if the value of 'backing' in the + * reopen_state->options QDict is valid or not. + * + * If 'backing' is missing from the QDict then return 0. + * + * If 'backing' contains the node name of the backing file of + * reopen_state->bs then return 0. + * + * If 'backing' contains a different node name (or is null) then check + * whether the current backing file can be replaced with the new one. + * If that's the case then reopen_state->replace_backing_bs is set to + * true and reopen_state->new_backing_bs contains a pointer to the new + * backing BlockDriverState (or NULL). + * + * Return 0 on success, otherwise return < 0 and set @errp. + */ +static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, + Error **errp) +{ + BlockDriverState *bs = reopen_state->bs; + BlockDriverState *overlay_bs, *new_backing_bs; + QObject *value; + const char *str; + + value = qdict_get(reopen_state->options, "backing"); + if (value == NULL) { + return 0; + } + + switch (qobject_type(value)) { + case QTYPE_QNULL: + new_backing_bs = NULL; + break; + case QTYPE_QSTRING: + str = qobject_get_try_str(value); + new_backing_bs = bdrv_lookup_bs(NULL, str, errp); + if (new_backing_bs == NULL) { + return -EINVAL; + } else if (bdrv_recurse_has_child(new_backing_bs, bs)) { + error_setg(errp, "Making '%s' a backing file of '%s' " + "would create a cycle", str, bs->node_name); + return -EINVAL; + } + break; + default: + /* 'backing' does not allow any other data type */ + g_assert_not_reached(); + } + + /* + * TODO: before removing the x- prefix from x-blockdev-reopen we + * should move the new backing file into the right AioContext + * instead of returning an error. + */ + if (new_backing_bs) { + if (bdrv_get_aio_context(new_backing_bs) != bdrv_get_aio_context(bs)) { + error_setg(errp, "Cannot use a new backing file " + "with a different AioContext"); + return -EINVAL; + } + } + + /* + * Find the "actual" backing file by skipping all links that point + * to an implicit node, if any (e.g. a commit filter node). + */ + overlay_bs = bs; + while (backing_bs(overlay_bs) && backing_bs(overlay_bs)->implicit) { + overlay_bs = backing_bs(overlay_bs); + } + + /* If we want to replace the backing file we need some extra checks */ + if (new_backing_bs != backing_bs(overlay_bs)) { + /* Check for implicit nodes between bs and its backing file */ + if (bs != overlay_bs) { + error_setg(errp, "Cannot change backing link if '%s' has " + "an implicit backing file", bs->node_name); + return -EPERM; + } + /* Check if the backing link that we want to replace is frozen */ + if (bdrv_is_backing_chain_frozen(overlay_bs, backing_bs(overlay_bs), + errp)) { + return -EPERM; + } + reopen_state->replace_backing_bs = true; + if (new_backing_bs) { + bdrv_ref(new_backing_bs); + reopen_state->new_backing_bs = new_backing_bs; + } + } + + return 0; +} + /* * Prepares a BlockDriverState for reopen. All changes are staged in the * 'opaque' field of the BDRVReopenState, which is used and allocated by @@ -3350,6 +3767,17 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, } if (drv->bdrv_reopen_prepare) { + /* + * If a driver-specific option is missing, it means that we + * should reset it to its default value. + * But not all options allow that, so we need to check it first. + */ + ret = bdrv_reset_options_allowed(reopen_state->bs, + reopen_state->options, errp); + if (ret) { + goto error; + } + ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); if (ret) { if (local_err != NULL) { @@ -3373,6 +3801,30 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, drv_prepared = true; + /* + * We must provide the 'backing' option if the BDS has a backing + * file or if the image file has a backing file name as part of + * its metadata. Otherwise the 'backing' option can be omitted. + */ + if (drv->supports_backing && reopen_state->backing_missing && + (backing_bs(reopen_state->bs) || reopen_state->bs->backing_file[0])) { + error_setg(errp, "backing is missing for '%s'", + reopen_state->bs->node_name); + ret = -EINVAL; + goto error; + } + + /* + * Allow changing the 'backing' option. The new value can be + * either a reference to an existing node (using its node name) + * or NULL to simply detach the current backing file. + */ + ret = bdrv_reopen_parse_backing(reopen_state, errp); + if (ret < 0) { + goto error; + } + qdict_del(reopen_state->options, "backing"); + /* Options that are not handled are only okay if they are unchanged * compared to the old state. It is expected that some options are only * used for the initial open, but not reopen (e.g. filename) */ @@ -3425,12 +3877,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, } while ((entry = qdict_next(reopen_state->options, entry))); } - ret = bdrv_check_perm(reopen_state->bs, queue, reopen_state->perm, - reopen_state->shared_perm, NULL, errp); - if (ret < 0) { - goto error; - } - ret = 0; /* Restore the original reopen_state->options QDict */ @@ -3488,6 +3934,11 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state) bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); bs->detect_zeroes = reopen_state->detect_zeroes; + if (reopen_state->replace_backing_bs) { + qdict_del(bs->explicit_options, "backing"); + qdict_del(bs->options, "backing"); + } + /* Remove child references from bs->options and bs->explicit_options. * Child options were already removed in bdrv_reopen_queue_child() */ QLIST_FOREACH(child, &bs->children, next) { @@ -3495,10 +3946,22 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state) qdict_del(bs->options, child->name); } - bdrv_refresh_limits(bs, NULL); + /* + * Change the backing file if a new one was specified. We do this + * after updating bs->options, so bdrv_refresh_filename() (called + * from bdrv_set_backing_hd()) has the new values. + */ + if (reopen_state->replace_backing_bs) { + BlockDriverState *old_backing_bs = backing_bs(bs); + assert(!old_backing_bs || !old_backing_bs->implicit); + /* Abort the permission update on the backing bs we're detaching */ + if (old_backing_bs) { + bdrv_abort_perm_update(old_backing_bs); + } + bdrv_set_backing_hd(bs, reopen_state->new_backing_bs, &error_abort); + } - bdrv_set_perm(reopen_state->bs, reopen_state->perm, - reopen_state->shared_perm); + bdrv_refresh_limits(bs, NULL); new_can_write = !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE); @@ -3531,8 +3994,6 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state) if (drv->bdrv_reopen_abort) { drv->bdrv_reopen_abort(reopen_state); } - - bdrv_abort_perm_update(reopen_state->bs); } @@ -3541,7 +4002,6 @@ static void bdrv_close(BlockDriverState *bs) BdrvAioNotifier *ban, *ban_next; BdrvChild *child, *next; - assert(!bs->job); assert(!bs->refcnt); bdrv_drained_begin(bs); /* complete I/O */ @@ -3555,22 +4015,12 @@ static void bdrv_close(BlockDriverState *bs) bs->drv = NULL; } - bdrv_set_backing_hd(bs, NULL, &error_abort); - - if (bs->file != NULL) { - bdrv_unref_child(bs, bs->file); - bs->file = NULL; - } - QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - /* TODO Remove bdrv_unref() from drivers' close function and use - * bdrv_unref_child() here */ - if (child->bs->inherits_from == bs) { - child->bs->inherits_from = NULL; - } - bdrv_detach_child(child); + bdrv_unref_child(bs, child); } + bs->backing = NULL; + bs->file = NULL; g_free(bs->opaque); bs->opaque = NULL; atomic_set(&bs->copy_on_read, 0); @@ -3699,19 +4149,24 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, uint64_t perm = 0, shared = BLK_PERM_ALL; int ret; - assert(!atomic_read(&from->in_flight)); - assert(!atomic_read(&to->in_flight)); - /* Make sure that @from doesn't go away until we have successfully attached * all of its parents to @to. */ bdrv_ref(from); + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + bdrv_drained_begin(from); + /* Put all parents into @list and calculate their cumulative permissions */ QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); if (!should_update_child(c, to)) { continue; } + if (c->frozen) { + error_setg(errp, "Cannot change '%s' link to '%s'", + c->name, from->node_name); + goto out; + } list = g_slist_prepend(list, c); perm |= c->perm; shared &= c->shared_perm; @@ -3719,7 +4174,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, /* Check whether the required permissions can be granted on @to, ignoring * all BdrvChild in @list so that they can't block themselves. */ - ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp); + ret = bdrv_check_update_perm(to, NULL, perm, shared, list, NULL, errp); if (ret < 0) { bdrv_abort_perm_update(to); goto out; @@ -3741,6 +4196,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, out: g_slist_free(list); + bdrv_drained_end(from); bdrv_unref(from); } @@ -3786,18 +4242,17 @@ out: static void bdrv_delete(BlockDriverState *bs) { - assert(!bs->job); assert(bdrv_op_blocker_is_empty(bs)); assert(!bs->refcnt); - bdrv_close(bs); - /* remove from list, if necessary */ if (bs->node_name[0] != '\0') { QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); } QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); + bdrv_close(bs); + g_free(bs); } @@ -3829,7 +4284,7 @@ typedef struct CheckCo { int ret; } CheckCo; -static void bdrv_check_co_entry(void *opaque) +static void coroutine_fn bdrv_check_co_entry(void *opaque) { CheckCo *cco = opaque; cco->ret = bdrv_co_check(cco->bs, cco->res, cco->fix); @@ -3923,6 +4378,70 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs) return bdrv_find_overlay(bs, NULL); } +/* + * Return true if at least one of the backing links between @bs and + * @base is frozen. @errp is set if that's the case. + * @base must be reachable from @bs, or NULL. + */ +bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, + Error **errp) +{ + BlockDriverState *i; + + for (i = bs; i != base; i = backing_bs(i)) { + if (i->backing && i->backing->frozen) { + error_setg(errp, "Cannot change '%s' link from '%s' to '%s'", + i->backing->name, i->node_name, + backing_bs(i)->node_name); + return true; + } + } + + return false; +} + +/* + * Freeze all backing links between @bs and @base. + * If any of the links is already frozen the operation is aborted and + * none of the links are modified. + * @base must be reachable from @bs, or NULL. + * Returns 0 on success. On failure returns < 0 and sets @errp. + */ +int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, + Error **errp) +{ + BlockDriverState *i; + + if (bdrv_is_backing_chain_frozen(bs, base, errp)) { + return -EPERM; + } + + for (i = bs; i != base; i = backing_bs(i)) { + if (i->backing) { + i->backing->frozen = true; + } + } + + return 0; +} + +/* + * Unfreeze all backing links between @bs and @base. The caller must + * ensure that all links are frozen before using this function. + * @base must be reachable from @bs, or NULL. + */ +void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base) +{ + BlockDriverState *i; + + for (i = bs; i != base; i = backing_bs(i)) { + if (i->backing) { + assert(i->backing->frozen); + i->backing->frozen = false; + } + } +} + /* * Drops images above 'base' up to and including 'top', and sets the image * above 'top' to have base as its backing file. @@ -3972,6 +4491,14 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, goto exit; } + /* This function changes all links that point to top and makes + * them point to base. Check that none of them is frozen. */ + QLIST_FOREACH(c, &top->parents, next_parent) { + if (c->frozen) { + goto exit; + } + } + /* If 'base' recursively inherits from 'top' then we should set * base->inherits_from to top->inherits_from after 'top' and all * other intermediate nodes have been dropped. @@ -3993,11 +4520,10 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, QLIST_FOREACH_SAFE(c, &top->parents, next_parent, next) { /* Check whether we are allowed to switch c from top to base */ GSList *ignore_children = g_slist_prepend(NULL, c); - bdrv_check_update_perm(base, NULL, c->perm, c->shared_perm, - ignore_children, &local_err); + ret = bdrv_check_update_perm(base, NULL, c->perm, c->shared_perm, + ignore_children, NULL, &local_err); g_slist_free(ignore_children); - if (local_err) { - ret = -EPERM; + if (ret < 0) { error_report_err(local_err); goto exit; } @@ -4147,7 +4673,7 @@ static int qsort_strcmp(const void *a, const void *b) } void bdrv_iterate_format(void (*it)(void *opaque, const char *name), - void *opaque) + void *opaque, bool read_only) { BlockDriver *drv; int count = 0; @@ -4158,6 +4684,11 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name), if (drv->format_name) { bool found = false; int i = count; + + if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) { + continue; + } + while (formats && i && !found) { found = !strcmp(formats[--i], drv->format_name); } @@ -4176,6 +4707,11 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name), bool found = false; int j = count; + if (use_bdrv_whitelist && + !bdrv_format_is_whitelisted(format_name, read_only)) { + continue; + } + while (formats && j && !found) { found = !strcmp(formats[--j], format_name); } @@ -4760,7 +5296,7 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, */ bs->open_flags &= ~BDRV_O_INACTIVE; bdrv_get_cumulative_perm(bs, &perm, &shared_perm); - ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &local_err); + ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); if (ret < 0) { bs->open_flags |= BDRV_O_INACTIVE; error_propagate(errp, local_err); @@ -4874,6 +5410,7 @@ static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) static int bdrv_inactivate_recurse(BlockDriverState *bs) { BdrvChild *child, *parent; + bool tighten_restrictions; uint64_t perm, shared_perm; int ret; @@ -4910,8 +5447,15 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) /* Update permissions, they may differ for inactive nodes */ bdrv_get_cumulative_perm(bs, &perm, &shared_perm); - bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &error_abort); - bdrv_set_perm(bs, perm, shared_perm); + ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, + &tighten_restrictions, NULL); + assert(tighten_restrictions == false); + if (ret < 0) { + /* We only tried to loosen restrictions, so errors are not fatal */ + bdrv_abort_perm_update(bs); + } else { + bdrv_set_perm(bs, perm, shared_perm); + } /* Recursively inactivate children */ @@ -5293,14 +5837,9 @@ static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) g_free(ban); } -void bdrv_detach_aio_context(BlockDriverState *bs) +static void bdrv_detach_aio_context(BlockDriverState *bs) { BdrvAioNotifier *baf, *baf_tmp; - BdrvChild *child; - - if (!bs->drv) { - return; - } assert(!bs->walking_aio_notifiers); bs->walking_aio_notifiers = true; @@ -5316,12 +5855,9 @@ void bdrv_detach_aio_context(BlockDriverState *bs) */ bs->walking_aio_notifiers = false; - if (bs->drv->bdrv_detach_aio_context) { + if (bs->drv && bs->drv->bdrv_detach_aio_context) { bs->drv->bdrv_detach_aio_context(bs); } - QLIST_FOREACH(child, &bs->children, next) { - bdrv_detach_aio_context(child->bs); - } if (bs->quiesce_counter) { aio_enable_external(bs->aio_context); @@ -5329,15 +5865,10 @@ void bdrv_detach_aio_context(BlockDriverState *bs) bs->aio_context = NULL; } -void bdrv_attach_aio_context(BlockDriverState *bs, - AioContext *new_context) +static void bdrv_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) { BdrvAioNotifier *ban, *ban_tmp; - BdrvChild *child; - - if (!bs->drv) { - return; - } if (bs->quiesce_counter) { aio_disable_external(new_context); @@ -5345,10 +5876,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs, bs->aio_context = new_context; - QLIST_FOREACH(child, &bs->children, next) { - bdrv_attach_aio_context(child->bs, new_context); - } - if (bs->drv->bdrv_attach_aio_context) { + if (bs->drv && bs->drv->bdrv_attach_aio_context) { bs->drv->bdrv_attach_aio_context(bs, new_context); } @@ -5364,16 +5892,44 @@ void bdrv_attach_aio_context(BlockDriverState *bs, bs->walking_aio_notifiers = false; } -/* The caller must own the AioContext lock for the old AioContext of bs, but it - * must not own the AioContext lock for new_context (unless new_context is - * the same as the current context of bs). */ -void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) +/* + * Changes the AioContext used for fd handlers, timers, and BHs by this + * BlockDriverState and all its children and parents. + * + * The caller must own the AioContext lock for the old AioContext of bs, but it + * must not own the AioContext lock for new_context (unless new_context is the + * same as the current context of bs). + * + * @ignore will accumulate all visited BdrvChild object. The caller is + * responsible for freeing the list afterwards. + */ +void bdrv_set_aio_context_ignore(BlockDriverState *bs, + AioContext *new_context, GSList **ignore) { + BdrvChild *child; + if (bdrv_get_aio_context(bs) == new_context) { return; } bdrv_drained_begin(bs); + + QLIST_FOREACH(child, &bs->children, next) { + if (g_slist_find(*ignore, child)) { + continue; + } + *ignore = g_slist_prepend(*ignore, child); + bdrv_set_aio_context_ignore(child->bs, new_context, ignore); + } + QLIST_FOREACH(child, &bs->parents, next_parent) { + if (g_slist_find(*ignore, child)) { + continue; + } + assert(child->role->set_aio_ctx); + *ignore = g_slist_prepend(*ignore, child); + child->role->set_aio_ctx(child, new_context, ignore); + } + bdrv_detach_aio_context(bs); /* This function executes in the old AioContext so acquire the new one in @@ -5385,6 +5941,91 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) aio_context_release(new_context); } +static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + if (g_slist_find(*ignore, c)) { + return true; + } + *ignore = g_slist_prepend(*ignore, c); + + /* A BdrvChildRole that doesn't handle AioContext changes cannot + * tolerate any AioContext changes */ + if (!c->role->can_set_aio_ctx) { + char *user = bdrv_child_user_desc(c); + error_setg(errp, "Changing iothreads is not supported by %s", user); + g_free(user); + return false; + } + if (!c->role->can_set_aio_ctx(c, ctx, ignore, errp)) { + assert(!errp || *errp); + return false; + } + return true; +} + +bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + if (g_slist_find(*ignore, c)) { + return true; + } + *ignore = g_slist_prepend(*ignore, c); + return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp); +} + +/* @ignore will accumulate all visited BdrvChild object. The caller is + * responsible for freeing the list afterwards. */ +bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BdrvChild *c; + + if (bdrv_get_aio_context(bs) == ctx) { + return true; + } + + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) { + return false; + } + } + QLIST_FOREACH(c, &bs->children, next) { + if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) { + return false; + } + } + + return true; +} + +int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp) +{ + GSList *ignore; + bool ret; + + ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; + ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp); + g_slist_free(ignore); + + if (!ret) { + return -EPERM; + } + + ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; + bdrv_set_aio_context_ignore(bs, ctx, &ignore); + g_slist_free(ignore); + + return 0; +} + +int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + Error **errp) +{ + return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp); +} + void bdrv_add_aio_context_notifier(BlockDriverState *bs, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque)