X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=block.c;h=cbd8da5f3bd4e43f651de655b4151b8c4a890bef;hb=10da4a368992a5950abd07673ba8311fbbe667f5;hp=ed9253c786ac238eacb23726002e6f51f9371668;hpb=36fe7709664579c934f014f21e5610d69e5b298f;p=mirror_qemu.git diff --git a/block.c b/block.c index ed9253c786..cbd8da5f3b 100644 --- a/block.c +++ b/block.c @@ -911,10 +911,11 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child) return bdrv_drain_poll(bs, false, NULL, false); } -static void bdrv_child_cb_drained_end(BdrvChild *child) +static void bdrv_child_cb_drained_end(BdrvChild *child, + int *drained_end_counter) { BlockDriverState *bs = child->opaque; - bdrv_drained_end(bs); + bdrv_drained_end_no_poll(bs, drained_end_counter); } static void bdrv_child_cb_attach(BdrvChild *child) @@ -936,6 +937,20 @@ static int bdrv_child_cb_inactivate(BdrvChild *child) return 0; } +static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BlockDriverState *bs = child->opaque; + return bdrv_can_set_aio_context(bs, ctx, ignore, errp); +} + +static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx, + GSList **ignore) +{ + BlockDriverState *bs = child->opaque; + return bdrv_set_aio_context_ignore(bs, ctx, ignore); +} + /* * Returns the options and flags that a temporary snapshot should get, based on * the originally requested flags (the originally requested image will have @@ -950,8 +965,9 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); - /* Copy the read-only option from the parent */ + /* Copy the read-only and discard options from the parent */ qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); + qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD); /* aio=native doesn't work for cache.direct=off, so disable it for the * temporary snapshot */ @@ -1002,6 +1018,8 @@ const BdrvChildRole child_file = { .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, }; /* @@ -1028,6 +1046,8 @@ const BdrvChildRole child_format = { .attach = bdrv_child_cb_attach, .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, }; static void bdrv_backing_attach(BdrvChild *c) @@ -1151,6 +1171,8 @@ const BdrvChildRole child_backing = { .drained_end = bdrv_child_cb_drained_end, .inactivate = bdrv_child_cb_inactivate, .update_filename = bdrv_backing_update_filename, + .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, + .set_aio_ctx = bdrv_child_cb_set_aio_ctx, }; static int bdrv_open_flags(BlockDriverState *bs, int flags) @@ -1685,9 +1707,12 @@ static int bdrv_fill_options(QDict **options, const char *filename, static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, uint64_t perm, uint64_t shared, - GSList *ignore_children, Error **errp); + GSList *ignore_children, + bool *tighten_restrictions, Error **errp); static void bdrv_child_abort_perm_update(BdrvChild *c); static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, + uint64_t *shared_perm); typedef struct BlockReopenQueueEntry { bool prepared; @@ -1742,11 +1767,10 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, uint64_t parent_perm, uint64_t parent_shared, uint64_t *nperm, uint64_t *nshared) { - if (bs->drv && bs->drv->bdrv_child_perm) { - bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, - parent_perm, parent_shared, - nperm, nshared); - } + assert(bs->drv && bs->drv->bdrv_child_perm); + bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, + parent_perm, parent_shared, + nperm, nshared); /* TODO Take force_share from reopen_queue */ if (child_bs && child_bs->force_share) { *nshared = BLK_PERM_ALL; @@ -1759,23 +1783,61 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, * permissions of all its parents. This involves checking whether all necessary * permission changes to child nodes can be performed. * + * Will set *tighten_restrictions to true if and only if new permissions have to + * be taken or currently shared permissions are to be unshared. Otherwise, + * errors are not fatal as long as the caller accepts that the restrictions + * remain tighter than they need to be. The caller still has to abort the + * transaction. + * @tighten_restrictions cannot be used together with @q: When reopening, we may + * encounter fatal errors even though no restrictions are to be tightened. For + * example, changing a node from RW to RO will fail if the WRITE permission is + * to be kept. + * * A call to this function must always be followed by a call to bdrv_set_perm() * or bdrv_abort_perm_update(). */ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, uint64_t cumulative_perms, uint64_t cumulative_shared_perms, - GSList *ignore_children, Error **errp) + GSList *ignore_children, + bool *tighten_restrictions, Error **errp) { BlockDriver *drv = bs->drv; BdrvChild *c; int ret; + assert(!q || !tighten_restrictions); + + if (tighten_restrictions) { + uint64_t current_perms, current_shared; + uint64_t added_perms, removed_shared_perms; + + bdrv_get_cumulative_perm(bs, ¤t_perms, ¤t_shared); + + added_perms = cumulative_perms & ~current_perms; + removed_shared_perms = current_shared & ~cumulative_shared_perms; + + *tighten_restrictions = added_perms || removed_shared_perms; + } + /* Write permissions never work with read-only images */ if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && !bdrv_is_writable_after_reopen(bs, q)) { - error_setg(errp, "Block node is read-only"); + if (!bdrv_is_writable_after_reopen(bs, NULL)) { + error_setg(errp, "Block node is read-only"); + } else { + uint64_t current_perms, current_shared; + bdrv_get_cumulative_perm(bs, ¤t_perms, ¤t_shared); + if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { + error_setg(errp, "Cannot make block node read-only, there is " + "a writer on it"); + } else { + error_setg(errp, "Cannot make block node read-only and create " + "a writer on it"); + } + } + return -EPERM; } @@ -1798,11 +1860,18 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, /* Check all children */ QLIST_FOREACH(c, &bs->children, next) { uint64_t cur_perm, cur_shared; + bool child_tighten_restr; + bdrv_child_perm(bs, c->bs, c, c->role, q, cumulative_perms, cumulative_shared_perms, &cur_perm, &cur_shared); - ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, - ignore_children, errp); + ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, ignore_children, + tighten_restrictions ? &child_tighten_restr + : NULL, + errp); + if (tighten_restrictions) { + *tighten_restrictions |= child_tighten_restr; + } if (ret < 0) { return ret; } @@ -1926,17 +1995,23 @@ char *bdrv_perm_names(uint64_t perm) * set, the BdrvChild objects in this list are ignored in the calculations; * this allows checking permission updates for an existing reference. * + * See bdrv_check_perm() for the semantics of @tighten_restrictions. + * * Needs to be followed by a call to either bdrv_set_perm() or * bdrv_abort_perm_update(). */ static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, uint64_t new_used_perm, uint64_t new_shared_perm, - GSList *ignore_children, Error **errp) + GSList *ignore_children, + bool *tighten_restrictions, + Error **errp) { BdrvChild *c; uint64_t cumulative_perms = new_used_perm; uint64_t cumulative_shared_perms = new_shared_perm; + assert(!q || !tighten_restrictions); + /* There is no reason why anyone couldn't tolerate write_unchanged */ assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED); @@ -1948,6 +2023,11 @@ static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, if ((new_used_perm & c->shared_perm) != new_used_perm) { char *user = bdrv_child_user_desc(c); char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm); + + if (tighten_restrictions) { + *tighten_restrictions = true; + } + error_setg(errp, "Conflicts with use by %s as '%s', which does not " "allow '%s' on %s", user, c->name, perm_names, bdrv_get_node_name(c->bs)); @@ -1959,6 +2039,11 @@ static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, if ((c->perm & new_shared_perm) != c->perm) { char *user = bdrv_child_user_desc(c); char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm); + + if (tighten_restrictions) { + *tighten_restrictions = true; + } + error_setg(errp, "Conflicts with use by %s as '%s', which uses " "'%s' on %s", user, c->name, perm_names, bdrv_get_node_name(c->bs)); @@ -1972,19 +2057,21 @@ static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, } return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms, - ignore_children, errp); + ignore_children, tighten_restrictions, errp); } /* Needs to be followed by a call to either bdrv_child_set_perm() or * bdrv_child_abort_perm_update(). */ static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, uint64_t perm, uint64_t shared, - GSList *ignore_children, Error **errp) + GSList *ignore_children, + bool *tighten_restrictions, Error **errp) { int ret; ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c); - ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, errp); + ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, + tighten_restrictions, errp); g_slist_free(ignore_children); if (ret < 0) { @@ -2035,11 +2122,26 @@ static void bdrv_child_abort_perm_update(BdrvChild *c) int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, Error **errp) { + Error *local_err = NULL; int ret; + bool tighten_restrictions; - ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, errp); + ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, + &tighten_restrictions, &local_err); if (ret < 0) { bdrv_child_abort_perm_update(c); + if (tighten_restrictions) { + error_propagate(errp, local_err); + } else { + /* + * Our caller may intend to only loosen restrictions and + * does not expect this function to fail. Errors are not + * fatal in such a case, so we can just hide them from our + * caller. + */ + error_free(local_err); + ret = 0; + } return ret; } @@ -2048,6 +2150,18 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, return 0; } +int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) +{ + uint64_t parent_perms, parent_shared; + uint64_t perms, shared; + + bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); + bdrv_child_perm(bs, c->bs, c, c->role, NULL, parent_perms, parent_shared, + &perms, &shared); + + return bdrv_child_try_set_perm(c, perms, shared, errp); +} + void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, BlockReopenQueue *reopen_queue, @@ -2138,24 +2252,19 @@ static void bdrv_replace_child_noperm(BdrvChild *child, if (child->role->detach) { child->role->detach(child); } - if (old_bs->quiesce_counter && child->role->drained_end) { - int num = old_bs->quiesce_counter; - if (child->role->parent_is_bds) { - num -= bdrv_drain_all_count; - } - assert(num >= 0); - for (i = 0; i < num; i++) { - child->role->drained_end(child); - } + while (child->parent_quiesce_counter) { + bdrv_parent_drained_end_single(child); } QLIST_REMOVE(child, next_parent); + } else { + assert(child->parent_quiesce_counter == 0); } child->bs = new_bs; if (new_bs) { QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); - if (new_bs->quiesce_counter && child->role->drained_begin) { + if (new_bs->quiesce_counter) { int num = new_bs->quiesce_counter; if (child->role->parent_is_bds) { num -= bdrv_drain_all_count; @@ -2193,33 +2302,69 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) bdrv_replace_child_noperm(child, new_bs); + /* + * Start with the new node's permissions. If @new_bs is a (direct + * or indirect) child of @old_bs, we must complete the permission + * update on @new_bs before we loosen the restrictions on @old_bs. + * Otherwise, bdrv_check_perm() on @old_bs would re-initiate + * updating the permissions of @new_bs, and thus not purely loosen + * restrictions. + */ + if (new_bs) { + bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm); + bdrv_set_perm(new_bs, perm, shared_perm); + } + if (old_bs) { /* Update permissions for old node. This is guaranteed to succeed * because we're just taking a parent away, so we're loosening * restrictions. */ + bool tighten_restrictions; + int ret; + bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm); - bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, &error_abort); - bdrv_set_perm(old_bs, perm, shared_perm); - } + ret = bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, + &tighten_restrictions, NULL); + assert(tighten_restrictions == false); + if (ret < 0) { + /* We only tried to loosen restrictions, so errors are not fatal */ + bdrv_abort_perm_update(old_bs); + } else { + bdrv_set_perm(old_bs, perm, shared_perm); + } - if (new_bs) { - bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm); - bdrv_set_perm(new_bs, perm, shared_perm); + /* When the parent requiring a non-default AioContext is removed, the + * node moves back to the main AioContext */ + bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL); } } +/* + * This function steals the reference to child_bs from the caller. + * That reference is later dropped by bdrv_root_unref_child(). + * + * On failure NULL is returned, errp is set and the reference to + * child_bs is also dropped. + * + * The caller must hold the AioContext lock @child_bs, but not that of @ctx + * (unless @child_bs is already in @ctx). + */ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, + AioContext *ctx, uint64_t perm, uint64_t shared_perm, void *opaque, Error **errp) { BdrvChild *child; + Error *local_err = NULL; int ret; - ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp); + ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, NULL, + errp); if (ret < 0) { bdrv_abort_perm_update(child_bs); + bdrv_unref(child_bs); return NULL; } @@ -2233,12 +2378,48 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, .opaque = opaque, }; + /* If the AioContexts don't match, first try to move the subtree of + * child_bs into the AioContext of the new parent. If this doesn't work, + * try moving the parent into the AioContext of child_bs instead. */ + if (bdrv_get_aio_context(child_bs) != ctx) { + ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err); + if (ret < 0 && child_role->can_set_aio_ctx) { + GSList *ignore = g_slist_prepend(NULL, child);; + ctx = bdrv_get_aio_context(child_bs); + if (child_role->can_set_aio_ctx(child, ctx, &ignore, NULL)) { + error_free(local_err); + ret = 0; + g_slist_free(ignore); + ignore = g_slist_prepend(NULL, child);; + child_role->set_aio_ctx(child, ctx, &ignore); + } + g_slist_free(ignore); + } + if (ret < 0) { + error_propagate(errp, local_err); + g_free(child); + bdrv_abort_perm_update(child_bs); + return NULL; + } + } + /* This performs the matching bdrv_set_perm() for the above check. */ bdrv_replace_child(child, child_bs); return child; } +/* + * This function transfers the reference to child_bs from the caller + * to parent_bs. That reference is later dropped by parent_bs on + * bdrv_close() or if someone calls bdrv_unref_child(). + * + * On failure NULL is returned, errp is set and the reference to + * child_bs is also dropped. + * + * If @parent_bs and @child_bs are in different AioContexts, the caller must + * hold the AioContext lock for @child_bs, but not for @parent_bs. + */ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, @@ -2251,11 +2432,11 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); assert(parent_bs->drv); - assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs)); bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, perm, shared_perm, &perm, &shared_perm); child = bdrv_root_attach_child(child_bs, child_name, child_role, + bdrv_get_aio_context(parent_bs), perm, shared_perm, parent_bs, errp); if (child == NULL) { return NULL; @@ -2287,18 +2468,20 @@ void bdrv_root_unref_child(BdrvChild *child) bdrv_unref(child_bs); } -void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) +/** + * Clear all inherits_from pointers from children and grandchildren of + * @root that point to @root, where necessary. + */ +static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child) { - if (child == NULL) { - return; - } - - if (child->bs->inherits_from == parent) { - BdrvChild *c; + BdrvChild *c; - /* Remove inherits_from only when the last reference between parent and - * child->bs goes away. */ - QLIST_FOREACH(c, &parent->children, next) { + if (child->bs->inherits_from == root) { + /* + * Remove inherits_from only when the last reference between root and + * child->bs goes away. + */ + QLIST_FOREACH(c, &root->children, next) { if (c != child && c->bs == child->bs) { break; } @@ -2308,6 +2491,18 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) } } + QLIST_FOREACH(c, &child->bs->children, next) { + bdrv_unset_inherits_from(root, c); + } +} + +void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) +{ + if (child == NULL) { + return; + } + + bdrv_unset_inherits_from(parent, child); bdrv_root_unref_child(child); } @@ -2366,12 +2561,9 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, /* If backing_hd was already part of bs's backing chain, and * inherits_from pointed recursively to bs then let's update it to * point directly to bs (else it will become NULL). */ - if (update_inherits_from) { + if (bs->backing && update_inherits_from) { backing_hd->inherits_from = bs; } - if (!bs->backing) { - bdrv_unref(backing_hd); - } out: bdrv_refresh_limits(bs, NULL); @@ -2469,7 +2661,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, ret = -EINVAL; goto free_exit; } - bdrv_set_aio_context(backing_hd, bdrv_get_aio_context(bs)); if (implicit_backing) { bdrv_refresh_filename(backing_hd); @@ -2559,7 +2750,6 @@ BdrvChild *bdrv_open_child(const char *filename, const BdrvChildRole *child_role, bool allow_none, Error **errp) { - BdrvChild *c; BlockDriverState *bs; bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role, @@ -2568,13 +2758,7 @@ BdrvChild *bdrv_open_child(const char *filename, return NULL; } - c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp); - if (!c) { - bdrv_unref(bs); - return NULL; - } - - return c; + return bdrv_attach_child(parent, bs, bdref_key, child_role, errp); } /* TODO Future callers may need to specify parent/child_role in order for @@ -2843,7 +3027,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, /* Not requesting BLK_PERM_CONSISTENT_READ because we're only * looking at the header to guess the image format. This works even * in cases where a guest would not see a consistent state. */ - file = blk_new(0, BLK_PERM_ALL); + file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL); blk_insert_bs(file, file_bs, &local_err); bdrv_unref(file_bs); if (local_err) { @@ -3272,7 +3456,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { BDRVReopenState *state = &bs_entry->state; ret = bdrv_check_perm(state->bs, bs_queue, state->perm, - state->shared_perm, NULL, errp); + state->shared_perm, NULL, NULL, errp); if (ret < 0) { goto cleanup_perm; } @@ -3284,7 +3468,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) state->perm, state->shared_perm, &nperm, &nshared); ret = bdrv_check_update_perm(state->new_backing_bs, NULL, - nperm, nshared, NULL, errp); + nperm, nshared, NULL, NULL, errp); if (ret < 0) { goto cleanup_perm; } @@ -3828,7 +4012,6 @@ static void bdrv_close(BlockDriverState *bs) BdrvAioNotifier *ban, *ban_next; BdrvChild *child, *next; - assert(!bs->job); assert(!bs->refcnt); bdrv_drained_begin(bs); /* complete I/O */ @@ -3842,22 +4025,12 @@ static void bdrv_close(BlockDriverState *bs) bs->drv = NULL; } - bdrv_set_backing_hd(bs, NULL, &error_abort); - - if (bs->file != NULL) { - bdrv_unref_child(bs, bs->file); - bs->file = NULL; - } - QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - /* TODO Remove bdrv_unref() from drivers' close function and use - * bdrv_unref_child() here */ - if (child->bs->inherits_from == bs) { - child->bs->inherits_from = NULL; - } - bdrv_detach_child(child); + bdrv_unref_child(bs, child); } + bs->backing = NULL; + bs->file = NULL; g_free(bs->opaque); bs->opaque = NULL; atomic_set(&bs->copy_on_read, 0); @@ -3986,13 +4159,13 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, uint64_t perm = 0, shared = BLK_PERM_ALL; int ret; - assert(!atomic_read(&from->in_flight)); - assert(!atomic_read(&to->in_flight)); - /* Make sure that @from doesn't go away until we have successfully attached * all of its parents to @to. */ bdrv_ref(from); + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + bdrv_drained_begin(from); + /* Put all parents into @list and calculate their cumulative permissions */ QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); @@ -4011,7 +4184,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, /* Check whether the required permissions can be granted on @to, ignoring * all BdrvChild in @list so that they can't block themselves. */ - ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp); + ret = bdrv_check_update_perm(to, NULL, perm, shared, list, NULL, errp); if (ret < 0) { bdrv_abort_perm_update(to); goto out; @@ -4033,6 +4206,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, out: g_slist_free(list); + bdrv_drained_end(from); bdrv_unref(from); } @@ -4078,18 +4252,17 @@ out: static void bdrv_delete(BlockDriverState *bs) { - assert(!bs->job); assert(bdrv_op_blocker_is_empty(bs)); assert(!bs->refcnt); - bdrv_close(bs); - /* remove from list, if necessary */ if (bs->node_name[0] != '\0') { QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); } QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); + bdrv_close(bs); + g_free(bs); } @@ -4121,7 +4294,7 @@ typedef struct CheckCo { int ret; } CheckCo; -static void bdrv_check_co_entry(void *opaque) +static void coroutine_fn bdrv_check_co_entry(void *opaque) { CheckCo *cco = opaque; cco->ret = bdrv_co_check(cco->bs, cco->res, cco->fix); @@ -4218,14 +4391,15 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs) /* * Return true if at least one of the backing links between @bs and * @base is frozen. @errp is set if that's the case. + * @base must be reachable from @bs, or NULL. */ bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, Error **errp) { BlockDriverState *i; - for (i = bs; i != base && i->backing; i = backing_bs(i)) { - if (i->backing->frozen) { + for (i = bs; i != base; i = backing_bs(i)) { + if (i->backing && i->backing->frozen) { error_setg(errp, "Cannot change '%s' link from '%s' to '%s'", i->backing->name, i->node_name, backing_bs(i)->node_name); @@ -4240,6 +4414,7 @@ bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, * Freeze all backing links between @bs and @base. * If any of the links is already frozen the operation is aborted and * none of the links are modified. + * @base must be reachable from @bs, or NULL. * Returns 0 on success. On failure returns < 0 and sets @errp. */ int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, @@ -4251,8 +4426,18 @@ int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, return -EPERM; } - for (i = bs; i != base && i->backing; i = backing_bs(i)) { - i->backing->frozen = true; + for (i = bs; i != base; i = backing_bs(i)) { + if (i->backing && backing_bs(i)->never_freeze) { + error_setg(errp, "Cannot freeze '%s' link to '%s'", + i->backing->name, backing_bs(i)->node_name); + return -EPERM; + } + } + + for (i = bs; i != base; i = backing_bs(i)) { + if (i->backing) { + i->backing->frozen = true; + } } return 0; @@ -4261,14 +4446,17 @@ int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, /* * Unfreeze all backing links between @bs and @base. The caller must * ensure that all links are frozen before using this function. + * @base must be reachable from @bs, or NULL. */ void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base) { BlockDriverState *i; - for (i = bs; i != base && i->backing; i = backing_bs(i)) { - assert(i->backing->frozen); - i->backing->frozen = false; + for (i = bs; i != base; i = backing_bs(i)) { + if (i->backing) { + assert(i->backing->frozen); + i->backing->frozen = false; + } } } @@ -4350,11 +4538,10 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, QLIST_FOREACH_SAFE(c, &top->parents, next_parent, next) { /* Check whether we are allowed to switch c from top to base */ GSList *ignore_children = g_slist_prepend(NULL, c); - bdrv_check_update_perm(base, NULL, c->perm, c->shared_perm, - ignore_children, &local_err); + ret = bdrv_check_update_perm(base, NULL, c->perm, c->shared_perm, + ignore_children, NULL, &local_err); g_slist_free(ignore_children); - if (local_err) { - ret = -EPERM; + if (ret < 0) { error_report_err(local_err); goto exit; } @@ -5127,7 +5314,7 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, */ bs->open_flags &= ~BDRV_O_INACTIVE; bdrv_get_cumulative_perm(bs, &perm, &shared_perm); - ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &local_err); + ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); if (ret < 0) { bs->open_flags |= BDRV_O_INACTIVE; error_propagate(errp, local_err); @@ -5241,6 +5428,7 @@ static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) static int bdrv_inactivate_recurse(BlockDriverState *bs) { BdrvChild *child, *parent; + bool tighten_restrictions; uint64_t perm, shared_perm; int ret; @@ -5277,8 +5465,15 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) /* Update permissions, they may differ for inactive nodes */ bdrv_get_cumulative_perm(bs, &perm, &shared_perm); - bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &error_abort); - bdrv_set_perm(bs, perm, shared_perm); + ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, + &tighten_restrictions, NULL); + assert(tighten_restrictions == false); + if (ret < 0) { + /* We only tried to loosen restrictions, so errors are not fatal */ + bdrv_abort_perm_update(bs); + } else { + bdrv_set_perm(bs, perm, shared_perm); + } /* Recursively inactivate children */ @@ -5660,14 +5855,9 @@ static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) g_free(ban); } -void bdrv_detach_aio_context(BlockDriverState *bs) +static void bdrv_detach_aio_context(BlockDriverState *bs) { BdrvAioNotifier *baf, *baf_tmp; - BdrvChild *child; - - if (!bs->drv) { - return; - } assert(!bs->walking_aio_notifiers); bs->walking_aio_notifiers = true; @@ -5683,12 +5873,9 @@ void bdrv_detach_aio_context(BlockDriverState *bs) */ bs->walking_aio_notifiers = false; - if (bs->drv->bdrv_detach_aio_context) { + if (bs->drv && bs->drv->bdrv_detach_aio_context) { bs->drv->bdrv_detach_aio_context(bs); } - QLIST_FOREACH(child, &bs->children, next) { - bdrv_detach_aio_context(child->bs); - } if (bs->quiesce_counter) { aio_enable_external(bs->aio_context); @@ -5696,15 +5883,10 @@ void bdrv_detach_aio_context(BlockDriverState *bs) bs->aio_context = NULL; } -void bdrv_attach_aio_context(BlockDriverState *bs, - AioContext *new_context) +static void bdrv_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) { BdrvAioNotifier *ban, *ban_tmp; - BdrvChild *child; - - if (!bs->drv) { - return; - } if (bs->quiesce_counter) { aio_disable_external(new_context); @@ -5712,10 +5894,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs, bs->aio_context = new_context; - QLIST_FOREACH(child, &bs->children, next) { - bdrv_attach_aio_context(child->bs, new_context); - } - if (bs->drv->bdrv_attach_aio_context) { + if (bs->drv && bs->drv->bdrv_attach_aio_context) { bs->drv->bdrv_attach_aio_context(bs, new_context); } @@ -5731,25 +5910,161 @@ void bdrv_attach_aio_context(BlockDriverState *bs, bs->walking_aio_notifiers = false; } -/* The caller must own the AioContext lock for the old AioContext of bs, but it - * must not own the AioContext lock for new_context (unless new_context is - * the same as the current context of bs). */ -void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) +/* + * Changes the AioContext used for fd handlers, timers, and BHs by this + * BlockDriverState and all its children and parents. + * + * Must be called from the main AioContext. + * + * The caller must own the AioContext lock for the old AioContext of bs, but it + * must not own the AioContext lock for new_context (unless new_context is the + * same as the current context of bs). + * + * @ignore will accumulate all visited BdrvChild object. The caller is + * responsible for freeing the list afterwards. + */ +void bdrv_set_aio_context_ignore(BlockDriverState *bs, + AioContext *new_context, GSList **ignore) { - if (bdrv_get_aio_context(bs) == new_context) { + AioContext *old_context = bdrv_get_aio_context(bs); + BdrvChild *child; + + g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + + if (old_context == new_context) { return; } bdrv_drained_begin(bs); + + QLIST_FOREACH(child, &bs->children, next) { + if (g_slist_find(*ignore, child)) { + continue; + } + *ignore = g_slist_prepend(*ignore, child); + bdrv_set_aio_context_ignore(child->bs, new_context, ignore); + } + QLIST_FOREACH(child, &bs->parents, next_parent) { + if (g_slist_find(*ignore, child)) { + continue; + } + assert(child->role->set_aio_ctx); + *ignore = g_slist_prepend(*ignore, child); + child->role->set_aio_ctx(child, new_context, ignore); + } + bdrv_detach_aio_context(bs); - /* This function executes in the old AioContext so acquire the new one in - * case it runs in a different thread. - */ - aio_context_acquire(new_context); + /* Acquire the new context, if necessary */ + if (qemu_get_aio_context() != new_context) { + aio_context_acquire(new_context); + } + bdrv_attach_aio_context(bs, new_context); + + /* + * If this function was recursively called from + * bdrv_set_aio_context_ignore(), there may be nodes in the + * subtree that have not yet been moved to the new AioContext. + * Release the old one so bdrv_drained_end() can poll them. + */ + if (qemu_get_aio_context() != old_context) { + aio_context_release(old_context); + } + bdrv_drained_end(bs); - aio_context_release(new_context); + + if (qemu_get_aio_context() != old_context) { + aio_context_acquire(old_context); + } + if (qemu_get_aio_context() != new_context) { + aio_context_release(new_context); + } +} + +static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + if (g_slist_find(*ignore, c)) { + return true; + } + *ignore = g_slist_prepend(*ignore, c); + + /* A BdrvChildRole that doesn't handle AioContext changes cannot + * tolerate any AioContext changes */ + if (!c->role->can_set_aio_ctx) { + char *user = bdrv_child_user_desc(c); + error_setg(errp, "Changing iothreads is not supported by %s", user); + g_free(user); + return false; + } + if (!c->role->can_set_aio_ctx(c, ctx, ignore, errp)) { + assert(!errp || *errp); + return false; + } + return true; +} + +bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp) +{ + if (g_slist_find(*ignore, c)) { + return true; + } + *ignore = g_slist_prepend(*ignore, c); + return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp); +} + +/* @ignore will accumulate all visited BdrvChild object. The caller is + * responsible for freeing the list afterwards. */ +bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, + GSList **ignore, Error **errp) +{ + BdrvChild *c; + + if (bdrv_get_aio_context(bs) == ctx) { + return true; + } + + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) { + return false; + } + } + QLIST_FOREACH(c, &bs->children, next) { + if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) { + return false; + } + } + + return true; +} + +int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp) +{ + GSList *ignore; + bool ret; + + ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; + ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp); + g_slist_free(ignore); + + if (!ret) { + return -EPERM; + } + + ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; + bdrv_set_aio_context_ignore(bs, ctx, &ignore); + g_slist_free(ignore); + + return 0; +} + +int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + Error **errp) +{ + return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp); } void bdrv_add_aio_context_notifier(BlockDriverState *bs,