QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
int quiesce_counter; /* atomic: written under BQL, read by other threads */
+ QemuMutex queued_requests_lock; /* protects queued_requests */
CoQueue queued_requests;
- bool disable_request_queuing;
+ bool disable_request_queuing; /* atomic */
VMChangeStateEntry *vmsh;
bool force_allow_inactivate;
block_acct_init(&blk->stats);
+ qemu_mutex_init(&blk->queued_requests_lock);
qemu_co_queue_init(&blk->queued_requests);
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
* Both sets of permissions can be changed later using blk_set_perm().
*
* Return the new BlockBackend on success, null on failure.
+ *
+ * Callers must hold the AioContext lock of @bs.
*/
BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
uint64_t shared_perm, Error **errp)
/*
* Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
- * The new BlockBackend is in the main AioContext.
+ * By default, the new BlockBackend is in the main AioContext, but if the
+ * parameters connect it with any existing node in a different AioContext, it
+ * may end up there instead.
*
* Just as with bdrv_open(), after having called this function the reference to
* @options belongs to the block layer (even on failure).
*
+ * Called without holding an AioContext lock.
+ *
* TODO: Remove @filename and @flags; it should be possible to specify a whole
* BDS tree just by specifying the @options QDict (or @reference,
* alternatively). At the time of adding this function, this is not possible,
{
BlockBackend *blk;
BlockDriverState *bs;
+ AioContext *ctx;
uint64_t perm = 0;
uint64_t shared = BLK_PERM_ALL;
shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
}
- blk = blk_new(qemu_get_aio_context(), perm, shared);
+ aio_context_acquire(qemu_get_aio_context());
bs = bdrv_open(filename, reference, options, flags, errp);
+ aio_context_release(qemu_get_aio_context());
if (!bs) {
- blk_unref(blk);
return NULL;
}
- blk->root = bdrv_root_attach_child(bs, "root", &child_root,
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
- perm, shared, blk, errp);
+ /* bdrv_open() could have moved bs to a different AioContext */
+ ctx = bdrv_get_aio_context(bs);
+ blk = blk_new(bdrv_get_aio_context(bs), perm, shared);
+ blk->perm = perm;
+ blk->shared_perm = shared;
+
+ aio_context_acquire(ctx);
+ blk_insert_bs(blk, bs, errp);
+ bdrv_unref(bs);
+ aio_context_release(ctx);
+
if (!blk->root) {
blk_unref(blk);
return NULL;
assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
assert(QLIST_EMPTY(&blk->aio_notifiers));
+ assert(qemu_co_queue_empty(&blk->queued_requests));
+ qemu_mutex_destroy(&blk->queued_requests_lock);
QTAILQ_REMOVE(&block_backends, blk, link);
drive_info_del(blk->legacy_dinfo);
block_acct_cleanup(&blk->stats);
/*
* Associates a new BlockDriverState with @blk.
+ *
+ * Callers must hold the AioContext lock of @bs.
*/
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
{
IO_CODE();
- blk->disable_request_queuing = disable;
+ qatomic_set(&blk->disable_request_queuing, disable);
}
static int coroutine_fn GRAPH_RDLOCK
return 0;
}
+/* Are we currently in a drained section? */
+bool blk_in_drain(BlockBackend *blk)
+{
+ GLOBAL_STATE_CODE(); /* change to IO_OR_GS_CODE(), if necessary */
+ return qatomic_read(&blk->quiesce_counter);
+}
+
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
{
assert(blk->in_flight > 0);
- if (qatomic_read(&blk->quiesce_counter) && !blk->disable_request_queuing) {
+ if (qatomic_read(&blk->quiesce_counter) &&
+ !qatomic_read(&blk->disable_request_queuing)) {
+ /*
+ * Take lock before decrementing in flight counter so main loop thread
+ * waits for us to enqueue ourselves before it can leave the drained
+ * section.
+ */
+ qemu_mutex_lock(&blk->queued_requests_lock);
blk_dec_in_flight(blk);
- qemu_co_queue_wait(&blk->queued_requests, NULL);
+ qemu_co_queue_wait(&blk->queued_requests, &blk->queued_requests_lock);
blk_inc_in_flight(blk);
+ qemu_mutex_unlock(&blk->queued_requests_lock);
}
}
return ret;
}
+static void coroutine_fn blk_aio_zone_report_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
+ (unsigned int*)(uintptr_t)acb->bytes,
+ rwco->iobuf);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
+ unsigned int *nr_zones,
+ BlockZoneDescriptor *zones,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ BlkAioEmAIOCB *acb;
+ Coroutine *co;
+ IO_CODE();
+
+ blk_inc_in_flight(blk);
+ acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+ acb->rwco = (BlkRwCo) {
+ .blk = blk,
+ .offset = offset,
+ .iobuf = zones,
+ .ret = NOT_DONE,
+ };
+ acb->bytes = (int64_t)(uintptr_t)nr_zones,
+ acb->has_returned = false;
+
+ co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
+ aio_co_enter(blk_get_aio_context(blk), co);
+
+ acb->has_returned = true;
+ if (acb->rwco.ret != NOT_DONE) {
+ replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ blk_aio_complete_bh, acb);
+ }
+
+ return &acb->common;
+}
+
+static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_co_zone_mgmt(rwco->blk,
+ (BlockZoneOp)(uintptr_t)rwco->iobuf,
+ rwco->offset, acb->bytes);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+ int64_t offset, int64_t len,
+ BlockCompletionFunc *cb, void *opaque) {
+ BlkAioEmAIOCB *acb;
+ Coroutine *co;
+ IO_CODE();
+
+ blk_inc_in_flight(blk);
+ acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+ acb->rwco = (BlkRwCo) {
+ .blk = blk,
+ .offset = offset,
+ .iobuf = (void *)(uintptr_t)op,
+ .ret = NOT_DONE,
+ };
+ acb->bytes = len;
+ acb->has_returned = false;
+
+ co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
+ aio_co_enter(blk_get_aio_context(blk), co);
+
+ acb->has_returned = true;
+ if (acb->rwco.ret != NOT_DONE) {
+ replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ blk_aio_complete_bh, acb);
+ }
+
+ return &acb->common;
+}
+
+static void coroutine_fn blk_aio_zone_append_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes,
+ rwco->iobuf, rwco->flags);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque) {
+ BlkAioEmAIOCB *acb;
+ Coroutine *co;
+ IO_CODE();
+
+ blk_inc_in_flight(blk);
+ acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+ acb->rwco = (BlkRwCo) {
+ .blk = blk,
+ .ret = NOT_DONE,
+ .flags = flags,
+ .iobuf = qiov,
+ };
+ acb->bytes = (int64_t)(uintptr_t)offset;
+ acb->has_returned = false;
+
+ co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
+ aio_co_enter(blk_get_aio_context(blk), co);
+ acb->has_returned = true;
+ if (acb->rwco.ret != NOT_DONE) {
+ replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ blk_aio_complete_bh, acb);
+ }
+
+ return &acb->common;
+}
+
+/*
+ * Send a zone_report command.
+ * offset is a byte offset from the start of the device. No alignment
+ * required for offset.
+ * nr_zones represents IN maximum and OUT actual.
+ */
+int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
+ unsigned int *nr_zones,
+ BlockZoneDescriptor *zones)
+{
+ int ret;
+ IO_CODE();
+
+ blk_inc_in_flight(blk); /* increase before waiting */
+ blk_wait_while_drained(blk);
+ GRAPH_RDLOCK_GUARD();
+ if (!blk_is_available(blk)) {
+ blk_dec_in_flight(blk);
+ return -ENOMEDIUM;
+ }
+ ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
+ blk_dec_in_flight(blk);
+ return ret;
+}
+
+/*
+ * Send a zone_management command.
+ * op is the zone operation;
+ * offset is the byte offset from the start of the zoned device;
+ * len is the maximum number of bytes the command should operate on. It
+ * should be aligned with the device zone size.
+ */
+int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+ int64_t offset, int64_t len)
+{
+ int ret;
+ IO_CODE();
+
+ blk_inc_in_flight(blk);
+ blk_wait_while_drained(blk);
+ GRAPH_RDLOCK_GUARD();
+
+ ret = blk_check_byte_request(blk, offset, len);
+ if (ret < 0) {
+ blk_dec_in_flight(blk);
+ return ret;
+ }
+
+ ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
+ blk_dec_in_flight(blk);
+ return ret;
+}
+
+/*
+ * Send a zone_append command.
+ */
+int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+ int ret;
+ IO_CODE();
+
+ blk_inc_in_flight(blk);
+ blk_wait_while_drained(blk);
+ GRAPH_RDLOCK_GUARD();
+ if (!blk_is_available(blk)) {
+ blk_dec_in_flight(blk);
+ return -ENOMEDIUM;
+ }
+
+ ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
+ blk_dec_in_flight(blk);
+ return ret;
+}
+
void blk_drain(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
bdrv_drain_all_begin();
while ((blk = blk_all_next(blk)) != NULL) {
- AioContext *ctx = blk_get_aio_context(blk);
-
- aio_context_acquire(ctx);
-
/* We may have -ENOMEDIUM completions in flight */
- AIO_WAIT_WHILE(ctx, qatomic_read(&blk->in_flight) > 0);
-
- aio_context_release(ctx);
+ AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_read(&blk->in_flight) > 0);
}
bdrv_drain_all_end();
return;
}
- bdrv_activate(bs, errp);
+ /*
+ * Migration code can call this function in coroutine context, so leave
+ * coroutine context if necessary.
+ */
+ if (qemu_in_coroutine()) {
+ bdrv_co_activate(bs, errp);
+ } else {
+ bdrv_activate(bs, errp);
+ }
}
bool coroutine_fn blk_co_is_inserted(BlockBackend *blk)
AioContext *blk_get_aio_context(BlockBackend *blk)
{
- BlockDriverState *bs = blk_bs(blk);
+ BlockDriverState *bs;
IO_CODE();
+ if (!blk) {
+ return qemu_get_aio_context();
+ }
+
+ bs = blk_bs(blk);
if (bs) {
AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
assert(ctx == blk->ctx);
return blk_get_aio_context(blk_acb->blk);
}
-static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
- bool update_root_node, Error **errp)
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+ Error **errp)
{
+ bool old_allow_change;
BlockDriverState *bs = blk_bs(blk);
- ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
int ret;
- if (bs) {
- bdrv_ref(bs);
-
- if (update_root_node) {
- /*
- * update_root_node MUST be false for blk_root_set_aio_ctx_commit(),
- * as we are already in the commit function of a transaction.
- */
- ret = bdrv_try_change_aio_context(bs, new_context, blk->root, errp);
- if (ret < 0) {
- bdrv_unref(bs);
- return ret;
- }
- }
- /*
- * Make blk->ctx consistent with the root node before we invoke any
- * other operations like drain that might inquire blk->ctx
- */
- blk->ctx = new_context;
- if (tgm->throttle_state) {
- bdrv_drained_begin(bs);
- throttle_group_detach_aio_context(tgm);
- throttle_group_attach_aio_context(tgm, new_context);
- bdrv_drained_end(bs);
- }
+ GLOBAL_STATE_CODE();
- bdrv_unref(bs);
- } else {
+ if (!bs) {
blk->ctx = new_context;
+ return 0;
}
- return 0;
-}
+ bdrv_ref(bs);
-int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
- Error **errp)
-{
- GLOBAL_STATE_CODE();
- return blk_do_set_aio_context(blk, new_context, true, errp);
+ old_allow_change = blk->allow_aio_context_change;
+ blk->allow_aio_context_change = true;
+
+ ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp);
+
+ blk->allow_aio_context_change = old_allow_change;
+
+ bdrv_unref(bs);
+ return ret;
}
typedef struct BdrvStateBlkRootContext {
{
BdrvStateBlkRootContext *s = opaque;
BlockBackend *blk = s->blk;
+ AioContext *new_context = s->new_ctx;
+ ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
- blk_do_set_aio_context(blk, s->new_ctx, false, &error_abort);
+ blk->ctx = new_context;
+ if (tgm->throttle_state) {
+ throttle_group_detach_aio_context(tgm);
+ throttle_group_attach_aio_context(tgm, new_context);
+ }
}
static TransactionActionDrv set_blk_root_context = {
notifier_list_add(&blk->insert_bs_notifiers, notify);
}
-void coroutine_fn blk_co_io_plug(BlockBackend *blk)
-{
- BlockDriverState *bs = blk_bs(blk);
- IO_CODE();
- GRAPH_RDLOCK_GUARD();
-
- if (bs) {
- bdrv_co_io_plug(bs);
- }
-}
-
-void coroutine_fn blk_co_io_unplug(BlockBackend *blk)
-{
- BlockDriverState *bs = blk_bs(blk);
- IO_CODE();
- GRAPH_RDLOCK_GUARD();
-
- if (bs) {
- bdrv_co_io_unplug(bs);
- }
-}
-
BlockAcctStats *blk_get_stats(BlockBackend *blk)
{
IO_CODE();
if (blk->dev_ops && blk->dev_ops->drained_end) {
blk->dev_ops->drained_end(blk->dev_opaque);
}
- while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
+ qemu_mutex_lock(&blk->queued_requests_lock);
+ while (qemu_co_enter_next(&blk->queued_requests,
+ &blk->queued_requests_lock)) {
/* Resume all queued requests */
}
+ qemu_mutex_unlock(&blk->queued_requests_lock);
}
}