bool iostatus_enabled;
BlockDeviceIoStatus iostatus;
+ uint64_t perm;
+ uint64_t shared_perm;
+ bool disable_perm;
+
bool allow_write_beyond_eof;
NotifierList remove_bs_notifiers, insert_bs_notifiers;
+
+ int quiesce_counter;
};
typedef struct BlockBackendAIOCB {
static void drive_info_del(DriveInfo *dinfo);
static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
+static char *blk_get_attached_dev_id(BlockBackend *blk);
/* All BlockBackends */
static QTAILQ_HEAD(, BlockBackend) block_backends =
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
+static char *blk_root_get_parent_desc(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+ char *dev_id;
+
+ if (blk->name) {
+ return g_strdup(blk->name);
+ }
+
+ dev_id = blk_get_attached_dev_id(blk);
+ if (*dev_id) {
+ return dev_id;
+ } else {
+ /* TODO Callback into the BB owner for something more detailed */
+ g_free(dev_id);
+ return g_strdup("a block device");
+ }
+}
+
static const char *blk_root_get_name(BdrvChild *child)
{
return blk_name(child->opaque);
.change_media = blk_root_change_media,
.resize = blk_root_resize,
.get_name = blk_root_get_name,
+ .get_parent_desc = blk_root_get_parent_desc,
.drained_begin = blk_root_drained_begin,
.drained_end = blk_root_drained_end,
/*
* Create a new BlockBackend with a reference count of one.
- * Store an error through @errp on failure, unless it's null.
+ *
+ * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
+ * to request for a block driver node that is attached to this BlockBackend.
+ * @shared_perm is a bitmask which describes which permissions may be granted
+ * to other users of the attached node.
+ * Both sets of permissions can be changed later using blk_set_perm().
+ *
* Return the new BlockBackend on success, null on failure.
*/
-BlockBackend *blk_new(void)
+BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
{
BlockBackend *blk;
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
+ blk->perm = perm;
+ blk->shared_perm = shared_perm;
blk_set_enable_write_cache(blk, true);
qemu_co_queue_init(&blk->public.throttled_reqs[0]);
{
BlockBackend *blk;
BlockDriverState *bs;
+ uint64_t perm;
+
+ /* blk_new_open() is mainly used in .bdrv_create implementations and the
+ * tools where sharing isn't a concern because the BDS stays private, so we
+ * just request permission according to the flags.
+ *
+ * The exceptions are xen_disk and blockdev_init(); in these cases, the
+ * caller of blk_new_open() doesn't make use of the permissions, but they
+ * shouldn't hurt either. We can still share everything here because the
+ * guest devices will add their own blockers if they can't share. */
+ perm = BLK_PERM_CONSISTENT_READ;
+ if (flags & BDRV_O_RDWR) {
+ perm |= BLK_PERM_WRITE;
+ }
+ if (flags & BDRV_O_RESIZE) {
+ perm |= BLK_PERM_RESIZE;
+ }
- blk = blk_new();
+ blk = blk_new(perm, BLK_PERM_ALL);
bs = bdrv_open(filename, reference, options, flags, errp);
if (!bs) {
blk_unref(blk);
return NULL;
}
- blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+ perm, BLK_PERM_ALL, blk, errp);
+ if (!blk->root) {
+ bdrv_unref(bs);
+ blk_unref(blk);
+ return NULL;
+ }
return blk;
}
assert(!blk->refcnt);
assert(!blk->name);
assert(!blk->dev);
+ if (blk->public.throttle_state) {
+ blk_io_limits_disable(blk);
+ }
if (blk->root) {
blk_remove_bs(blk);
}
* Return @blk's name, a non-null string.
* Returns an empty string iff @blk is not referenced by the monitor.
*/
-const char *blk_name(BlockBackend *blk)
+const char *blk_name(const BlockBackend *blk)
{
return blk->name ?: "";
}
/*
* Associates a new BlockDriverState with @blk.
*/
-void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+ blk->perm, blk->shared_perm, blk, errp);
+ if (blk->root == NULL) {
+ return -EPERM;
+ }
bdrv_ref(bs);
- blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
notifier_list_notify(&blk->insert_bs_notifiers, blk);
if (blk->public.throttle_state) {
throttle_timers_attach_aio_context(
&blk->public.throttle_timers, bdrv_get_aio_context(bs));
}
+
+ return 0;
+}
+
+/*
+ * Sets the permission bitmasks that the user of the BlockBackend needs.
+ */
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp)
+{
+ int ret;
+
+ if (blk->root && !blk->disable_perm) {
+ ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ blk->perm = perm;
+ blk->shared_perm = shared_perm;
+
+ return 0;
+}
+
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
+{
+ *perm = blk->perm;
+ *shared_perm = blk->shared_perm;
+}
+
+/*
+ * Notifies the user of all BlockBackends that migration has completed. qdev
+ * devices can tighten their permissions in response (specifically revoke
+ * shared write permissions that we needed for storage migration).
+ *
+ * If an error is returned, the VM cannot be allowed to be resumed.
+ */
+void blk_resume_after_migration(Error **errp)
+{
+ BlockBackend *blk;
+ Error *local_err = NULL;
+
+ for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
+ if (!blk->disable_perm) {
+ continue;
+ }
+
+ blk->disable_perm = false;
+
+ blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ blk->disable_perm = true;
+ return;
+ }
+ }
}
static int blk_do_attach_dev(BlockBackend *blk, void *dev)
if (blk->dev) {
return -EBUSY;
}
+
+ /* While migration is still incoming, we don't need to apply the
+ * permissions of guest device BlockBackends. We might still have a block
+ * job or NBD server writing to the image for storage migration. */
+ if (runstate_check(RUN_STATE_INMIGRATE)) {
+ blk->disable_perm = true;
+ }
+
blk_ref(blk);
blk->dev = dev;
blk->legacy_dev = false;
blk_iostatus_reset(blk);
+
return 0;
}
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
blk->guest_block_size = 512;
+ blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
blk_unref(blk);
}
void *opaque)
{
/* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
- * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops
- * is set. */
+ * it that way, so we can assume blk->dev, if present, is a DeviceState if
+ * blk->dev_ops is set. Non-device users may use dev_ops without device. */
assert(!blk->legacy_dev);
blk->dev_ops = ops;
blk->dev_opaque = opaque;
+
+ /* Are we currently quiesced? Should we enforce this right now? */
+ if (blk->quiesce_counter && ops->drained_begin) {
+ ops->drained_begin(opaque);
+ }
}
/*
* Notify @blk's attached device model of media change.
- * If @load is true, notify of media load.
- * Else, notify of media eject.
+ *
+ * If @load is true, notify of media load. This action can fail, meaning that
+ * the medium cannot be loaded. @errp is set then.
+ *
+ * If @load is false, notify of media eject. This can never fail.
+ *
* Also send DEVICE_TRAY_MOVED events as appropriate.
*/
-void blk_dev_change_media_cb(BlockBackend *blk, bool load)
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
{
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
bool tray_was_open, tray_is_open;
+ Error *local_err = NULL;
assert(!blk->legacy_dev);
tray_was_open = blk_dev_is_tray_open(blk);
- blk->dev_ops->change_media_cb(blk->dev_opaque, load);
+ blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
+ if (local_err) {
+ assert(load == true);
+ error_propagate(errp, local_err);
+ return;
+ }
tray_is_open = blk_dev_is_tray_open(blk);
if (tray_was_open != tray_is_open) {
static void blk_root_change_media(BdrvChild *child, bool load)
{
- blk_dev_change_media_cb(child->opaque, load);
+ blk_dev_change_media_cb(child->opaque, load, NULL);
}
/*
BdrvRequestFlags flags)
{
int ret;
+ BlockDriverState *bs = blk_bs(blk);
- trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags);
+ trace_blk_co_preadv(blk, bs, offset, bytes, flags);
ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
+ bdrv_inc_in_flight(bs);
+
/* throttling disk I/O */
if (blk->public.throttle_state) {
throttle_group_co_io_limits_intercept(blk, bytes, false);
}
- return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
+ ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
+ bdrv_dec_in_flight(bs);
+ return ret;
}
int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
BdrvRequestFlags flags)
{
int ret;
+ BlockDriverState *bs = blk_bs(blk);
- trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags);
+ trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
+ bdrv_inc_in_flight(bs);
+
/* throttling disk I/O */
if (blk->public.throttle_state) {
throttle_group_co_io_limits_intercept(blk, bytes, true);
flags |= BDRV_REQ_FUA;
}
- return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
+ ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
+ bdrv_dec_in_flight(bs);
+ return ret;
}
typedef struct BlkRwCo {
int64_t bytes, CoroutineEntry co_entry,
BdrvRequestFlags flags)
{
- AioContext *aio_context;
QEMUIOVector qiov;
struct iovec iov;
- Coroutine *co;
BlkRwCo rwco;
iov = (struct iovec) {
.ret = NOT_DONE,
};
- co = qemu_coroutine_create(co_entry, &rwco);
- qemu_coroutine_enter(co);
-
- aio_context = blk_get_aio_context(blk);
- while (rwco.ret == NOT_DONE) {
- aio_poll(aio_context, true);
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ co_entry(&rwco);
+ } else {
+ Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
+ bdrv_coroutine_enter(blk_bs(blk), co);
+ BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
}
return rwco.ret;
static void error_callback_bh(void *opaque)
{
struct BlockBackendAIOCB *acb = opaque;
+
+ bdrv_dec_in_flight(acb->common.bs);
acb->common.cb(acb->common.opaque, acb->ret);
qemu_aio_unref(acb);
}
{
struct BlockBackendAIOCB *acb;
+ bdrv_inc_in_flight(blk_bs(blk));
acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
acb->blk = blk;
acb->ret = ret;
static void blk_aio_complete(BlkAioEmAIOCB *acb)
{
if (acb->has_returned) {
+ bdrv_dec_in_flight(acb->common.bs);
acb->common.cb(acb->common.opaque, acb->rwco.ret);
qemu_aio_unref(acb);
}
static void blk_aio_complete_bh(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
-
assert(acb->has_returned);
blk_aio_complete(acb);
}
BlkAioEmAIOCB *acb;
Coroutine *co;
+ bdrv_inc_in_flight(blk_bs(blk));
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
acb->has_returned = false;
co = qemu_coroutine_create(co_entry, acb);
- qemu_coroutine_enter(co);
+ bdrv_coroutine_enter(blk_bs(blk), co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
if (bs) {
bdrv_eject(bs, eject_flag);
-
- id = blk_get_attached_dev_id(blk);
- qapi_event_send_device_tray_moved(blk_name(blk), id,
- eject_flag, &error_abort);
- g_free(id);
-
}
+
+ /* Whether or not we ejected on the backend,
+ * the frontend experienced a tray event. */
+ id = blk_get_attached_dev_id(blk);
+ qapi_event_send_device_tray_moved(blk_name(blk), id,
+ eject_flag, &error_abort);
+ g_free(id);
}
int blk_get_flags(BlockBackend *blk)
BDRV_REQ_WRITE_COMPRESSED);
}
-int blk_truncate(BlockBackend *blk, int64_t offset)
+int blk_truncate(BlockBackend *blk, int64_t offset, Error **errp)
{
if (!blk_is_available(blk)) {
+ error_setg(errp, "No medium inserted");
return -ENOMEDIUM;
}
- return bdrv_truncate(blk_bs(blk), offset);
+ return bdrv_truncate(blk->root, offset, errp);
}
static void blk_pdiscard_entry(void *opaque)
{
BlockBackend *blk = child->opaque;
+ if (++blk->quiesce_counter == 1) {
+ if (blk->dev_ops && blk->dev_ops->drained_begin) {
+ blk->dev_ops->drained_begin(blk->dev_opaque);
+ }
+ }
+
/* Note that blk->root may not be accessible here yet if we are just
* attaching to a BlockDriverState that is drained. Use child instead. */
static void blk_root_drained_end(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
+ assert(blk->quiesce_counter);
assert(blk->public.io_limits_disabled);
--blk->public.io_limits_disabled;
+
+ if (--blk->quiesce_counter == 0) {
+ if (blk->dev_ops && blk->dev_ops->drained_end) {
+ blk->dev_ops->drained_end(blk->dev_opaque);
+ }
+ }
}