BlockBackendPublic public;
void *dev; /* attached device model, if any */
+ bool legacy_dev; /* true if dev is not a DeviceState */
/* TODO change to DeviceState when all users are qdevified */
const BlockDevOps *dev_ops;
void *dev_opaque;
bool iostatus_enabled;
BlockDeviceIoStatus iostatus;
+ uint64_t perm;
+ uint64_t shared_perm;
+ bool disable_perm;
+
bool allow_write_beyond_eof;
NotifierList remove_bs_notifiers, insert_bs_notifiers;
+
+ int quiesce_counter;
};
typedef struct BlockBackendAIOCB {
static void drive_info_del(DriveInfo *dinfo);
static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
+static char *blk_get_attached_dev_id(BlockBackend *blk);
/* All BlockBackends */
static QTAILQ_HEAD(, BlockBackend) block_backends =
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
+static char *blk_root_get_parent_desc(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+ char *dev_id;
+
+ if (blk->name) {
+ return g_strdup(blk->name);
+ }
+
+ dev_id = blk_get_attached_dev_id(blk);
+ if (*dev_id) {
+ return dev_id;
+ } else {
+ /* TODO Callback into the BB owner for something more detailed */
+ g_free(dev_id);
+ return g_strdup("a block device");
+ }
+}
+
static const char *blk_root_get_name(BdrvChild *child)
{
return blk_name(child->opaque);
.change_media = blk_root_change_media,
.resize = blk_root_resize,
.get_name = blk_root_get_name,
+ .get_parent_desc = blk_root_get_parent_desc,
.drained_begin = blk_root_drained_begin,
.drained_end = blk_root_drained_end,
/*
* Create a new BlockBackend with a reference count of one.
- * Store an error through @errp on failure, unless it's null.
+ *
+ * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
+ * to request for a block driver node that is attached to this BlockBackend.
+ * @shared_perm is a bitmask which describes which permissions may be granted
+ * to other users of the attached node.
+ * Both sets of permissions can be changed later using blk_set_perm().
+ *
* Return the new BlockBackend on success, null on failure.
*/
-BlockBackend *blk_new(void)
+BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
{
BlockBackend *blk;
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
+ blk->perm = perm;
+ blk->shared_perm = shared_perm;
blk_set_enable_write_cache(blk, true);
qemu_co_queue_init(&blk->public.throttled_reqs[0]);
{
BlockBackend *blk;
BlockDriverState *bs;
+ uint64_t perm;
+
+ /* blk_new_open() is mainly used in .bdrv_create implementations and the
+ * tools where sharing isn't a concern because the BDS stays private, so we
+ * just request permission according to the flags.
+ *
+ * The exceptions are xen_disk and blockdev_init(); in these cases, the
+ * caller of blk_new_open() doesn't make use of the permissions, but they
+ * shouldn't hurt either. We can still share everything here because the
+ * guest devices will add their own blockers if they can't share. */
+ perm = BLK_PERM_CONSISTENT_READ;
+ if (flags & BDRV_O_RDWR) {
+ perm |= BLK_PERM_WRITE;
+ }
+ if (flags & BDRV_O_RESIZE) {
+ perm |= BLK_PERM_RESIZE;
+ }
- blk = blk_new();
+ blk = blk_new(perm, BLK_PERM_ALL);
bs = bdrv_open(filename, reference, options, flags, errp);
if (!bs) {
blk_unref(blk);
return NULL;
}
- blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+ perm, BLK_PERM_ALL, blk, errp);
+ if (!blk->root) {
+ bdrv_unref(bs);
+ blk_unref(blk);
+ return NULL;
+ }
return blk;
}
assert(!blk->refcnt);
assert(!blk->name);
assert(!blk->dev);
+ if (blk->public.throttle_state) {
+ blk_io_limits_disable(blk);
+ }
if (blk->root) {
blk_remove_bs(blk);
}
* Return @blk's name, a non-null string.
* Returns an empty string iff @blk is not referenced by the monitor.
*/
-const char *blk_name(BlockBackend *blk)
+const char *blk_name(const BlockBackend *blk)
{
return blk->name ?: "";
}
/*
* Associates a new BlockDriverState with @blk.
*/
-void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+ blk->perm, blk->shared_perm, blk, errp);
+ if (blk->root == NULL) {
+ return -EPERM;
+ }
bdrv_ref(bs);
- blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
notifier_list_notify(&blk->insert_bs_notifiers, blk);
if (blk->public.throttle_state) {
throttle_timers_attach_aio_context(
&blk->public.throttle_timers, bdrv_get_aio_context(bs));
}
+
+ return 0;
}
/*
- * Attach device model @dev to @blk.
- * Return 0 on success, -EBUSY when a device model is attached already.
+ * Sets the permission bitmasks that the user of the BlockBackend needs.
*/
-int blk_attach_dev(BlockBackend *blk, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp)
+{
+ int ret;
+
+ if (blk->root && !blk->disable_perm) {
+ ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ blk->perm = perm;
+ blk->shared_perm = shared_perm;
+
+ return 0;
+}
+
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
+{
+ *perm = blk->perm;
+ *shared_perm = blk->shared_perm;
+}
+
+/*
+ * Notifies the user of all BlockBackends that migration has completed. qdev
+ * devices can tighten their permissions in response (specifically revoke
+ * shared write permissions that we needed for storage migration).
+ *
+ * If an error is returned, the VM cannot be allowed to be resumed.
+ */
+void blk_resume_after_migration(Error **errp)
+{
+ BlockBackend *blk;
+ Error *local_err = NULL;
+
+ for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
+ if (!blk->disable_perm) {
+ continue;
+ }
+
+ blk->disable_perm = false;
+
+ blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ blk->disable_perm = true;
+ return;
+ }
+ }
+}
+
+static int blk_do_attach_dev(BlockBackend *blk, void *dev)
{
if (blk->dev) {
return -EBUSY;
}
+
+ /* While migration is still incoming, we don't need to apply the
+ * permissions of guest device BlockBackends. We might still have a block
+ * job or NBD server writing to the image for storage migration. */
+ if (runstate_check(RUN_STATE_INMIGRATE)) {
+ blk->disable_perm = true;
+ }
+
blk_ref(blk);
blk->dev = dev;
+ blk->legacy_dev = false;
blk_iostatus_reset(blk);
+
return 0;
}
+/*
+ * Attach device model @dev to @blk.
+ * Return 0 on success, -EBUSY when a device model is attached already.
+ */
+int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
+{
+ return blk_do_attach_dev(blk, dev);
+}
+
/*
* Attach device model @dev to @blk.
* @blk must not have a device model attached already.
* TODO qdevified devices don't use this, remove when devices are qdevified
*/
-void blk_attach_dev_nofail(BlockBackend *blk, void *dev)
+void blk_attach_dev_legacy(BlockBackend *blk, void *dev)
{
- if (blk_attach_dev(blk, dev) < 0) {
+ if (blk_do_attach_dev(blk, dev) < 0) {
abort();
}
+ blk->legacy_dev = true;
}
/*
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
blk->guest_block_size = 512;
+ blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
blk_unref(blk);
}
return blk->dev;
}
+/* Return the qdev ID, or if no ID is assigned the QOM path, of the block
+ * device attached to the BlockBackend. */
+static char *blk_get_attached_dev_id(BlockBackend *blk)
+{
+ DeviceState *dev;
+
+ assert(!blk->legacy_dev);
+ dev = blk->dev;
+
+ if (!dev) {
+ return g_strdup("");
+ } else if (dev->id) {
+ return g_strdup(dev->id);
+ }
+ return object_get_canonical_path(OBJECT(dev));
+}
+
/*
* Return the BlockBackend which has the device model @dev attached if it
* exists, else null.
void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
void *opaque)
{
+ /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
+ * it that way, so we can assume blk->dev, if present, is a DeviceState if
+ * blk->dev_ops is set. Non-device users may use dev_ops without device. */
+ assert(!blk->legacy_dev);
+
blk->dev_ops = ops;
blk->dev_opaque = opaque;
+
+ /* Are we currently quiesced? Should we enforce this right now? */
+ if (blk->quiesce_counter && ops->drained_begin) {
+ ops->drained_begin(opaque);
+ }
}
/*
* Notify @blk's attached device model of media change.
- * If @load is true, notify of media load.
- * Else, notify of media eject.
+ *
+ * If @load is true, notify of media load. This action can fail, meaning that
+ * the medium cannot be loaded. @errp is set then.
+ *
+ * If @load is false, notify of media eject. This can never fail.
+ *
* Also send DEVICE_TRAY_MOVED events as appropriate.
*/
-void blk_dev_change_media_cb(BlockBackend *blk, bool load)
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
{
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
bool tray_was_open, tray_is_open;
+ Error *local_err = NULL;
+
+ assert(!blk->legacy_dev);
tray_was_open = blk_dev_is_tray_open(blk);
- blk->dev_ops->change_media_cb(blk->dev_opaque, load);
+ blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
+ if (local_err) {
+ assert(load == true);
+ error_propagate(errp, local_err);
+ return;
+ }
tray_is_open = blk_dev_is_tray_open(blk);
if (tray_was_open != tray_is_open) {
- qapi_event_send_device_tray_moved(blk_name(blk), tray_is_open,
+ char *id = blk_get_attached_dev_id(blk);
+ qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open,
&error_abort);
+ g_free(id);
}
}
}
static void blk_root_change_media(BdrvChild *child, bool load)
{
- blk_dev_change_media_cb(child->opaque, load);
+ blk_dev_change_media_cb(child->opaque, load, NULL);
}
/*
BdrvRequestFlags flags)
{
int ret;
+ BlockDriverState *bs = blk_bs(blk);
- trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags);
+ trace_blk_co_preadv(blk, bs, offset, bytes, flags);
ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
+ bdrv_inc_in_flight(bs);
+
/* throttling disk I/O */
if (blk->public.throttle_state) {
throttle_group_co_io_limits_intercept(blk, bytes, false);
}
- return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
+ ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
+ bdrv_dec_in_flight(bs);
+ return ret;
}
int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
BdrvRequestFlags flags)
{
int ret;
+ BlockDriverState *bs = blk_bs(blk);
- trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags);
+ trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
+ bdrv_inc_in_flight(bs);
+
/* throttling disk I/O */
if (blk->public.throttle_state) {
throttle_group_co_io_limits_intercept(blk, bytes, true);
flags |= BDRV_REQ_FUA;
}
- return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
+ ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
+ bdrv_dec_in_flight(bs);
+ return ret;
}
typedef struct BlkRwCo {
int64_t bytes, CoroutineEntry co_entry,
BdrvRequestFlags flags)
{
- AioContext *aio_context;
QEMUIOVector qiov;
struct iovec iov;
- Coroutine *co;
BlkRwCo rwco;
iov = (struct iovec) {
.ret = NOT_DONE,
};
- co = qemu_coroutine_create(co_entry, &rwco);
- qemu_coroutine_enter(co);
-
- aio_context = blk_get_aio_context(blk);
- while (rwco.ret == NOT_DONE) {
- aio_poll(aio_context, true);
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ co_entry(&rwco);
+ } else {
+ Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
+ bdrv_coroutine_enter(blk_bs(blk), co);
+ BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
}
return rwco.ret;
static void error_callback_bh(void *opaque)
{
struct BlockBackendAIOCB *acb = opaque;
+
+ bdrv_dec_in_flight(acb->common.bs);
acb->common.cb(acb->common.opaque, acb->ret);
qemu_aio_unref(acb);
}
{
struct BlockBackendAIOCB *acb;
+ bdrv_inc_in_flight(blk_bs(blk));
acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
acb->blk = blk;
acb->ret = ret;
static void blk_aio_complete(BlkAioEmAIOCB *acb)
{
if (acb->has_returned) {
+ bdrv_dec_in_flight(acb->common.bs);
acb->common.cb(acb->common.opaque, acb->rwco.ret);
qemu_aio_unref(acb);
}
static void blk_aio_complete_bh(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
-
assert(acb->has_returned);
blk_aio_complete(acb);
}
BlkAioEmAIOCB *acb;
Coroutine *co;
+ bdrv_inc_in_flight(blk_bs(blk));
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
acb->has_returned = false;
co = qemu_coroutine_create(co_entry, acb);
- qemu_coroutine_enter(co);
+ bdrv_coroutine_enter(blk_bs(blk), co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
blk_aio_write_entry, flags, cb, opaque);
}
+static void blk_aio_flush_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_co_flush(rwco->blk);
+ blk_aio_complete(acb);
+}
+
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
- if (!blk_is_available(blk)) {
- return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
- }
+ return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
+}
- return bdrv_aio_flush(blk_bs(blk), cb, opaque);
+static void blk_aio_pdiscard_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
+ blk_aio_complete(acb);
}
BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque)
{
- int ret = blk_check_byte_request(blk, offset, count);
- if (ret < 0) {
- return blk_abort_aio_request(blk, cb, opaque, ret);
- }
-
- return bdrv_aio_pdiscard(blk_bs(blk), offset, count, cb, opaque);
+ return blk_aio_prwv(blk, offset, count, NULL, blk_aio_pdiscard_entry, 0,
+ cb, opaque);
}
void blk_aio_cancel(BlockAIOCB *acb)
bdrv_aio_cancel_async(acb);
}
-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
- return bdrv_ioctl(blk_bs(blk), req, buf);
+ return bdrv_co_ioctl(blk_bs(blk), req, buf);
+}
+
+static void blk_ioctl_entry(void *opaque)
+{
+ BlkRwCo *rwco = opaque;
+ rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
+ rwco->qiov->iov[0].iov_base);
+}
+
+int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+{
+ return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
+}
+
+static void blk_aio_ioctl_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
+ rwco->qiov->iov[0].iov_base);
+ blk_aio_complete(acb);
}
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
{
- if (!blk_is_available(blk)) {
- return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
- }
+ QEMUIOVector qiov;
+ struct iovec iov;
+
+ iov = (struct iovec) {
+ .iov_base = buf,
+ .iov_len = 0,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
+ return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
}
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
return bdrv_co_flush(blk_bs(blk));
}
-int blk_flush(BlockBackend *blk)
+static void blk_flush_entry(void *opaque)
{
- if (!blk_is_available(blk)) {
- return -ENOMEDIUM;
- }
+ BlkRwCo *rwco = opaque;
+ rwco->ret = blk_co_flush(rwco->blk);
+}
- return bdrv_flush(blk_bs(blk));
+int blk_flush(BlockBackend *blk)
+{
+ return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
}
void blk_drain(BlockBackend *blk)
void blk_eject(BlockBackend *blk, bool eject_flag)
{
BlockDriverState *bs = blk_bs(blk);
+ char *id;
+
+ /* blk_eject is only called by qdevified devices */
+ assert(!blk->legacy_dev);
if (bs) {
bdrv_eject(bs, eject_flag);
}
+
+ /* Whether or not we ejected on the backend,
+ * the frontend experienced a tray event. */
+ id = blk_get_attached_dev_id(blk);
+ qapi_event_send_device_tray_moved(blk_name(blk), id,
+ eject_flag, &error_abort);
+ g_free(id);
}
int blk_get_flags(BlockBackend *blk)
BDRV_REQ_WRITE_COMPRESSED);
}
-int blk_truncate(BlockBackend *blk, int64_t offset)
+int blk_truncate(BlockBackend *blk, int64_t offset, Error **errp)
{
if (!blk_is_available(blk)) {
+ error_setg(errp, "No medium inserted");
return -ENOMEDIUM;
}
- return bdrv_truncate(blk_bs(blk), offset);
+ return bdrv_truncate(blk->root, offset, errp);
}
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
+static void blk_pdiscard_entry(void *opaque)
{
- int ret = blk_check_byte_request(blk, offset, count);
- if (ret < 0) {
- return ret;
- }
+ BlkRwCo *rwco = opaque;
+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
+}
- return bdrv_pdiscard(blk_bs(blk), offset, count);
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
+{
+ return blk_prw(blk, offset, NULL, count, blk_pdiscard_entry, 0);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
{
BlockBackend *blk = child->opaque;
+ if (++blk->quiesce_counter == 1) {
+ if (blk->dev_ops && blk->dev_ops->drained_begin) {
+ blk->dev_ops->drained_begin(blk->dev_opaque);
+ }
+ }
+
/* Note that blk->root may not be accessible here yet if we are just
* attaching to a BlockDriverState that is drained. Use child instead. */
static void blk_root_drained_end(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
+ assert(blk->quiesce_counter);
assert(blk->public.io_limits_disabled);
--blk->public.io_limits_disabled;
+
+ if (--blk->quiesce_counter == 0) {
+ if (blk->dev_ops && blk->dev_ops->drained_end) {
+ blk->dev_ops->drained_end(blk->dev_opaque);
+ }
+ }
}