uint64_t perm;
uint64_t shared_perm;
+ bool disable_perm;
bool allow_write_beyond_eof;
NotifierList remove_bs_notifiers, insert_bs_notifiers;
+
+ int quiesce_counter;
};
typedef struct BlockBackendAIOCB {
static void drive_info_del(DriveInfo *dinfo);
static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
+static char *blk_get_attached_dev_id(BlockBackend *blk);
/* All BlockBackends */
static QTAILQ_HEAD(, BlockBackend) block_backends =
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
+static char *blk_root_get_parent_desc(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+ char *dev_id;
+
+ if (blk->name) {
+ return g_strdup(blk->name);
+ }
+
+ dev_id = blk_get_attached_dev_id(blk);
+ if (*dev_id) {
+ return dev_id;
+ } else {
+ /* TODO Callback into the BB owner for something more detailed */
+ g_free(dev_id);
+ return g_strdup("a block device");
+ }
+}
+
static const char *blk_root_get_name(BdrvChild *child)
{
return blk_name(child->opaque);
}
+/*
+ * Notifies the user of the BlockBackend that migration has completed. qdev
+ * devices can tighten their permissions in response (specifically revoke
+ * shared write permissions that we needed for storage migration).
+ *
+ * If an error is returned, the VM cannot be allowed to be resumed.
+ */
+static void blk_root_activate(BdrvChild *child, Error **errp)
+{
+ BlockBackend *blk = child->opaque;
+ Error *local_err = NULL;
+
+ if (!blk->disable_perm) {
+ return;
+ }
+
+ blk->disable_perm = false;
+
+ blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ blk->disable_perm = true;
+ return;
+ }
+}
+
+static int blk_root_inactivate(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+
+ if (blk->disable_perm) {
+ return 0;
+ }
+
+ /* Only inactivate BlockBackends for guest devices (which are inactive at
+ * this point because the VM is stopped) and unattached monitor-owned
+ * BlockBackends. If there is still any other user like a block job, then
+ * we simply can't inactivate the image. */
+ if (!blk->dev && !blk_name(blk)[0]) {
+ return -EPERM;
+ }
+
+ blk->disable_perm = true;
+ if (blk->root) {
+ bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
+ }
+
+ return 0;
+}
+
static const BdrvChildRole child_root = {
.inherit_options = blk_root_inherit_options,
.change_media = blk_root_change_media,
.resize = blk_root_resize,
.get_name = blk_root_get_name,
+ .get_parent_desc = blk_root_get_parent_desc,
.drained_begin = blk_root_drained_begin,
.drained_end = blk_root_drained_end,
+
+ .activate = blk_root_activate,
+ .inactivate = blk_root_inactivate,
};
/*
blk->shared_perm = shared_perm;
blk_set_enable_write_cache(blk, true);
+ qemu_co_mutex_init(&blk->public.throttled_reqs_lock);
qemu_co_queue_init(&blk->public.throttled_reqs[0]);
qemu_co_queue_init(&blk->public.throttled_reqs[1]);
+ block_acct_init(&blk->stats);
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
}
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
- perm, BLK_PERM_ALL, blk, &error_abort);
+ perm, BLK_PERM_ALL, blk, errp);
+ if (!blk->root) {
+ bdrv_unref(bs);
+ blk_unref(blk);
+ return NULL;
+ }
return blk;
}
assert(!blk->refcnt);
assert(!blk->name);
assert(!blk->dev);
+ if (blk->public.throttle_state) {
+ blk_io_limits_disable(blk);
+ }
if (blk->root) {
blk_remove_bs(blk);
}
* Return @blk's name, a non-null string.
* Returns an empty string iff @blk is not referenced by the monitor.
*/
-const char *blk_name(BlockBackend *blk)
+const char *blk_name(const BlockBackend *blk)
{
return blk->name ?: "";
}
{
int ret;
- if (blk->root) {
+ if (blk->root && !blk->disable_perm) {
ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
if (ret < 0) {
return ret;
return 0;
}
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
+{
+ *perm = blk->perm;
+ *shared_perm = blk->shared_perm;
+}
+
static int blk_do_attach_dev(BlockBackend *blk, void *dev)
{
if (blk->dev) {
return -EBUSY;
}
+
+ /* While migration is still incoming, we don't need to apply the
+ * permissions of guest device BlockBackends. We might still have a block
+ * job or NBD server writing to the image for storage migration. */
+ if (runstate_check(RUN_STATE_INMIGRATE)) {
+ blk->disable_perm = true;
+ }
+
blk_ref(blk);
blk->dev = dev;
blk->legacy_dev = false;
blk_iostatus_reset(blk);
+
return 0;
}
void *opaque)
{
/* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
- * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops
- * is set. */
+ * it that way, so we can assume blk->dev, if present, is a DeviceState if
+ * blk->dev_ops is set. Non-device users may use dev_ops without device. */
assert(!blk->legacy_dev);
blk->dev_ops = ops;
blk->dev_opaque = opaque;
+
+ /* Are we currently quiesced? Should we enforce this right now? */
+ if (blk->quiesce_counter && ops->drained_begin) {
+ ops->drained_begin(opaque);
+ }
}
/*
* Notify @blk's attached device model of media change.
- * If @load is true, notify of media load.
- * Else, notify of media eject.
+ *
+ * If @load is true, notify of media load. This action can fail, meaning that
+ * the medium cannot be loaded. @errp is set then.
+ *
+ * If @load is false, notify of media eject. This can never fail.
+ *
* Also send DEVICE_TRAY_MOVED events as appropriate.
*/
-void blk_dev_change_media_cb(BlockBackend *blk, bool load)
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
{
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
bool tray_was_open, tray_is_open;
+ Error *local_err = NULL;
assert(!blk->legacy_dev);
tray_was_open = blk_dev_is_tray_open(blk);
- blk->dev_ops->change_media_cb(blk->dev_opaque, load);
+ blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
+ if (local_err) {
+ assert(load == true);
+ error_propagate(errp, local_err);
+ return;
+ }
tray_is_open = blk_dev_is_tray_open(blk);
if (tray_was_open != tray_is_open) {
static void blk_root_change_media(BdrvChild *child, bool load)
{
- blk_dev_change_media_cb(child->opaque, load);
+ blk_dev_change_media_cb(child->opaque, load, NULL);
}
/*
co_entry(&rwco);
} else {
Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
- qemu_coroutine_enter(co);
+ bdrv_coroutine_enter(blk_bs(blk), co);
BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
}
}
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int count, BdrvRequestFlags flags)
+ int bytes, BdrvRequestFlags flags)
{
- return blk_prw(blk, offset, NULL, count, blk_write_entry,
+ return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
flags | BDRV_REQ_ZERO_WRITE);
}
acb->has_returned = false;
co = qemu_coroutine_create(co_entry, acb);
- qemu_coroutine_enter(co);
+ bdrv_coroutine_enter(blk_bs(blk), co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
}
BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
- int64_t offset, int count,
+ int64_t offset, int bytes,
BlockCompletionFunc *cb, void *opaque)
{
- return blk_aio_prwv(blk, offset, count, NULL, blk_aio_pdiscard_entry, 0,
+ return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
cb, opaque);
}
return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
}
-int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
+int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
- int ret = blk_check_byte_request(blk, offset, count);
+ int ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
- return bdrv_co_pdiscard(blk_bs(blk), offset, count);
+ return bdrv_co_pdiscard(blk_bs(blk), offset, bytes);
}
int blk_co_flush(BlockBackend *blk)
}
int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int count, BdrvRequestFlags flags)
+ int bytes, BdrvRequestFlags flags)
{
- return blk_co_pwritev(blk, offset, count, NULL,
+ return blk_co_pwritev(blk, offset, bytes, NULL,
flags | BDRV_REQ_ZERO_WRITE);
}
BDRV_REQ_WRITE_COMPRESSED);
}
-int blk_truncate(BlockBackend *blk, int64_t offset)
+int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
+ Error **errp)
{
if (!blk_is_available(blk)) {
+ error_setg(errp, "No medium inserted");
return -ENOMEDIUM;
}
- return bdrv_truncate(blk->root, offset);
+ return bdrv_truncate(blk->root, offset, prealloc, errp);
}
static void blk_pdiscard_entry(void *opaque)
rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
}
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
- return blk_prw(blk, offset, NULL, count, blk_pdiscard_entry, 0);
+ return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
{
BlockBackend *blk = child->opaque;
+ if (++blk->quiesce_counter == 1) {
+ if (blk->dev_ops && blk->dev_ops->drained_begin) {
+ blk->dev_ops->drained_begin(blk->dev_opaque);
+ }
+ }
+
/* Note that blk->root may not be accessible here yet if we are just
* attaching to a BlockDriverState that is drained. Use child instead. */
- if (blk->public.io_limits_disabled++ == 0) {
+ if (atomic_fetch_inc(&blk->public.io_limits_disabled) == 0) {
throttle_group_restart_blk(blk);
}
}
static void blk_root_drained_end(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
+ assert(blk->quiesce_counter);
assert(blk->public.io_limits_disabled);
- --blk->public.io_limits_disabled;
+ atomic_dec(&blk->public.io_limits_disabled);
+
+ if (--blk->quiesce_counter == 0) {
+ if (blk->dev_ops && blk->dev_ops->drained_end) {
+ blk->dev_ops->drained_end(blk->dev_opaque);
+ }
+ }
}