allow_failure: true
variables:
NAME: debian-riscv64-cross
+ QEMU_JOB_OPTIONAL: 1
# we can however build TCG tests using a non-sid base
riscv64-debian-test-cross-container:
F: include/hw/mem/memory-device.h
F: include/hw/mem/nvdimm.h
F: include/hw/mem/pc-dimm.h
+F: stubs/memory_device.c
F: docs/nvdimm.txt
SPICE
R: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
F: system/qtest.c
+F: include/sysemu/qtest.h
F: accel/qtest/
F: tests/qtest/
F: docs/devel/qgraph.rst
PVRDMA
M: Yuval Shaia <yuval.shaia.ml@gmail.com>
M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
-S: Maintained
+S: Odd Fixes
F: hw/rdma/*
F: hw/rdma/vmw/*
F: docs/pvrdma.txt
}
}
-int kvm_get_max_memslots(void)
+unsigned int kvm_get_max_memslots(void)
{
KVMState *s = KVM_STATE(current_accel());
return s->nr_slots;
}
+unsigned int kvm_get_free_memslots(void)
+{
+ unsigned int used_slots = 0;
+ KVMState *s = kvm_state;
+ int i;
+
+ kvm_slots_lock();
+ for (i = 0; i < s->nr_as; i++) {
+ if (!s->as[i].ml) {
+ continue;
+ }
+ used_slots = MAX(used_slots, s->as[i].ml->nr_used_slots);
+ }
+ kvm_slots_unlock();
+
+ return s->nr_slots - used_slots;
+}
+
/* Called with KVMMemoryListener.slots_lock held */
static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
{
return NULL;
}
-bool kvm_has_free_slot(MachineState *ms)
-{
- KVMState *s = KVM_STATE(ms->accelerator);
- bool result;
- KVMMemoryListener *kml = &s->memory_listener;
-
- kvm_slots_lock();
- result = !!kvm_get_free_slot(kml);
- kvm_slots_unlock();
-
- return result;
-}
-
/* Called with KVMMemoryListener.slots_lock held */
static KVMSlot *kvm_alloc_slot(KVMMemoryListener *kml)
{
}
start_addr += slot_size;
size -= slot_size;
+ kml->nr_used_slots--;
} while (size);
return;
}
ram_start_offset += slot_size;
ram += slot_size;
size -= slot_size;
+ kml->nr_used_slots++;
} while (size);
}
return -ENOSYS;
}
-bool kvm_has_free_slot(MachineState *ms)
+unsigned int kvm_get_max_memslots(void)
{
- return false;
+ return 0;
+}
+
+unsigned int kvm_get_free_memslots(void)
+{
+ return 0;
}
void kvm_init_cpu_signals(CPUState *cpu)
return !(bs->open_flags & BDRV_O_RDWR);
}
-static int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
- bool ignore_allow_rdw, Error **errp)
+static int GRAPH_RDLOCK
+bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
+ bool ignore_allow_rdw, Error **errp)
{
IO_CODE();
* setting @errp. In all other cases, NULL will only be returned with
* @errp set.
*/
-static char *bdrv_make_absolute_filename(BlockDriverState *relative_to,
- const char *filename, Error **errp)
+static char * GRAPH_RDLOCK
+bdrv_make_absolute_filename(BlockDriverState *relative_to,
+ const char *filename, Error **errp)
{
char *dir, *full_name;
return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
}
-static void bdrv_child_cb_drained_begin(BdrvChild *child)
+static void GRAPH_RDLOCK bdrv_child_cb_drained_begin(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
bdrv_do_drained_begin_quiesce(bs, NULL);
}
-static bool bdrv_child_cb_drained_poll(BdrvChild *child)
+static bool GRAPH_RDLOCK bdrv_child_cb_drained_poll(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
return bdrv_drain_poll(bs, NULL, false);
}
-static void bdrv_child_cb_drained_end(BdrvChild *child)
+static void GRAPH_RDLOCK bdrv_child_cb_drained_end(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
bdrv_drained_end(bs);
*child_flags &= ~BDRV_O_NATIVE_AIO;
}
-static void bdrv_backing_attach(BdrvChild *c)
+static void GRAPH_WRLOCK bdrv_backing_attach(BdrvChild *c)
{
BlockDriverState *parent = c->opaque;
BlockDriverState *backing_hd = c->bs;
}
if (file != NULL) {
+ bdrv_graph_rdlock_main_loop();
bdrv_refresh_filename(blk_bs(file));
+ bdrv_graph_rdunlock_main_loop();
+
filename = blk_bs(file)->filename;
} else {
/*
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
if (!ro && bdrv_is_whitelisted(drv, true)) {
+ bdrv_graph_rdlock_main_loop();
ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
+ bdrv_graph_rdunlock_main_loop();
} else {
ret = -ENOTSUP;
}
{
assert(!child->bs);
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
assert(!child->next.le_prev); /* not in children list */
g_free(child->name);
implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file);
}
+ bdrv_graph_rdlock_main_loop();
backing_filename = bdrv_get_full_backing_filename(bs, &local_err);
+ bdrv_graph_rdunlock_main_loop();
+
if (local_err) {
ret = -EINVAL;
error_propagate(errp, local_err);
}
if (implicit_backing) {
+ bdrv_graph_rdlock_main_loop();
bdrv_refresh_filename(backing_hd);
+ bdrv_graph_rdunlock_main_loop();
pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
backing_hd->filename);
}
/*
* Returns true if @child can be reached recursively from @bs
*/
-static bool bdrv_recurse_has_child(BlockDriverState *bs,
- BlockDriverState *child)
+static bool GRAPH_RDLOCK
+bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child)
{
BdrvChild *c;
*
* To be called with bs->aio_context locked.
*/
-static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
- BlockDriverState *bs,
- QDict *options,
- const BdrvChildClass *klass,
- BdrvChildRole role,
- bool parent_is_format,
- QDict *parent_options,
- int parent_flags,
- bool keep_old_opts)
+static BlockReopenQueue * GRAPH_RDLOCK
+bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs,
+ QDict *options, const BdrvChildClass *klass,
+ BdrvChildRole role, bool parent_is_format,
+ QDict *parent_options, int parent_flags,
+ bool keep_old_opts)
{
assert(bs != NULL);
GLOBAL_STATE_CODE();
+ /*
+ * Strictly speaking, draining is illegal under GRAPH_RDLOCK. We know that
+ * we've been called with bdrv_graph_rdlock_main_loop(), though, so it's ok
+ * in practice.
+ */
bdrv_drained_begin(bs);
if (bs_queue == NULL) {
QDict *options, bool keep_old_opts)
{
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false,
NULL, 0, keep_old_opts);
* Callers must make sure that their AioContext locking is still correct after
* this.
*/
-static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
- bool is_backing, Transaction *tran,
- Error **errp)
+static int GRAPH_UNLOCKED
+bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
+ bool is_backing, Transaction *tran,
+ Error **errp)
{
BlockDriverState *bs = reopen_state->bs;
BlockDriverState *new_child_bs;
QObject *value;
const char *str;
AioContext *ctx, *old_ctx;
+ bool has_child;
int ret;
GLOBAL_STATE_CODE();
new_child_bs = bdrv_lookup_bs(NULL, str, errp);
if (new_child_bs == NULL) {
return -EINVAL;
- } else if (bdrv_recurse_has_child(new_child_bs, bs)) {
+ }
+
+ bdrv_graph_rdlock_main_loop();
+ has_child = bdrv_recurse_has_child(new_child_bs, bs);
+ bdrv_graph_rdunlock_main_loop();
+
+ if (has_child) {
error_setg(errp, "Making '%s' a %s child of '%s' would create a "
"cycle", str, child_name, bs->node_name);
return -EINVAL;
* After calling this function, the transaction @change_child_tran may only be
* completed while holding a writer lock for the graph.
*/
-static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
- BlockReopenQueue *queue,
- Transaction *change_child_tran, Error **errp)
+static int GRAPH_UNLOCKED
+bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
+ Transaction *change_child_tran, Error **errp)
{
int ret = -1;
int old_flags;
* to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
* not set, or if the BDS still has copy_on_read enabled */
read_only = !(reopen_state->flags & BDRV_O_RDWR);
+
+ bdrv_graph_rdlock_main_loop();
ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err);
+ bdrv_graph_rdunlock_main_loop();
if (local_err) {
error_propagate(errp, local_err);
goto error;
if (local_err != NULL) {
error_propagate(errp, local_err);
} else {
+ bdrv_graph_rdlock_main_loop();
bdrv_refresh_filename(reopen_state->bs);
+ bdrv_graph_rdunlock_main_loop();
error_setg(errp, "failed while preparing to reopen image '%s'",
reopen_state->bs->filename);
}
} else {
/* It is currently mandatory to have a bdrv_reopen_prepare()
* handler for each supported drv. */
+ bdrv_graph_rdlock_main_loop();
error_setg(errp, "Block format '%s' used by node '%s' "
"does not support reopening files", drv->format_name,
bdrv_get_device_or_node_name(reopen_state->bs));
+ bdrv_graph_rdunlock_main_loop();
ret = -1;
goto error;
}
if (qdict_size(reopen_state->options)) {
const QDictEntry *entry = qdict_first(reopen_state->options);
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
do {
QObject *new = entry->value;
QObject *old = qdict_get(reopen_state->bs->options, entry->key);
* makes them final by swapping the staging BlockDriverState contents into
* the active BlockDriverState contents.
*/
-static void bdrv_reopen_commit(BDRVReopenState *reopen_state)
+static void GRAPH_UNLOCKED bdrv_reopen_commit(BDRVReopenState *reopen_state)
{
BlockDriver *drv;
BlockDriverState *bs;
drv->bdrv_reopen_commit(reopen_state);
}
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
/* set BDS specific flags now */
qobject_unref(bs->explicit_options);
qobject_unref(bs->options);
qdict_del(bs->explicit_options, "backing");
qdict_del(bs->options, "backing");
- bdrv_graph_rdlock_main_loop();
bdrv_refresh_limits(bs, NULL, NULL);
- bdrv_graph_rdunlock_main_loop();
bdrv_refresh_total_sectors(bs, bs->total_sectors);
}
* Abort the reopen, and delete and free the staged changes in
* reopen_state
*/
-static void bdrv_reopen_abort(BDRVReopenState *reopen_state)
+static void GRAPH_UNLOCKED bdrv_reopen_abort(BDRVReopenState *reopen_state)
{
BlockDriver *drv;
bdrv_ref(top);
bdrv_drained_begin(base);
+ bdrv_graph_rdlock_main_loop();
if (!top->drv || !base->drv) {
goto exit;
backing_file_str = base->filename;
}
- bdrv_graph_rdlock_main_loop();
QLIST_FOREACH(c, &top->parents, next_parent) {
updated_children = g_slist_prepend(updated_children, c);
}
- bdrv_graph_rdunlock_main_loop();
/*
* It seems correct to pass detach_subchain=true here, but it triggers
ret = 0;
exit:
+ bdrv_graph_rdunlock_main_loop();
bdrv_drained_end(base);
bdrv_unref(top);
return ret;
BlockDriverState *bs;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
list = NULL;
QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
bs->drv->bdrv_co_debug_event(bs, event);
}
-static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
+static BlockDriverState * GRAPH_RDLOCK
+bdrv_find_debug_node(BlockDriverState *bs)
{
GLOBAL_STATE_CODE();
while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
const char *tag)
{
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
bs = bdrv_find_debug_node(bs);
if (bs) {
return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
{
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
bs = bdrv_find_debug_node(bs);
if (bs) {
return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
{
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
bs = bdrv_primary_bs(bs);
}
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
{
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
bs = bdrv_primary_bs(bs);
}
BlockDriverState *bs_below;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
if (!bs || !bs->drv || !backing_file) {
return NULL;
BdrvNextIterator it;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
}
}
-static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
+static bool GRAPH_RDLOCK
+bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
{
BdrvChild *parent;
GLOBAL_STATE_CODE();
return false;
}
-static int bdrv_inactivate_recurse(BlockDriverState *bs)
+static int GRAPH_RDLOCK bdrv_inactivate_recurse(BlockDriverState *bs)
{
BdrvChild *child, *parent;
int ret;
uint64_t cumulative_perms, cumulative_shared_perms;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD_MAINLOOP();
if (!bs->drv) {
return -ENOMEDIUM;
GSList *aio_ctxs = NULL, *ctx;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
{
BdrvOpBlocker *blocker;
GLOBAL_STATE_CODE();
+
assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
if (!QLIST_EMPTY(&bs->op_blockers[op])) {
blocker = QLIST_FIRST(&bs->op_blockers[op]);
assert(bs);
assert(target);
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
/* QMP interface protects us from these cases */
assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL);
return blk->root ? blk->root->bs : NULL;
}
-static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
+static BlockBackend * GRAPH_RDLOCK bdrv_first_blk(BlockDriverState *bs)
{
BdrvChild *child;
GLOBAL_STATE_CODE();
+ assert_bdrv_graph_readable();
QLIST_FOREACH(child, &bs->parents, next_parent) {
if (child->klass == &child_root) {
BdrvChild *c;
GLOBAL_STATE_CODE();
+ assert_bdrv_graph_readable();
+
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (c->klass != &child_root) {
return false;
if (qemu_in_coroutine()) {
bdrv_co_activate(bs, errp);
} else {
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
bdrv_activate(bs, errp);
}
}
{
BlockDriverState *bs = blk_bs(blk);
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
if (!bs) {
return false;
int blk_make_empty(BlockBackend *blk, Error **errp)
{
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
if (!blk_is_available(blk)) {
error_setg(errp, "No medium inserted");
return -ENOMEDIUM;
int ret;
/* No write support yet */
+ bdrv_graph_rdlock_main_loop();
ret = bdrv_apply_auto_read_only(bs, NULL, errp);
+ bdrv_graph_rdunlock_main_loop();
if (ret < 0) {
return ret;
}
uint32_t offsets_size, max_compressed_block_size = 1, i;
int ret;
+ bdrv_graph_rdlock_main_loop();
ret = bdrv_apply_auto_read_only(bs, NULL, errp);
+ bdrv_graph_rdunlock_main_loop();
if (ret < 0) {
return ret;
}
Error *local_err = NULL;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
if (!drv)
return -ENOMEDIUM;
return -EACCES;
}
- ret = bdrv_block_status(child->bs, offset, cur_bytes, pnum, map, file);
+ ret = bdrv_co_block_status(child->bs, offset, cur_bytes, pnum, map, file);
if (child == s->target) {
/*
* We refer to s->target only for areas that we've written to it.
local_flags = flags;
/* In case of failure, try to copy-on-read anyway */
- ret = bdrv_is_allocated(bs->file->bs, offset, bytes, &n);
+ ret = bdrv_co_is_allocated(bs->file->bs, offset, bytes, &n);
if (ret <= 0) {
- ret = bdrv_is_allocated_above(bdrv_backing_chain_next(bs->file->bs),
- state->bottom_bs, true, offset,
- n, &n);
+ ret = bdrv_co_is_allocated_above(bdrv_backing_chain_next(bs->file->bs),
+ state->bottom_bs, true, offset,
+ n, &n);
if (ret > 0 || ret < 0) {
local_flags |= BDRV_REQ_COPY_ON_READ;
}
errp);
}
-static int
+static int GRAPH_RDLOCK
block_crypto_amend_options_luks(BlockDriverState *bs,
QemuOpts *opts,
BlockDriverAmendStatusCB *status_cb,
QCryptoBlockAmendOptions *amend_options = NULL;
int ret = -EINVAL;
- assume_graph_lock(); /* FIXME */
-
assert(crypto);
assert(crypto->block);
const char *protocol_delimiter;
int ret;
+ bdrv_graph_rdlock_main_loop();
ret = bdrv_apply_auto_read_only(bs, "curl driver does not support writes",
errp);
+ bdrv_graph_rdunlock_main_loop();
if (ret < 0) {
return ret;
}
int64_t offset;
int ret;
+ bdrv_graph_rdlock_main_loop();
ret = bdrv_apply_auto_read_only(bs, NULL, errp);
+ bdrv_graph_rdunlock_main_loop();
if (ret < 0) {
return ret;
}
uint64_t perm;
int ret;
+ GLOBAL_STATE_CODE();
+
if (!id_wellformed(export->id)) {
error_setg(errp, "Invalid block export id");
return NULL;
* access since the export could be available before migration handover.
* ctx was acquired in the caller.
*/
+ bdrv_graph_rdlock_main_loop();
bdrv_activate(bs, NULL);
+ bdrv_graph_rdunlock_main_loop();
perm = BLK_PERM_CONSISTENT_READ;
if (export->writable) {
if (ret == -EACCES || ret == -EROFS) {
/* Try to degrade to read-only, but if it doesn't work, still use the
* normal error message. */
+ bdrv_graph_rdlock_main_loop();
if (bdrv_apply_auto_read_only(bs, NULL, NULL) == 0) {
open_flags = (open_flags & ~O_RDWR) | O_RDONLY;
s->fd = glfs_open(s->glfs, gconf->path, open_flags);
ret = s->fd ? 0 : -errno;
}
+ bdrv_graph_rdunlock_main_loop();
}
s->supports_seek_data = qemu_gluster_test_seek(s->fd);
return rd;
}
-void bdrv_graph_wrlock(BlockDriverState *bs)
+void no_coroutine_fn bdrv_graph_wrlock(BlockDriverState *bs)
{
AioContext *ctx = NULL;
GLOBAL_STATE_CODE();
assert(!qatomic_read(&has_writer));
+ assert(!qemu_in_coroutine());
/*
* Release only non-mainloop AioContext. The mainloop often relies on the
/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
-static void bdrv_parent_cb_resize(BlockDriverState *bs);
+static void coroutine_fn GRAPH_RDLOCK
+bdrv_parent_cb_resize(BlockDriverState *bs);
+
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int64_t bytes, BdrvRequestFlags flags);
-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
+static void GRAPH_RDLOCK
+bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
{
BdrvChild *c, *next;
+ IO_OR_GS_CODE();
+ assert_bdrv_graph_readable();
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
if (c == ignore) {
}
}
-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
+static void GRAPH_RDLOCK
+bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
{
BdrvChild *c;
+ IO_OR_GS_CODE();
+ assert_bdrv_graph_readable();
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (c == ignore) {
bool bdrv_parent_drained_poll_single(BdrvChild *c)
{
+ IO_OR_GS_CODE();
+
if (c->klass->drained_poll) {
return c->klass->drained_poll(c);
}
return false;
}
-static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
- bool ignore_bds_parents)
+static bool GRAPH_RDLOCK
+bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
+ bool ignore_bds_parents)
{
BdrvChild *c, *next;
bool busy = false;
+ IO_OR_GS_CODE();
+ assert_bdrv_graph_readable();
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
c->quiesced_parent = true;
if (c->klass->drained_begin) {
+ /* called with rdlock taken, but it doesn't really need it. */
c->klass->drained_begin(c);
}
}
static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
BdrvChild *ignore_parent)
{
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
return bdrv_drain_poll(bs, ignore_parent, false);
}
/* Stop things in parent-to-child order */
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
bdrv_parent_drained_begin(bs, parent);
if (bs->drv && bs->drv->bdrv_drain_begin) {
bs->drv->bdrv_drain_begin(bs);
bdrv_co_yield_to_drain(bs, false, parent, false);
return;
}
+
+ /* At this point, we should be always running in the main loop. */
+ GLOBAL_STATE_CODE();
assert(bs->quiesce_counter > 0);
GLOBAL_STATE_CODE();
/* Re-enable things in child-to-parent order */
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
if (old_quiesce_counter == 1) {
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
if (bs->drv && bs->drv->bdrv_drain_end) {
bs->drv->bdrv_drain_end(bs);
}
static void bdrv_drain_assert_idle(BlockDriverState *bs)
{
BdrvChild *child, *next;
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
assert(qatomic_read(&bs->in_flight) == 0);
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
{
BlockDriverState *bs = NULL;
bool result = false;
+
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
/* bdrv_drain_poll() can't make changes to the graph and we are holding the
* main AioContext lock, so iterating bdrv_next_all_states() is safe. */
ret = 1; /* "already allocated", so nothing will be copied */
pnum = MIN(align_bytes, max_transfer);
} else {
- ret = bdrv_is_allocated(bs, align_offset,
- MIN(align_bytes, max_transfer), &pnum);
+ ret = bdrv_co_is_allocated(bs, align_offset,
+ MIN(align_bytes, max_transfer), &pnum);
if (ret < 0) {
/*
* Safe to treat errors in querying allocation as if
/* The flag BDRV_REQ_COPY_ON_READ has reached its addressee */
flags &= ~BDRV_REQ_COPY_ON_READ;
- ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
+ ret = bdrv_co_is_allocated(bs, offset, bytes, &pnum);
if (ret < 0) {
goto out;
}
}
}
-static inline void coroutine_fn
+static inline void coroutine_fn GRAPH_RDLOCK
bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
BdrvTrackedRequest *req, int ret)
{
int result = 0;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
/*
* bdrv queue is managed by record/replay,
* set to the host mapping and BDS corresponding to the guest offset.
*/
static int coroutine_fn GRAPH_RDLOCK
-bdrv_co_block_status(BlockDriverState *bs, bool want_zero,
- int64_t offset, int64_t bytes,
- int64_t *pnum, int64_t *map, BlockDriverState **file)
+bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map, BlockDriverState **file)
{
int64_t total_size;
int64_t n; /* bytes */
if (ret & BDRV_BLOCK_RAW) {
assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
- ret = bdrv_co_block_status(local_file, want_zero, local_map,
- *pnum, pnum, &local_map, &local_file);
+ ret = bdrv_co_do_block_status(local_file, want_zero, local_map,
+ *pnum, pnum, &local_map, &local_file);
goto out;
}
int64_t file_pnum;
int ret2;
- ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
- *pnum, &file_pnum, NULL, NULL);
+ ret2 = bdrv_co_do_block_status(local_file, want_zero, local_map,
+ *pnum, &file_pnum, NULL, NULL);
if (ret2 >= 0) {
/* Ignore errors. This is just providing extra information, it
* is useful but not necessary.
return 0;
}
- ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
+ ret = bdrv_co_do_block_status(bs, want_zero, offset, bytes, pnum,
+ map, file);
++*depth;
if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
return ret;
for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
p = bdrv_filter_or_cow_bs(p))
{
- ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
- file);
+ ret = bdrv_co_do_block_status(p, want_zero, offset, bytes, pnum,
+ map, file);
++*depth;
if (ret < 0) {
return ret;
bytes, pnum, map, file, NULL);
}
-int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
- int64_t offset, int64_t bytes, int64_t *pnum,
- int64_t *map, BlockDriverState **file)
-{
- IO_CODE();
- return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
- pnum, map, file, NULL);
-}
-
-int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
- int64_t *pnum, int64_t *map, BlockDriverState **file)
+int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, int64_t offset,
+ int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file)
{
IO_CODE();
- return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
- offset, bytes, pnum, map, file);
+ return bdrv_co_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
+ offset, bytes, pnum, map, file);
}
/*
return !!(ret & BDRV_BLOCK_ALLOCATED);
}
-int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
- int64_t *pnum)
-{
- int ret;
- int64_t dummy;
- IO_CODE();
-
- ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
- bytes, pnum ? pnum : &dummy, NULL,
- NULL, NULL);
- if (ret < 0) {
- return ret;
- }
- return !!(ret & BDRV_BLOCK_ALLOCATED);
-}
-
-/* See bdrv_is_allocated_above for documentation */
-int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
- BlockDriverState *base,
- bool include_base, int64_t offset,
- int64_t bytes, int64_t *pnum)
-{
- int depth;
- int ret;
- IO_CODE();
-
- ret = bdrv_co_common_block_status_above(top, base, include_base, false,
- offset, bytes, pnum, NULL, NULL,
- &depth);
- if (ret < 0) {
- return ret;
- }
-
- if (ret & BDRV_BLOCK_ALLOCATED) {
- return depth;
- }
- return 0;
-}
-
/*
* Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
*
* words, the result is not necessarily the maximum possible range);
* but 'pnum' will only be 0 when end of file is reached.
*/
-int bdrv_is_allocated_above(BlockDriverState *top,
- BlockDriverState *base,
- bool include_base, int64_t offset,
- int64_t bytes, int64_t *pnum)
+int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *bs,
+ BlockDriverState *base,
+ bool include_base, int64_t offset,
+ int64_t bytes, int64_t *pnum)
{
int depth;
int ret;
IO_CODE();
- ret = bdrv_common_block_status_above(top, base, include_base, false,
- offset, bytes, pnum, NULL, NULL,
- &depth);
+ ret = bdrv_co_common_block_status_above(bs, base, include_base, false,
+ offset, bytes, pnum, NULL, NULL,
+ &depth);
if (ret < 0) {
return ret;
}
bytes, read_flags, write_flags);
}
-static void bdrv_parent_cb_resize(BlockDriverState *bs)
+static void coroutine_fn GRAPH_RDLOCK
+bdrv_parent_cb_resize(BlockDriverState *bs)
{
BdrvChild *c;
+
+ assert_bdrv_graph_readable();
+
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (c->klass->resize) {
c->klass->resize(c);
/* Check the write protect flag of the LUN if we want to write */
if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
iscsilun->write_protected) {
+ bdrv_graph_rdlock_main_loop();
ret = bdrv_apply_auto_read_only(bs, "LUN is write protected", errp);
+ bdrv_graph_rdunlock_main_loop();
if (ret < 0) {
goto out;
}
assert(!(offset % s->granularity));
WITH_GRAPH_RDLOCK_GUARD() {
- ret = bdrv_block_status_above(source, NULL, offset,
- nb_chunks * s->granularity,
- &io_bytes, NULL, NULL);
+ ret = bdrv_co_block_status_above(source, NULL, offset,
+ nb_chunks * s->granularity,
+ &io_bytes, NULL, NULL);
}
if (ret < 0) {
io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes);
}
WITH_GRAPH_RDLOCK_GUARD() {
- ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset,
- bytes, &count);
+ ret = bdrv_co_is_allocated_above(bs, s->base_overlay, true, offset,
+ bytes, &count);
}
if (ret < 0) {
return ret;
AioContext *aio_context;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
bs = bdrv_find_node(id);
if (bs) {
qmp_blockdev_del(id, &local_err);
SnapshotEntry *snapshot_entry;
Error *err = NULL;
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
bs = bdrv_all_find_vmstate_bs(NULL, false, NULL, &err);
if (!bs) {
error_report_err(err);
* Return failure if the server's advertised options are incompatible with the
* client's needs.
*/
-static int nbd_handle_updated_info(BlockDriverState *bs, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+nbd_handle_updated_info(BlockDriverState *bs, Error **errp)
{
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
int ret;
}
}
-static char *nfs_dirname(BlockDriverState *bs, Error **errp)
+static char * GRAPH_RDLOCK nfs_dirname(BlockDriverState *bs, Error **errp)
{
NFSClient *client = bs->opaque;
bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
/* Disable migration until bdrv_activate method is added */
+ bdrv_graph_rdlock_main_loop();
error_setg(&s->migration_blocker, "The Parallels format used by node '%s' "
"does not support live migration",
bdrv_get_device_or_node_name(bs));
+ bdrv_graph_rdunlock_main_loop();
+
ret = migrate_add_blocker(s->migration_blocker, errp);
if (ret < 0) {
error_setg(errp, "Migration blocker error");
}
}
-static void blockdev_remove_medium(const char *device, const char *id,
- Error **errp)
+static void GRAPH_UNLOCKED
+blockdev_remove_medium(const char *device, const char *id, Error **errp)
{
BlockBackend *blk;
BlockDriverState *bs;
AioContext *aio_context;
bool has_attached_device;
+ GLOBAL_STATE_CODE();
+
blk = qmp_get_blk(device, id, errp);
if (!blk) {
return;
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
+ bdrv_graph_rdlock_main_loop();
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
+ bdrv_graph_rdunlock_main_loop();
goto out;
}
+ bdrv_graph_rdunlock_main_loop();
blk_remove_bs(blk);
BlockBackend *blk;
BlockDriverState *bs;
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
blk = qmp_get_blk(device, id, errp);
if (!blk) {
return;
* Helper function for other query info functions. Store information about @bs
* in @info, setting @errp on error.
*/
-static void bdrv_do_query_node_info(BlockDriverState *bs,
- BlockNodeInfo *info,
- Error **errp)
+static void GRAPH_RDLOCK
+bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp)
{
int64_t size;
const char *backing_filename;
}
/* @p_info will be set only on success. */
-static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
- Error **errp)
+static void GRAPH_RDLOCK
+bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, Error **errp)
{
BlockInfo *info = g_malloc0(sizeof(*info));
BlockDriverState *bs = blk_bs(blk);
BlockBackend *blk;
Error *local_err = NULL;
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
BlockInfoList *info;
}
/* Disable migration when qcow images are used */
+ bdrv_graph_rdlock_main_loop();
error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
"does not support live migration",
bdrv_get_device_or_node_name(bs));
+ bdrv_graph_rdunlock_main_loop();
+
ret = migrate_add_blocker(s->migration_blocker, errp);
if (ret < 0) {
error_free(s->migration_blocker);
return DIV_ROUND_UP(num_bits, 8);
}
-static int check_constraints_on_bitmap(BlockDriverState *bs,
- const char *name,
- uint32_t granularity,
- Error **errp)
+static int GRAPH_RDLOCK
+check_constraints_on_bitmap(BlockDriverState *bs, const char *name,
+ uint32_t granularity, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
int granularity_bits = ctz32(granularity);
return 0;
}
-static void clear_bitmap_table(BlockDriverState *bs, uint64_t *bitmap_table,
- uint32_t bitmap_table_size)
+static void GRAPH_RDLOCK
+clear_bitmap_table(BlockDriverState *bs, uint64_t *bitmap_table,
+ uint32_t bitmap_table_size)
{
BDRVQcow2State *s = bs->opaque;
int i;
return ret;
}
-static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb)
+static int GRAPH_RDLOCK
+free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb)
{
int ret;
uint64_t *bitmap_table;
* Store bitmap list to qcow2 image as a bitmap directory.
* Everything is checked.
*/
-static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list,
- uint64_t *offset, uint64_t *size, bool in_place)
+static int GRAPH_RDLOCK
+bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list,
+ uint64_t *offset, uint64_t *size, bool in_place)
{
int ret;
uint8_t *dir;
* Bitmap List end
*/
-static int update_ext_header_and_dir_in_place(BlockDriverState *bs,
- Qcow2BitmapList *bm_list)
+static int GRAPH_RDLOCK
+update_ext_header_and_dir_in_place(BlockDriverState *bs,
+ Qcow2BitmapList *bm_list)
{
BDRVQcow2State *s = bs->opaque;
int ret;
*/
}
-static int update_ext_header_and_dir(BlockDriverState *bs,
- Qcow2BitmapList *bm_list)
+static int GRAPH_RDLOCK
+update_ext_header_and_dir(BlockDriverState *bs, Qcow2BitmapList *bm_list)
{
BDRVQcow2State *s = bs->opaque;
int ret;
/* store_bitmap_data()
* Store bitmap to image, filling bitmap table accordingly.
*/
-static uint64_t *store_bitmap_data(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap,
- uint32_t *bitmap_table_size, Error **errp)
+static uint64_t * GRAPH_RDLOCK
+store_bitmap_data(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ uint32_t *bitmap_table_size, Error **errp)
{
int ret;
BDRVQcow2State *s = bs->opaque;
* Store bm->dirty_bitmap to qcow2.
* Set bm->table_offset and bm->table_size accordingly.
*/
-static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp)
+static int GRAPH_RDLOCK
+store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp)
{
int ret;
uint64_t *tb;
return 0;
}
-static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
+static int GRAPH_RDLOCK
+qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
{
int ret;
return 0;
}
-static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
+static int GRAPH_RDLOCK
+qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
{
BDRVQcow2State *s = bs->opaque;
int ret = 0;
return 0;
}
-static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
- uint64_t offset, void **table, bool read_from_disk)
+static int GRAPH_RDLOCK
+qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table, bool read_from_disk)
{
BDRVQcow2State *s = bs->opaque;
int i;
* the cache is used; otherwise the L2 slice is loaded from the image
* file.
*/
-static int l2_load(BlockDriverState *bs, uint64_t offset,
- uint64_t l2_offset, uint64_t **l2_slice)
+static int GRAPH_RDLOCK
+l2_load(BlockDriverState *bs, uint64_t offset,
+ uint64_t l2_offset, uint64_t **l2_slice)
{
BDRVQcow2State *s = bs->opaque;
int start_of_slice = l2_entry_size(s) *
*
*/
-static int l2_allocate(BlockDriverState *bs, int l1_index)
+static int GRAPH_RDLOCK l2_allocate(BlockDriverState *bs, int l1_index)
{
BDRVQcow2State *s = bs->opaque;
uint64_t old_l2_offset;
*
* Returns 0 on success, -errno in failure case
*/
-static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
- uint64_t **new_l2_slice,
- int *new_l2_index)
+static int GRAPH_RDLOCK
+get_cluster_table(BlockDriverState *bs, uint64_t offset,
+ uint64_t **new_l2_slice, int *new_l2_index)
{
BDRVQcow2State *s = bs->opaque;
unsigned int l2_index;
*
* Returns 0 on success, -errno on failure.
*/
-static int coroutine_fn calculate_l2_meta(BlockDriverState *bs,
- uint64_t host_cluster_offset,
- uint64_t guest_offset, unsigned bytes,
- uint64_t *l2_slice, QCowL2Meta **m,
- bool keep_old)
+static int coroutine_fn GRAPH_RDLOCK
+calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset,
+ uint64_t guest_offset, unsigned bytes, uint64_t *l2_slice,
+ QCowL2Meta **m, bool keep_old)
{
BDRVQcow2State *s = bs->opaque;
int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset);
*
* -errno: in error cases
*/
-static int coroutine_fn handle_copied(BlockDriverState *bs,
- uint64_t guest_offset, uint64_t *host_offset, uint64_t *bytes,
- QCowL2Meta **m)
+static int coroutine_fn GRAPH_RDLOCK
+handle_copied(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
{
BDRVQcow2State *s = bs->opaque;
int l2_index;
* function has been waiting for another request and the allocation must be
* restarted, but the whole request should not be failed.
*/
-static int coroutine_fn do_alloc_cluster_offset(BlockDriverState *bs,
- uint64_t guest_offset,
- uint64_t *host_offset,
- uint64_t *nb_clusters)
+static int coroutine_fn GRAPH_RDLOCK
+do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, uint64_t *nb_clusters)
{
BDRVQcow2State *s = bs->opaque;
*
* -errno: in error cases
*/
-static int coroutine_fn handle_alloc(BlockDriverState *bs,
- uint64_t guest_offset, uint64_t *host_offset, uint64_t *bytes,
- QCowL2Meta **m)
+static int coroutine_fn GRAPH_RDLOCK
+handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
{
BDRVQcow2State *s = bs->opaque;
int l2_index;
* all clusters in the same L2 slice) and returns the number of discarded
* clusters.
*/
-static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
- uint64_t nb_clusters,
- enum qcow2_discard_type type, bool full_discard)
+static int GRAPH_RDLOCK
+discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, uint64_t nb_clusters,
+ enum qcow2_discard_type type, bool full_discard)
{
BDRVQcow2State *s = bs->opaque;
uint64_t *l2_slice;
* all clusters in the same L2 slice) and returns the number of zeroed
* clusters.
*/
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
uint64_t nb_clusters, int flags)
{
return nb_clusters;
}
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
zero_l2_subclusters(BlockDriverState *bs, uint64_t offset,
unsigned nb_subclusters)
{
* status_cb(). l1_entries contains the total number of L1 entries and
* *visited_l1_entries counts all visited L1 entries.
*/
-static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
- int l1_size, int64_t *visited_l1_entries,
- int64_t l1_entries,
- BlockDriverAmendStatusCB *status_cb,
- void *cb_opaque)
+static int GRAPH_RDLOCK
+expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
+ int l1_size, int64_t *visited_l1_entries,
+ int64_t l1_entries,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque)
{
BDRVQcow2State *s = bs->opaque;
bool is_active_l1 = (l1_table == s->l1_table);
}
-static int load_refcount_block(BlockDriverState *bs,
- int64_t refcount_block_offset,
- void **refcount_block)
+static int GRAPH_RDLOCK
+load_refcount_block(BlockDriverState *bs, int64_t refcount_block_offset,
+ void **refcount_block)
{
BDRVQcow2State *s = bs->opaque;
*
* Returns 0 on success or -errno in error case
*/
-static int alloc_refcount_block(BlockDriverState *bs,
- int64_t cluster_index, void **refcount_block)
+static int GRAPH_RDLOCK
+alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index,
+ void **refcount_block)
{
BDRVQcow2State *s = bs->opaque;
unsigned int refcount_table_index;
/* XXX: cache several refcount block clusters ? */
/* @addend is the absolute value of the addend; if @decrease is set, @addend
* will be subtracted from the current refcount, otherwise it will be added */
-static int update_refcount(BlockDriverState *bs,
- int64_t offset,
- int64_t length,
- uint64_t addend,
- bool decrease,
- enum qcow2_discard_type type)
+static int GRAPH_RDLOCK
+update_refcount(BlockDriverState *bs, int64_t offset, int64_t length,
+ uint64_t addend, bool decrease, enum qcow2_discard_type type)
{
BDRVQcow2State *s = bs->opaque;
int64_t start, last, cluster_offset;
/* return < 0 if error */
-static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
- uint64_t max)
+static int64_t GRAPH_RDLOCK
+alloc_clusters_noref(BlockDriverState *bs, uint64_t size, uint64_t max)
{
BDRVQcow2State *s = bs->opaque;
uint64_t i, nb_clusters, refcount;
* Compares the actual reference count for each cluster in the image against the
* refcount as reported by the refcount structures on-disk.
*/
-static void coroutine_fn
+static void coroutine_fn GRAPH_RDLOCK
compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix, bool *rebuild,
int64_t *highest_cluster,
*
* @allocated should be set to true if a new cluster has been allocated.
*/
-typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
- uint64_t reftable_index, uint64_t *reftable_size,
- void *refblock, bool refblock_empty,
- bool *allocated, Error **errp);
+typedef int /* GRAPH_RDLOCK_PTR */
+ (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty,
+ bool *allocated, Error **errp);
/**
* This "operation" for walk_over_reftable() allocates the refblock on disk (if
* it is not empty) and inserts its offset into the new reftable. The size of
* this new reftable is increased as required.
*/
-static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
- uint64_t reftable_index, uint64_t *reftable_size,
- void *refblock, bool refblock_empty, bool *allocated,
- Error **errp)
+static int GRAPH_RDLOCK
+alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty, bool *allocated,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
int64_t offset;
* offset specified by the new reftable's entry. It does not modify the new
* reftable or change any refcounts.
*/
-static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
- uint64_t reftable_index, uint64_t *reftable_size,
- void *refblock, bool refblock_empty, bool *allocated,
- Error **errp)
+static int GRAPH_RDLOCK
+flush_refblock(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty, bool *allocated,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
int64_t offset;
*
* @allocated is set to true if a new cluster has been allocated.
*/
-static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
- uint64_t *new_reftable_index,
- uint64_t *new_reftable_size,
- void *new_refblock, int new_refblock_size,
- int new_refcount_bits,
- RefblockFinishOp *operation, bool *allocated,
- Qcow2SetRefcountFunc *new_set_refcount,
- BlockDriverAmendStatusCB *status_cb,
- void *cb_opaque, int index, int total,
- Error **errp)
+static int GRAPH_RDLOCK
+walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
+ uint64_t *new_reftable_index,
+ uint64_t *new_reftable_size,
+ void *new_refblock, int new_refblock_size,
+ int new_refcount_bits,
+ RefblockFinishOp *operation, bool *allocated,
+ Qcow2SetRefcountFunc *new_set_refcount,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque, int index, int total,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
uint64_t reftable_index;
return ret;
}
-static int64_t coroutine_fn get_refblock_offset(BlockDriverState *bs,
- uint64_t offset)
+static int64_t coroutine_fn GRAPH_RDLOCK
+get_refblock_offset(BlockDriverState *bs, uint64_t offset)
{
BDRVQcow2State *s = bs->opaque;
uint32_t index = offset_to_reftable_index(s, offset);
return covering_refblock_offset;
}
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
qcow2_discard_refcount_block(BlockDriverState *bs, uint64_t discard_block_offs)
{
BDRVQcow2State *s = bs->opaque;
* function when there are no pending requests, it does not guard against
* concurrent requests dirtying the image.
*/
-static int qcow2_mark_clean(BlockDriverState *bs)
+static int GRAPH_RDLOCK qcow2_mark_clean(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
* Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
* before if necessary.
*/
-static int coroutine_fn qcow2_mark_consistent(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_mark_consistent(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
} Qcow2ReopenState;
-static int qcow2_update_options_prepare(BlockDriverState *bs,
- Qcow2ReopenState *r,
- QDict *options, int flags,
- Error **errp)
+static int GRAPH_RDLOCK
+qcow2_update_options_prepare(BlockDriverState *bs, Qcow2ReopenState *r,
+ QDict *options, int flags, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
QemuOpts *opts = NULL;
qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
}
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
qcow2_update_options(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->bl.pdiscard_alignment = s->cluster_size;
}
-static int qcow2_reopen_prepare(BDRVReopenState *state,
- BlockReopenQueue *queue, Error **errp)
+static int GRAPH_UNLOCKED
+qcow2_reopen_prepare(BDRVReopenState *state,BlockReopenQueue *queue,
+ Error **errp)
{
BDRVQcow2State *s = state->bs->opaque;
Qcow2ReopenState *r;
int ret;
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
r = g_new0(Qcow2ReopenState, 1);
state->opaque = r;
static void qcow2_reopen_commit_post(BDRVReopenState *state)
{
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
if (state->flags & BDRV_O_RDWR) {
Error *local_err = NULL;
return ret;
}
-static int qcow2_inactivate(BlockDriverState *bs)
+static int GRAPH_RDLOCK qcow2_inactivate(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
int ret, result = 0;
return result;
}
-static void qcow2_do_close(BlockDriverState *bs, bool close_data_file)
+static void coroutine_mixed_fn GRAPH_RDLOCK
+qcow2_do_close(BlockDriverState *bs, bool close_data_file)
{
BDRVQcow2State *s = bs->opaque;
qemu_vfree(s->l1_table);
g_free(s->image_backing_format);
if (close_data_file && has_data_file(bs)) {
+ GLOBAL_STATE_CODE();
+ bdrv_graph_rdunlock_main_loop();
bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, s->data_file);
bdrv_graph_wrunlock();
s->data_file = NULL;
+ bdrv_graph_rdlock_main_loop();
}
qcow2_refcount_close(bs);
qcow2_free_snapshots(bs);
}
-static void qcow2_close(BlockDriverState *bs)
+static void GRAPH_UNLOCKED qcow2_close(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
qcow2_do_close(bs, true);
}
}
-static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
+static bool coroutine_fn GRAPH_RDLOCK
+is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
{
int64_t nr;
int res;
* backing file. So, we need a loop.
*/
do {
- res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
+ res = bdrv_co_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
offset += nr;
bytes -= nr;
} while (res >= 0 && (res & BDRV_BLOCK_ZERO) && nr && bytes);
return ret;
}
-static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
- int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
{
int ret;
BDRVQcow2State *s = bs->opaque;
return ret;
}
-static int make_completely_empty(BlockDriverState *bs)
+static int GRAPH_RDLOCK make_completely_empty(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
Error *local_err = NULL;
return ret;
}
-static int qcow2_make_empty(BlockDriverState *bs)
+static int GRAPH_RDLOCK qcow2_make_empty(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
uint64_t offset, end_offset;
return ret;
}
-static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
+static coroutine_fn GRAPH_RDLOCK int qcow2_co_flush_to_os(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
int ret;
return bs->drv->bdrv_co_preadv_part(bs, offset, qiov->size, qiov, 0, 0);
}
-static int qcow2_has_compressed_clusters(BlockDriverState *bs)
+static int GRAPH_RDLOCK qcow2_has_compressed_clusters(BlockDriverState *bs)
{
int64_t offset = 0;
int64_t bytes = bdrv_getlength(bs);
* Downgrades an image's version. To achieve this, any incompatible features
* have to be removed.
*/
-static int qcow2_downgrade(BlockDriverState *bs, int target_version,
- BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
- Error **errp)
+static int GRAPH_RDLOCK
+qcow2_downgrade(BlockDriverState *bs, int target_version,
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
int current_version = s->qcow_version;
* features of older versions, some things may have to be presented
* differently.
*/
-static int qcow2_upgrade(BlockDriverState *bs, int target_version,
- BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
- Error **errp)
+static int GRAPH_RDLOCK
+qcow2_upgrade(BlockDriverState *bs, int target_version,
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
bool need_snapshot_update;
info->original_cb_opaque);
}
-static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb,
- void *cb_opaque,
- bool force,
- Error **errp)
+static int GRAPH_RDLOCK
+qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ bool force, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
int old_version = s->qcow_version, new_version = old_version;
int qcow2_mark_corrupt(BlockDriverState *bs);
int qcow2_update_header(BlockDriverState *bs);
-void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
- int64_t size, const char *message_format, ...)
- G_GNUC_PRINTF(5, 6);
+void GRAPH_RDLOCK
+qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
+ int64_t size, const char *message_format, ...)
+ G_GNUC_PRINTF(5, 6);
int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
uint64_t entries, size_t entry_len,
int coroutine_fn GRAPH_RDLOCK qcow2_refcount_init(BlockDriverState *bs);
void qcow2_refcount_close(BlockDriverState *bs);
-int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
- uint64_t *refcount);
+int GRAPH_RDLOCK qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
+ uint64_t *refcount);
-int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
- uint64_t addend, bool decrease,
- enum qcow2_discard_type type);
+int GRAPH_RDLOCK
+qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
+ uint64_t addend, bool decrease,
+ enum qcow2_discard_type type);
-int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t offset,
- uint64_t additional_clusters, bool exact_size,
- int new_refblock_index,
- uint64_t new_refblock_offset);
+int64_t GRAPH_RDLOCK
+qcow2_refcount_area(BlockDriverState *bs, uint64_t offset,
+ uint64_t additional_clusters, bool exact_size,
+ int new_refblock_index,
+ uint64_t new_refblock_offset);
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
-int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int64_t nb_clusters);
-int64_t coroutine_fn GRAPH_RDLOCK qcow2_alloc_bytes(BlockDriverState *bs, int size);
-void qcow2_free_clusters(BlockDriverState *bs,
- int64_t offset, int64_t size,
- enum qcow2_discard_type type);
-void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
- enum qcow2_discard_type type);
+int64_t GRAPH_RDLOCK
+qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
-int qcow2_update_snapshot_refcount(BlockDriverState *bs,
- int64_t l1_table_offset, int l1_size, int addend);
+int64_t GRAPH_RDLOCK coroutine_fn
+qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int64_t nb_clusters);
-int qcow2_flush_caches(BlockDriverState *bs);
-int qcow2_write_caches(BlockDriverState *bs);
+int64_t coroutine_fn GRAPH_RDLOCK qcow2_alloc_bytes(BlockDriverState *bs, int size);
+void GRAPH_RDLOCK qcow2_free_clusters(BlockDriverState *bs,
+ int64_t offset, int64_t size,
+ enum qcow2_discard_type type);
+void GRAPH_RDLOCK
+qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
+ enum qcow2_discard_type type);
+
+int GRAPH_RDLOCK
+qcow2_update_snapshot_refcount(BlockDriverState *bs, int64_t l1_table_offset,
+ int l1_size, int addend);
+
+int GRAPH_RDLOCK qcow2_flush_caches(BlockDriverState *bs);
+int GRAPH_RDLOCK qcow2_write_caches(BlockDriverState *bs);
int coroutine_fn qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix);
int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
int64_t size);
-int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
- int64_t size, bool data_file);
+int GRAPH_RDLOCK
+qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
+ int64_t size, bool data_file);
+
int coroutine_fn qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
void **refcount_table,
int64_t *refcount_table_size,
int64_t offset, int64_t size);
-int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
- BlockDriverAmendStatusCB *status_cb,
- void *cb_opaque, Error **errp);
+int GRAPH_RDLOCK
+qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque, Error **errp);
int coroutine_fn GRAPH_RDLOCK qcow2_shrink_reftable(BlockDriverState *bs);
-int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
+
+int64_t coroutine_fn GRAPH_RDLOCK
+qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
int coroutine_fn GRAPH_RDLOCK
qcow2_detect_metadata_preallocation(BlockDriverState *bs);
/* qcow2-cluster.c functions */
-int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
- bool exact_size);
+int GRAPH_RDLOCK
+qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size);
int coroutine_fn GRAPH_RDLOCK
qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size);
-int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
+int GRAPH_RDLOCK qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
uint8_t *buf, int nb_sectors, bool enc, Error **errp);
-int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset,
- unsigned int *bytes, uint64_t *host_offset,
- QCow2SubclusterType *subcluster_type);
-int coroutine_fn qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset,
- unsigned int *bytes,
- uint64_t *host_offset, QCowL2Meta **m);
+int GRAPH_RDLOCK
+qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset,
+ unsigned int *bytes, uint64_t *host_offset,
+ QCow2SubclusterType *subcluster_type);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset,
+ unsigned int *bytes, uint64_t *host_offset,
+ QCowL2Meta **m);
+
int coroutine_fn GRAPH_RDLOCK
qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset,
int compressed_size, uint64_t *host_offset);
int coroutine_fn GRAPH_RDLOCK
qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-void coroutine_fn qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
-int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
- uint64_t bytes, enum qcow2_discard_type type,
- bool full_discard);
+void coroutine_fn GRAPH_RDLOCK
+qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
+
+int GRAPH_RDLOCK
+qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ enum qcow2_discard_type type, bool full_discard);
int coroutine_fn GRAPH_RDLOCK
qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
int flags);
-int qcow2_expand_zero_clusters(BlockDriverState *bs,
- BlockDriverAmendStatusCB *status_cb,
- void *cb_opaque);
+int GRAPH_RDLOCK
+qcow2_expand_zero_clusters(BlockDriverState *bs,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque);
/* qcow2-snapshot.c functions */
-int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
-int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
-int qcow2_snapshot_delete(BlockDriverState *bs,
- const char *snapshot_id,
- const char *name,
- Error **errp);
+int GRAPH_RDLOCK
+qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
+
+int GRAPH_RDLOCK
+qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
+
+int GRAPH_RDLOCK
+qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id,
+ const char *name, Error **errp);
+
int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
int qcow2_snapshot_load_tmp(BlockDriverState *bs,
const char *snapshot_id,
void qcow2_free_snapshots(BlockDriverState *bs);
int coroutine_fn GRAPH_RDLOCK
qcow2_read_snapshots(BlockDriverState *bs, Error **errp);
-int qcow2_write_snapshots(BlockDriverState *bs);
+int GRAPH_RDLOCK qcow2_write_snapshots(BlockDriverState *bs);
int coroutine_fn GRAPH_RDLOCK
qcow2_check_read_snapshot_table(BlockDriverState *bs, BdrvCheckResult *result,
BdrvCheckMode fix);
-int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs,
- BdrvCheckResult *result,
- BdrvCheckMode fix);
+int coroutine_fn GRAPH_RDLOCK
+qcow2_check_fix_snapshot_table(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix);
/* qcow2-cache.c functions */
Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
int qcow2_cache_destroy(Qcow2Cache *c);
void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
-int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c);
-int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
- Qcow2Cache *dependency);
+int GRAPH_RDLOCK qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
+int GRAPH_RDLOCK qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c);
+int GRAPH_RDLOCK qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
+ Qcow2Cache *dependency);
void qcow2_cache_depends_on_flush(Qcow2Cache *c);
void qcow2_cache_clean_unused(Qcow2Cache *c);
-int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
+int GRAPH_RDLOCK qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
+
+int GRAPH_RDLOCK
+qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table);
+
+int GRAPH_RDLOCK
+qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table);
-int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table);
-int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table);
void qcow2_cache_put(Qcow2Cache *c, void **table);
void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset);
void qcow2_cache_discard(Qcow2Cache *c, void *table);
qcow2_load_dirty_bitmaps(BlockDriverState *bs, bool *header_updated, Error **errp);
bool qcow2_get_bitmap_info_list(BlockDriverState *bs,
Qcow2BitmapInfoList **info_list, Error **errp);
-int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
+int GRAPH_RDLOCK qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
+int GRAPH_RDLOCK qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
int coroutine_fn qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp);
-bool qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs,
- bool release_stored, Error **errp);
-int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
-bool coroutine_fn qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs,
- const char *name,
- uint32_t granularity,
- Error **errp);
-int coroutine_fn qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs,
- const char *name,
- Error **errp);
+
+bool GRAPH_RDLOCK
+qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, bool release_stored,
+ Error **errp);
+
+bool coroutine_fn GRAPH_RDLOCK
+qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+ uint32_t granularity, Error **errp);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
+ Error **errp);
+
bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs);
uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *bs,
uint32_t cluster_size);
end_sector - start_sector);
}
-static void quorum_report_failure(QuorumAIOCB *acb)
+static void GRAPH_RDLOCK quorum_report_failure(QuorumAIOCB *acb)
{
const char *reference = bdrv_get_device_or_node_name(acb->bs);
int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE;
static int quorum_vote_error(QuorumAIOCB *acb);
-static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
+static bool GRAPH_RDLOCK quorum_has_too_much_io_failed(QuorumAIOCB *acb)
{
BDRVQuorumState *s = acb->bs->opaque;
BDRV_REQ_ZERO_WRITE;
if (bs->probed && !bdrv_is_read_only(bs)) {
+ bdrv_graph_rdlock_main_loop();
bdrv_refresh_filename(bs->file->bs);
+ bdrv_graph_rdunlock_main_loop();
fprintf(stderr,
"WARNING: Image format was not specified for '%s' and probing "
"guessed raw.\n"
/* If we are using an rbd snapshot, we must be r/o, otherwise
* leave as-is */
if (s->snap != NULL) {
+ bdrv_graph_rdlock_main_loop();
r = bdrv_apply_auto_read_only(bs, "rbd snapshots are read-only", errp);
+ bdrv_graph_rdunlock_main_loop();
if (r < 0) {
goto failed_post_open;
}
BDRVRBDState *s = state->bs->opaque;
int ret = 0;
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
if (s->snap && state->flags & BDRV_O_RDWR) {
error_setg(errp,
"Cannot change node '%s' to r/w when using RBD snapshot",
while (remaining_sectors > 0) {
int64_t count;
- ret = bdrv_is_allocated_above(top->bs, base->bs, false,
- sector_num * BDRV_SECTOR_SIZE,
- remaining_sectors * BDRV_SECTOR_SIZE,
- &count);
+ ret = bdrv_co_is_allocated_above(top->bs, base->bs, false,
+ sector_num * BDRV_SECTOR_SIZE,
+ remaining_sectors * BDRV_SECTOR_SIZE,
+ &count);
if (ret < 0) {
goto out1;
}
return ret;
}
-static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
+static void GRAPH_UNLOCKED
+secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
{
BDRVReplicationState *s = bs->opaque;
BdrvChild *active_disk = bs->file;
Error *local_err = NULL;
int ret;
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
if (!s->backup_job) {
error_setg(errp, "Backup job was cancelled unexpectedly");
return;
backup_job_cleanup(bs);
}
-static bool check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs)
+static bool GRAPH_RDLOCK
+check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs)
{
BdrvChild *child;
Error *local_err = NULL;
BackupPerf perf = { .use_copy_range = true, .max_workers = 1 };
+ GLOBAL_STATE_CODE();
+
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
s = bs->opaque;
return;
}
+ bdrv_graph_rdlock_main_loop();
secondary_disk = hidden_disk->bs->backing;
if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) {
error_setg(errp, "The secondary disk doesn't have block backend");
+ bdrv_graph_rdunlock_main_loop();
aio_context_release(aio_context);
return;
}
+ bdrv_graph_rdunlock_main_loop();
/* verify the length */
active_length = bdrv_getlength(active_disk->bs);
/* Must be true, or the bdrv_getlength() calls would have failed */
assert(active_disk->bs->drv && hidden_disk->bs->drv);
+ bdrv_graph_rdlock_main_loop();
if (!active_disk->bs->drv->bdrv_make_empty ||
!hidden_disk->bs->drv->bdrv_make_empty) {
error_setg(errp,
"Active disk or hidden disk doesn't support make_empty");
aio_context_release(aio_context);
+ bdrv_graph_rdunlock_main_loop();
return;
}
+ bdrv_graph_rdunlock_main_loop();
/* reopen the backing file in r/w mode */
reopen_backing_file(bs, true, &local_err);
return;
}
- bdrv_graph_wrunlock();
-
/* start backup job now */
error_setg(&s->blocker,
"Block device is in use by internal backup job");
if (!top_bs || !bdrv_is_root_node(top_bs) ||
!check_top_bs(top_bs, bs)) {
error_setg(errp, "No top_bs or it is invalid");
+ bdrv_graph_wrunlock();
reopen_backing_file(bs, false, NULL);
aio_context_release(aio_context);
return;
bdrv_op_block_all(top_bs, s->blocker);
bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);
+ bdrv_graph_wrunlock();
+
s->backup_job = backup_job_create(
NULL, s->secondary_disk->bs, s->hidden_disk->bs,
0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
* back if the given BDS does not support snapshots.
* Return NULL if there is no BDS to (safely) fall back to.
*/
-static BdrvChild *bdrv_snapshot_fallback_child(BlockDriverState *bs)
+static BdrvChild * GRAPH_RDLOCK
+bdrv_snapshot_fallback_child(BlockDriverState *bs)
{
BdrvChild *fallback = bdrv_primary_child(bs);
BdrvChild *child;
+ GLOBAL_STATE_CODE();
+ assert_bdrv_graph_readable();
+
/* We allow fallback only to primary child */
if (!fallback) {
return NULL;
return fallback;
}
-static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs)
+static BlockDriverState * GRAPH_RDLOCK
+bdrv_snapshot_fallback(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
return child_bs(bdrv_snapshot_fallback_child(bs));
}
return ret;
}
+ bdrv_graph_rdlock_main_loop();
fallback = bdrv_snapshot_fallback_child(bs);
+ bdrv_graph_rdunlock_main_loop();
+
if (fallback) {
QDict *options;
QDict *file_options;
* respective option (with the qdict_put_str() call above).
* Assert that .bdrv_open() has attached the right BDS as primary child.
*/
+ bdrv_graph_rdlock_main_loop();
assert(bdrv_primary_bs(bs) == fallback_bs);
+ bdrv_graph_rdunlock_main_loop();
+
bdrv_unref(fallback_bs);
return ret;
}
int bdrv_snapshot_list(BlockDriverState *bs,
QEMUSnapshotInfo **psn_info)
{
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
BlockDriver *drv = bs->drv;
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
- GLOBAL_STATE_CODE();
if (!drv) {
return -ENOMEDIUM;
}
BlockDriver *drv = bs->drv;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
if (!drv) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
}
-static int bdrv_all_get_snapshot_devices(bool has_devices, strList *devices,
- GList **all_bdrvs,
- Error **errp)
+static int GRAPH_RDLOCK
+bdrv_all_get_snapshot_devices(bool has_devices, strList *devices,
+ GList **all_bdrvs, Error **errp)
{
g_autoptr(GList) bdrvs = NULL;
}
-static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs)
+static bool GRAPH_RDLOCK bdrv_all_snapshots_includes_bs(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
+ assert_bdrv_graph_readable();
+
if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
return false;
}
GList *iterbdrvs;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return false;
GList *iterbdrvs;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
{
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ int ret;
GLOBAL_STATE_CODE();
- if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
+ bdrv_graph_rdlock_main_loop();
+ ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp);
+ bdrv_graph_rdunlock_main_loop();
+
+ if (ret < 0) {
return -1;
}
BlockDriverState *bs = iterbdrvs->data;
AioContext *ctx = bdrv_get_aio_context(bs);
int ret = 0;
+ bool all_snapshots_includes_bs;
aio_context_acquire(ctx);
- if (devices || bdrv_all_snapshots_includes_bs(bs)) {
+ bdrv_graph_rdlock_main_loop();
+ all_snapshots_includes_bs = bdrv_all_snapshots_includes_bs(bs);
+ bdrv_graph_rdunlock_main_loop();
+
+ if (devices || all_snapshots_includes_bs) {
ret = bdrv_snapshot_goto(bs, name, errp);
}
aio_context_release(ctx);
if (ret < 0) {
+ bdrv_graph_rdlock_main_loop();
error_prepend(errp, "Could not load snapshot '%s' on '%s': ",
name, bdrv_get_device_or_node_name(bs));
+ bdrv_graph_rdunlock_main_loop();
return -1;
}
GList *iterbdrvs;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
{
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
GList *iterbdrvs;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return NULL;
copy = false;
WITH_GRAPH_RDLOCK_GUARD() {
- ret = bdrv_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n);
+ ret = bdrv_co_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n);
if (ret == 1) {
/* Allocated in the top, no need to copy. */
} else if (ret >= 0) {
* Copy if allocated in the intermediate images. Limit to the
* known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).
*/
- ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
- s->base_overlay, true,
- offset, n, &n);
+ ret = bdrv_co_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
+ s->base_overlay, true,
+ offset, n, &n);
/* Finish early if end of backing file has been reached */
if (ret == 0 && n == 0) {
n = len - offset;
}
/* Disable migration when vdi images are used */
+ bdrv_graph_rdlock_main_loop();
error_setg(&s->migration_blocker, "The vdi format used by node '%s' "
"does not support live migration",
bdrv_get_device_or_node_name(bs));
+ bdrv_graph_rdunlock_main_loop();
+
ret = migrate_add_blocker(s->migration_blocker, errp);
if (ret < 0) {
error_free(s->migration_blocker);
uint64_t signature;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
if (ret < 0) {
return ret;
}
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
s->bat = NULL;
s->first_visible_write = true;
bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
-int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
- Error **errp);
+int GRAPH_RDLOCK
+vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
+ Error **errp);
int coroutine_fn GRAPH_RDLOCK
vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
return 0;
}
-static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
- Error **errp)
+static int GRAPH_RDLOCK
+vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, Error **errp)
{
int ret;
size_t l1_size;
return ret;
}
-static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
- BdrvChild *file,
- int flags, Error **errp)
+static int GRAPH_RDLOCK
+vmdk_open_vmfs_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
+ Error **errp)
{
int ret;
uint32_t magic;
return 0;
}
-static int vmdk_open_se_sparse(BlockDriverState *bs,
- BdrvChild *file,
- int flags, Error **errp)
+static int GRAPH_RDLOCK
+vmdk_open_se_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
+ Error **errp)
{
int ret;
VMDKSESparseConstHeader const_header;
return buf;
}
-static int vmdk_open_vmdk4(BlockDriverState *bs,
- BdrvChild *file,
- int flags, QDict *options, Error **errp)
+static int GRAPH_RDLOCK
+vmdk_open_vmdk4(BlockDriverState *bs, BdrvChild *file, int flags,
+ QDict *options, Error **errp)
{
int ret;
uint32_t magic;
}
/* Open an extent file and append to bs array */
-static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
- char *buf, QDict *options, Error **errp)
+static int GRAPH_RDLOCK
+vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
+ char *buf, QDict *options, Error **errp)
{
uint32_t magic;
return s;
}
-static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
- QDict *options, Error **errp)
+static int GRAPH_RDLOCK
+vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
+ Error **errp)
{
int ret;
int matches;
char extent_opt_prefix[32];
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
for (p = desc; *p; p = next_line(p)) {
/* parse extent line in one of below formats:
*
ret = vmdk_add_extent(bs, extent_file, true, sectors,
0, 0, 0, 0, 0, &extent, errp);
if (ret < 0) {
+ bdrv_graph_rdunlock_main_loop();
bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
+ bdrv_graph_rdlock_main_loop();
goto out;
}
extent->flat_start_offset = flat_offset << 9;
}
g_free(buf);
if (ret) {
+ bdrv_graph_rdunlock_main_loop();
bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
+ bdrv_graph_rdlock_main_loop();
goto out;
}
extent = &s->extents[s->num_extents - 1];
} else if (!strcmp(type, "SESPARSE")) {
ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
if (ret) {
+ bdrv_graph_rdunlock_main_loop();
bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
+ bdrv_graph_rdlock_main_loop();
goto out;
}
extent = &s->extents[s->num_extents - 1];
} else {
error_setg(errp, "Unsupported extent type '%s'", type);
+ bdrv_graph_rdunlock_main_loop();
bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
+ bdrv_graph_rdlock_main_loop();
ret = -ENOTSUP;
goto out;
}
return ret;
}
-static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
- QDict *options, Error **errp)
+static int GRAPH_RDLOCK
+vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, QDict *options,
+ Error **errp)
{
int ret;
char ct[128];
return 1;
}
-static VmdkExtentInfo *vmdk_get_extent_info(VmdkExtent *extent)
+static VmdkExtentInfo * GRAPH_RDLOCK vmdk_get_extent_info(VmdkExtent *extent)
{
VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1);
return ret;
}
-static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs,
- Error **errp)
+static ImageInfoSpecific * GRAPH_RDLOCK
+vmdk_get_specific_info(BlockDriverState *bs, Error **errp)
{
int i;
BDRVVmdkState *s = bs->opaque;
}
/* Disable migration when VHD images are used */
+ bdrv_graph_rdlock_main_loop();
error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
"does not support live migration",
bdrv_get_device_or_node_name(bs));
+ bdrv_graph_rdunlock_main_loop();
+
ret = migrate_add_blocker(s->migration_blocker, errp);
if (ret < 0) {
error_free(s->migration_blocker);
QemuOpts *opts;
int ret;
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
#ifdef DEBUG
vvv = s;
#endif
if (s->qcow) {
int64_t n;
int ret;
- ret = bdrv_is_allocated(s->qcow->bs, sector_num * BDRV_SECTOR_SIZE,
- (nb_sectors - i) * BDRV_SECTOR_SIZE, &n);
+ ret = bdrv_co_is_allocated(s->qcow->bs, sector_num * BDRV_SECTOR_SIZE,
+ (nb_sectors - i) * BDRV_SECTOR_SIZE, &n);
if (ret < 0) {
return ret;
}
}
for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) {
- was_modified = bdrv_is_allocated(s->qcow->bs,
- (cluster2sector(s, cluster_num) +
- i) * BDRV_SECTOR_SIZE,
- BDRV_SECTOR_SIZE, NULL);
+ was_modified = bdrv_co_is_allocated(s->qcow->bs,
+ (cluster2sector(s, cluster_num) +
+ i) * BDRV_SECTOR_SIZE,
+ BDRV_SECTOR_SIZE, NULL);
}
/*
for (i = 0; i < s->sectors_per_cluster; i++) {
int res;
- res = bdrv_is_allocated(s->qcow->bs,
- (offs + i) * BDRV_SECTOR_SIZE,
- BDRV_SECTOR_SIZE, NULL);
+ res = bdrv_co_is_allocated(s->qcow->bs,
+ (offs + i) * BDRV_SECTOR_SIZE,
+ BDRV_SECTOR_SIZE, NULL);
if (res < 0) {
return -1;
}
BlockDriverState *bs;
AioContext *aio_context;
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
bs = bdrv_lookup_bs(name, name, errp);
if (bs == NULL) {
return NULL;
SnapshotInfo *info = NULL;
int ret;
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
bs = qmp_get_root_bs(device, errp);
if (!bs) {
return NULL;
AioContext *aio_context;
int ret1;
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
tran_add(tran, &internal_snapshot_drv, state);
device = internal->device;
AioContext *aio_context;
Error *local_error = NULL;
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
if (!state->created) {
return;
}
bool set_backing_hd = false;
int ret;
+ GLOBAL_STATE_CODE();
+
tran_add(tran, &drive_backup_drv, state);
if (!backup->has_mode) {
}
/* Early check to avoid creating target */
+ bdrv_graph_rdlock_main_loop();
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
+ bdrv_graph_rdunlock_main_loop();
goto out;
}
+ bdrv_graph_rdunlock_main_loop();
flags = bs->open_flags | BDRV_O_RDWR;
BlockDriverState *explicit_backing =
bdrv_skip_implicit_filters(source);
+ bdrv_graph_rdlock_main_loop();
bdrv_refresh_filename(explicit_backing);
+ bdrv_graph_rdunlock_main_loop();
+
bdrv_img_create(backup->target, format,
explicit_backing->filename,
explicit_backing->drv->format_name, NULL,
return;
}
+ bdrv_graph_co_rdlock();
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
error_setg(errp, QERR_DEVICE_IN_USE, device);
+ bdrv_graph_co_rdunlock();
return;
}
+ bdrv_graph_co_rdunlock();
blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
if (!blk) {
Error *local_err = NULL;
int job_flags = JOB_DEFAULT;
+ GLOBAL_STATE_CODE();
+
if (base && base_node) {
error_setg(errp, "'base' and 'base-node' cannot be specified "
"at the same time");
goto out;
}
assert(bdrv_get_aio_context(base_bs) == aio_context);
+
+ bdrv_graph_rdlock_main_loop();
bdrv_refresh_filename(base_bs);
+ bdrv_graph_rdunlock_main_loop();
}
if (bottom) {
* Check for op blockers in the whole chain between bs and base (or bottom)
*/
iter_end = bottom ? bdrv_filter_or_cow_bs(bottom_bs) : base_bs;
+ bdrv_graph_rdlock_main_loop();
for (iter = bs; iter && iter != iter_end;
iter = bdrv_filter_or_cow_bs(iter))
{
if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) {
+ bdrv_graph_rdunlock_main_loop();
goto out;
}
}
+ bdrv_graph_rdunlock_main_loop();
/* if we are streaming the entire chain, the result will have no backing
* file, and specifying one is therefore an error */
XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp)
{
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
return bdrv_get_xdbg_block_graph(errp);
}
}
/* Early check to avoid creating target */
+ bdrv_graph_rdlock_main_loop();
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) {
+ bdrv_graph_rdunlock_main_loop();
return;
}
+ bdrv_graph_rdunlock_main_loop();
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
break;
case NEW_IMAGE_MODE_ABSOLUTE_PATHS:
/* create new image with backing file */
+ bdrv_graph_rdlock_main_loop();
bdrv_refresh_filename(explicit_backing);
+ bdrv_graph_rdunlock_main_loop();
+
bdrv_img_create(arg->target, format,
explicit_backing->filename,
explicit_backing->drv->format_name,
/* even though we are not necessarily operating on bs, we need it to
* determine if block ops are currently prohibited on the chain */
+ bdrv_graph_rdlock_main_loop();
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_CHANGE, errp)) {
+ bdrv_graph_rdunlock_main_loop();
goto out;
}
+ bdrv_graph_rdunlock_main_loop();
/* final sanity check */
if (!bdrv_chain_contains(bs, image_bs)) {
BlockDriverState *bs;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
bs = bdrv_find_node(node_name);
if (!bs) {
AioContext *new_context;
BlockDriverState *bs;
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
bs = bdrv_find_node(node_name);
if (!bs) {
error_setg(errp, "Failed to find node with node-name='%s'", node_name);
BlockJob *job;
int ret;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
if (job_id == NULL && !(flags & JOB_INTERNAL)) {
job_id = bdrv_get_device_name(bs);
.width = width,
.height = height,
};
- pixman_image_t *i =
+ pixman_image_t *img =
pixman_image_create_bits(pixman_image_get_format(res->image),
msg->payload.update.width,
msg->payload.update.height,
payload.update.data),
width * bpp);
pixman_image_composite(PIXMAN_OP_SRC,
- res->image, NULL, i,
+ res->image, NULL, img,
extents->x1, extents->y1,
0, 0, 0, 0,
width, height);
- pixman_image_unref(i);
+ pixman_image_unref(img);
vg_send_msg(g, msg, -1);
g_free(msg);
}
};
#define VUGPU_FILL_CMD(out) do { \
- size_t s; \
- s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \
+ size_t vugpufillcmd_s_ = \
+ iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \
&out, sizeof(out)); \
- if (s != sizeof(out)) { \
+ if (vugpufillcmd_s_ != sizeof(out)) { \
g_critical("%s: command size incorrect %zu vs %zu", \
- __func__, s, sizeof(out)); \
+ __func__, vugpufillcmd_s_, sizeof(out)); \
return; \
} \
} while (0)
- "Zve64f" should be replaced with "zve64f"
- "Zve64d" should be replaced with "zve64d"
+``-device pvrdma`` and the rdma subsystem (since 8.2)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The pvrdma device and the whole rdma subsystem are in a bad shape and
+without active maintenance. The QEMU project intends to remove this
+device and subsystem from the code base in a future release without
+replacement unless somebody steps up and improves the situation.
+
+
Block device options
''''''''''''''''''''
return 0;
}
-static void memory_device_check_addable(MachineState *ms, MemoryRegion *mr,
- Error **errp)
+static unsigned int memory_device_get_memslots(MemoryDeviceState *md)
{
+ const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+
+ if (mdc->get_memslots) {
+ return mdc->get_memslots(md);
+ }
+ return 1;
+}
+
+/*
+ * Memslots that are reserved by memory devices (required but still reported
+ * as free from KVM / vhost).
+ */
+static unsigned int get_reserved_memslots(MachineState *ms)
+{
+ if (ms->device_memory->used_memslots >
+ ms->device_memory->required_memslots) {
+ /* This is unexpected, and we warned already in the memory notifier. */
+ return 0;
+ }
+ return ms->device_memory->required_memslots -
+ ms->device_memory->used_memslots;
+}
+
+unsigned int memory_devices_get_reserved_memslots(void)
+{
+ if (!current_machine->device_memory) {
+ return 0;
+ }
+ return get_reserved_memslots(current_machine);
+}
+
+bool memory_devices_memslot_auto_decision_active(void)
+{
+ if (!current_machine->device_memory) {
+ return false;
+ }
+
+ return current_machine->device_memory->memslot_auto_decision_active;
+}
+
+static unsigned int memory_device_memslot_decision_limit(MachineState *ms,
+ MemoryRegion *mr)
+{
+ const unsigned int reserved = get_reserved_memslots(ms);
+ const uint64_t size = memory_region_size(mr);
+ unsigned int max = vhost_get_max_memslots();
+ unsigned int free = vhost_get_free_memslots();
+ uint64_t available_space;
+ unsigned int memslots;
+
+ if (kvm_enabled()) {
+ max = MIN(max, kvm_get_max_memslots());
+ free = MIN(free, kvm_get_free_memslots());
+ }
+
+ /*
+ * If we only have less overall memslots than what we consider reasonable,
+ * just keep it to a minimum.
+ */
+ if (max < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS) {
+ return 1;
+ }
+
+ /*
+ * Consider our soft-limit across all memory devices. We don't really
+ * expect to exceed this limit in reasonable configurations.
+ */
+ if (MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT <=
+ ms->device_memory->required_memslots) {
+ return 1;
+ }
+ memslots = MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT -
+ ms->device_memory->required_memslots;
+
+ /*
+ * Consider the actually still free memslots. This is only relevant if
+ * other memslot consumers would consume *significantly* more memslots than
+ * what we prepared for (> 253). Unlikely, but let's just handle it
+ * cleanly.
+ */
+ memslots = MIN(memslots, free - reserved);
+ if (memslots < 1 || unlikely(free < reserved)) {
+ return 1;
+ }
+
+ /* We cannot have any other memory devices? So give all to this device. */
+ if (size == ms->maxram_size - ms->ram_size) {
+ return memslots;
+ }
+
+ /*
+ * Simple heuristic: equally distribute the memslots over the space
+ * still available for memory devices.
+ */
+ available_space = ms->maxram_size - ms->ram_size -
+ ms->device_memory->used_region_size;
+ memslots = (double)memslots * size / available_space;
+ return memslots < 1 ? 1 : memslots;
+}
+
+static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md,
+ MemoryRegion *mr, Error **errp)
+{
+ const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
const uint64_t used_region_size = ms->device_memory->used_region_size;
const uint64_t size = memory_region_size(mr);
+ const unsigned int reserved_memslots = get_reserved_memslots(ms);
+ unsigned int required_memslots, memslot_limit;
+
+ /*
+ * Instruct the device to decide how many memslots to use, if applicable,
+ * before we query the number of required memslots the first time.
+ */
+ if (mdc->decide_memslots) {
+ memslot_limit = memory_device_memslot_decision_limit(ms, mr);
+ mdc->decide_memslots(md, memslot_limit);
+ }
+ required_memslots = memory_device_get_memslots(md);
- /* we will need a new memory slot for kvm and vhost */
- if (kvm_enabled() && !kvm_has_free_slot(ms)) {
- error_setg(errp, "hypervisor has no free memory slots left");
+ /* we will need memory slots for kvm and vhost */
+ if (kvm_enabled() &&
+ kvm_get_free_memslots() < required_memslots + reserved_memslots) {
+ error_setg(errp, "hypervisor has not enough free memory slots left");
return;
}
- if (!vhost_has_free_slot()) {
- error_setg(errp, "a used vhost backend has no free memory slots left");
+ if (vhost_get_free_memslots() < required_memslots + reserved_memslots) {
+ error_setg(errp, "a used vhost backend has not enough free memory slots left");
return;
}
goto out;
}
- memory_device_check_addable(ms, mr, &local_err);
+ memory_device_check_addable(ms, md, mr, &local_err);
if (local_err) {
goto out;
}
void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
{
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+ const unsigned int memslots = memory_device_get_memslots(md);
const uint64_t addr = mdc->get_addr(md);
MemoryRegion *mr;
g_assert(ms->device_memory);
ms->device_memory->used_region_size += memory_region_size(mr);
+ ms->device_memory->required_memslots += memslots;
+ if (mdc->decide_memslots && memslots > 1) {
+ ms->device_memory->memslot_auto_decision_active++;
+ }
+
memory_region_add_subregion(&ms->device_memory->mr,
addr - ms->device_memory->base, mr);
trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr);
void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
{
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+ const unsigned int memslots = memory_device_get_memslots(md);
MemoryRegion *mr;
/*
g_assert(ms->device_memory);
memory_region_del_subregion(&ms->device_memory->mr, mr);
+
+ if (mdc->decide_memslots && memslots > 1) {
+ ms->device_memory->memslot_auto_decision_active--;
+ }
ms->device_memory->used_region_size -= memory_region_size(mr);
+ ms->device_memory->required_memslots -= memslots;
trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "",
mdc->get_addr(md));
}
return memory_region_size(mr);
}
+static void memory_devices_region_mod(MemoryListener *listener,
+ MemoryRegionSection *mrs, bool add)
+{
+ DeviceMemoryState *dms = container_of(listener, DeviceMemoryState,
+ listener);
+
+ if (!memory_region_is_ram(mrs->mr)) {
+ warn_report("Unexpected memory region mapped into device memory region.");
+ return;
+ }
+
+ /*
+ * The expectation is that each distinct RAM memory region section in
+ * our region for memory devices consumes exactly one memslot in KVM
+ * and in vhost. For vhost, this is true, except:
+ * * ROM memory regions don't consume a memslot. These get used very
+ * rarely for memory devices (R/O NVDIMMs).
+ * * Memslots without a fd (memory-backend-ram) don't necessarily
+ * consume a memslot. Such setups are quite rare and possibly bogus:
+ * the memory would be inaccessible by such vhost devices.
+ *
+ * So for vhost, in corner cases we might over-estimate the number of
+ * memslots that are currently used or that might still be reserved
+ * (required - used).
+ */
+ dms->used_memslots += add ? 1 : -1;
+
+ if (dms->used_memslots > dms->required_memslots) {
+ warn_report("Memory devices use more memory slots than indicated as required.");
+ }
+}
+
+static void memory_devices_region_add(MemoryListener *listener,
+ MemoryRegionSection *mrs)
+{
+ return memory_devices_region_mod(listener, mrs, true);
+}
+
+static void memory_devices_region_del(MemoryListener *listener,
+ MemoryRegionSection *mrs)
+{
+ return memory_devices_region_mod(listener, mrs, false);
+}
+
void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size)
{
g_assert(size);
memory_region_init(&ms->device_memory->mr, OBJECT(ms), "device-memory",
size);
+ address_space_init(&ms->device_memory->as, &ms->device_memory->mr,
+ "device-memory");
memory_region_add_subregion(get_system_memory(), ms->device_memory->base,
&ms->device_memory->mr);
+
+ /* Track the number of memslots used by memory devices. */
+ ms->device_memory->listener.region_add = memory_devices_region_add;
+ ms->device_memory->listener.region_del = memory_devices_region_del;
+ memory_listener_register(&ms->device_memory->listener,
+ &ms->device_memory->as);
}
static const TypeInfo memory_device_info = {
bool
depends on I2C
+config I2C_ECHO
+ bool
+ default y if TEST_DEVICES
+ depends on I2C
+
config PL310
bool
+/*
+ * Example I2C device using asynchronous I2C send.
+ *
+ * Copyright (C) 2023 Samsung Electronics Co., Ltd. All Rights Reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
#include "qemu/osdep.h"
#include "qemu/timer.h"
#include "qemu/main-loop.h"
system_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c'))
-system_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
+system_ss.add(when: 'CONFIG_I2C_ECHO', if_true: files('i2c-echo.c'))
specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c'))
dma_addr_t dir_addr, uint32_t num_pages)
{
uint64_t *dir, *tbl;
- int rc = 0;
+ int max_pages, rc = 0;
if (!num_pages) {
rdma_error_report("Ring pages count must be strictly positive");
return -EINVAL;
}
+ /*
+ * Make sure we can satisfy the requested number of pages in a single
+ * TARGET_PAGE_SIZE sized page table (taking into account that first entry
+ * is reserved for ring-state)
+ */
+ max_pages = TARGET_PAGE_SIZE / sizeof(dma_addr_t) - 1;
+ if (num_pages > max_pages) {
+ rdma_error_report("Maximum pages on a single directory must not exceed %d\n",
+ max_pages);
+ return -EINVAL;
+ }
+
dir = rdma_pci_dma_map(pci_dev, dir_addr, TARGET_PAGE_SIZE);
if (!dir) {
rdma_error_report("Failed to map to page directory (ring %s)", name);
rc = -ENOMEM;
goto out;
}
+
+ /* We support only one page table for a ring */
tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
if (!tbl) {
rdma_error_report("Failed to map to page table (ring %s)", name);
bool ram_shared = false;
PCIDevice *func0;
+ warn_report_once("pvrdma is deprecated and will be removed in a future release");
+
rdma_info_report("Initializing device %s %x.%x", pdev->name,
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
#include "hw/virtio/vhost.h"
#include "hw/virtio/vhost-user.h"
-bool vhost_has_free_slot(void)
+unsigned int vhost_get_max_memslots(void)
{
- return true;
+ return UINT_MAX;
+}
+
+unsigned int vhost_get_free_memslots(void)
+{
+ return UINT_MAX;
}
bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
return -ENOTSUP;
}
-static bool vhost_user_can_merge(struct vhost_dev *dev,
- uint64_t start1, uint64_t size1,
- uint64_t start2, uint64_t size2)
-{
- ram_addr_t offset;
- int mfd, rfd;
-
- (void)vhost_user_get_mr_data(start1, &offset, &mfd);
- (void)vhost_user_get_mr_data(start2, &offset, &rfd);
-
- return mfd == rfd;
-}
-
static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
{
VhostUserMsg msg;
return 0;
}
-static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
- MemoryRegionSection *section)
+static bool vhost_user_no_private_memslots(struct vhost_dev *dev)
{
- return memory_region_get_fd(section->mr) >= 0;
+ return true;
}
static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
.vhost_backend_init = vhost_user_backend_init,
.vhost_backend_cleanup = vhost_user_backend_cleanup,
.vhost_backend_memslots_limit = vhost_user_memslots_limit,
+ .vhost_backend_no_private_memslots = vhost_user_no_private_memslots,
.vhost_set_log_base = vhost_user_set_log_base,
.vhost_set_mem_table = vhost_user_set_mem_table,
.vhost_set_vring_addr = vhost_user_set_vring_addr,
.vhost_set_vring_enable = vhost_user_set_vring_enable,
.vhost_requires_shm_log = vhost_user_requires_shm_log,
.vhost_migration_done = vhost_user_migration_done,
- .vhost_backend_can_merge = vhost_user_can_merge,
.vhost_net_set_mtu = vhost_user_net_set_mtu,
.vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
.vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
.vhost_set_config = vhost_user_set_config,
.vhost_crypto_create_session = vhost_user_crypto_create_session,
.vhost_crypto_close_session = vhost_user_crypto_close_session,
- .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
.vhost_get_inflight_fd = vhost_user_get_inflight_fd,
.vhost_set_inflight_fd = vhost_user_set_inflight_fd,
.vhost_dev_start = vhost_user_dev_start,
.vhost_set_config = vhost_vdpa_set_config,
.vhost_requires_shm_log = NULL,
.vhost_migration_done = NULL,
- .vhost_backend_can_merge = NULL,
.vhost_net_set_mtu = NULL,
.vhost_set_iotlb_callback = NULL,
.vhost_send_device_iotlb_msg = NULL,
#include "qemu/log.h"
#include "standard-headers/linux/vhost_types.h"
#include "hw/virtio/virtio-bus.h"
+#include "hw/mem/memory-device.h"
#include "migration/blocker.h"
#include "migration/qemu-file-types.h"
#include "sysemu/dma.h"
static struct vhost_log *vhost_log;
static struct vhost_log *vhost_log_shm;
+/* Memslots used by backends that support private memslots (without an fd). */
static unsigned int used_memslots;
+
+/* Memslots used by backends that only support shared memslots (with an fd). */
+static unsigned int used_shared_memslots;
+
static QLIST_HEAD(, vhost_dev) vhost_devices =
QLIST_HEAD_INITIALIZER(vhost_devices);
-bool vhost_has_free_slot(void)
+unsigned int vhost_get_max_memslots(void)
+{
+ unsigned int max = UINT_MAX;
+ struct vhost_dev *hdev;
+
+ QLIST_FOREACH(hdev, &vhost_devices, entry) {
+ max = MIN(max, hdev->vhost_ops->vhost_backend_memslots_limit(hdev));
+ }
+ return max;
+}
+
+unsigned int vhost_get_free_memslots(void)
{
- unsigned int slots_limit = ~0U;
+ unsigned int free = UINT_MAX;
struct vhost_dev *hdev;
QLIST_FOREACH(hdev, &vhost_devices, entry) {
unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
- slots_limit = MIN(slots_limit, r);
+ unsigned int cur_free;
+
+ if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
+ hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
+ cur_free = r - used_shared_memslots;
+ } else {
+ cur_free = r - used_memslots;
+ }
+ free = MIN(free, cur_free);
}
- return slots_limit > used_memslots;
+ return free;
}
static void vhost_dev_sync_region(struct vhost_dev *dev,
* vhost_section: identify sections needed for vhost access
*
* We only care about RAM sections here (where virtqueue and guest
- * internals accessed by virtio might live). If we find one we still
- * allow the backend to potentially filter it out of our list.
+ * internals accessed by virtio might live).
*/
static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
{
return false;
}
- if (dev->vhost_ops->vhost_backend_mem_section_filter &&
- !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) {
+ /*
+ * Some backends (like vhost-user) can only handle memory regions
+ * that have an fd (can be mapped into a different process). Filter
+ * the ones without an fd out, if requested.
+ *
+ * TODO: we might have to limit to MAP_SHARED as well.
+ */
+ if (memory_region_get_fd(section->mr) < 0 &&
+ dev->vhost_ops->vhost_backend_no_private_memslots &&
+ dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {
trace_vhost_reject_section(mr->name, 2);
return false;
}
dev->n_mem_sections * sizeof dev->mem->regions[0];
dev->mem = g_realloc(dev->mem, regions_size);
dev->mem->nregions = dev->n_mem_sections;
- used_memslots = dev->mem->nregions;
+
+ if (dev->vhost_ops->vhost_backend_no_private_memslots &&
+ dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {
+ used_shared_memslots = dev->mem->nregions;
+ } else {
+ used_memslots = dev->mem->nregions;
+ }
+
for (i = 0; i < dev->n_mem_sections; i++) {
struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
struct MemoryRegionSection *mrs = dev->mem_sections + i;
mrs_size, mrs_host);
}
- if (dev->n_tmp_sections) {
+ if (dev->n_tmp_sections && !section->unmergeable) {
/* Since we already have at least one section, lets see if
* this extends it; since we're scanning in order, we only
* have to look at the last one, and the FlatView that calls
size_t offset = mrs_gpa - prev_gpa_start;
if (prev_host_start + offset == mrs_host &&
- section->mr == prev_sec->mr &&
- (!dev->vhost_ops->vhost_backend_can_merge ||
- dev->vhost_ops->vhost_backend_can_merge(dev,
- mrs_host, mrs_size,
- prev_host_start, prev_size))) {
+ section->mr == prev_sec->mr && !prev_sec->unmergeable) {
uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
need_add = false;
prev_sec->offset_within_address_space =
VhostBackendType backend_type, uint32_t busyloop_timeout,
Error **errp)
{
+ unsigned int used, reserved, limit;
uint64_t features;
int i, r, n_initialized_vqs = 0;
goto fail;
}
+ limit = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
+ if (limit < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS &&
+ memory_devices_memslot_auto_decision_active()) {
+ error_setg(errp, "some memory device (like virtio-mem)"
+ " decided how many memory slots to use based on the overall"
+ " number of memory slots; this vhost backend would further"
+ " restricts the overall number of memory slots");
+ error_append_hint(errp, "Try plugging this vhost backend before"
+ " plugging such memory devices.\n");
+ r = -EINVAL;
+ goto fail;
+ }
+
for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
if (r < 0) {
memory_listener_register(&hdev->memory_listener, &address_space_memory);
QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
- if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
- error_setg(errp, "vhost backend memory slots limit is less"
- " than current number of present memory slots");
+ /*
+ * The listener we registered properly updated the corresponding counter.
+ * So we can trust that these values are accurate.
+ */
+ if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
+ hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
+ used = used_shared_memslots;
+ } else {
+ used = used_memslots;
+ }
+ /*
+ * We assume that all reserved memslots actually require a real memslot
+ * in our vhost backend. This might not be true, for example, if the
+ * memslot would be ROM. If ever relevant, we can optimize for that --
+ * but we'll need additional information about the reservations.
+ */
+ reserved = memory_devices_get_reserved_memslots();
+ if (used + reserved > limit) {
+ error_setg(errp, "vhost backend memory slots limit (%d) is less"
+ " than current number of used (%d) and reserved (%d)"
+ " memory slots for memory devices.", limit, used, reserved);
r = -EINVAL;
goto fail_busyloop;
}
return vmc->get_memory_region(vmem, errp);
}
+static void virtio_mem_pci_decide_memslots(MemoryDeviceState *md,
+ unsigned int limit)
+{
+ VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md);
+ VirtIOMEM *vmem = VIRTIO_MEM(&pci_mem->vdev);
+ VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem);
+
+ vmc->decide_memslots(vmem, limit);
+}
+
+static unsigned int virtio_mem_pci_get_memslots(MemoryDeviceState *md)
+{
+ VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md);
+ VirtIOMEM *vmem = VIRTIO_MEM(&pci_mem->vdev);
+ VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem);
+
+ return vmc->get_memslots(vmem);
+}
+
static uint64_t virtio_mem_pci_get_plugged_size(const MemoryDeviceState *md,
Error **errp)
{
mdc->set_addr = virtio_mem_pci_set_addr;
mdc->get_plugged_size = virtio_mem_pci_get_plugged_size;
mdc->get_memory_region = virtio_mem_pci_get_memory_region;
+ mdc->decide_memslots = virtio_mem_pci_decide_memslots;
+ mdc->get_memslots = virtio_mem_pci_get_memslots;
mdc->fill_device_info = virtio_mem_pci_fill_device_info;
mdc->get_min_alignment = virtio_mem_pci_get_min_alignment;
return default_thp_size;
}
+/*
+ * The minimum memslot size depends on this setting ("sane default"), the
+ * device block size, and the memory backend page size. The last (or single)
+ * memslot might be smaller than this constant.
+ */
+#define VIRTIO_MEM_MIN_MEMSLOT_SIZE (1 * GiB)
+
/*
* We want to have a reasonable default block size such that
* 1. We avoid splitting THPs when unplugging memory, which degrades
return migration_in_incoming_postcopy() || !migration_is_idle();
}
-typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
+typedef int (*virtio_mem_range_cb)(VirtIOMEM *vmem, void *arg,
uint64_t offset, uint64_t size);
-static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
+static int virtio_mem_for_each_unplugged_range(VirtIOMEM *vmem, void *arg,
virtio_mem_range_cb cb)
{
unsigned long first_zero_bit, last_zero_bit;
return ret;
}
-static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void *arg,
+static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg,
virtio_mem_range_cb cb)
{
unsigned long first_bit, last_bit;
return true;
}
+static void virtio_mem_activate_memslot(VirtIOMEM *vmem, unsigned int idx)
+{
+ const uint64_t memslot_offset = idx * vmem->memslot_size;
+
+ assert(vmem->memslots);
+
+ /*
+ * Instead of enabling/disabling memslots, we add/remove them. This should
+ * make address space updates faster, because we don't have to loop over
+ * many disabled subregions.
+ */
+ if (memory_region_is_mapped(&vmem->memslots[idx])) {
+ return;
+ }
+ memory_region_add_subregion(vmem->mr, memslot_offset, &vmem->memslots[idx]);
+}
+
+static void virtio_mem_deactivate_memslot(VirtIOMEM *vmem, unsigned int idx)
+{
+ assert(vmem->memslots);
+
+ if (!memory_region_is_mapped(&vmem->memslots[idx])) {
+ return;
+ }
+ memory_region_del_subregion(vmem->mr, &vmem->memslots[idx]);
+}
+
+static void virtio_mem_activate_memslots_to_plug(VirtIOMEM *vmem,
+ uint64_t offset, uint64_t size)
+{
+ const unsigned int start_idx = offset / vmem->memslot_size;
+ const unsigned int end_idx = (offset + size + vmem->memslot_size - 1) /
+ vmem->memslot_size;
+ unsigned int idx;
+
+ if (!vmem->dynamic_memslots) {
+ return;
+ }
+
+ /* Activate all involved memslots in a single transaction. */
+ memory_region_transaction_begin();
+ for (idx = start_idx; idx < end_idx; idx++) {
+ virtio_mem_activate_memslot(vmem, idx);
+ }
+ memory_region_transaction_commit();
+}
+
+static void virtio_mem_deactivate_unplugged_memslots(VirtIOMEM *vmem,
+ uint64_t offset,
+ uint64_t size)
+{
+ const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
+ const unsigned int start_idx = offset / vmem->memslot_size;
+ const unsigned int end_idx = (offset + size + vmem->memslot_size - 1) /
+ vmem->memslot_size;
+ unsigned int idx;
+
+ if (!vmem->dynamic_memslots) {
+ return;
+ }
+
+ /* Deactivate all memslots with unplugged blocks in a single transaction. */
+ memory_region_transaction_begin();
+ for (idx = start_idx; idx < end_idx; idx++) {
+ const uint64_t memslot_offset = idx * vmem->memslot_size;
+ uint64_t memslot_size = vmem->memslot_size;
+
+ /* The size of the last memslot might be smaller. */
+ if (idx == vmem->nb_memslots - 1) {
+ memslot_size = region_size - memslot_offset;
+ }
+
+ /*
+ * Partially covered memslots might still have some blocks plugged and
+ * have to remain active if that's the case.
+ */
+ if (offset > memslot_offset ||
+ offset + size < memslot_offset + memslot_size) {
+ const uint64_t gpa = vmem->addr + memslot_offset;
+
+ if (!virtio_mem_is_range_unplugged(vmem, gpa, memslot_size)) {
+ continue;
+ }
+ }
+
+ virtio_mem_deactivate_memslot(vmem, idx);
+ }
+ memory_region_transaction_commit();
+}
+
static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
uint64_t size, bool plug)
{
}
virtio_mem_notify_unplug(vmem, offset, size);
virtio_mem_set_range_unplugged(vmem, start_gpa, size);
+ /* Deactivate completely unplugged memslots after updating the state. */
+ virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);
return 0;
}
}
if (!ret) {
+ /*
+ * Activate before notifying and rollback in case of any errors.
+ *
+ * When activating a yet inactive memslot, memory notifiers will get
+ * notified about the added memory region and can register with the
+ * RamDiscardManager; this will traverse all plugged blocks and skip the
+ * blocks we are plugging here. The following notification will inform
+ * registered listeners about the blocks we're plugging.
+ */
+ virtio_mem_activate_memslots_to_plug(vmem, offset, size);
ret = virtio_mem_notify_plug(vmem, offset, size);
+ if (ret) {
+ virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);
+ }
}
if (ret) {
/* Could be preallocation or a notifier populated memory. */
static int virtio_mem_unplug_all(VirtIOMEM *vmem)
{
+ const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
RAMBlock *rb = vmem->memdev->mr.ram_block;
if (vmem->size) {
bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
vmem->size = 0;
notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
+
+ /* Deactivate all memslots after updating the state. */
+ virtio_mem_deactivate_unplugged_memslots(vmem, 0, region_size);
}
trace_virtio_mem_unplugged_all();
virtio_mem_unplug_all(vmem);
}
+static void virtio_mem_prepare_mr(VirtIOMEM *vmem)
+{
+ const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
+
+ assert(!vmem->mr && vmem->dynamic_memslots);
+ vmem->mr = g_new0(MemoryRegion, 1);
+ memory_region_init(vmem->mr, OBJECT(vmem), "virtio-mem",
+ region_size);
+ vmem->mr->align = memory_region_get_alignment(&vmem->memdev->mr);
+}
+
+static void virtio_mem_prepare_memslots(VirtIOMEM *vmem)
+{
+ const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
+ unsigned int idx;
+
+ g_assert(!vmem->memslots && vmem->nb_memslots && vmem->dynamic_memslots);
+ vmem->memslots = g_new0(MemoryRegion, vmem->nb_memslots);
+
+ /* Initialize our memslots, but don't map them yet. */
+ for (idx = 0; idx < vmem->nb_memslots; idx++) {
+ const uint64_t memslot_offset = idx * vmem->memslot_size;
+ uint64_t memslot_size = vmem->memslot_size;
+ char name[20];
+
+ /* The size of the last memslot might be smaller. */
+ if (idx == vmem->nb_memslots - 1) {
+ memslot_size = region_size - memslot_offset;
+ }
+
+ snprintf(name, sizeof(name), "memslot-%u", idx);
+ memory_region_init_alias(&vmem->memslots[idx], OBJECT(vmem), name,
+ &vmem->memdev->mr, memslot_offset,
+ memslot_size);
+ /*
+ * We want to be able to atomically and efficiently activate/deactivate
+ * individual memslots without affecting adjacent memslots in memory
+ * notifiers.
+ */
+ memory_region_set_unmergeable(&vmem->memslots[idx], true);
+ }
+}
+
static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
{
MachineState *ms = MACHINE(qdev_get_machine());
vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
#endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
+ if (vmem->dynamic_memslots &&
+ vmem->unplugged_inaccessible != ON_OFF_AUTO_ON) {
+ error_setg(errp, "'%s' property set to 'on' requires '%s' to be 'on'",
+ VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP,
+ VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
+ return;
+ }
+
/*
* If the block size wasn't configured by the user, use a sane default. This
* allows using hugetlbfs backends of any page size without manual
virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config));
vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
+ /*
+ * With "dynamic-memslots=off" (old behavior) we always map the whole
+ * RAM memory region directly.
+ */
+ if (vmem->dynamic_memslots) {
+ if (!vmem->mr) {
+ virtio_mem_prepare_mr(vmem);
+ }
+ if (vmem->nb_memslots <= 1) {
+ vmem->nb_memslots = 1;
+ vmem->memslot_size = memory_region_size(&vmem->memdev->mr);
+ }
+ if (!vmem->memslots) {
+ virtio_mem_prepare_memslots(vmem);
+ }
+ } else {
+ assert(!vmem->mr && !vmem->nb_memslots && !vmem->memslots);
+ }
+
host_memory_backend_set_mapped(vmem->memdev, true);
vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
if (vmem->early_migration) {
ram_block_coordinated_discard_require(false);
}
-static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
+static int virtio_mem_discard_range_cb(VirtIOMEM *vmem, void *arg,
uint64_t offset, uint64_t size)
{
RAMBlock *rb = vmem->memdev->mr.ram_block;
virtio_mem_discard_range_cb);
}
-static int virtio_mem_post_load(void *opaque, int version_id)
+static int virtio_mem_activate_memslot_range_cb(VirtIOMEM *vmem, void *arg,
+ uint64_t offset, uint64_t size)
+{
+ virtio_mem_activate_memslots_to_plug(vmem, offset, size);
+ return 0;
+}
+
+static int virtio_mem_post_load_bitmap(VirtIOMEM *vmem)
{
- VirtIOMEM *vmem = VIRTIO_MEM(opaque);
RamDiscardListener *rdl;
int ret;
+ /*
+ * We restored the bitmap and updated the requested size; activate all
+ * memslots (so listeners register) before notifying about plugged blocks.
+ */
+ if (vmem->dynamic_memslots) {
+ /*
+ * We don't expect any active memslots at this point to deactivate: no
+ * memory was plugged on the migration destination.
+ */
+ virtio_mem_for_each_plugged_range(vmem, NULL,
+ virtio_mem_activate_memslot_range_cb);
+ }
+
/*
* We started out with all memory discarded and our memory region is mapped
* into an address space. Replay, now that we updated the bitmap.
return ret;
}
}
+ return 0;
+}
+
+static int virtio_mem_post_load(void *opaque, int version_id)
+{
+ VirtIOMEM *vmem = VIRTIO_MEM(opaque);
+ int ret;
+
+ if (!vmem->early_migration) {
+ ret = virtio_mem_post_load_bitmap(vmem);
+ if (ret) {
+ return ret;
+ }
+ }
/*
* If shared RAM is migrated using the file content and not using QEMU,
return virtio_mem_restore_unplugged(vmem);
}
-static int virtio_mem_prealloc_range_cb(const VirtIOMEM *vmem, void *arg,
+static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg,
uint64_t offset, uint64_t size)
{
void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
int ret;
if (!vmem->prealloc) {
- return 0;
+ goto post_load_bitmap;
}
/*
* don't mess with preallocation and postcopy.
*/
if (migrate_ram_is_ignored(rb)) {
- return 0;
+ goto post_load_bitmap;
}
/*
return -EBUSY;
}
}
- return 0;
+
+post_load_bitmap:
+ /* Finally, update any other state to be consistent with the new bitmap. */
+ return virtio_mem_post_load_bitmap(vmem);
}
typedef struct VirtIOMEMMigSanityChecks {
if (!vmem->memdev) {
error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
return NULL;
+ } else if (vmem->dynamic_memslots) {
+ if (!vmem->mr) {
+ virtio_mem_prepare_mr(vmem);
+ }
+ return vmem->mr;
}
return &vmem->memdev->mr;
}
+static void virtio_mem_decide_memslots(VirtIOMEM *vmem, unsigned int limit)
+{
+ uint64_t region_size, memslot_size, min_memslot_size;
+ unsigned int memslots;
+ RAMBlock *rb;
+
+ if (!vmem->dynamic_memslots) {
+ return;
+ }
+
+ /* We're called exactly once, before realizing the device. */
+ assert(!vmem->nb_memslots);
+
+ /* If realizing the device will fail, just assume a single memslot. */
+ if (limit <= 1 || !vmem->memdev || !vmem->memdev->mr.ram_block) {
+ vmem->nb_memslots = 1;
+ return;
+ }
+
+ rb = vmem->memdev->mr.ram_block;
+ region_size = memory_region_size(&vmem->memdev->mr);
+
+ /*
+ * Determine the default block size now, to determine the minimum memslot
+ * size. We want the minimum slot size to be at least the device block size.
+ */
+ if (!vmem->block_size) {
+ vmem->block_size = virtio_mem_default_block_size(rb);
+ }
+ /* If realizing the device will fail, just assume a single memslot. */
+ if (vmem->block_size < qemu_ram_pagesize(rb) ||
+ !QEMU_IS_ALIGNED(region_size, vmem->block_size)) {
+ vmem->nb_memslots = 1;
+ return;
+ }
+
+ /*
+ * All memslots except the last one have a reasonable minimum size, and
+ * and all memslot sizes are aligned to the device block size.
+ */
+ memslot_size = QEMU_ALIGN_UP(region_size / limit, vmem->block_size);
+ min_memslot_size = MAX(vmem->block_size, VIRTIO_MEM_MIN_MEMSLOT_SIZE);
+ memslot_size = MAX(memslot_size, min_memslot_size);
+
+ memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size;
+ if (memslots != 1) {
+ vmem->memslot_size = memslot_size;
+ }
+ vmem->nb_memslots = memslots;
+}
+
+static unsigned int virtio_mem_get_memslots(VirtIOMEM *vmem)
+{
+ if (!vmem->dynamic_memslots) {
+ /* Exactly one static RAM memory region. */
+ return 1;
+ }
+
+ /* We're called after instructed to make a decision. */
+ g_assert(vmem->nb_memslots);
+ return vmem->nb_memslots;
+}
+
static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
Notifier *notifier)
{
NULL, NULL);
}
+static void virtio_mem_instance_finalize(Object *obj)
+{
+ VirtIOMEM *vmem = VIRTIO_MEM(obj);
+
+ /*
+ * Note: the core already dropped the references on all memory regions
+ * (it's passed as the owner to memory_region_init_*()) and finalized
+ * these objects. We can simply free the memory.
+ */
+ g_free(vmem->memslots);
+ vmem->memslots = NULL;
+ g_free(vmem->mr);
+ vmem->mr = NULL;
+}
+
static Property virtio_mem_properties[] = {
DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
#endif
DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM,
early_migration, true),
+ DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM,
+ dynamic_memslots, false),
DEFINE_PROP_END_OF_LIST(),
};
vmc->fill_device_info = virtio_mem_fill_device_info;
vmc->get_memory_region = virtio_mem_get_memory_region;
+ vmc->decide_memslots = virtio_mem_decide_memslots;
+ vmc->get_memslots = virtio_mem_get_memslots;
vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
vmc->unplug_request_check = virtio_mem_unplug_request_check;
.parent = TYPE_VIRTIO_DEVICE,
.instance_size = sizeof(VirtIOMEM),
.instance_init = virtio_mem_instance_init,
+ .instance_finalize = virtio_mem_instance_finalize,
.class_init = virtio_mem_class_init,
.class_size = sizeof(VirtIOMEMClass),
.interfaces = (InterfaceInfo[]) {
* function. The coroutine yields after scheduling the BH and is reentered when
* the wrapped function returns.
*
- * A no_co_wrapper_bdrv_wrlock function is a no_co_wrapper function that
- * automatically takes the graph wrlock when calling the wrapped function.
+ * A no_co_wrapper_bdrv_rdlock function is a no_co_wrapper function that
+ * automatically takes the graph rdlock when calling the wrapped function. In
+ * the same way, no_co_wrapper_bdrv_wrlock functions automatically take the
+ * graph wrlock.
*
* If the first parameter of the function is a BlockDriverState, BdrvChild or
* BlockBackend pointer, the AioContext lock for it is taken in the wrapper.
*/
#define no_co_wrapper
+#define no_co_wrapper_bdrv_rdlock
#define no_co_wrapper_bdrv_wrlock
#include "block/blockjob.h"
Error **errp);
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
-void bdrv_refresh_filename(BlockDriverState *bs);
+void GRAPH_RDLOCK bdrv_refresh_filename(BlockDriverState *bs);
void GRAPH_RDLOCK
bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
int bdrv_commit(BlockDriverState *bs);
-int bdrv_make_empty(BdrvChild *c, Error **errp);
+int GRAPH_RDLOCK bdrv_make_empty(BdrvChild *c, Error **errp);
int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
const char *backing_fmt, bool warn);
void bdrv_register(BlockDriver *bdrv);
*/
typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
int64_t total_work_size, void *opaque);
-int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
- bool force,
- Error **errp);
+int GRAPH_RDLOCK
+bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ bool force, Error **errp);
/* check if a named node can be replaced when doing drive-mirror */
BlockDriverState * GRAPH_RDLOCK
check_to_replace_node(BlockDriverState *parent_bs, const char *node_name,
Error **errp);
-int no_coroutine_fn bdrv_activate(BlockDriverState *bs, Error **errp);
+int no_coroutine_fn GRAPH_RDLOCK
+bdrv_activate(BlockDriverState *bs, Error **errp);
-int coroutine_fn no_co_wrapper
+int coroutine_fn no_co_wrapper_bdrv_rdlock
bdrv_co_activate(BlockDriverState *bs, Error **errp);
void bdrv_activate_all(Error **errp);
int bdrv_has_zero_init(BlockDriverState *bs);
BlockDriverState *bdrv_find_node(const char *node_name);
BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp);
-XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp);
+XDbgBlockGraph * GRAPH_RDLOCK bdrv_get_xdbg_block_graph(Error **errp);
BlockDriverState *bdrv_lookup_bs(const char *device,
const char *node_name,
Error **errp);
BlockDriverState *bs;
} BdrvNextIterator;
-BlockDriverState *bdrv_first(BdrvNextIterator *it);
-BlockDriverState *bdrv_next(BdrvNextIterator *it);
+BlockDriverState * GRAPH_RDLOCK bdrv_first(BdrvNextIterator *it);
+BlockDriverState * GRAPH_RDLOCK bdrv_next(BdrvNextIterator *it);
void bdrv_next_cleanup(BdrvNextIterator *it);
BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
void *opaque, bool read_only);
-char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
-char *bdrv_dirname(BlockDriverState *bs, Error **errp);
+
+char * GRAPH_RDLOCK
+bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
+
+char * GRAPH_RDLOCK bdrv_dirname(BlockDriverState *bs, Error **errp);
void bdrv_img_create(const char *filename, const char *fmt,
const char *base_filename, const char *base_fmt,
BdrvChildRole child_role,
Error **errp);
-bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
+bool GRAPH_RDLOCK
+bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
+
void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
BdrvRequestFlags flags);
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
-int bdrv_block_status(BlockDriverState *bs, int64_t offset,
- int64_t bytes, int64_t *pnum, int64_t *map,
- BlockDriverState **file);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map, BlockDriverState **file);
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map, BlockDriverState **file);
int coroutine_fn GRAPH_RDLOCK
bdrv_co_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file);
-int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
- int64_t offset, int64_t bytes, int64_t *pnum,
- int64_t *map, BlockDriverState **file);
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
+ int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
int coroutine_fn GRAPH_RDLOCK
bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum);
-int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
- int64_t *pnum);
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
+ int64_t bytes, int64_t *pnum);
int coroutine_fn GRAPH_RDLOCK
bdrv_co_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
bool include_base, int64_t offset, int64_t bytes,
int64_t *pnum);
-int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
- bool include_base, int64_t offset, int64_t bytes,
- int64_t *pnum);
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_is_allocated_above(BlockDriverState *bs, BlockDriverState *base,
+ bool include_base, int64_t offset,
+ int64_t bytes, int64_t *pnum);
int coroutine_fn GRAPH_RDLOCK
bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes);
-int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
- Error **errp);
+int GRAPH_RDLOCK
+bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
+ Error **errp);
+
bool bdrv_is_read_only(BlockDriverState *bs);
bool bdrv_is_writable(BlockDriverState *bs);
bool bdrv_is_sg(BlockDriverState *bs);
bool bdrv_supports_compressed_writes(BlockDriverState *bs);
const char *bdrv_get_node_name(const BlockDriverState *bs);
-const char *bdrv_get_device_name(const BlockDriverState *bs);
-const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
+
+const char * GRAPH_RDLOCK
+bdrv_get_device_name(const BlockDriverState *bs);
+
+const char * GRAPH_RDLOCK
+bdrv_get_device_or_node_name(const BlockDriverState *bs);
int coroutine_fn GRAPH_RDLOCK
bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
int co_wrapper_mixed_bdrv_rdlock
bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
-ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
- Error **errp);
+ImageInfoSpecific * GRAPH_RDLOCK
+bdrv_get_specific_info(BlockDriverState *bs, Error **errp);
+
BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
void bdrv_round_to_subclusters(BlockDriverState *bs,
int64_t offset, int64_t bytes,
*
* Begin a quiesced section for the parent of @c.
*/
-void bdrv_parent_drained_begin_single(BdrvChild *c);
+void GRAPH_RDLOCK bdrv_parent_drained_begin_single(BdrvChild *c);
/**
* bdrv_parent_drained_poll_single:
* Returns true if there is any pending activity to cease before @c can be
* called quiesced, false otherwise.
*/
-bool bdrv_parent_drained_poll_single(BdrvChild *c);
+bool GRAPH_RDLOCK bdrv_parent_drained_poll_single(BdrvChild *c);
/**
* bdrv_parent_drained_end_single:
*
* End a quiesced section for the parent of @c.
*/
-void bdrv_parent_drained_end_single(BdrvChild *c);
+void GRAPH_RDLOCK bdrv_parent_drained_end_single(BdrvChild *c);
/**
* bdrv_drain_poll:
*
* This is part of bdrv_drained_begin.
*/
-bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
- bool ignore_bds_parents);
+bool GRAPH_RDLOCK
+bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
+ bool ignore_bds_parents);
/**
* bdrv_drained_begin:
* Begin a quiesced section for exclusive access to the BDS, by disabling
* external request sources including NBD server, block jobs, and device model.
*
+ * This function can only be invoked by the main loop or a coroutine
+ * (regardless of the AioContext where it is running).
+ * If the coroutine is running in an Iothread AioContext, this function will
+ * just schedule a BH to run in the main loop.
+ * However, it cannot be directly called by an Iothread.
+ *
* This function can be recursive.
*/
void bdrv_drained_begin(BlockDriverState *bs);
* bdrv_drained_end:
*
* End a quiescent section started by bdrv_drained_begin().
+ *
+ * This function can only be invoked by the main loop or a coroutine
+ * (regardless of the AioContext where it is running).
+ * If the coroutine is running in an Iothread AioContext, this function will
+ * just schedule a BH to run in the main loop.
+ * However, it cannot be directly called by an Iothread.
*/
void bdrv_drained_end(BlockDriverState *bs);
Error **errp);
/* For handling image reopen for split or non-split files. */
- int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
- BlockReopenQueue *queue, Error **errp);
- void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
- void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state);
- void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+ int GRAPH_UNLOCKED_PTR (*bdrv_reopen_prepare)(
+ BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp);
+ void GRAPH_UNLOCKED_PTR (*bdrv_reopen_commit)(
+ BDRVReopenState *reopen_state);
+ void GRAPH_UNLOCKED_PTR (*bdrv_reopen_commit_post)(
+ BDRVReopenState *reopen_state);
+ void GRAPH_UNLOCKED_PTR (*bdrv_reopen_abort)(
+ BDRVReopenState *reopen_state);
void (*bdrv_join_options)(QDict *options, QDict *old_options);
int GRAPH_UNLOCKED_PTR (*bdrv_open)(
int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create_opts)(
BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp);
- int (*bdrv_amend_options)(BlockDriverState *bs,
- QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb,
- void *cb_opaque,
- bool force,
- Error **errp);
+ int GRAPH_RDLOCK_PTR (*bdrv_amend_options)(
+ BlockDriverState *bs, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ bool force, Error **errp);
- int (*bdrv_make_empty)(BlockDriverState *bs);
+ int GRAPH_RDLOCK_PTR (*bdrv_make_empty)(BlockDriverState *bs);
/*
* Refreshes the bs->exact_filename field. If that is impossible,
* bs->exact_filename has to be left empty.
*/
- void (*bdrv_refresh_filename)(BlockDriverState *bs);
+ void GRAPH_RDLOCK_PTR (*bdrv_refresh_filename)(BlockDriverState *bs);
/*
* Gathers the open options for all children into @target.
* block driver which implements it is probably doing something
* shady regarding its runtime option structure.
*/
- void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target,
- bool backing_overridden);
+ void GRAPH_RDLOCK_PTR (*bdrv_gather_child_options)(
+ BlockDriverState *bs, QDict *target, bool backing_overridden);
/*
* Returns an allocated string which is the directory name of this BDS: It
* will be used to make relative filenames absolute by prepending this
* function's return value to them.
*/
- char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp);
+ char * GRAPH_RDLOCK_PTR (*bdrv_dirname)(BlockDriverState *bs, Error **errp);
/*
* This informs the driver that we are no longer interested in the result
int GRAPH_RDLOCK_PTR (*bdrv_inactivate)(BlockDriverState *bs);
- int (*bdrv_snapshot_create)(BlockDriverState *bs,
- QEMUSnapshotInfo *sn_info);
- int (*bdrv_snapshot_goto)(BlockDriverState *bs,
- const char *snapshot_id);
- int (*bdrv_snapshot_delete)(BlockDriverState *bs,
- const char *snapshot_id,
- const char *name,
- Error **errp);
+ int GRAPH_RDLOCK_PTR (*bdrv_snapshot_create)(
+ BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
+
+ int GRAPH_UNLOCKED_PTR (*bdrv_snapshot_goto)(
+ BlockDriverState *bs, const char *snapshot_id);
+
+ int GRAPH_RDLOCK_PTR (*bdrv_snapshot_delete)(
+ BlockDriverState *bs, const char *snapshot_id, const char *name,
+ Error **errp);
+
int (*bdrv_snapshot_list)(BlockDriverState *bs,
QEMUSnapshotInfo **psn_info);
int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_get_info)(
BlockDriverState *bs, BlockDriverInfo *bdi);
- ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs,
- Error **errp);
+ ImageInfoSpecific * GRAPH_RDLOCK_PTR (*bdrv_get_specific_info)(
+ BlockDriverState *bs, Error **errp);
BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs);
int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_save_vmstate)(
* Note that this can be nested. If drained_begin() was called twice, new
* I/O is allowed only after drained_end() was called twice, too.
*/
- void (*drained_begin)(BdrvChild *child);
- void (*drained_end)(BdrvChild *child);
+ void GRAPH_RDLOCK_PTR (*drained_begin)(BdrvChild *child);
+ void GRAPH_RDLOCK_PTR (*drained_end)(BdrvChild *child);
/*
* Returns whether the parent has pending requests for the child. This
* callback is polled after .drained_begin() has been called until all
* activity on the child has stopped.
*/
- bool (*drained_poll)(BdrvChild *child);
+ bool GRAPH_RDLOCK_PTR (*drained_poll)(BdrvChild *child);
/*
* Notifies the parent that the filename of its child has changed (e.g.
*/
bool quiesced_parent;
- QLIST_ENTRY(BdrvChild) next;
- QLIST_ENTRY(BdrvChild) next_parent;
+ QLIST_ENTRY(BdrvChild GRAPH_RDLOCK_PTR) next;
+ QLIST_ENTRY(BdrvChild GRAPH_RDLOCK_PTR) next_parent;
};
/*
* See also comment in include/block/block.h, to learn how backing and file
* are connected with BdrvChildRole.
*/
- QLIST_HEAD(, BdrvChild) children;
+ QLIST_HEAD(, BdrvChild GRAPH_RDLOCK_PTR) children;
BdrvChild *backing;
BdrvChild *file;
- QLIST_HEAD(, BdrvChild) parents;
+ QLIST_HEAD(, BdrvChild GRAPH_RDLOCK_PTR) parents;
QDict *options;
QDict *explicit_options;
*/
void bdrv_wakeup(BlockDriverState *bs);
-const char *bdrv_get_parent_name(const BlockDriverState *bs);
+const char * GRAPH_RDLOCK bdrv_get_parent_name(const BlockDriverState *bs);
bool blk_dev_has_tray(BlockBackend *blk);
bool blk_dev_is_tray_open(BlockBackend *blk);
BdrvChild *bdrv_cow_child(BlockDriverState *bs);
BdrvChild *bdrv_filter_child(BlockDriverState *bs);
BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs);
-BdrvChild *bdrv_primary_child(BlockDriverState *bs);
+BdrvChild * GRAPH_RDLOCK bdrv_primary_child(BlockDriverState *bs);
BlockDriverState *bdrv_skip_filters(BlockDriverState *bs);
BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs);
return child_bs(bdrv_filter_or_cow_child(bs));
}
-static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
+static inline BlockDriverState * GRAPH_RDLOCK
+bdrv_primary_bs(BlockDriverState *bs)
{
IO_CODE();
return child_bs(bdrv_primary_child(bs));
* This function polls. Callers must not hold the lock of any AioContext other
* than the current one and the one of @bs.
*/
-void bdrv_graph_wrlock(BlockDriverState *bs) TSA_ACQUIRE(graph_lock) TSA_NO_TSA;
+void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA
+bdrv_graph_wrlock(BlockDriverState *bs);
/*
* bdrv_graph_wrunlock:
#include "block/snapshot.h"
#include "qapi/qapi-types-block-core.h"
-BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
- BlockDriverState *bs,
- bool flat,
- Error **errp);
-int bdrv_query_snapshot_info_list(BlockDriverState *bs,
- SnapshotInfoList **p_list,
- Error **errp);
-void bdrv_query_image_info(BlockDriverState *bs,
- ImageInfo **p_info,
- bool flat,
- bool skip_implicit_filters,
- Error **errp);
+BlockDeviceInfo * GRAPH_RDLOCK
+bdrv_block_device_info(BlockBackend *blk, BlockDriverState *bs,
+ bool flat, Error **errp);
+
+int GRAPH_RDLOCK
+bdrv_query_snapshot_info_list(BlockDriverState *bs,
+ SnapshotInfoList **p_list,
+ Error **errp);
+void GRAPH_RDLOCK
+bdrv_query_image_info(BlockDriverState *bs, ImageInfo **p_info, bool flat,
+ bool skip_implicit_filters, Error **errp);
void GRAPH_RDLOCK
bdrv_query_block_graph_info(BlockDriverState *bs, BlockGraphInfo **p_info,
Error **errp);
#ifndef SNAPSHOT_H
#define SNAPSHOT_H
+#include "block/graph-lock.h"
#include "qapi/qapi-builtin-types.h"
#define SNAPSHOT_OPT_BASE "snapshot."
const char *name,
QEMUSnapshotInfo *sn_info,
Error **errp);
-int bdrv_can_snapshot(BlockDriverState *bs);
-int bdrv_snapshot_create(BlockDriverState *bs,
- QEMUSnapshotInfo *sn_info);
-int bdrv_snapshot_goto(BlockDriverState *bs,
- const char *snapshot_id,
- Error **errp);
-int bdrv_snapshot_delete(BlockDriverState *bs,
- const char *snapshot_id,
- const char *name,
- Error **errp);
+
+int GRAPH_RDLOCK bdrv_can_snapshot(BlockDriverState *bs);
+
+int GRAPH_RDLOCK
+bdrv_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
+
+int GRAPH_UNLOCKED
+bdrv_snapshot_goto(BlockDriverState *bs, const char *snapshot_id, Error **errp);
+
+int GRAPH_RDLOCK
+bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id,
+ const char *name, Error **errp);
+
int bdrv_snapshot_list(BlockDriverState *bs,
QEMUSnapshotInfo **psn_info);
int bdrv_snapshot_load_tmp(BlockDriverState *bs,
ram_addr_t qemu_ram_addr_from_host(void *ptr);
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
RAMBlock *qemu_ram_block_by_name(const char *name);
+
+/*
+ * Translates a host ptr back to a RAMBlock and an offset in that RAMBlock.
+ *
+ * @ptr: The host pointer to translate.
+ * @round_offset: Whether to round the result offset down to a target page
+ * @offset: Will be set to the offset within the returned RAMBlock.
+ *
+ * Returns: RAMBlock (or NULL if not found)
+ *
+ * By the time this function returns, the returned pointer is not protected
+ * by RCU anymore. If the caller is not within an RCU critical section and
+ * does not hold the iothread lock, it must have other means of protecting the
+ * pointer, such as a reference to the memory region that owns the RAMBlock.
+ */
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
ram_addr_t *offset);
ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host);
* relative to the region's address space
* @readonly: writes to this section are ignored
* @nonvolatile: this section is non-volatile
+ * @unmergeable: this section should not get merged with adjacent sections
*/
struct MemoryRegionSection {
Int128 size;
hwaddr offset_within_address_space;
bool readonly;
bool nonvolatile;
+ bool unmergeable;
};
typedef struct IOMMUTLBEntry IOMMUTLBEntry;
* populated (consuming memory), to be used/accessed by the VM.
*
* A #RamDiscardManager can only be set for a RAM #MemoryRegion while the
- * #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion is
- * mapped.
+ * #MemoryRegion isn't mapped into an address space yet (either directly
+ * or via an alias); it cannot change while the #MemoryRegion is
+ * mapped into an address space.
*
* The #RamDiscardManager is intended to be used by technologies that are
* incompatible with discarding of RAM (e.g., VFIO, which may pin all
bool nonvolatile;
bool rom_device;
bool flush_coalesced_mmio;
+ bool unmergeable;
uint8_t dirty_log_mask;
bool is_iommu;
RAMBlock *ram_block;
void memory_region_set_alias_offset(MemoryRegion *mr,
hwaddr offset);
+/*
+ * memory_region_set_unmergeable: Set a memory region unmergeable
+ *
+ * Mark a memory region unmergeable, resulting in the memory region (or
+ * everything contained in a memory region container) not getting merged when
+ * simplifying the address space and notifying memory listeners. Consequently,
+ * memory listeners will never get notified about ranges that are larger than
+ * the original memory regions.
+ *
+ * This is primarily useful when multiple aliases to a RAM memory region are
+ * mapped into a memory region container, and updates (e.g., enable/disable or
+ * map/unmap) of individual memory region aliases are not supposed to affect
+ * other memory regions in the same container.
+ *
+ * @mr: the #MemoryRegion to be updated
+ * @unmergeable: whether to mark the #MemoryRegion unmergeable
+ */
+void memory_region_set_unmergeable(MemoryRegion *mr, bool unmergeable);
+
/**
* memory_region_present: checks if an address relative to a @container
* translates into #MemoryRegion within @container
* DeviceMemoryState:
* @base: address in guest physical address space where the memory
* address space for memory devices starts
- * @mr: address space container for memory devices
+ * @mr: memory region container for memory devices
+ * @as: address space for memory devices
+ * @listener: memory listener used to track used memslots in the address space
* @dimm_size: the sum of plugged DIMMs' sizes
* @used_region_size: the part of @mr already used by memory devices
+ * @required_memslots: the number of memslots required by memory devices
+ * @used_memslots: the number of memslots currently used by memory devices
+ * @memslot_auto_decision_active: whether any plugged memory device
+ * automatically decided to use more than
+ * one memslot
*/
typedef struct DeviceMemoryState {
hwaddr base;
MemoryRegion mr;
+ AddressSpace as;
+ MemoryListener listener;
uint64_t dimm_size;
uint64_t used_region_size;
+ unsigned int required_memslots;
+ unsigned int used_memslots;
+ unsigned int memslot_auto_decision_active;
} DeviceMemoryState;
/**
#define MEMORY_DEVICE_H
#include "hw/qdev-core.h"
+#include "qemu/typedefs.h"
#include "qapi/qapi-types-machine.h"
#include "qom/object.h"
* successive memory regions are used, a covering memory region has to
* be provided. Scattered memory regions are not supported for single
* devices.
+ *
+ * The device memory region returned via @get_memory_region may either be a
+ * single RAM memory region or a memory region container with subregions
+ * that are RAM memory regions or aliases to RAM memory regions. Other
+ * memory regions or subregions are not supported.
+ *
+ * If the device memory region returned via @get_memory_region is a
+ * memory region container, it's supported to dynamically (un)map subregions
+ * as long as the number of memslots returned by @get_memslots() won't
+ * be exceeded and as long as all memory regions are of the same kind (e.g.,
+ * all RAM or all ROM).
*/
struct MemoryDeviceClass {
/* private */
*/
MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp);
+ /*
+ * Optional: Instruct the memory device to decide how many memory slots
+ * it requires, not exceeding the given limit.
+ *
+ * Called exactly once when pre-plugging the memory device, before
+ * querying the number of memslots using @get_memslots the first time.
+ */
+ void (*decide_memslots)(MemoryDeviceState *md, unsigned int limit);
+
+ /*
+ * Optional for memory devices that require only a single memslot,
+ * required for all other memory devices: Return the number of memslots
+ * (distinct RAM memory regions in the device memory region) that are
+ * required by the device.
+ *
+ * If this function is not implemented, the assumption is "1".
+ *
+ * Called when (un)plugging the memory device, to check if the requirements
+ * can be satisfied, and to do proper accounting.
+ */
+ unsigned int (*get_memslots)(MemoryDeviceState *md);
+
/*
* Optional: Return the desired minimum alignment of the device in guest
* physical address space. The final alignment is computed based on this
MemoryDeviceInfo *info);
};
+/*
+ * Traditionally, KVM/vhost in many setups supported 509 memslots, whereby
+ * 253 memslots were "reserved" for boot memory and other devices (such
+ * as PCI BARs, which can get mapped dynamically) and 256 memslots were
+ * dedicated for DIMMs. These magic numbers worked reliably in the past.
+ *
+ * Further, using many memslots can negatively affect performance, so setting
+ * the soft-limit of memslots used by memory devices to the traditional
+ * DIMM limit of 256 sounds reasonable.
+ *
+ * If we have less than 509 memslots, we will instruct memory devices that
+ * support automatically deciding how many memslots to use to only use a single
+ * one.
+ *
+ * Hotplugging vhost devices with at least 509 memslots is not expected to
+ * cause problems, not even when devices automatically decided how many memslots
+ * to use.
+ */
+#define MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT 256
+#define MEMORY_DEVICES_SAFE_MAX_MEMSLOTS 509
+
MemoryDeviceInfoList *qmp_memory_device_list(void);
uint64_t get_plugged_memory_size(void);
+unsigned int memory_devices_get_reserved_memslots(void);
+bool memory_devices_memslot_auto_decision_active(void);
void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
const uint64_t *legacy_align, Error **errp);
void memory_device_plug(MemoryDeviceState *md, MachineState *ms);
typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev);
typedef int (*vhost_migration_done_op)(struct vhost_dev *dev,
char *mac_addr);
-typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev,
- uint64_t start1, uint64_t size1,
- uint64_t start2, uint64_t size2);
typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev,
uint64_t guest_cid);
typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start);
typedef int (*vhost_crypto_close_session_op)(struct vhost_dev *dev,
uint64_t session_id);
-typedef bool (*vhost_backend_mem_section_filter_op)(struct vhost_dev *dev,
- MemoryRegionSection *section);
+typedef bool (*vhost_backend_no_private_memslots_op)(struct vhost_dev *dev);
typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
uint16_t queue_size,
vhost_backend_init vhost_backend_init;
vhost_backend_cleanup vhost_backend_cleanup;
vhost_backend_memslots_limit vhost_backend_memslots_limit;
+ vhost_backend_no_private_memslots_op vhost_backend_no_private_memslots;
vhost_net_set_backend_op vhost_net_set_backend;
vhost_net_set_mtu_op vhost_net_set_mtu;
vhost_scsi_set_endpoint_op vhost_scsi_set_endpoint;
vhost_set_vring_enable_op vhost_set_vring_enable;
vhost_requires_shm_log_op vhost_requires_shm_log;
vhost_migration_done_op vhost_migration_done;
- vhost_backend_can_merge_op vhost_backend_can_merge;
vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid;
vhost_vsock_set_running_op vhost_vsock_set_running;
vhost_set_iotlb_callback_op vhost_set_iotlb_callback;
vhost_set_config_op vhost_set_config;
vhost_crypto_create_session_op vhost_crypto_create_session;
vhost_crypto_close_session_op vhost_crypto_close_session;
- vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter;
vhost_get_inflight_fd_op vhost_get_inflight_fd;
vhost_set_inflight_fd_op vhost_set_inflight_fd;
vhost_dev_start_op vhost_dev_start;
*/
void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
uint64_t features);
-bool vhost_has_free_slot(void);
+unsigned int vhost_get_max_memslots(void);
+unsigned int vhost_get_free_memslots(void);
int vhost_net_set_backend(struct vhost_dev *hdev,
struct vhost_vring_file *file);
};
#define VIRTIO_GPU_FILL_CMD(out) do { \
- size_t s; \
- s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \
+ size_t virtiogpufillcmd_s_ = \
+ iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \
&out, sizeof(out)); \
- if (s != sizeof(out)) { \
+ if (virtiogpufillcmd_s_ != sizeof(out)) { \
qemu_log_mask(LOG_GUEST_ERROR, \
"%s: command size incorrect %zu vs %zu\n", \
- __func__, s, sizeof(out)); \
+ __func__, virtiogpufillcmd_s_, sizeof(out)); \
return; \
} \
} while (0)
#define VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "unplugged-inaccessible"
#define VIRTIO_MEM_EARLY_MIGRATION_PROP "x-early-migration"
#define VIRTIO_MEM_PREALLOC_PROP "prealloc"
+#define VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP "dynamic-memslots"
struct VirtIOMEM {
VirtIODevice parent_obj;
int32_t bitmap_size;
unsigned long *bitmap;
- /* assigned memory backend and memory region */
+ /*
+ * With "dynamic-memslots=on": Device memory region in which we dynamically
+ * map the memslots.
+ */
+ MemoryRegion *mr;
+
+ /*
+ * With "dynamic-memslots=on": The individual memslots (aliases into the
+ * memory backend).
+ */
+ MemoryRegion *memslots;
+
+ /* With "dynamic-memslots=on": The total number of memslots. */
+ uint16_t nb_memslots;
+
+ /*
+ * With "dynamic-memslots=on": Size of one memslot (the size of the
+ * last one can differ).
+ */
+ uint64_t memslot_size;
+
+ /* Assigned memory backend with the RAM memory region. */
HostMemoryBackend *memdev;
/* NUMA node */
*/
bool early_migration;
+ /*
+ * Whether we dynamically map (multiple, if possible) memslots instead of
+ * statically mapping the whole RAM memory region.
+ */
+ bool dynamic_memslots;
+
/* notifiers to notify when "size" changes */
NotifierList size_change_notifiers;
/* public */
void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi);
MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp);
+ void (*decide_memslots)(VirtIOMEM *vmem, unsigned int limit);
+ unsigned int (*get_memslots)(VirtIOMEM *vmem);
void (*add_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier);
void (*remove_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier);
void (*unplug_request_check)(VirtIOMEM *vmem, Error **errp);
void blk_remove_bs(BlockBackend *blk);
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
-bool bdrv_has_blk(BlockDriverState *bs);
-bool bdrv_is_root_node(BlockDriverState *bs);
+bool GRAPH_RDLOCK bdrv_has_blk(BlockDriverState *bs);
+bool GRAPH_RDLOCK bdrv_is_root_node(BlockDriverState *bs);
int GRAPH_UNLOCKED blk_set_perm(BlockBackend *blk, uint64_t perm,
uint64_t shared_perm, Error **errp);
void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
/* external API */
-bool kvm_has_free_slot(MachineState *ms);
+unsigned int kvm_get_max_memslots(void);
+unsigned int kvm_get_free_memslots(void);
bool kvm_has_sync_mmu(void);
int kvm_has_vcpu_events(void);
int kvm_has_robust_singlestep(void);
*/
int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
struct ppc_radix_page_info *kvm_get_radix_page_info(void);
-int kvm_get_max_memslots(void);
/* Notify resamplefd for EOI of specific interrupts. */
void kvm_resample_fd_notify(int gsi);
typedef struct KVMMemoryListener {
MemoryListener listener;
KVMSlot *slots;
+ unsigned int nr_used_slots;
int as_id;
QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del;
Error *local_err = NULL;
int ret;
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
block_mig_state.submitted = 0;
block_mig_state.read_done = 0;
block_mig_state.transferred = 0;
BlockDriverState *bs;
BdrvNextIterator it;
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
len = strlen(str);
readline_set_completion_index(rs, len);
has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
if (file && has_offset) {
+ bdrv_graph_rdlock_main_loop();
bdrv_refresh_filename(file);
+ bdrv_graph_rdunlock_main_loop();
filename = file->filename;
}
sn.date_sec = rt / G_USEC_PER_SEC;
sn.date_nsec = (rt % G_USEC_PER_SEC) * 1000;
+ bdrv_graph_rdlock_main_loop();
ret = bdrv_snapshot_create(bs, &sn);
+ bdrv_graph_rdunlock_main_loop();
+
if (ret) {
error_report("Could not create snapshot '%s': %s",
snapshot_name, strerror(-ret));
break;
case SNAPSHOT_DELETE:
+ bdrv_graph_rdlock_main_loop();
ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
if (ret < 0) {
error_report("Could not delete snapshot '%s': snapshot not "
ret = 1;
}
}
+ bdrv_graph_rdunlock_main_loop();
break;
}
qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
}
+ bdrv_graph_rdlock_main_loop();
bdrv_refresh_filename(bs);
+ bdrv_graph_rdunlock_main_loop();
overlay_filename = bs->exact_filename[0] ? bs->exact_filename
: bs->filename;
out_real_path =
{
BlockDriver *drv;
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
/* Find driver and parse its options */
drv = bdrv_find_format(format);
if (!drv) {
goto out;
}
+ bdrv_graph_rdlock_main_loop();
if (!bs->drv->bdrv_amend_options) {
error_report("Format driver '%s' does not support option amendment",
fmt);
+ bdrv_graph_rdunlock_main_loop();
ret = -1;
goto out;
}
"This option is only supported for image creation\n");
}
+ bdrv_graph_rdunlock_main_loop();
error_report_err(err);
ret = -1;
goto out;
qemu_progress_print(0.f, 0);
ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, force, &err);
qemu_progress_print(100.f, 0);
+ bdrv_graph_rdunlock_main_loop();
+
if (ret < 0) {
error_report_err(err);
goto out;
char s1[64], s2[64];
int ret;
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
if (bs->drv && bs->drv->format_name) {
printf("format name: %s\n", bs->drv->format_name);
}
raise ValueError(f"Invalid no_co function name: {self.name}")
if not self.create_only_co:
raise ValueError(f"no_co function can't be mixed: {self.name}")
- if self.graph_rdlock:
- raise ValueError(f"no_co function can't be rdlock: {self.name}")
+ if self.graph_rdlock and self.graph_wrlock:
+ raise ValueError("function can't be both rdlock and wrlock: "
+ f"{self.name}")
self.target_name = f'{subsystem}_{subname}'
self.ctx = self.gen_ctx()
graph_lock=''
graph_unlock=''
- if func.graph_wrlock:
+ if func.graph_rdlock:
+ graph_lock=' bdrv_graph_rdlock_main_loop();'
+ graph_unlock=' bdrv_graph_rdunlock_main_loop();'
+ elif func.graph_wrlock:
graph_lock=' bdrv_graph_wrlock(NULL);'
graph_unlock=' bdrv_graph_wrunlock();'
--- /dev/null
+#include "qemu/osdep.h"
+#include "hw/mem/memory-device.h"
+
+MemoryDeviceInfoList *qmp_memory_device_list(void)
+{
+ return NULL;
+}
+
+uint64_t get_plugged_memory_size(void)
+{
+ return (uint64_t)-1;
+}
+
+unsigned int memory_devices_get_reserved_memslots(void)
+{
+ return 0;
+}
+
+bool memory_devices_memslot_auto_decision_active(void)
+{
+ return false;
+}
stub_ss.add(files('monitor-core.c'))
stub_ss.add(files('physmem.c'))
stub_ss.add(files('qemu-timer-notify-cb.c'))
-stub_ss.add(files('qmp_memory_device.c'))
+stub_ss.add(files('memory_device.c'))
stub_ss.add(files('qmp-command-available.c'))
stub_ss.add(files('qmp-quit.c'))
stub_ss.add(files('qtest.c'))
+++ /dev/null
-#include "qemu/osdep.h"
-#include "hw/mem/memory-device.h"
-
-MemoryDeviceInfoList *qmp_memory_device_list(void)
-{
- return NULL;
-}
-
-uint64_t get_plugged_memory_size(void)
-{
- return (uint64_t)-1;
-}
/* The iova will be updated by iova_to_va() later, so just remove it */
vduse_iova_remove_region(dev, req.iova.start, req.iova.last);
for (i = 0; i < dev->num_queues; i++) {
- VduseVirtq *vq = &dev->vqs[i];
+ vq = &dev->vqs[i];
if (vq->ready) {
if (vduse_queue_update_vring(vq, vq->vring.desc_addr,
vq->vring.avail_addr,
for (i = 0; i < dev->nregions; i++) {
if (reg_equal(&dev->regions[i], msg_region)) {
VuDevRegion *r = &dev->regions[i];
- void *m = (void *) (uintptr_t) r->mmap_addr;
+ void *ma = (void *) (uintptr_t) r->mmap_addr;
- if (m) {
- munmap(m, r->size + r->mmap_offset);
+ if (ma) {
+ munmap(ma, r->size + r->mmap_offset);
}
/*
for (i = 0; i < dev->nregions; i++) {
VuDevRegion *r = &dev->regions[i];
- void *m = (void *) (uintptr_t) r->mmap_addr;
+ void *ma = (void *) (uintptr_t) r->mmap_addr;
- if (m) {
- munmap(m, r->size + r->mmap_offset);
+ if (ma) {
+ munmap(ma, r->size + r->mmap_offset);
}
}
dev->nregions = memory->nregions;
bool romd_mode;
bool readonly;
bool nonvolatile;
+ bool unmergeable;
};
#define FOR_EACH_FLAT_RANGE(var, view) \
.offset_within_address_space = int128_get64(fr->addr.start),
.readonly = fr->readonly,
.nonvolatile = fr->nonvolatile,
+ .unmergeable = fr->unmergeable,
};
}
&& a->offset_in_region == b->offset_in_region
&& a->romd_mode == b->romd_mode
&& a->readonly == b->readonly
- && a->nonvolatile == b->nonvolatile;
+ && a->nonvolatile == b->nonvolatile
+ && a->unmergeable == b->unmergeable;
}
static FlatView *flatview_new(MemoryRegion *mr_root)
&& r1->dirty_log_mask == r2->dirty_log_mask
&& r1->romd_mode == r2->romd_mode
&& r1->readonly == r2->readonly
- && r1->nonvolatile == r2->nonvolatile;
+ && r1->nonvolatile == r2->nonvolatile
+ && !r1->unmergeable && !r2->unmergeable;
}
/* Attempt to simplify a view by merging adjacent ranges */
Int128 base,
AddrRange clip,
bool readonly,
- bool nonvolatile)
+ bool nonvolatile,
+ bool unmergeable)
{
MemoryRegion *subregion;
unsigned i;
int128_addto(&base, int128_make64(mr->addr));
readonly |= mr->readonly;
nonvolatile |= mr->nonvolatile;
+ unmergeable |= mr->unmergeable;
tmp = addrrange_make(base, mr->size);
int128_subfrom(&base, int128_make64(mr->alias->addr));
int128_subfrom(&base, int128_make64(mr->alias_offset));
render_memory_region(view, mr->alias, base, clip,
- readonly, nonvolatile);
+ readonly, nonvolatile, unmergeable);
return;
}
/* Render subregions in priority order. */
QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) {
render_memory_region(view, subregion, base, clip,
- readonly, nonvolatile);
+ readonly, nonvolatile, unmergeable);
}
if (!mr->terminates) {
fr.romd_mode = mr->romd_mode;
fr.readonly = readonly;
fr.nonvolatile = nonvolatile;
+ fr.unmergeable = unmergeable;
/* Render the region itself into any gaps left by the current view. */
for (i = 0; i < view->nr && int128_nz(remain); ++i) {
if (mr) {
render_memory_region(view, mr, int128_zero(),
addrrange_make(int128_zero(), int128_2_64()),
- false, false);
+ false, false, false);
}
flatview_simplify(view);
RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
{
- if (!memory_region_is_mapped(mr) || !memory_region_is_ram(mr)) {
+ if (!memory_region_is_ram(mr)) {
return NULL;
}
return mr->rdm;
void memory_region_set_ram_discard_manager(MemoryRegion *mr,
RamDiscardManager *rdm)
{
- g_assert(memory_region_is_ram(mr) && !memory_region_is_mapped(mr));
+ g_assert(memory_region_is_ram(mr));
g_assert(!rdm || !mr->rdm);
mr->rdm = rdm;
}
memory_region_transaction_commit();
}
+void memory_region_set_unmergeable(MemoryRegion *mr, bool unmergeable)
+{
+ if (unmergeable == mr->unmergeable) {
+ return;
+ }
+
+ memory_region_transaction_begin();
+ mr->unmergeable = unmergeable;
+ memory_region_update_pending |= mr->enabled;
+ memory_region_transaction_commit();
+}
+
uint64_t memory_region_get_alignment(const MemoryRegion *mr)
{
return mr->align;
return res;
}
-/*
- * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
- * in that RAMBlock.
- *
- * ptr: Host pointer to look up
- * round_offset: If true round the result offset down to a page boundary
- * *ram_addr: set to result ram_addr
- * *offset: set to result offset within the RAMBlock
- *
- * Returns: RAMBlock (or NULL if not found)
- *
- * By the time this function returns, the returned pointer is not protected
- * by RCU anymore. If the caller is not within an RCU critical section and
- * does not hold the iothread lock, it must have other means of protecting the
- * pointer, such as a reference to the region that includes the incoming
- * ram_addr_t.
- */
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
ram_addr_t *offset)
{
static void tss_set_busy(CPUX86State *env, int tss_selector, bool value,
uintptr_t retaddr)
{
- target_ulong ptr = env->gdt.base + (env->tr.selector & ~7);
+ target_ulong ptr = env->gdt.base + (tss_selector & ~7);
uint32_t e2 = cpu_ldl_kernel_ra(env, ptr + 4, retaddr);
if (value) {
int i;
tim_reset(td);
+ clock_step_next();
tim_write_ticr(td, count);
tim_write_tcsr(td, CEN | IE | MODE_PERIODIC | PRESCALE(ps));
blk_co_unref(blk);
} else {
BdrvChild *c, *next_c;
+ bdrv_graph_co_rdlock();
QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
+ bdrv_graph_co_rdunlock();
bdrv_co_unref_child(bs, c);
+ bdrv_graph_co_rdlock();
}
+ bdrv_graph_co_rdunlock();
}
dbdd->done = true;
};
static struct detach_by_parent_data detach_by_parent_data;
-static void detach_indirect_bh(void *opaque)
+static void no_coroutine_fn detach_indirect_bh(void *opaque)
{
struct detach_by_parent_data *data = opaque;
bdrv_graph_wrunlock();
}
-static void detach_by_parent_aio_cb(void *opaque, int ret)
+static void coroutine_mixed_fn detach_by_parent_aio_cb(void *opaque, int ret)
{
struct detach_by_parent_data *data = &detach_by_parent_data;
g_assert_cmpint(ret, ==, 0);
if (data->by_parent_cb) {
bdrv_inc_in_flight(data->child_b->bs);
- detach_indirect_bh(data);
+ aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
+ detach_indirect_bh, &detach_by_parent_data);
}
}
-static void detach_by_driver_cb_drained_begin(BdrvChild *child)
+static void GRAPH_RDLOCK detach_by_driver_cb_drained_begin(BdrvChild *child)
{
struct detach_by_parent_data *data = &detach_by_parent_data;
* state is messed up, but if it is only polled in the single
* BDRV_POLL_WHILE() at the end of the drain, this should work fine.
*/
-static void test_detach_indirect(bool by_parent_cb)
+static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb)
{
BlockBackend *blk;
BlockDriverState *parent_a, *parent_b, *a, *b, *c;
static void test_sync_op_activate(BdrvChild *c)
{
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
/* Early success: Image is not inactive */
bdrv_activate(c->bs, NULL);
}
BlockDriverState *bs;
BdrvChild *c;
+ GLOBAL_STATE_CODE();
+
blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
bs->total_sectors = 65536 / BDRV_SECTOR_SIZE;
blk_insert_bs(blk, bs, &error_abort);
+
+ bdrv_graph_rdlock_main_loop();
c = QLIST_FIRST(&bs->parents);
+ bdrv_graph_rdunlock_main_loop();
blk_set_aio_context(blk, ctx, &error_abort);
aio_context_acquire(ctx);