Aleksandar Rikalo <aleksandar.rikalo@syrmia.com> <arikalo@wavecomp.com>
Aleksandar Rikalo <aleksandar.rikalo@syrmia.com> <aleksandar.rikalo@rt-rk.com>
Alexander Graf <agraf@csgraf.de> <agraf@suse.de>
+Ani Sinha <anisinha@redhat.com> <ani@anisinha.ca>
Anthony Liguori <anthony@codemonkey.ws> Anthony Liguori <aliguori@us.ibm.com>
Christian Borntraeger <borntraeger@linux.ibm.com> <borntraeger@de.ibm.com>
Damien Hedde <damien.hedde@dahe.fr> <damien.hedde@greensocs.com>
ACPI/SMBIOS
M: Michael S. Tsirkin <mst@redhat.com>
M: Igor Mammedov <imammedo@redhat.com>
-R: Ani Sinha <ani@anisinha.ca>
+R: Ani Sinha <anisinha@redhat.com>
S: Supported
F: include/hw/acpi/*
F: include/hw/firmware/smbios.h
F: hw/acpi/viot.h
ACPI/AVOCADO/BIOSBITS
-M: Ani Sinha <ani@anisinha.ca>
+M: Ani Sinha <anisinha@redhat.com>
M: Michael S. Tsirkin <mst@redhat.com>
S: Supported
F: tests/avocado/acpi-bits/*
F: include/sysemu/vhost-user-backend.h
F: subprojects/libvhost-user/
+vhost-shadow-virtqueue
+R: Eugenio Pérez <eperezma@redhat.com>
+F: hw/virtio/vhost-shadow-virtqueue.*
+
virtio
M: Michael S. Tsirkin <mst@redhat.com>
S: Supported
F: hw/i386/intel_iommu_internal.h
F: include/hw/i386/intel_iommu.h
+AMD-Vi Emulation
+S: Orphan
+F: hw/i386/amd_iommu.?
+
OpenSBI Firmware
M: Bin Meng <bmeng.cn@gmail.com>
S: Supported
void tpm_backend_deliver_request(TPMBackend *s, TPMBackendCmd *cmd)
{
- ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
-
if (s->cmd != NULL) {
error_report("There is a TPM request pending");
return;
s->cmd = cmd;
object_ref(OBJECT(s));
- thread_pool_submit_aio(pool, tpm_backend_worker_thread, s,
+ thread_pool_submit_aio(tpm_backend_worker_thread, s,
tpm_backend_request_completed, s);
}
* sums the size of all data-bearing children. (This excludes backing
* children.)
*/
-static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
+static int64_t coroutine_fn bdrv_sum_allocated_file_size(BlockDriverState *bs)
{
BdrvChild *child;
int64_t child_size, sum = 0;
return ret;
}
-static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
- BlkdebugIOType iotype)
+static int coroutine_fn rule_check(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, BlkdebugIOType iotype)
{
BDRVBlkdebugState *s = bs->opaque;
BlkdebugRule *rule = NULL;
NotifierList remove_bs_notifiers, insert_bs_notifiers;
QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
- int quiesce_counter;
+ int quiesce_counter; /* atomic: written under BQL, read by other threads */
+ QemuMutex queued_requests_lock; /* protects queued_requests */
CoQueue queued_requests;
- bool disable_request_queuing;
+ bool disable_request_queuing; /* atomic */
VMChangeStateEntry *vmsh;
bool force_allow_inactivate;
block_acct_init(&blk->stats);
+ qemu_mutex_init(&blk->queued_requests_lock);
qemu_co_queue_init(&blk->queued_requests);
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
assert(QLIST_EMPTY(&blk->aio_notifiers));
+ assert(qemu_co_queue_empty(&blk->queued_requests));
+ qemu_mutex_destroy(&blk->queued_requests_lock);
QTAILQ_REMOVE(&block_backends, blk, link);
drive_info_del(blk->legacy_dinfo);
block_acct_cleanup(&blk->stats);
blk->dev_opaque = opaque;
/* Are we currently quiesced? Should we enforce this right now? */
- if (blk->quiesce_counter && ops && ops->drained_begin) {
+ if (qatomic_read(&blk->quiesce_counter) && ops && ops->drained_begin) {
ops->drained_begin(opaque);
}
}
void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
{
IO_CODE();
- blk->disable_request_queuing = disable;
+ qatomic_set(&blk->disable_request_queuing, disable);
}
static int coroutine_fn GRAPH_RDLOCK
{
assert(blk->in_flight > 0);
- if (blk->quiesce_counter && !blk->disable_request_queuing) {
+ if (qatomic_read(&blk->quiesce_counter) &&
+ !qatomic_read(&blk->disable_request_queuing)) {
+ /*
+ * Take lock before decrementing in flight counter so main loop thread
+ * waits for us to enqueue ourselves before it can leave the drained
+ * section.
+ */
+ qemu_mutex_lock(&blk->queued_requests_lock);
blk_dec_in_flight(blk);
- qemu_co_queue_wait(&blk->queued_requests, NULL);
+ qemu_co_queue_wait(&blk->queued_requests, &blk->queued_requests_lock);
blk_inc_in_flight(blk);
+ qemu_mutex_unlock(&blk->queued_requests_lock);
}
}
bdrv_drain_all_begin();
while ((blk = blk_all_next(blk)) != NULL) {
- AioContext *ctx = blk_get_aio_context(blk);
-
- aio_context_acquire(ctx);
-
/* We may have -ENOMEDIUM completions in flight */
- AIO_WAIT_WHILE(ctx, qatomic_read(&blk->in_flight) > 0);
-
- aio_context_release(ctx);
+ AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_read(&blk->in_flight) > 0);
}
bdrv_drain_all_end();
BlockBackend *blk = child->opaque;
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
- if (++blk->quiesce_counter == 1) {
+ if (qatomic_fetch_inc(&blk->quiesce_counter) == 0) {
if (blk->dev_ops && blk->dev_ops->drained_begin) {
blk->dev_ops->drained_begin(blk->dev_opaque);
}
{
BlockBackend *blk = child->opaque;
bool busy = false;
- assert(blk->quiesce_counter);
+ assert(qatomic_read(&blk->quiesce_counter));
if (blk->dev_ops && blk->dev_ops->drained_poll) {
busy = blk->dev_ops->drained_poll(blk->dev_opaque);
static void blk_root_drained_end(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
- assert(blk->quiesce_counter);
+ assert(qatomic_read(&blk->quiesce_counter));
assert(blk->public.throttle_group_member.io_limits_disabled);
qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
- if (--blk->quiesce_counter == 0) {
+ if (qatomic_fetch_dec(&blk->quiesce_counter) == 1) {
if (blk->dev_ops && blk->dev_ops->drained_end) {
blk->dev_ops->drained_end(blk->dev_opaque);
}
- while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
+ qemu_mutex_lock(&blk->queued_requests_lock);
+ while (qemu_co_enter_next(&blk->queued_requests,
+ &blk->queued_requests_lock)) {
/* Resume all queued requests */
}
+ qemu_mutex_unlock(&blk->queued_requests_lock);
}
}
#include "qemu/memalign.h"
#include "dmg.h"
-int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
- char *next_out, unsigned int avail_out);
-
-int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
- char *next_out, unsigned int avail_out);
+BdrvDmgUncompressFunc *dmg_uncompress_bz2;
+BdrvDmgUncompressFunc *dmg_uncompress_lzfse;
enum {
/* Limit chunk sizes to prevent unreasonable amounts of memory being used
z_stream zstream;
} BDRVDMGState;
-extern int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
- char *next_out, unsigned int avail_out);
+typedef int BdrvDmgUncompressFunc(char *next_in, unsigned int avail_in,
+ char *next_out, unsigned int avail_out);
-extern int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
- char *next_out, unsigned int avail_out);
+extern BdrvDmgUncompressFunc *dmg_uncompress_bz2;
+extern BdrvDmgUncompressFunc *dmg_uncompress_lzfse;
#endif
blk_exp_request_shutdown(exp);
}
- AIO_WAIT_WHILE(NULL, blk_exp_has_type(type));
+ AIO_WAIT_WHILE_UNLOCKED(NULL, blk_exp_has_type(type));
}
void blk_exp_close_all(void)
* later. See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
+#include "qemu/error-report.h"
#include "block/block.h"
#include "subprojects/libvhost-user/libvhost-user.h" /* only for the type definitions */
#include "standard-headers/linux/virtio_blk.h"
vhost_user_server_stop(&vexp->vu_server);
}
+static void vu_blk_exp_resize(void *opaque)
+{
+ VuBlkExport *vexp = opaque;
+ BlockDriverState *bs = blk_bs(vexp->handler.blk);
+ int64_t new_size = bdrv_getlength(bs);
+
+ if (new_size < 0) {
+ error_printf("Failed to get length of block node '%s'",
+ bdrv_get_node_name(bs));
+ return;
+ }
+
+ vexp->blkcfg.capacity = cpu_to_le64(new_size >> VIRTIO_BLK_SECTOR_BITS);
+
+ vu_config_change_msg(&vexp->vu_server.vu_dev);
+}
+
+static const BlockDevOps vu_blk_dev_ops = {
+ .resize_cb = vu_blk_exp_resize,
+};
+
static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
Error **errp)
{
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
vexp);
+ blk_set_dev_ops(exp->blk, &vu_blk_dev_ops, vexp);
+
if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx,
num_queues, &vu_blk_iface, errp)) {
blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
return result;
}
-static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs,
- ThreadPoolFunc func, void *arg)
+static int coroutine_fn raw_thread_pool_submit(ThreadPoolFunc func, void *arg)
{
- /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */
- ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
- return thread_pool_submit_co(pool, func, arg);
+ return thread_pool_submit_co(func, arg);
}
/*
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_IO_URING
} else if (s->use_linux_io_uring) {
- LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
assert(qiov->size == bytes);
- return luring_co_submit(bs, aio, s->fd, offset, qiov, type);
+ return luring_co_submit(bs, s->fd, offset, qiov, type);
#endif
#ifdef CONFIG_LINUX_AIO
} else if (s->use_linux_aio) {
- LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
assert(qiov->size == bytes);
- return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
- s->aio_max_batch);
+ return laio_co_submit(s->fd, offset, qiov, type, s->aio_max_batch);
#endif
}
};
assert(qiov->size == bytes);
- return raw_thread_pool_submit(bs, handle_aiocb_rw, &acb);
+ return raw_thread_pool_submit(handle_aiocb_rw, &acb);
}
static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
BDRVRawState __attribute__((unused)) *s = bs->opaque;
#ifdef CONFIG_LINUX_AIO
if (s->use_linux_aio) {
- LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
- laio_io_plug(bs, aio);
+ laio_io_plug();
}
#endif
#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
- LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
- luring_io_plug(bs, aio);
+ luring_io_plug();
}
#endif
}
BDRVRawState __attribute__((unused)) *s = bs->opaque;
#ifdef CONFIG_LINUX_AIO
if (s->use_linux_aio) {
- LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
- laio_io_unplug(bs, aio, s->aio_max_batch);
+ laio_io_unplug(s->aio_max_batch);
}
#endif
#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
- LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
- luring_io_unplug(bs, aio);
+ luring_io_unplug();
}
#endif
}
#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
- LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
- return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH);
+ return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
}
#endif
- return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
+ return raw_thread_pool_submit(handle_aiocb_flush, &acb);
}
static void raw_aio_attach_aio_context(BlockDriverState *bs,
},
};
- return raw_thread_pool_submit(bs, handle_aiocb_truncate, &acb);
+ return raw_thread_pool_submit(handle_aiocb_truncate, &acb);
}
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
acb.aio_type |= QEMU_AIO_BLKDEV;
}
- ret = raw_thread_pool_submit(bs, handle_aiocb_discard, &acb);
+ ret = raw_thread_pool_submit(handle_aiocb_discard, &acb);
raw_account_discard(s, bytes, ret);
return ret;
}
handler = handle_aiocb_write_zeroes;
}
- return raw_thread_pool_submit(bs, handler, &acb);
+ return raw_thread_pool_submit(handler, &acb);
}
static int coroutine_fn raw_co_pwrite_zeroes(
},
};
- return raw_thread_pool_submit(bs, handle_aiocb_copy_range, &acb);
+ return raw_thread_pool_submit(handle_aiocb_copy_range, &acb);
}
BlockDriver bdrv_file = {
struct sg_io_hdr *io_hdr = buf;
if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT ||
io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) {
- return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs),
+ return pr_manager_execute(s->pr_mgr, qemu_get_current_aio_context(),
s->fd, io_hdr);
}
}
},
};
- return raw_thread_pool_submit(bs, handle_aiocb_ioctl, &acb);
+ return raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
}
#endif /* linux */
BlockCompletionFunc *cb, void *opaque, int type)
{
RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
- ThreadPool *pool;
acb->bs = bs;
acb->hfile = hfile;
acb->aio_offset = offset;
trace_file_paio_submit(acb, opaque, offset, count, type);
- pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
- return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
+ return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
}
int qemu_ftruncate64(int fd, int64_t length)
* reader lock.
*/
qatomic_set(&has_writer, 0);
- AIO_WAIT_WHILE(qemu_get_aio_context(), reader_count() >= 1);
+ AIO_WAIT_WHILE_UNLOCKED(NULL, reader_count() >= 1);
qatomic_set(&has_writer, 1);
/*
bdrv_drain_all_begin_nopoll();
/* Now poll the in-flight requests */
- AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
+ AIO_WAIT_WHILE_UNLOCKED(NULL, bdrv_drain_all_poll());
while ((bs = bdrv_next_all_states(bs))) {
bdrv_drain_assert_idle(bs);
#include "qapi/error.h"
#include "trace.h"
+/* Only used for assertions. */
+#include "qemu/coroutine_int.h"
+
/* io_uring ring size */
#define MAX_ENTRIES 128
struct io_uring ring;
- /* io queue for submit at batch. Protected by AioContext lock. */
+ /* No locking required, only accessed from AioContext home thread */
LuringQueue io_q;
- /* I/O completion processing. Only runs in I/O thread. */
QEMUBH *completion_bh;
} LuringState;
* eventually runs later. Coroutines cannot be entered recursively
* so avoid doing that!
*/
+ assert(luringcb->co->ctx == s->aio_context);
if (!qemu_coroutine_entered(luringcb->co)) {
aio_co_wake(luringcb->co);
}
static void luring_process_completions_and_submit(LuringState *s)
{
- aio_context_acquire(s->aio_context);
luring_process_completions(s);
if (!s->io_q.plugged && s->io_q.in_queue > 0) {
ioq_submit(s);
}
- aio_context_release(s->aio_context);
}
static void qemu_luring_completion_bh(void *opaque)
io_q->blocked = false;
}
-void luring_io_plug(BlockDriverState *bs, LuringState *s)
+void luring_io_plug(void)
{
+ AioContext *ctx = qemu_get_current_aio_context();
+ LuringState *s = aio_get_linux_io_uring(ctx);
trace_luring_io_plug(s);
s->io_q.plugged++;
}
-void luring_io_unplug(BlockDriverState *bs, LuringState *s)
+void luring_io_unplug(void)
{
+ AioContext *ctx = qemu_get_current_aio_context();
+ LuringState *s = aio_get_linux_io_uring(ctx);
assert(s->io_q.plugged);
trace_luring_io_unplug(s, s->io_q.blocked, s->io_q.plugged,
s->io_q.in_queue, s->io_q.in_flight);
return 0;
}
-int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
- uint64_t offset, QEMUIOVector *qiov, int type)
+int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
+ QEMUIOVector *qiov, int type)
{
int ret;
+ AioContext *ctx = qemu_get_current_aio_context();
+ LuringState *s = aio_get_linux_io_uring(ctx);
LuringAIOCB luringcb = {
.co = qemu_coroutine_self(),
.ret = -EINPROGRESS,
#include "qemu/coroutine.h"
#include "qapi/error.h"
+/* Only used for assertions. */
+#include "qemu/coroutine_int.h"
+
#include <libaio.h>
/*
io_context_t ctx;
EventNotifier e;
- /* io queue for submit at batch. Protected by AioContext lock. */
+ /* No locking required, only accessed from AioContext home thread */
LaioQueue io_q;
-
- /* I/O completion processing. Only runs in I/O thread. */
QEMUBH *completion_bh;
int event_idx;
int event_max;
* later. Coroutines cannot be entered recursively so avoid doing
* that!
*/
+ assert(laiocb->co->ctx == laiocb->ctx->aio_context);
if (!qemu_coroutine_entered(laiocb->co)) {
aio_co_wake(laiocb->co);
}
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
{
- aio_context_acquire(s->aio_context);
qemu_laio_process_completions(s);
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}
- aio_context_release(s->aio_context);
}
static void qemu_laio_completion_bh(void *opaque)
return max_batch;
}
-void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
+void laio_io_plug(void)
{
+ AioContext *ctx = qemu_get_current_aio_context();
+ LinuxAioState *s = aio_get_linux_aio(ctx);
+
s->io_q.plugged++;
}
-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
- uint64_t dev_max_batch)
+void laio_io_unplug(uint64_t dev_max_batch)
{
+ AioContext *ctx = qemu_get_current_aio_context();
+ LinuxAioState *s = aio_get_linux_aio(ctx);
+
assert(s->io_q.plugged);
s->io_q.plugged--;
return 0;
}
-int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
- uint64_t offset, QEMUIOVector *qiov, int type,
- uint64_t dev_max_batch)
+int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
+ int type, uint64_t dev_max_batch)
{
int ret;
+ AioContext *ctx = qemu_get_current_aio_context();
struct qemu_laiocb laiocb = {
.co = qemu_coroutine_self(),
.nbytes = qiov->size,
- .ctx = s,
+ .ctx = aio_get_linux_aio(ctx),
.ret = -EINPROGRESS,
.is_read = (type == QEMU_AIO_READ),
.qiov = qiov,
/* Called when going out of the streaming phase to flush the bulk of the
* data to the medium, or just before completing.
*/
-static int mirror_flush(MirrorBlockJob *s)
+static int coroutine_fn mirror_flush(MirrorBlockJob *s)
{
- int ret = blk_flush(s->target);
+ int ret = blk_co_flush(s->target);
if (ret < 0) {
if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) {
s->ret = ret;
error_report("Device '%s' not found", device);
return;
}
- if (!blk_is_available(blk)) {
- error_report("Device '%s' has no medium", device);
- return;
- }
bs = bdrv_skip_implicit_filters(blk_bs(blk));
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
+ if (!blk_is_available(blk)) {
+ error_report("Device '%s' has no medium", device);
+ aio_context_release(aio_context);
+ return;
+ }
+
ret = bdrv_commit(bs);
aio_context_release(aio_context);
}
/* Checks to see if it's safe to resize bitmaps */
-int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp)
+int coroutine_fn qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
Qcow2BitmapList *bm_list;
* Frees the allocated clusters because the request failed and they won't
* actually be linked.
*/
-void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
+void coroutine_fn qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
{
BDRVQcow2State *s = bs->opaque;
if (!has_data_file(bs) && !m->keep_old_clusters) {
*
* Returns 0 on success, -errno on failure.
*/
-static int calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset,
- uint64_t guest_offset, unsigned bytes,
- uint64_t *l2_slice, QCowL2Meta **m, bool keep_old)
+static int coroutine_fn calculate_l2_meta(BlockDriverState *bs,
+ uint64_t host_cluster_offset,
+ uint64_t guest_offset, unsigned bytes,
+ uint64_t *l2_slice, QCowL2Meta **m,
+ bool keep_old)
{
BDRVQcow2State *s = bs->opaque;
int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset);
* function has been waiting for another request and the allocation must be
* restarted, but the whole request should not be failed.
*/
-static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, uint64_t *nb_clusters)
+static int coroutine_fn do_alloc_cluster_offset(BlockDriverState *bs,
+ uint64_t guest_offset,
+ uint64_t *host_offset,
+ uint64_t *nb_clusters)
{
BDRVQcow2State *s = bs->opaque;
return nb_clusters;
}
-static int zero_l2_subclusters(BlockDriverState *bs, uint64_t offset,
- unsigned nb_subclusters)
+static int coroutine_fn
+zero_l2_subclusters(BlockDriverState *bs, uint64_t offset,
+ unsigned nb_subclusters)
{
BDRVQcow2State *s = bs->opaque;
uint64_t *l2_slice;
return offset;
}
-int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int64_t nb_clusters)
+int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int64_t nb_clusters)
{
BDRVQcow2State *s = bs->opaque;
uint64_t cluster_index, refcount;
/* only used to allocate compressed sectors. We try to allocate
contiguous sectors. size must be <= cluster_size */
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
+int64_t coroutine_fn qcow2_alloc_bytes(BlockDriverState *bs, int size)
{
BDRVQcow2State *s = bs->opaque;
int64_t offset;
return ret;
}
-int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
+int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
{
BDRVQcow2State *s = bs->opaque;
int64_t i;
* qcow2_check_refcounts() does not do anything with snapshots'
* extra data.)
*/
-static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
- int *nb_clusters_reduced,
- int *extra_data_dropped,
- Error **errp)
+static coroutine_fn GRAPH_RDLOCK
+int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
+ int *nb_clusters_reduced,
+ int *extra_data_dropped,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
QCowSnapshotHeader h;
/* Read statically sized part of the snapshot header */
offset = ROUND_UP(offset, 8);
- ret = bdrv_pread(bs->file, offset, sizeof(h), &h, 0);
+ ret = bdrv_co_pread(bs->file, offset, sizeof(h), &h, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to read snapshot table");
goto fail;
}
/* Read known extra data */
- ret = bdrv_pread(bs->file, offset,
- MIN(sizeof(extra), sn->extra_data_size), &extra, 0);
+ ret = bdrv_co_pread(bs->file, offset,
+ MIN(sizeof(extra), sn->extra_data_size), &extra, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to read snapshot table");
goto fail;
/* Store unknown extra data */
unknown_extra_data_size = sn->extra_data_size - sizeof(extra);
sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
- ret = bdrv_pread(bs->file, offset, unknown_extra_data_size,
- sn->unknown_extra_data, 0);
+ ret = bdrv_co_pread(bs->file, offset, unknown_extra_data_size,
+ sn->unknown_extra_data, 0);
if (ret < 0) {
error_setg_errno(errp, -ret,
"Failed to read snapshot table");
/* Read snapshot ID */
sn->id_str = g_malloc(id_str_size + 1);
- ret = bdrv_pread(bs->file, offset, id_str_size, sn->id_str, 0);
+ ret = bdrv_co_pread(bs->file, offset, id_str_size, sn->id_str, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to read snapshot table");
goto fail;
/* Read snapshot name */
sn->name = g_malloc(name_size + 1);
- ret = bdrv_pread(bs->file, offset, name_size, sn->name, 0);
+ ret = bdrv_co_pread(bs->file, offset, name_size, sn->name, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to read snapshot table");
goto fail;
return ret;
}
-int qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
+int coroutine_fn qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
{
return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp);
}
{
int ret;
BDRVQcow2State *s = bs->opaque;
- ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
qemu_co_mutex_lock(&s->lock);
while (s->nb_threads >= QCOW2_MAX_THREADS) {
s->nb_threads++;
qemu_co_mutex_unlock(&s->lock);
- ret = thread_pool_submit_co(pool, func, arg);
+ ret = thread_pool_submit_co(func, arg);
qemu_co_mutex_lock(&s->lock);
s->nb_threads--;
* unknown magic is skipped (future extension this version knows nothing about)
* return 0 upon success, non-0 otherwise
*/
-static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
- uint64_t end_offset, void **p_feature_table,
- int flags, bool *need_update_header,
- Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
+ uint64_t end_offset, void **p_feature_table,
+ int flags, bool *need_update_header, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
QCowExtension ext;
printf("attempting to read extended header in offset %lu\n", offset);
#endif
- ret = bdrv_pread(bs->file, offset, sizeof(ext), &ext, 0);
+ ret = bdrv_co_pread(bs->file, offset, sizeof(ext), &ext, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
"pread fail from offset %" PRIu64, offset);
sizeof(bs->backing_format));
return 2;
}
- ret = bdrv_pread(bs->file, offset, ext.len, bs->backing_format, 0);
+ ret = bdrv_co_pread(bs->file, offset, ext.len, bs->backing_format, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
"Could not read format name");
case QCOW2_EXT_MAGIC_FEATURE_TABLE:
if (p_feature_table != NULL) {
void *feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
- ret = bdrv_pread(bs->file, offset, ext.len, feature_table, 0);
+ ret = bdrv_co_pread(bs->file, offset, ext.len, feature_table, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
"Could not read table");
return -EINVAL;
}
- ret = bdrv_pread(bs->file, offset, ext.len, &s->crypto_header, 0);
+ ret = bdrv_co_pread(bs->file, offset, ext.len, &s->crypto_header, 0);
if (ret < 0) {
error_setg_errno(errp, -ret,
"Unable to read CRYPTO header extension");
break;
}
- ret = bdrv_pread(bs->file, offset, ext.len, &bitmaps_ext, 0);
+ ret = bdrv_co_pread(bs->file, offset, ext.len, &bitmaps_ext, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "bitmaps_ext: "
"Could not read ext header");
case QCOW2_EXT_MAGIC_DATA_FILE:
{
s->image_data_file = g_malloc0(ext.len + 1);
- ret = bdrv_pread(bs->file, offset, ext.len, s->image_data_file, 0);
+ ret = bdrv_co_pread(bs->file, offset, ext.len, s->image_data_file, 0);
if (ret < 0) {
error_setg_errno(errp, -ret,
"ERROR: Could not read data file name");
uext->len = ext.len;
QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
- ret = bdrv_pread(bs->file, offset, uext->len, uext->data, 0);
+ ret = bdrv_co_pread(bs->file, offset, uext->len, uext->data, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: unknown extension: "
"Could not read data");
qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
}
-static int qcow2_update_options(BlockDriverState *bs, QDict *options,
- int flags, Error **errp)
+static int coroutine_fn
+qcow2_update_options(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
{
Qcow2ReopenState r = {};
int ret;
uint64_t new_refblock_offset);
int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
-int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int64_t nb_clusters);
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
+int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int64_t nb_clusters);
+int64_t coroutine_fn qcow2_alloc_bytes(BlockDriverState *bs, int size);
void qcow2_free_clusters(BlockDriverState *bs,
int64_t offset, int64_t size,
enum qcow2_discard_type type);
BlockDriverAmendStatusCB *status_cb,
void *cb_opaque, Error **errp);
int coroutine_fn GRAPH_RDLOCK qcow2_shrink_reftable(BlockDriverState *bs);
-int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
+int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs);
/* qcow2-cluster.c functions */
int coroutine_fn GRAPH_RDLOCK
qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
+void coroutine_fn qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, enum qcow2_discard_type type,
bool full_discard);
Error **errp);
void qcow2_free_snapshots(BlockDriverState *bs);
-int qcow2_read_snapshots(BlockDriverState *bs, Error **errp);
+int coroutine_fn GRAPH_RDLOCK
+qcow2_read_snapshots(BlockDriverState *bs, Error **errp);
int qcow2_write_snapshots(BlockDriverState *bs);
int coroutine_fn GRAPH_RDLOCK
bool qcow2_get_bitmap_info_list(BlockDriverState *bs,
Qcow2BitmapInfoList **info_list, Error **errp);
int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
-int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp);
+int coroutine_fn qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp);
bool qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs,
bool release_stored, Error **errp);
int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
return ret;
}
-static int vmdk_is_cid_valid(BlockDriverState *bs)
+static int coroutine_fn vmdk_is_cid_valid(BlockDriverState *bs)
{
BDRVVmdkState *s = bs->opaque;
uint32_t cur_pcid;
#endif
static int enable_write_target(BlockDriverState *bs, Error **errp);
-static int is_consistent(BDRVVVFATState *s);
+static int coroutine_fn is_consistent(BDRVVVFATState *s);
static QemuOptsList runtime_opts = {
.name = "vvfat",
}
#endif
-static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn GRAPH_RDLOCK
+vvfat_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors)
{
BDRVVVFATState *s = bs->opaque;
int i;
DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64
" allocated\n", sector_num,
n >> BDRV_SECTOR_BITS));
- if (bdrv_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, n,
- buf + i * 0x200, 0) < 0) {
+ if (bdrv_co_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, n,
+ buf + i * 0x200, 0) < 0) {
return -1;
}
i += (n >> BDRV_SECTOR_BITS) - 1;
return 0;
}
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
vvfat_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags)
{
}
}
-static inline bool cluster_was_modified(BDRVVVFATState *s,
- uint32_t cluster_num)
+static inline bool coroutine_fn GRAPH_RDLOCK
+cluster_was_modified(BDRVVVFATState *s, uint32_t cluster_num)
{
int was_modified = 0;
int i;
* Further, the files/directories handled by this function are
* assumed to be *not* deleted (and *only* those).
*/
-static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
- direntry_t* direntry, const char* path)
+static uint32_t coroutine_fn GRAPH_RDLOCK
+get_cluster_count_for_direntry(BDRVVVFATState* s, direntry_t* direntry, const char* path)
{
/*
* This is a little bit tricky:
if (res) {
return -1;
}
- res = bdrv_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE,
- BDRV_SECTOR_SIZE, s->cluster_buffer,
- 0);
+ res = bdrv_co_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE,
+ BDRV_SECTOR_SIZE, s->cluster_buffer,
+ 0);
if (res < 0) {
return -2;
}
* It returns 0 upon inconsistency or error, and the number of clusters
* used by the directory, its subdirectories and their files.
*/
-static int check_directory_consistency(BDRVVVFATState *s,
- int cluster_num, const char* path)
+static int coroutine_fn GRAPH_RDLOCK
+check_directory_consistency(BDRVVVFATState *s, int cluster_num, const char* path)
{
int ret = 0;
unsigned char* cluster = g_malloc(s->cluster_size);
}
/* returns 1 on success */
-static int is_consistent(BDRVVVFATState* s)
+static int coroutine_fn GRAPH_RDLOCK
+is_consistent(BDRVVVFATState* s)
{
int i, check;
int used_clusters_count = 0;
return 0;
}
-static int commit_direntries(BDRVVVFATState* s,
- int dir_index, int parent_mapping_index)
+static int coroutine_fn GRAPH_RDLOCK
+commit_direntries(BDRVVVFATState* s, int dir_index, int parent_mapping_index)
{
direntry_t* direntry = array_get(&(s->directory), dir_index);
uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry);
/* commit one file (adjust contents, adjust mapping),
return first_mapping_index */
-static int commit_one_file(BDRVVVFATState* s,
- int dir_index, uint32_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+commit_one_file(BDRVVVFATState* s, int dir_index, uint32_t offset)
{
direntry_t* direntry = array_get(&(s->directory), dir_index);
uint32_t c = begin_of_direntry(direntry);
/*
* TODO: make sure that the short name is not matching *another* file
*/
-static int handle_commits(BDRVVVFATState* s)
+static int coroutine_fn GRAPH_RDLOCK handle_commits(BDRVVVFATState* s)
{
int i, fail = 0;
* - recurse direntries from root (using bs->bdrv_pread)
* - delete files corresponding to mappings marked as deleted
*/
-static int do_commit(BDRVVVFATState* s)
+static int coroutine_fn GRAPH_RDLOCK do_commit(BDRVVVFATState* s)
{
int ret = 0;
return 0;
}
-static int try_commit(BDRVVVFATState* s)
+static int coroutine_fn GRAPH_RDLOCK try_commit(BDRVVVFATState* s)
{
vvfat_close_current_file(s);
DLOG(checkpoint());
return do_commit(s);
}
-static int vvfat_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static int coroutine_fn GRAPH_RDLOCK
+vvfat_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
{
BDRVVVFATState *s = bs->opaque;
int i, ret;
* Use qcow backend. Commit later.
*/
DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors));
- ret = bdrv_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE,
- nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
+ ret = bdrv_co_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
if (ret < 0) {
fprintf(stderr, "Error writing to qcow backend\n");
return ret;
return 0;
}
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
vvfat_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags)
{
(c) They need not be loaded by avocado framework when running tests.
-Author: Ani Sinha <ani@anisinha.ca>
+Author: Ani Sinha <anisinha@redhat.com>
References:
-----------
Note that a ring address is an IOVA if ``VIRTIO_F_IOMMU_PLATFORM`` has
been negotiated. Otherwise it is a user address.
-Memory regions description
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-+-------------+---------+---------+-----+---------+
-| num regions | padding | region0 | ... | region7 |
-+-------------+---------+---------+-----+---------+
-
-:num regions: a 32-bit number of regions
-
-:padding: 32-bit
-
-A region is:
+Memory region description
+^^^^^^^^^^^^^^^^^^^^^^^^^
+---------------+------+--------------+-------------+
| guest address | size | user address | mmap offset |
:mmap offset: 64-bit offset where region starts in the mapped memory
+When the ``VHOST_USER_PROTOCOL_F_XEN_MMAP`` protocol feature has been
+successfully negotiated, the memory region description contains two extra
+fields at the end.
+
++---------------+------+--------------+-------------+----------------+-------+
+| guest address | size | user address | mmap offset | xen mmap flags | domid |
++---------------+------+--------------+-------------+----------------+-------+
+
+:xen mmap flags: 32-bit bit field
+
+- Bit 0 is set for Xen foreign memory mapping.
+- Bit 1 is set for Xen grant memory mapping.
+- Bit 8 is set if the memory region can not be mapped in advance, and memory
+ areas within this region must be mapped / unmapped only when required by the
+ back-end. The back-end shouldn't try to map the entire region at once, as the
+ front-end may not allow it. The back-end should rather map only the required
+ amount of memory at once and unmap it after it is used.
+
+:domid: a 32-bit Xen hypervisor specific domain id.
+
Single memory region description
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-+---------+---------------+------+--------------+-------------+
-| padding | guest address | size | user address | mmap offset |
-+---------+---------------+------+--------------+-------------+
++---------+--------+
+| padding | region |
++---------+--------+
:padding: 64-bit
-:guest address: a 64-bit guest address of the region
+A region is represented by Memory region description.
-:size: a 64-bit size
+Multiple Memory regions description
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-:user address: a 64-bit user address
++-------------+---------+---------+-----+---------+
+| num regions | padding | region0 | ... | region7 |
++-------------+---------+---------+-----+---------+
-:mmap offset: 64-bit offset where region starts in the mapped memory
+:num regions: a 32-bit number of regions
+
+:padding: 32-bit
+
+A region is represented by Memory region description.
Log description
^^^^^^^^^^^^^^^
#define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
#define VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS 15
#define VHOST_USER_PROTOCOL_F_STATUS 16
+ #define VHOST_USER_PROTOCOL_F_XEN_MMAP 17
Front-end message types
-----------------------
``VHOST_USER_SET_MEM_TABLE``
:id: 5
:equivalent ioctl: ``VHOST_SET_MEM_TABLE``
- :request payload: memory regions description
- :reply payload: (postcopy only) memory regions description
+ :request payload: multiple memory regions description
+ :reply payload: (postcopy only) multiple memory regions description
Sets the memory map regions on the back-end so it can translate the
vring addresses. In the ancillary data there is an array of file
overhead in I/O from virtual machines.
QEMU now implements the basic common functionality to enable an emulated device
-to support SR/IOV. Yet no fully implemented devices exists in QEMU, but a
-proof-of-concept hack of the Intel igb can be found here:
-
-git://github.com/knuto/qemu.git sriov_patches_v5
+to support SR/IOV.
Implementation
==============
.. toctree::
:maxdepth: 2
+ pci-ids
+ pci-serial
+ pci-testdev
ppc-xive
ppc-spapr-xive
ppc-spapr-numa
--- /dev/null
+================
+PCI IDs for QEMU
+================
+
+Red Hat, Inc. donates a part of its device ID range to QEMU, to be used for
+virtual devices. The vendor IDs are 1af4 (formerly Qumranet ID) and 1b36.
+
+Contact Gerd Hoffmann <kraxel@redhat.com> to get a device ID assigned
+for your devices.
+
+1af4 vendor ID
+--------------
+
+The 1000 -> 10ff device ID range is used as follows for virtio-pci devices.
+Note that this allocation is separate from the virtio device IDs, which are
+maintained as part of the virtio specification.
+
+1af4:1000
+ network device (legacy)
+1af4:1001
+ block device (legacy)
+1af4:1002
+ balloon device (legacy)
+1af4:1003
+ console device (legacy)
+1af4:1004
+ SCSI host bus adapter device (legacy)
+1af4:1005
+ entropy generator device (legacy)
+1af4:1009
+ 9p filesystem device (legacy)
+1af4:1012
+ vsock device (bug compatibility)
+
+1af4:1040 to 1af4:10ef
+ ID range for modern virtio devices. The PCI device
+ ID is calculated from the virtio device ID by adding the
+ 0x1040 offset. The virtio IDs are defined in the virtio
+ specification. The Linux kernel has a header file with
+ defines for all virtio IDs (``linux/virtio_ids.h``); QEMU has a
+ copy in ``include/standard-headers/``.
+
+1af4:10f0 to 1a4f:10ff
+ Available for experimental usage without registration. Must get
+ official ID when the code leaves the test lab (i.e. when seeking
+ upstream merge or shipping a distro/product) to avoid conflicts.
+
+1af4:1100
+ Used as PCI Subsystem ID for existing hardware devices emulated
+ by QEMU.
+
+1af4:1110
+ ivshmem device (shared memory, ``docs/specs/ivshmem-spec.txt``)
+
+All other device IDs are reserved.
+
+1b36 vendor ID
+--------------
+
+The 0000 -> 00ff device ID range is used as follows for QEMU-specific
+PCI devices (other than virtio):
+
+1b36:0001
+ PCI-PCI bridge
+1b36:0002
+ PCI serial port (16550A) adapter (:doc:`pci-serial`)
+1b36:0003
+ PCI Dual-port 16550A adapter (:doc:`pci-serial`)
+1b36:0004
+ PCI Quad-port 16550A adapter (:doc:`pci-serial`)
+1b36:0005
+ PCI test device (:doc:`pci-testdev`)
+1b36:0006
+ PCI Rocker Ethernet switch device
+1b36:0007
+ PCI SD Card Host Controller Interface (SDHCI)
+1b36:0008
+ PCIe host bridge
+1b36:0009
+ PCI Expander Bridge (-device pxb)
+1b36:000a
+ PCI-PCI bridge (multiseat)
+1b36:000b
+ PCIe Expander Bridge (-device pxb-pcie)
+1b36:000d
+ PCI xhci usb host adapter
+1b36:000f
+ mdpy (mdev sample device), ``linux/samples/vfio-mdev/mdpy.c``
+1b36:0010
+ PCIe NVMe device (``-device nvme``)
+1b36:0011
+ PCI PVPanic device (``-device pvpanic-pci``)
+1b36:0012
+ PCI ACPI ERST device (``-device acpi-erst``)
+
+All these devices are documented in :doc:`index`.
+
+The 0100 device ID is used for the QXL video card device.
+++ /dev/null
-
-PCI IDs for qemu
-================
-
-Red Hat, Inc. donates a part of its device ID range to qemu, to be used for
-virtual devices. The vendor IDs are 1af4 (formerly Qumranet ID) and 1b36.
-
-Contact Gerd Hoffmann <kraxel@redhat.com> to get a device ID assigned
-for your devices.
-
-1af4 vendor ID
---------------
-
-The 1000 -> 10ff device ID range is used as follows for virtio-pci devices.
-Note that this allocation separate from the virtio device IDs, which are
-maintained as part of the virtio specification.
-
-1af4:1000 network device (legacy)
-1af4:1001 block device (legacy)
-1af4:1002 balloon device (legacy)
-1af4:1003 console device (legacy)
-1af4:1004 SCSI host bus adapter device (legacy)
-1af4:1005 entropy generator device (legacy)
-1af4:1009 9p filesystem device (legacy)
-1af4:1012 vsock device (bug compatibility)
-
-1af4:1040 Start of ID range for modern virtio devices. The PCI device
- to ID is calculated from the virtio device ID by adding the
-1af4:10ef 0x1040 offset. The virtio IDs are defined in the virtio
- specification. The Linux kernel has a header file with
- defines for all virtio IDs (linux/virtio_ids.h), qemu has a
- copy in include/standard-headers/.
-
-1af4:10f0 Available for experimental usage without registration. Must get
- to official ID when the code leaves the test lab (i.e. when seeking
-1af4:10ff upstream merge or shipping a distro/product) to avoid conflicts.
-
-1af4:1100 Used as PCI Subsystem ID for existing hardware devices emulated
- by qemu.
-
-1af4:1110 ivshmem device (shared memory, docs/specs/ivshmem-spec.txt)
-
-All other device IDs are reserved.
-
-1b36 vendor ID
---------------
-
-The 0000 -> 00ff device ID range is used as follows for QEMU-specific
-PCI devices (other than virtio):
-
-1b36:0001 PCI-PCI bridge
-1b36:0002 PCI serial port (16550A) adapter (docs/specs/pci-serial.txt)
-1b36:0003 PCI Dual-port 16550A adapter (docs/specs/pci-serial.txt)
-1b36:0004 PCI Quad-port 16550A adapter (docs/specs/pci-serial.txt)
-1b36:0005 PCI test device (docs/specs/pci-testdev.txt)
-1b36:0006 PCI Rocker Ethernet switch device
-1b36:0007 PCI SD Card Host Controller Interface (SDHCI)
-1b36:0008 PCIe host bridge
-1b36:0009 PCI Expander Bridge (-device pxb)
-1b36:000a PCI-PCI bridge (multiseat)
-1b36:000b PCIe Expander Bridge (-device pxb-pcie)
-1b36:000d PCI xhci usb host adapter
-1b36:000f mdpy (mdev sample device), linux/samples/vfio-mdev/mdpy.c
-1b36:0010 PCIe NVMe device (-device nvme)
-1b36:0011 PCI PVPanic device (-device pvpanic-pci)
-1b36:0012 PCI ACPI ERST device (-device acpi-erst)
-
-All these devices are documented in docs/specs.
-
-The 0100 device ID is used for the QXL video card device.
--- /dev/null
+=======================
+QEMU PCI serial devices
+=======================
+
+QEMU implements some PCI serial devices which are simple PCI
+wrappers around one or more 16550 UARTs.
+
+There is one single-port variant and two multiport-variants. Linux
+guests work out-of-the box with all cards. There is a Windows inf file
+(``docs/qemupciserial.inf``) to set up the cards in Windows guests.
+
+
+Single-port card
+----------------
+
+Name:
+ ``pci-serial``
+PCI ID:
+ 1b36:0002
+PCI Region 0:
+ IO bar, 8 bytes long, with the 16550 UART mapped to it.
+Interrupt:
+ Wired to pin A.
+
+
+Multiport cards
+---------------
+
+Name:
+ ``pci-serial-2x``, ``pci-serial-4x``
+PCI ID:
+ 1b36:0003 (``-2x``) and 1b36:0004 (``-4x``)
+PCI Region 0:
+ IO bar, with two or four 16550 UARTs mapped after each other.
+ The first is at offset 0, the second at offset 8, and so on.
+Interrupt:
+ Wired to pin A.
+++ /dev/null
-
-QEMU pci serial devices
-=======================
-
-There is one single-port variant and two muliport-variants. Linux
-guests out-of-the box with all cards. There is a Windows inf file
-(docs/qemupciserial.inf) to setup the single-port card in Windows
-guests.
-
-
-single-port card
-----------------
-
-Name: pci-serial
-PCI ID: 1b36:0002
-
-PCI Region 0:
- IO bar, 8 bytes long, with the 16550 uart mapped to it.
- Interrupt is wired to pin A.
-
-
-multiport cards
----------------
-
-Name: pci-serial-2x
-PCI ID: 1b36:0003
-
-Name: pci-serial-4x
-PCI ID: 1b36:0004
-
-PCI Region 0:
- IO bar, with two/four 16550 uart mapped after each other.
- The first is at offset 0, second at offset 8, ...
- Interrupt is wired to pin A.
--- /dev/null
+====================
+QEMU PCI test device
+====================
+
+``pci-testdev`` is a device used for testing low level IO.
+
+The device implements up to three BARs: BAR0, BAR1 and BAR2.
+Each of BAR 0+1 can be memory or IO. Guests must detect
+BAR types and act accordingly.
+
+BAR 0+1 size is up to 4K bytes each.
+BAR 0+1 starts with the following header:
+
+.. code-block:: c
+
+ typedef struct PCITestDevHdr {
+ uint8_t test; /* write-only, starts a given test number */
+ uint8_t width_type; /*
+ * read-only, type and width of access for a given test.
+ * 1,2,4 for byte,word or long write.
+ * any other value if test not supported on this BAR
+ */
+ uint8_t pad0[2];
+ uint32_t offset; /* read-only, offset in this BAR for a given test */
+ uint32_t data; /* read-only, data to use for a given test */
+ uint32_t count; /* for debugging. number of writes detected. */
+ uint8_t name[]; /* for debugging. 0-terminated ASCII string. */
+ } PCITestDevHdr;
+
+All registers are little endian.
+
+The device is expected to always implement tests 0 to N on each BAR, and to add new
+tests with higher numbers. In this way a guest can scan test numbers until it
+detects an access type that it does not support on this BAR, then stop.
+
+BAR2 is a 64bit memory BAR, without backing storage. It is disabled
+by default and can be enabled using the ``membar=<size>`` property. This
+can be used to test whether guests handle PCI BARs of a specific
+(possibly quite large) size correctly.
+++ /dev/null
-pci-test is a device used for testing low level IO
-
-device implements up to three BARs: BAR0, BAR1 and BAR2.
-Each of BAR 0+1 can be memory or IO. Guests must detect
-BAR types and act accordingly.
-
-BAR 0+1 size is up to 4K bytes each.
-BAR 0+1 starts with the following header:
-
-typedef struct PCITestDevHdr {
- uint8_t test; <- write-only, starts a given test number
- uint8_t width_type; <- read-only, type and width of access for a given test.
- 1,2,4 for byte,word or long write.
- any other value if test not supported on this BAR
- uint8_t pad0[2];
- uint32_t offset; <- read-only, offset in this BAR for a given test
- uint32_t data; <- read-only, data to use for a given test
- uint32_t count; <- for debugging. number of writes detected.
- uint8_t name[]; <- for debugging. 0-terminated ASCII string.
-} PCITestDevHdr;
-
-All registers are little endian.
-
-device is expected to always implement tests 0 to N on each BAR, and to add new
-tests with higher numbers. In this way a guest can scan test numbers until it
-detects an access type that it does not support on this BAR, then stop.
-
-BAR2 is a 64bit memory bar, without backing storage. It is disabled
-by default and can be enabled using the membar=<size> property. This
-can be used to test whether guests handle pci bars of a specific
-(possibly quite large) size correctly.
QemuMutex readdir_mutex_L;
} V9fsDir;
-static inline void v9fs_readdir_lock(V9fsDir *dir)
+static inline void coroutine_fn v9fs_readdir_lock(V9fsDir *dir)
{
if (dir->proto_version == V9FS_PROTO_2000U) {
qemu_co_mutex_lock(&dir->readdir_mutex_u);
}
}
-static inline void v9fs_readdir_unlock(V9fsDir *dir)
+static inline void coroutine_fn v9fs_readdir_unlock(V9fsDir *dir)
{
if (dir->proto_version == V9FS_PROTO_2000U) {
qemu_co_mutex_unlock(&dir->readdir_mutex_u);
*
* See v9fs_co_readdir_many() (as its only user) below for details.
*/
-static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
- struct V9fsDirEnt **entries, off_t offset,
- int32_t maxsize, bool dostat)
+static int coroutine_fn
+do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp, struct V9fsDirEnt **entries,
+ off_t offset, int32_t maxsize, bool dostat)
{
V9fsState *s = pdu->s;
V9fsString name;
void co_run_in_worker_bh(void *opaque)
{
Coroutine *co = opaque;
- thread_pool_submit_aio(aio_get_thread_pool(qemu_get_aio_context()),
- coroutine_enter_func, co, coroutine_enter_cb, co);
+ thread_pool_submit_aio(coroutine_enter_func, co, coroutine_enter_cb, co);
}
#include "qapi/error.h"
#include "qemu/uuid.h"
-static void cedt_build_chbs(GArray *table_data, PXBDev *cxl)
+static void cedt_build_chbs(GArray *table_data, PXBCXLDev *cxl)
{
- SysBusDevice *sbd = SYS_BUS_DEVICE(cxl->cxl.cxl_host_bridge);
+ PXBDev *pxb = PXB_DEV(cxl);
+ SysBusDevice *sbd = SYS_BUS_DEVICE(cxl->cxl_host_bridge);
struct MemoryRegion *mr = sbd->mmio[0].memory;
/* Type */
build_append_int_noprefix(table_data, 32, 2);
/* UID - currently equal to bus number */
- build_append_int_noprefix(table_data, cxl->bus_nr, 4);
+ build_append_int_noprefix(table_data, pxb->bus_nr, 4);
/* Version */
build_append_int_noprefix(table_data, 1, 4);
/* Host Bridge List (list of UIDs - currently bus_nr) */
for (i = 0; i < fw->num_targets; i++) {
g_assert(fw->target_hbs[i]);
- build_append_int_noprefix(table_data, fw->target_hbs[i]->bus_nr, 4);
+ build_append_int_noprefix(table_data, PXB_DEV(fw->target_hbs[i])->bus_nr, 4);
}
}
}
{
Aml *cedt = opaque;
- if (object_dynamic_cast(obj, TYPE_PXB_CXL_DEVICE)) {
+ if (object_dynamic_cast(obj, TYPE_PXB_CXL_DEV)) {
cedt_build_chbs(cedt->buf, PXB_CXL_DEV(obj));
}
* acpi_pcihp_eject_slot() when the operation is completed.
*/
pdev->qdev.pending_deleted_event = true;
+ /* if unplug was requested before OSPM is initialized,
+ * linux kernel will clear GPE0.sts[] bits during boot, which effectively
+ * hides unplug event. And than followup qmp_device_del() calls remain
+ * blocked by above flag permanently.
+ * Unblock qmp_device_del() by setting expire limit, so user can
+ * repeat unplug request later when OSPM has been booted.
+ */
+ pdev->qdev.pending_deleted_expires_ms =
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); /* 1 msec */
+
s->acpi_pcihp_pci_status[bsel].down |= (1U << slot);
acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
}
* THE SOFTWARE.
*/
-/* see docs/specs/pci-serial.txt */
+/* see docs/specs/pci-serial.rst */
#include "qemu/osdep.h"
#include "qapi/error.h"
* THE SOFTWARE.
*/
-/* see docs/specs/pci-serial.txt */
+/* see docs/specs/pci-serial.rst */
#include "qemu/osdep.h"
#include "qapi/error.h"
bool ambig;
o = object_resolve_path_type(fw->targets[i],
- TYPE_PXB_CXL_DEVICE,
+ TYPE_PXB_CXL_DEV,
&ambig);
if (!o) {
error_setg(errp, "Could not resolve CXLFM target %s",
addr += fw->base;
rb_index = (addr / cxl_decode_ig(fw->enc_int_gran)) % fw->num_targets;
- hb = PCI_HOST_BRIDGE(fw->target_hbs[rb_index]->cxl.cxl_host_bridge);
+ hb = PCI_HOST_BRIDGE(fw->target_hbs[rb_index]->cxl_host_bridge);
if (!hb || !hb->bus || !pci_bus_is_cxl(hb->bus)) {
return NULL;
}
/* IVHD length */
build_append_int_noprefix(table_data, ivhd_table_len, 2);
/* DeviceID */
- build_append_int_noprefix(table_data, s->devid, 2);
+ build_append_int_noprefix(table_data,
+ object_property_get_int(OBJECT(&s->pci), "addr",
+ &error_abort), 2);
/* Capability offset */
- build_append_int_noprefix(table_data, s->capab_offset, 2);
+ build_append_int_noprefix(table_data, s->pci.capab_offset, 2);
/* IOMMU base address */
build_append_int_noprefix(table_data, s->mmio.addr, 8);
/* PCI Segment Group */
int legacy_table_size =
ROUND_UP(tables_blob->len - aml_len + legacy_aml_len,
ACPI_BUILD_ALIGN_SIZE);
- if (tables_blob->len > legacy_table_size) {
+ if ((tables_blob->len > legacy_table_size) &&
+ !pcmc->resizable_acpi_blob) {
/* Should happen only with PCI bridges and -M pc-i440fx-2.0. */
warn_report("ACPI table size %u exceeds %d bytes,"
" migration may not work",
g_array_set_size(tables_blob, legacy_table_size);
} else {
/* Make sure we have a buffer in case we need to resize the tables. */
- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) {
+ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) &&
+ !pcmc->resizable_acpi_blob) {
/* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */
warn_report("ACPI table size %u exceeds %d bytes,"
" migration may not work",
amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
0xffffffffffffffef, 0);
amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
+}
+
+static void amdvi_pci_realize(PCIDevice *pdev, Error **errp)
+{
+ AMDVIPCIState *s = AMD_IOMMU_PCI(pdev);
+ int ret;
+
+ ret = pci_add_capability(pdev, AMDVI_CAPAB_ID_SEC, 0,
+ AMDVI_CAPAB_SIZE, errp);
+ if (ret < 0) {
+ return;
+ }
+ s->capab_offset = ret;
+
+ ret = pci_add_capability(pdev, PCI_CAP_ID_MSI, 0,
+ AMDVI_CAPAB_REG_SIZE, errp);
+ if (ret < 0) {
+ return;
+ }
+ ret = pci_add_capability(pdev, PCI_CAP_ID_HT, 0,
+ AMDVI_CAPAB_REG_SIZE, errp);
+ if (ret < 0) {
+ return;
+ }
+
+ if (msi_init(pdev, 0, 1, true, false, errp) < 0) {
+ return;
+ }
/* reset device ident */
- pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD);
- pci_config_set_prog_interface(s->pci.dev.config, 00);
- pci_config_set_device_id(s->pci.dev.config, s->devid);
- pci_config_set_class(s->pci.dev.config, 0x0806);
+ pci_config_set_prog_interface(pdev->config, 0);
/* reset AMDVI specific capabilities, all r/o */
- pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES);
- pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
- s->mmio.addr & ~(0xffff0000));
- pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
- (s->mmio.addr & ~(0xffff)) >> 16);
- pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE,
+ pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES);
+ pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
+ AMDVI_BASE_ADDR & ~(0xffff0000));
+ pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
+ (AMDVI_BASE_ADDR & ~(0xffff)) >> 16);
+ pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE,
0xff000000);
- pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
- pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC,
+ pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
+ pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC,
AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
}
static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
{
- int ret = 0;
AMDVIState *s = AMD_IOMMU_DEVICE(dev);
MachineState *ms = MACHINE(qdev_get_machine());
PCMachineState *pcms = PC_MACHINE(ms);
if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
return;
}
- ret = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
- AMDVI_CAPAB_SIZE, errp);
- if (ret < 0) {
- return;
- }
- s->capab_offset = ret;
-
- ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0,
- AMDVI_CAPAB_REG_SIZE, errp);
- if (ret < 0) {
- return;
- }
- ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0,
- AMDVI_CAPAB_REG_SIZE, errp);
- if (ret < 0) {
- return;
- }
/* Pseudo address space under root PCI bus. */
x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
- s->devid = object_property_get_int(OBJECT(&s->pci), "addr", &error_abort);
- msi_init(&s->pci.dev, 0, 1, true, false, errp);
amdvi_init(s);
}
static void amdvi_pci_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+ k->vendor_id = PCI_VENDOR_ID_AMD;
+ k->class_id = 0x0806;
+ k->realize = amdvi_pci_realize;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
OBJECT_DECLARE_SIMPLE_TYPE(AMDVIState, AMD_IOMMU_DEVICE)
#define TYPE_AMD_IOMMU_PCI "AMDVI-PCI"
+OBJECT_DECLARE_SIMPLE_TYPE(AMDVIPCIState, AMD_IOMMU_PCI)
#define TYPE_AMD_IOMMU_MEMORY_REGION "amd-iommu-iommu-memory-region"
typedef struct AMDVIAddressSpace AMDVIAddressSpace;
/* functions to steal PCI config space */
-typedef struct AMDVIPCIState {
+struct AMDVIPCIState {
PCIDevice dev; /* The PCI device itself */
-} AMDVIPCIState;
+ uint32_t capab_offset; /* capability offset pointer */
+};
struct AMDVIState {
X86IOMMUState iommu; /* IOMMU bus device */
AMDVIPCIState pci; /* IOMMU PCI device */
uint32_t version;
- uint32_t capab_offset; /* capability offset pointer */
uint64_t mmio_addr;
- uint32_t devid; /* auto-assigned devid */
-
bool enabled; /* IOMMU enabled */
bool ats_enabled; /* address translation enabled */
bool cmdbuf_enabled; /* command buffer enabled */
struct vtd_iotlb_key {
uint64_t gfn;
uint32_t pasid;
- uint32_t level;
uint16_t sid;
+ uint8_t level;
};
static void vtd_address_space_refresh_all(IntelIOMMUState *s);
static guint vtd_iotlb_hash(gconstpointer v)
{
const struct vtd_iotlb_key *key = v;
+ uint64_t hash64 = key->gfn | ((uint64_t)(key->sid) << VTD_IOTLB_SID_SHIFT) |
+ (uint64_t)(key->level - 1) << VTD_IOTLB_LVL_SHIFT |
+ (uint64_t)(key->pasid) << VTD_IOTLB_PASID_SHIFT;
- return key->gfn | ((key->sid) << VTD_IOTLB_SID_SHIFT) |
- (key->level) << VTD_IOTLB_LVL_SHIFT |
- (key->pasid) << VTD_IOTLB_PASID_SHIFT;
+ return (guint)((hash64 >> 32) ^ (hash64 & 0xffffffffU));
}
static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
VTD_INTERRUPT_ADDR_FIRST + 1)
/* The shift of source_id in the key of IOTLB hash table */
-#define VTD_IOTLB_SID_SHIFT 20
-#define VTD_IOTLB_LVL_SHIFT 28
-#define VTD_IOTLB_PASID_SHIFT 30
+#define VTD_IOTLB_SID_SHIFT 26
+#define VTD_IOTLB_LVL_SHIFT 42
+#define VTD_IOTLB_PASID_SHIFT 44
#define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */
/* IOTLB_REG */
pcmc->acpi_data_size = 0x20000 + 0x8000;
pcmc->pvh_enabled = true;
pcmc->kvmclock_create_always = true;
+ pcmc->resizable_acpi_blob = true;
assert(!mc->get_hotplug_handler);
mc->get_hotplug_handler = pc_get_hotplug_handler;
mc->hotplug_allowed = pc_hotplug_allowed;
compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len);
compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len);
pcmc->rsdp_in_ram = false;
+ pcmc->resizable_acpi_blob = false;
}
DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn,
memory_device_plug(MEMORY_DEVICE(dimm), machine);
vmstate_register_ram(vmstate_mr, DEVICE(dimm));
+ /* count only "real" DIMMs, not NVDIMMs */
+ if (!object_dynamic_cast(OBJECT(dimm), TYPE_NVDIMM)) {
+ machine->device_memory->dimm_size += memory_region_size(vmstate_mr);
+ }
}
void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
memory_device_unplug(MEMORY_DEVICE(dimm), machine);
vmstate_unregister_ram(vmstate_mr, DEVICE(dimm));
+ if (!object_dynamic_cast(OBJECT(dimm), TYPE_NVDIMM)) {
+ machine->device_memory->dimm_size -= memory_region_size(vmstate_mr);
+ }
}
static int pc_dimm_slot2bitmap(Object *obj, void *opaque)
char bus_path[8];
};
-#define TYPE_PXB_DEVICE "pxb"
-DECLARE_INSTANCE_CHECKER(PXBDev, PXB_DEV,
- TYPE_PXB_DEVICE)
-
-#define TYPE_PXB_PCIE_DEVICE "pxb-pcie"
-DECLARE_INSTANCE_CHECKER(PXBDev, PXB_PCIE_DEV,
- TYPE_PXB_PCIE_DEVICE)
-
-static PXBDev *convert_to_pxb(PCIDevice *dev)
-{
- /* A CXL PXB's parent bus is PCIe, so the normal check won't work */
- if (object_dynamic_cast(OBJECT(dev), TYPE_PXB_CXL_DEVICE)) {
- return PXB_CXL_DEV(dev);
- }
-
- return pci_bus_is_express(pci_get_bus(dev))
- ? PXB_PCIE_DEV(dev) : PXB_DEV(dev);
-}
+#define TYPE_PXB_PCIE_DEV "pxb-pcie"
+OBJECT_DECLARE_SIMPLE_TYPE(PXBPCIEDev, PXB_PCIE_DEV)
static GList *pxb_dev_list;
static int pxb_bus_num(PCIBus *bus)
{
- PXBDev *pxb = convert_to_pxb(bus->parent_dev);
+ PXBDev *pxb = PXB_DEV(bus->parent_dev);
return pxb->bus_nr;
}
static uint16_t pxb_bus_numa_node(PCIBus *bus)
{
- PXBDev *pxb = convert_to_pxb(bus->parent_dev);
+ PXBDev *pxb = PXB_DEV(bus->parent_dev);
return pxb->numa_node;
}
pxb_host = PCI_HOST_BRIDGE(dev);
pxb_bus = pxb_host->bus;
- pxb_dev = convert_to_pxb(pxb_bus->parent_dev);
+ pxb_dev = PXB_DEV(pxb_bus->parent_dev);
position = g_list_index(pxb_dev_list, pxb_dev);
assert(position >= 0);
*/
void pxb_cxl_hook_up_registers(CXLState *cxl_state, PCIBus *bus, Error **errp)
{
- PXBDev *pxb = PXB_CXL_DEV(pci_bridge_get_device(bus));
- CXLHost *cxl = pxb->cxl.cxl_host_bridge;
+ PXBCXLDev *pxb = PXB_CXL_DEV(pci_bridge_get_device(bus));
+ CXLHost *cxl = pxb->cxl_host_bridge;
CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
struct MemoryRegion *mr = &cxl_cstate->crb.component_registers;
hwaddr offset;
static void pxb_cxl_dev_reset(DeviceState *dev)
{
- CXLHost *cxl = PXB_CXL_DEV(dev)->cxl.cxl_host_bridge;
+ CXLHost *cxl = PXB_CXL_DEV(dev)->cxl_host_bridge;
CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
PCIHostState *hb = PCI_HOST_BRIDGE(cxl);
uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers;
* The CXL specification allows for host bridges with no HDM decoders
* if they only have a single root port.
*/
- if (!PXB_DEV(dev)->hdm_for_passthrough) {
+ if (!PXB_CXL_DEV(dev)->hdm_for_passthrough) {
dsp_count = pcie_count_ds_ports(hb->bus);
}
/* Initial reset will have 0 dsp so wait until > 0 */
static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type,
Error **errp)
{
- PXBDev *pxb = convert_to_pxb(dev);
+ PXBDev *pxb = PXB_DEV(dev);
DeviceState *ds, *bds = NULL;
PCIBus *bus;
const char *dev_name = NULL;
} else if (type == CXL) {
bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_CXL_BUS);
bus->flags |= PCI_BUS_CXL;
- PXB_CXL_DEV(dev)->cxl.cxl_host_bridge = PXB_CXL_HOST(ds);
+ PXB_CXL_DEV(dev)->cxl_host_bridge = PXB_CXL_HOST(ds);
} else {
bus = pci_root_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS);
bds = qdev_new("pci-bridge");
static void pxb_dev_exitfn(PCIDevice *pci_dev)
{
- PXBDev *pxb = convert_to_pxb(pci_dev);
+ PXBDev *pxb = PXB_DEV(pci_dev);
pxb_dev_list = g_list_remove(pxb_dev_list, pxb);
}
}
static const TypeInfo pxb_dev_info = {
- .name = TYPE_PXB_DEVICE,
+ .name = TYPE_PXB_DEV,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(PXBDev),
.class_init = pxb_dev_class_init,
k->class_id = PCI_CLASS_BRIDGE_HOST;
dc->desc = "PCI Express Expander Bridge";
- device_class_set_props(dc, pxb_dev_properties);
dc->hotpluggable = false;
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
}
static const TypeInfo pxb_pcie_dev_info = {
- .name = TYPE_PXB_PCIE_DEVICE,
- .parent = TYPE_PCI_DEVICE,
- .instance_size = sizeof(PXBDev),
+ .name = TYPE_PXB_PCIE_DEV,
+ .parent = TYPE_PXB_DEV,
+ .instance_size = sizeof(PXBPCIEDev),
.class_init = pxb_pcie_dev_class_init,
.interfaces = (InterfaceInfo[]) {
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
}
static Property pxb_cxl_dev_properties[] = {
- /* Note: 0 is not a legal PXB bus number. */
- DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0),
- DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED),
- DEFINE_PROP_BOOL("bypass_iommu", PXBDev, bypass_iommu, false),
- DEFINE_PROP_BOOL("hdm_for_passthrough", PXBDev, hdm_for_passthrough, false),
+ DEFINE_PROP_BOOL("hdm_for_passthrough", PXBCXLDev, hdm_for_passthrough, false),
DEFINE_PROP_END_OF_LIST(),
};
}
static const TypeInfo pxb_cxl_dev_info = {
- .name = TYPE_PXB_CXL_DEVICE,
- .parent = TYPE_PCI_DEVICE,
- .instance_size = sizeof(PXBDev),
+ .name = TYPE_PXB_CXL_DEV,
+ .parent = TYPE_PXB_PCIE_DEV,
+ .instance_size = sizeof(PXBCXLDev),
.class_init = pxb_cxl_dev_class_init,
.interfaces =
(InterfaceInfo[]){
return bus->slot_reserved_mask & (1UL << PCI_SLOT(devfn));
}
+uint32_t pci_bus_get_slot_reserved_mask(PCIBus *bus)
+{
+ return bus->slot_reserved_mask;
+}
+
+void pci_bus_set_slot_reserved_mask(PCIBus *bus, uint32_t mask)
+{
+ bus->slot_reserved_mask |= mask;
+}
+
+void pci_bus_clear_slot_reserved_mask(PCIBus *bus, uint32_t mask)
+{
+ bus->slot_reserved_mask &= ~mask;
+}
+
/* -1 for devfn means auto assign */
static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
const char *name, int devfn,
{
SpaprNVDIMMDevice *s_nvdimm = (SpaprNVDIMMDevice *)opaque;
SpaprNVDIMMDeviceFlushState *state;
- ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(s_nvdimm)->hostmem);
bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL);
bool pmem_override = object_property_get_bool(OBJECT(s_nvdimm),
}
QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) {
- thread_pool_submit_aio(pool, flush_worker_cb, state,
+ thread_pool_submit_aio(flush_worker_cb, state,
spapr_nvdimm_flush_completion_cb, state);
}
PCDIMMDevice *dimm;
HostMemoryBackend *backend = NULL;
SpaprNVDIMMDeviceFlushState *state;
- ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
int fd;
if (!drc || !drc->dev ||
state->drcidx = drc_index;
- thread_pool_submit_aio(pool, flush_worker_cb, state,
+ thread_pool_submit_aio(flush_worker_cb, state,
spapr_nvdimm_flush_completion_cb, state);
continue_token = state->continue_token;
return -EINVAL;
}
virtio_queue_set_num(vdev, index, num);
+ virtio_init_region_cache(vdev, index);
} else if (virtio_queue_get_num(vdev, index) > num) {
/* Fail if we don't have a big enough queue. */
return -EINVAL;
#include "hw/irq.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_bridge.h"
-#include "hw/pci/pci_bus.h"
#include "hw/pci/pci_host.h"
#include "hw/qdev-properties.h"
#include "hw/pci-host/sabre.h"
/* Only in-built Simba APBs can exist on the root bus, slot 0 on busA is
reserved (leaving no slots free after on-board devices) however slots
0-3 are free on busB */
- pci_bus->slot_reserved_mask = 0xfffffffc;
- pci_busA->slot_reserved_mask = 0xfffffff1;
- pci_busB->slot_reserved_mask = 0xfffffff0;
+ pci_bus_set_slot_reserved_mask(pci_bus, 0xfffffffc);
+ pci_bus_set_slot_reserved_mask(pci_busA, 0xfffffff1);
+ pci_bus_set_slot_reserved_mask(pci_busB, 0xfffffff0);
ebus = pci_new_multifunction(PCI_DEVFN(1, 0), true, TYPE_EBUS);
qdev_prop_set_uint64(DEVICE(ebus), "console-serial-base",
{
VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+ /*
+ * We don't support interrupts, return early if index is set to
+ * VIRTIO_CONFIG_IRQ_IDX.
+ */
+ if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+ return;
+ }
+
vhost_virtqueue_mask(&i2c->vhost_dev, vdev, idx, mask);
}
{
VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+ /*
+ * We don't support interrupts, return early if index is set to
+ * VIRTIO_CONFIG_IRQ_IDX.
+ */
+ if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+ return false;
+ }
+
return vhost_virtqueue_pending(&i2c->vhost_dev, idx);
}
0, virtio_queue_get_desc_size(vdev, idx));
}
-static void vhost_eventfd_add(MemoryListener *listener,
- MemoryRegionSection *section,
- bool match_data, uint64_t data, EventNotifier *e)
-{
-}
-
-static void vhost_eventfd_del(MemoryListener *listener,
- MemoryRegionSection *section,
- bool match_data, uint64_t data, EventNotifier *e)
-{
-}
-
static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
int n, uint32_t timeout)
{
.log_sync = vhost_log_sync,
.log_global_start = vhost_log_global_start,
.log_global_stop = vhost_log_global_stop,
- .eventfd_add = vhost_eventfd_add,
- .eventfd_del = vhost_eventfd_del,
.priority = 10
};
#include "qemu/error-report.h"
#include "migration/misc.h"
#include "migration/migration.h"
+#include "migration/options.h"
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h"
memcpy(config_data, &config, virtio_balloon_config_size(dev));
}
-static int build_dimm_list(Object *obj, void *opaque)
-{
- GSList **list = opaque;
-
- if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
- DeviceState *dev = DEVICE(obj);
- if (dev->realized) { /* only realized DIMMs matter */
- *list = g_slist_prepend(*list, dev);
- }
- }
-
- object_child_foreach(obj, build_dimm_list, opaque);
- return 0;
-}
-
static ram_addr_t get_current_ram_size(void)
{
- GSList *list = NULL, *item;
- ram_addr_t size = current_machine->ram_size;
-
- build_dimm_list(qdev_get_machine(), &list);
- for (item = list; item; item = g_slist_next(item)) {
- Object *obj = OBJECT(item->data);
- if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) {
- size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
- &error_abort);
- }
+ MachineState *machine = MACHINE(qdev_get_machine());
+ if (machine->device_memory) {
+ return machine->ram_size + machine->device_memory->dimm_size;
+ } else {
+ return machine->ram_size;
}
- g_slist_free(list);
-
- return size;
}
static bool virtio_balloon_page_poison_support(void *opaque)
if (proxy->legacy) {
virtio_queue_update_rings(vdev, vdev->queue_sel);
} else {
+ virtio_init_region_cache(vdev, vdev->queue_sel);
proxy->vqs[vdev->queue_sel].num = value;
}
break;
proxy->vqs[vdev->queue_sel].num = val;
virtio_queue_set_num(vdev, vdev->queue_sel,
proxy->vqs[vdev->queue_sel].num);
+ virtio_init_region_cache(vdev, vdev->queue_sel);
break;
case VIRTIO_PCI_COMMON_Q_MSIX:
vector = virtio_queue_vector(vdev, vdev->queue_sel);
VirtIODeviceRequest *req_data;
VirtIOPMEM *pmem = VIRTIO_PMEM(vdev);
HostMemoryBackend *backend = MEMORY_BACKEND(pmem->memdev);
- ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
trace_virtio_pmem_flush_request();
req_data = virtqueue_pop(vq, sizeof(VirtIODeviceRequest));
req_data->fd = memory_region_get_fd(&backend->mr);
req_data->pmem = pmem;
req_data->vdev = vdev;
- thread_pool_submit_aio(pool, worker_cb, req_data, done_cb, req_data);
+ thread_pool_submit_aio(worker_cb, req_data, done_cb, req_data);
}
static void virtio_pmem_get_config(VirtIODevice *vdev, uint8_t *config)
}
}
-static void virtio_init_region_cache(VirtIODevice *vdev, int n)
+void virtio_init_region_cache(VirtIODevice *vdev, int n)
{
VirtQueue *vq = &vdev->vq[n];
VRingMemoryRegionCaches *old = vq->vring.caches;
#include <sys/ioctl.h>
#include "hw/pci/pci.h"
-#include "hw/pci/pci_bus.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
#include "xen_pt.h"
}
XEN_PT_LOG(0, "Reserving PCI slot 2 for IGD\n");
- pci_bus->slot_reserved_mask |= XEN_PCI_IGD_SLOT_MASK;
+ pci_bus_set_slot_reserved_mask(pci_bus, XEN_PCI_IGD_SLOT_MASK);
}
static void xen_igd_clear_slot(DeviceState *qdev, Error **errp)
return;
}
- if (!(pci_bus->slot_reserved_mask & XEN_PCI_IGD_SLOT_MASK)) {
+ if (!(pci_bus_get_slot_reserved_mask(pci_bus) & XEN_PCI_IGD_SLOT_MASK)) {
xpdc->pci_qdev_realize(qdev, errp);
return;
}
s->real_device.dev == XEN_PCI_IGD_DEV &&
s->real_device.func == XEN_PCI_IGD_FN &&
s->real_device.vendor_id == PCI_VENDOR_ID_INTEL) {
- pci_bus->slot_reserved_mask &= ~XEN_PCI_IGD_SLOT_MASK;
+ pci_bus_clear_slot_reserved_mask(pci_bus, XEN_PCI_IGD_SLOT_MASK);
XEN_PT_LOG(pci_dev, "Intel IGD found, using slot 2\n");
}
xpdc->pci_qdev_realize(qdev, errp);
* @ctx: the aio context, or NULL if multiple aio contexts (for which the
* caller does not hold a lock) are involved in the polling condition.
* @cond: wait while this conditional expression is true
- * @unlock: whether to unlock and then lock again @ctx. This apples
+ * @unlock: whether to unlock and then lock again @ctx. This applies
* only when waiting for another AioContext from the main loop.
* Otherwise it's ignored.
*
struct ThreadPool *thread_pool;
#ifdef CONFIG_LINUX_AIO
- /*
- * State for native Linux AIO. Uses aio_context_acquire/release for
- * locking.
- */
struct LinuxAioState *linux_aio;
#endif
#ifdef CONFIG_LINUX_IO_URING
- /*
- * State for Linux io_uring. Uses aio_context_acquire/release for
- * locking.
- */
struct LuringState *linux_io_uring;
/* State for file descriptor monitoring using Linux io_uring */
/*
* Common functions that are neither I/O nor Global State.
*
- * See include/block/block-commmon.h for more information about
+ * See include/block/block-common.h for more information about
* the Common API.
*/
typedef struct LinuxAioState LinuxAioState;
LinuxAioState *laio_init(Error **errp);
void laio_cleanup(LinuxAioState *s);
-int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
- uint64_t offset, QEMUIOVector *qiov, int type,
- uint64_t dev_max_batch);
+
+/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
+int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
+ int type, uint64_t dev_max_batch);
+
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
-void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
- uint64_t dev_max_batch);
+
+/*
+ * laio_io_plug/unplug work in the thread's current AioContext, therefore the
+ * caller must ensure that they are paired in the same IOThread.
+ */
+void laio_io_plug(void);
+void laio_io_unplug(uint64_t dev_max_batch);
#endif
/* io_uring.c - Linux io_uring implementation */
#ifdef CONFIG_LINUX_IO_URING
typedef struct LuringState LuringState;
LuringState *luring_init(Error **errp);
void luring_cleanup(LuringState *s);
-int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
- uint64_t offset, QEMUIOVector *qiov, int type);
+
+/* luring_co_submit: submit I/O requests in the thread's current AioContext. */
+int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
+ QEMUIOVector *qiov, int type);
void luring_detach_aio_context(LuringState *s, AioContext *old_context);
void luring_attach_aio_context(LuringState *s, AioContext *new_context);
-void luring_io_plug(BlockDriverState *bs, LuringState *s);
-void luring_io_unplug(BlockDriverState *bs, LuringState *s);
+
+/*
+ * luring_io_plug/unplug work in the thread's current AioContext, therefore the
+ * caller must ensure that they are paired in the same IOThread.
+ */
+void luring_io_plug(void);
+void luring_io_unplug(void);
#endif
#ifdef _WIN32
ThreadPool *thread_pool_new(struct AioContext *ctx);
void thread_pool_free(ThreadPool *pool);
-BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
- ThreadPoolFunc *func, void *arg,
- BlockCompletionFunc *cb, void *opaque);
-int coroutine_fn thread_pool_submit_co(ThreadPool *pool,
- ThreadPoolFunc *func, void *arg);
-void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg);
+/*
+ * thread_pool_submit* API: submit I/O requests in the thread's
+ * current AioContext.
+ */
+BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
+ BlockCompletionFunc *cb, void *opaque);
+int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg);
+void thread_pool_submit(ThreadPoolFunc *func, void *arg);
+
void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx);
#endif
* @base: address in guest physical address space where the memory
* address space for memory devices starts
* @mr: address space container for memory devices
+ * @dimm_size: the sum of plugged DIMMs' sizes
*/
typedef struct DeviceMemoryState {
hwaddr base;
MemoryRegion mr;
+ uint64_t dimm_size;
} DeviceMemoryState;
/**
#define CXL_WINDOW_MAX 10
-typedef struct PXBDev PXBDev;
+typedef struct PXBCXLDev PXBCXLDev;
typedef struct CXLFixedWindow {
uint64_t size;
char **targets;
- PXBDev *target_hbs[8];
+ PXBCXLDev *target_hbs[8];
uint8_t num_targets;
uint8_t enc_int_ways;
uint8_t enc_int_gran;
/* create kvmclock device even when KVM PV features are not exposed */
bool kvmclock_create_always;
+
+ /* resizable acpi blob compat */
+ bool resizable_acpi_blob;
};
#define TYPE_PC_MACHINE "generic-pc-machine"
void pci_bus_map_irqs(PCIBus *bus, pci_map_irq_fn map_irq);
void pci_bus_irqs_cleanup(PCIBus *bus);
int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
+uint32_t pci_bus_get_slot_reserved_mask(PCIBus *bus);
+void pci_bus_set_slot_reserved_mask(PCIBus *bus, uint32_t mask);
+void pci_bus_clear_slot_reserved_mask(PCIBus *bus, uint32_t mask);
/* 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD */
static inline int pci_swizzle(int slot, int pin)
{
#define PCI_BRIDGE_DEV_PROP_SHPC "shpc"
typedef struct CXLHost CXLHost;
-struct PXBDev {
+typedef struct PXBDev {
/*< private >*/
PCIDevice parent_obj;
/*< public >*/
uint8_t bus_nr;
uint16_t numa_node;
bool bypass_iommu;
+} PXBDev;
+
+typedef struct PXBPCIEDev {
+ /*< private >*/
+ PXBDev parent_obj;
+} PXBPCIEDev;
+
+#define TYPE_PXB_DEV "pxb"
+OBJECT_DECLARE_SIMPLE_TYPE(PXBDev, PXB_DEV)
+
+typedef struct PXBCXLDev {
+ /*< private >*/
+ PXBPCIEDev parent_obj;
+ /*< public >*/
+
bool hdm_for_passthrough;
- struct cxl_dev {
- CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */
- } cxl;
-};
+ CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */
+} PXBCXLDev;
-#define TYPE_PXB_CXL_DEVICE "pxb-cxl"
-DECLARE_INSTANCE_CHECKER(PXBDev, PXB_CXL_DEV,
- TYPE_PXB_CXL_DEVICE)
+#define TYPE_PXB_CXL_DEV "pxb-cxl"
+OBJECT_DECLARE_SIMPLE_TYPE(PXBCXLDev, PXB_CXL_DEV)
int pci_bridge_ssvid_init(PCIDevice *dev, uint8_t offset,
uint16_t svid, uint16_t ssid,
void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
hwaddr avail, hwaddr used);
void virtio_queue_update_rings(VirtIODevice *vdev, int n);
+void virtio_init_region_cache(VirtIODevice *vdev, int n);
void virtio_queue_set_align(VirtIODevice *vdev, int n, int align);
void virtio_queue_notify(VirtIODevice *vdev, int n);
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
bool migration_has_failed(MigrationState *);
/* ...and after the device transmission */
bool migration_in_postcopy_after_devices(MigrationState *);
-void migration_global_dump(Monitor *mon);
/* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */
bool migration_in_incoming_postcopy(void);
/* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */
#ifndef QEMU_MMAP_ALLOC_H
#define QEMU_MMAP_ALLOC_H
+typedef enum {
+ QEMU_FS_TYPE_UNKNOWN = 0,
+ QEMU_FS_TYPE_TMPFS,
+ QEMU_FS_TYPE_HUGETLBFS,
+ QEMU_FS_TYPE_NUM,
+} QemuFsType;
size_t qemu_fd_getpagesize(int fd);
+QemuFsType qemu_fd_getfs(int fd);
/**
* qemu_ram_mmap: mmap anonymous memory, the specified file or device.
int blk_get_max_iov(BlockBackend *blk);
int blk_get_max_hw_iov(BlockBackend *blk);
+/*
+ * blk_io_plug/unplug are thread-local operations. This means that multiple
+ * IOThreads can simultaneously call plug/unplug, but the caller must ensure
+ * that each unplug() is called in the same IOThread of the matching plug().
+ */
void coroutine_fn blk_co_io_plug(BlockBackend *blk);
void co_wrapper blk_io_plug(BlockBackend *blk);
description: 'mutex debugging support')
option('debug_stack_usage', type: 'boolean', value: false,
description: 'measure coroutine stack usage')
-option('qom_cast_debug', type: 'boolean', value: false,
+option('qom_cast_debug', type: 'boolean', value: true,
description: 'cast debugging support')
option('gprof', type: 'boolean', value: false,
description: 'QEMU profiling with gprof',
#include "qapi/qapi-visit-migration.h"
#include "qapi/clone-visitor.h"
#include "trace.h"
+#include "options.h"
#define CHUNK_SIZE (1 << 10)
#include "migration/vmstate.h"
#include "sysemu/block-backend.h"
#include "trace.h"
+#include "options.h"
#define BLK_MIG_BLOCK_SIZE (1ULL << 20)
#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS)
bmds->bulk_completed = 0;
bmds->total_sectors = sectors;
bmds->completed_sectors = 0;
- bmds->shared_base = migrate_use_block_incremental();
+ bmds->shared_base = migrate_block_incremental();
assert(i < num_bs);
bmds_bs[i].bmds = bmds;
static bool block_is_active(void *opaque)
{
- return migrate_use_block();
+ return migrate_block();
}
static SaveVMHandlers savevm_block_handlers = {
#include "sysemu/cpus.h"
#include "sysemu/runstate.h"
#include "net/filter.h"
+#include "options.h"
static bool vmstate_loading;
static Notifier packets_compare_notifier;
trace_colo_vm_state_change("stop", "run");
timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) +
- s->parameters.x_checkpoint_delay);
+ migrate_checkpoint_delay());
while (s->state == MIGRATION_STATUS_COLO) {
if (failover_get_state() != FAILOVER_STATUS_NONE) {
qemu_event_set(&s->colo_checkpoint_event);
s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
- next_notify_time = s->colo_checkpoint_time +
- s->parameters.x_checkpoint_delay;
+ next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay();
timer_mod(s->colo_delay_timer, next_notify_time);
}
'migration.c',
'multifd.c',
'multifd-zlib.c',
+ 'options.c',
'postcopy-ram.c',
'savevm.c',
'socket.c',
#include "qemu/osdep.h"
#include "block/qapi.h"
-#include "migration/misc.h"
#include "migration/snapshot.h"
#include "monitor/hmp.h"
#include "monitor/monitor.h"
#include "qemu/sockets.h"
#include "sysemu/runstate.h"
#include "ui/qemu-spice.h"
+#include "sysemu/sysemu.h"
+#include "migration.h"
+
+static void migration_global_dump(Monitor *mon)
+{
+ MigrationState *ms = migrate_get_current();
+
+ monitor_printf(mon, "globals:\n");
+ monitor_printf(mon, "store-global-state: %s\n",
+ ms->store_global_state ? "on" : "off");
+ monitor_printf(mon, "only-migratable: %s\n",
+ only_migratable ? "on" : "off");
+ monitor_printf(mon, "send-configuration: %s\n",
+ ms->send_configuration ? "on" : "off");
+ monitor_printf(mon, "send-section-footer: %s\n",
+ ms->send_section_footer ? "on" : "off");
+ monitor_printf(mon, "decompress-error-check: %s\n",
+ ms->decompress_error_check ? "on" : "off");
+ monitor_printf(mon, "clear-bitmap-shift: %u\n",
+ ms->clear_bitmap_shift);
+}
void hmp_info_migrate(Monitor *mon, const QDict *qdict)
{
hmp_handle_error(mon, err);
}
-void hmp_client_migrate_info(Monitor *mon, const QDict *qdict)
-{
- Error *err = NULL;
- const char *protocol = qdict_get_str(qdict, "protocol");
- const char *hostname = qdict_get_str(qdict, "hostname");
- bool has_port = qdict_haskey(qdict, "port");
- int port = qdict_get_try_int(qdict, "port", -1);
- bool has_tls_port = qdict_haskey(qdict, "tls-port");
- int tls_port = qdict_get_try_int(qdict, "tls-port", -1);
- const char *cert_subject = qdict_get_try_str(qdict, "cert-subject");
-
- qmp_client_migrate_info(protocol, hostname,
- has_port, port, has_tls_port, tls_port,
- cert_subject, &err);
- hmp_handle_error(mon, err);
-}
-
void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
#include "sysemu/cpus.h"
#include "yank_functions.h"
#include "sysemu/qtest.h"
-#include "ui/qemu-spice.h"
+#include "options.h"
#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */
MIG_RP_MSG_MAX
};
-/* Migration capabilities set */
-struct MigrateCapsSet {
- int size; /* Capability set size */
- MigrationCapability caps[]; /* Variadic array of capabilities */
-};
-typedef struct MigrateCapsSet MigrateCapsSet;
-
-/* Define and initialize MigrateCapsSet */
-#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \
- MigrateCapsSet _name = { \
- .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \
- .caps = { __VA_ARGS__ } \
- }
-
-/* Background-snapshot compatibility check list */
-static const
-INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
- MIGRATION_CAPABILITY_POSTCOPY_RAM,
- MIGRATION_CAPABILITY_DIRTY_BITMAPS,
- MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME,
- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
- MIGRATION_CAPABILITY_RETURN_PATH,
- MIGRATION_CAPABILITY_MULTIFD,
- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
- MIGRATION_CAPABILITY_AUTO_CONVERGE,
- MIGRATION_CAPABILITY_RELEASE_RAM,
- MIGRATION_CAPABILITY_RDMA_PIN_ALL,
- MIGRATION_CAPABILITY_COMPRESS,
- MIGRATION_CAPABILITY_XBZRLE,
- MIGRATION_CAPABILITY_X_COLO,
- MIGRATION_CAPABILITY_VALIDATE_UUID,
- MIGRATION_CAPABILITY_ZERO_COPY_SEND);
-
/* When we add fault tolerance, we could have several
migrations at once. For now we don't need to add
dynamic creation of migration */
static bool migration_needs_multiple_sockets(void)
{
- return migrate_use_multifd() || migrate_postcopy_preempt();
+ return migrate_multifd() || migrate_postcopy_preempt();
}
static bool uri_supports_multi_channels(const char *uri)
static void migrate_generate_event(int new_state)
{
- if (migrate_use_events()) {
+ if (migrate_events()) {
qapi_event_send_migration(new_state);
}
}
-static bool migrate_late_block_activate(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[
- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE];
-}
-
/*
* Send a message on the return channel back to the source
* of the migration.
static bool migration_should_start_incoming(bool main_channel)
{
/* Multifd doesn't start unless all channels are established */
- if (migrate_use_multifd()) {
+ if (migrate_multifd()) {
return migration_has_all_channels();
}
uint32_t channel_magic = 0;
int ret = 0;
- if (migrate_use_multifd() && !migrate_postcopy_ram() &&
+ if (migrate_multifd() && !migrate_postcopy_ram() &&
qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
/*
* With multiple channels, it is possible that we receive channels
} else {
/* Multiple connections */
assert(migration_needs_multiple_sockets());
- if (migrate_use_multifd()) {
+ if (migrate_multifd()) {
multifd_recv_new_channel(ioc, &local_err);
} else {
assert(migrate_postcopy_preempt());
return false;
}
- if (migrate_use_multifd()) {
+ if (migrate_multifd()) {
return multifd_recv_all_channels_created();
}
migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
}
-MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
-{
- MigrationCapabilityStatusList *head = NULL, **tail = &head;
- MigrationCapabilityStatus *caps;
- MigrationState *s = migrate_get_current();
- int i;
-
- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
-#ifndef CONFIG_LIVE_BLOCK_MIGRATION
- if (i == MIGRATION_CAPABILITY_BLOCK) {
- continue;
- }
-#endif
- caps = g_malloc0(sizeof(*caps));
- caps->capability = i;
- caps->state = s->enabled_capabilities[i];
- QAPI_LIST_APPEND(tail, caps);
- }
-
- return head;
-}
-
-MigrationParameters *qmp_query_migrate_parameters(Error **errp)
-{
- MigrationParameters *params;
- MigrationState *s = migrate_get_current();
-
- /* TODO use QAPI_CLONE() instead of duplicating it inline */
- params = g_malloc0(sizeof(*params));
- params->has_compress_level = true;
- params->compress_level = s->parameters.compress_level;
- params->has_compress_threads = true;
- params->compress_threads = s->parameters.compress_threads;
- params->has_compress_wait_thread = true;
- params->compress_wait_thread = s->parameters.compress_wait_thread;
- params->has_decompress_threads = true;
- params->decompress_threads = s->parameters.decompress_threads;
- params->has_throttle_trigger_threshold = true;
- params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold;
- params->has_cpu_throttle_initial = true;
- params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
- params->has_cpu_throttle_increment = true;
- params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
- params->has_cpu_throttle_tailslow = true;
- params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
- params->tls_creds = g_strdup(s->parameters.tls_creds);
- params->tls_hostname = g_strdup(s->parameters.tls_hostname);
- params->tls_authz = g_strdup(s->parameters.tls_authz ?
- s->parameters.tls_authz : "");
- params->has_max_bandwidth = true;
- params->max_bandwidth = s->parameters.max_bandwidth;
- params->has_downtime_limit = true;
- params->downtime_limit = s->parameters.downtime_limit;
- params->has_x_checkpoint_delay = true;
- params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
- params->has_block_incremental = true;
- params->block_incremental = s->parameters.block_incremental;
- params->has_multifd_channels = true;
- params->multifd_channels = s->parameters.multifd_channels;
- params->has_multifd_compression = true;
- params->multifd_compression = s->parameters.multifd_compression;
- params->has_multifd_zlib_level = true;
- params->multifd_zlib_level = s->parameters.multifd_zlib_level;
- params->has_multifd_zstd_level = true;
- params->multifd_zstd_level = s->parameters.multifd_zstd_level;
- params->has_xbzrle_cache_size = true;
- params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
- params->has_max_postcopy_bandwidth = true;
- params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
- params->has_max_cpu_throttle = true;
- params->max_cpu_throttle = s->parameters.max_cpu_throttle;
- params->has_announce_initial = true;
- params->announce_initial = s->parameters.announce_initial;
- params->has_announce_max = true;
- params->announce_max = s->parameters.announce_max;
- params->has_announce_rounds = true;
- params->announce_rounds = s->parameters.announce_rounds;
- params->has_announce_step = true;
- params->announce_step = s->parameters.announce_step;
-
- if (s->parameters.has_block_bitmap_mapping) {
- params->has_block_bitmap_mapping = true;
- params->block_bitmap_mapping =
- QAPI_CLONE(BitmapMigrationNodeAliasList,
- s->parameters.block_bitmap_mapping);
- }
-
- return params;
-}
-
-void qmp_client_migrate_info(const char *protocol, const char *hostname,
- bool has_port, int64_t port,
- bool has_tls_port, int64_t tls_port,
- const char *cert_subject,
- Error **errp)
-{
- if (strcmp(protocol, "spice") == 0) {
- if (!qemu_using_spice(errp)) {
- return;
- }
-
- if (!has_port && !has_tls_port) {
- error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port");
- return;
- }
-
- if (qemu_spice.migrate_info(hostname,
- has_port ? port : -1,
- has_tls_port ? tls_port : -1,
- cert_subject)) {
- error_setg(errp, "Could not set up display for migration");
- return;
- }
- return;
- }
-
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'");
-}
-
-AnnounceParameters *migrate_announce_params(void)
-{
- static AnnounceParameters ap;
-
- MigrationState *s = migrate_get_current();
-
- ap.initial = s->parameters.announce_initial;
- ap.max = s->parameters.announce_max;
- ap.rounds = s->parameters.announce_rounds;
- ap.step = s->parameters.announce_step;
-
- return ≈
-}
-
/*
* Return true if we're already in the middle of a migration
* (i.e. any of the active or setup states)
size_t page_size = qemu_target_page_size();
info->ram = g_malloc0(sizeof(*info->ram));
- info->ram->transferred = stat64_get(&ram_atomic_counters.transferred);
+ info->ram->transferred = stat64_get(&ram_counters.transferred);
info->ram->total = ram_bytes_total();
- info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate);
+ info->ram->duplicate = stat64_get(&ram_counters.zero_pages);
/* legacy value. It is not used anymore */
info->ram->skipped = 0;
- info->ram->normal = stat64_get(&ram_atomic_counters.normal);
+ info->ram->normal = stat64_get(&ram_counters.normal_pages);
info->ram->normal_bytes = info->ram->normal * page_size;
info->ram->mbps = s->mbps;
- info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
+ info->ram->dirty_sync_count =
+ stat64_get(&ram_counters.dirty_sync_count);
info->ram->dirty_sync_missed_zero_copy =
- ram_counters.dirty_sync_missed_zero_copy;
- info->ram->postcopy_requests = ram_counters.postcopy_requests;
+ stat64_get(&ram_counters.dirty_sync_missed_zero_copy);
+ info->ram->postcopy_requests =
+ stat64_get(&ram_counters.postcopy_requests);
info->ram->page_size = page_size;
- info->ram->multifd_bytes = ram_counters.multifd_bytes;
+ info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes);
info->ram->pages_per_second = s->pages_per_second;
- info->ram->precopy_bytes = ram_counters.precopy_bytes;
- info->ram->downtime_bytes = ram_counters.downtime_bytes;
- info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes);
+ info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes);
+ info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes);
+ info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes);
- if (migrate_use_xbzrle()) {
+ if (migrate_xbzrle()) {
info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
info->xbzrle_cache->bytes = xbzrle_counters.bytes;
info->xbzrle_cache->overflow = xbzrle_counters.overflow;
}
- if (migrate_use_compression()) {
+ if (migrate_compress()) {
info->compression = g_malloc0(sizeof(*info->compression));
info->compression->pages = compression_counters.pages;
info->compression->busy = compression_counters.busy;
info->status = state;
}
-typedef enum WriteTrackingSupport {
- WT_SUPPORT_UNKNOWN = 0,
- WT_SUPPORT_ABSENT,
- WT_SUPPORT_AVAILABLE,
- WT_SUPPORT_COMPATIBLE
-} WriteTrackingSupport;
-
-static
-WriteTrackingSupport migrate_query_write_tracking(void)
-{
- /* Check if kernel supports required UFFD features */
- if (!ram_write_tracking_available()) {
- return WT_SUPPORT_ABSENT;
- }
- /*
- * Check if current memory configuration is
- * compatible with required UFFD features.
- */
- if (!ram_write_tracking_compatible()) {
- return WT_SUPPORT_AVAILABLE;
- }
-
- return WT_SUPPORT_COMPATIBLE;
-}
-
-/**
- * @migration_caps_check - check capability validity
- *
- * @cap_list: old capability list, array of bool
- * @params: new capabilities to be applied soon
- * @errp: set *errp if the check failed, with reason
- *
- * Returns true if check passed, otherwise false.
- */
-static bool migrate_caps_check(bool *cap_list,
- MigrationCapabilityStatusList *params,
- Error **errp)
-{
- MigrationCapabilityStatusList *cap;
- bool old_postcopy_cap;
- MigrationIncomingState *mis = migration_incoming_get_current();
-
- old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
-
- for (cap = params; cap; cap = cap->next) {
- cap_list[cap->value->capability] = cap->value->state;
- }
-
-#ifndef CONFIG_LIVE_BLOCK_MIGRATION
- if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
- error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
- "block migration");
- error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
- return false;
- }
-#endif
-
-#ifndef CONFIG_REPLICATION
- if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
- error_setg(errp, "QEMU compiled without replication module"
- " can't enable COLO");
- error_append_hint(errp, "Please enable replication before COLO.\n");
- return false;
- }
-#endif
-
- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
- /* This check is reasonably expensive, so only when it's being
- * set the first time, also it's only the destination that needs
- * special support.
- */
- if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
- !postcopy_ram_supported_by_host(mis)) {
- /* postcopy_ram_supported_by_host will have emitted a more
- * detailed message
- */
- error_setg(errp, "Postcopy is not supported");
- return false;
- }
-
- if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
- error_setg(errp, "Postcopy is not compatible with ignore-shared");
- return false;
- }
- }
-
- if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
- WriteTrackingSupport wt_support;
- int idx;
- /*
- * Check if 'background-snapshot' capability is supported by
- * host kernel and compatible with guest memory configuration.
- */
- wt_support = migrate_query_write_tracking();
- if (wt_support < WT_SUPPORT_AVAILABLE) {
- error_setg(errp, "Background-snapshot is not supported by host kernel");
- return false;
- }
- if (wt_support < WT_SUPPORT_COMPATIBLE) {
- error_setg(errp, "Background-snapshot is not compatible "
- "with guest memory configuration");
- return false;
- }
-
- /*
- * Check if there are any migration capabilities
- * incompatible with 'background-snapshot'.
- */
- for (idx = 0; idx < check_caps_background_snapshot.size; idx++) {
- int incomp_cap = check_caps_background_snapshot.caps[idx];
- if (cap_list[incomp_cap]) {
- error_setg(errp,
- "Background-snapshot is not compatible with %s",
- MigrationCapability_str(incomp_cap));
- return false;
- }
- }
- }
-
-#ifdef CONFIG_LINUX
- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] &&
- (!cap_list[MIGRATION_CAPABILITY_MULTIFD] ||
- cap_list[MIGRATION_CAPABILITY_COMPRESS] ||
- cap_list[MIGRATION_CAPABILITY_XBZRLE] ||
- migrate_multifd_compression() ||
- migrate_use_tls())) {
- error_setg(errp,
- "Zero copy only available for non-compressed non-TLS multifd migration");
- return false;
- }
-#else
- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) {
- error_setg(errp,
- "Zero copy currently only available on Linux");
- return false;
- }
-#endif
-
- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) {
- if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
- error_setg(errp, "Postcopy preempt requires postcopy-ram");
- return false;
- }
-
- /*
- * Preempt mode requires urgent pages to be sent in separate
- * channel, OTOH compression logic will disorder all pages into
- * different compression channels, which is not compatible with the
- * preempt assumptions on channel assignments.
- */
- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
- error_setg(errp, "Postcopy preempt not compatible with compress");
- return false;
- }
- }
-
- if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
- error_setg(errp, "Multifd is not compatible with compress");
- return false;
- }
- }
-
- return true;
-}
-
static void fill_destination_migration_info(MigrationInfo *info)
{
MigrationIncomingState *mis = migration_incoming_get_current();
return info;
}
-void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
- Error **errp)
-{
- MigrationState *s = migrate_get_current();
- MigrationCapabilityStatusList *cap;
- bool cap_list[MIGRATION_CAPABILITY__MAX];
-
- if (migration_is_running(s->state)) {
- error_setg(errp, QERR_MIGRATION_ACTIVE);
- return;
- }
-
- memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list));
- if (!migrate_caps_check(cap_list, params, errp)) {
- return;
- }
-
- for (cap = params; cap; cap = cap->next) {
- s->enabled_capabilities[cap->value->capability] = cap->value->state;
- }
-}
-
/*
* Check whether the parameters are valid. Error will be put into errp
* (if provided). Return true if valid, otherwise false.
}
#ifdef CONFIG_LINUX
- if (migrate_use_zero_copy_send() &&
+ if (migrate_zero_copy_send() &&
((params->has_multifd_compression && params->multifd_compression) ||
(params->tls_creds && *params->tls_creds))) {
error_setg(errp,
}
}
-static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index,
- bool state)
-{
- MigrationCapabilityStatus *cap;
-
- cap = g_new0(MigrationCapabilityStatus, 1);
- cap->capability = index;
- cap->state = state;
-
- return cap;
-}
-
-void migrate_set_block_enabled(bool value, Error **errp)
-{
- MigrationCapabilityStatusList *cap = NULL;
-
- QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value));
- qmp_migrate_set_capabilities(cap, errp);
- qapi_free_MigrationCapabilityStatusList(cap);
-}
-
static void migrate_set_block_incremental(MigrationState *s, bool value)
{
s->parameters.block_incremental = value;
{
if (s->must_remove_block_options) {
/* setting to false can never fail */
- migrate_set_block_enabled(false, &error_abort);
+ migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, false, &error_abort);
migrate_set_block_incremental(s, false);
s->must_remove_block_options = false;
}
}
if (blk || blk_inc) {
- if (migrate_colo_enabled()) {
+ if (migrate_colo()) {
error_setg(errp, "No disk migration is required in COLO mode");
return false;
}
- if (migrate_use_block() || migrate_use_block_incremental()) {
+ if (migrate_block() || migrate_block_incremental()) {
error_setg(errp, "Command options are incompatible with "
"current migration capabilities");
return false;
}
- migrate_set_block_enabled(true, &local_err);
- if (local_err) {
+ if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, &local_err)) {
error_propagate(errp, local_err);
return false;
}
qemu_sem_post(&s->pause_sem);
}
-bool migrate_release_ram(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
-}
-
-bool migrate_postcopy_ram(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
-}
-
-bool migrate_postcopy(void)
-{
- return migrate_postcopy_ram() || migrate_dirty_bitmaps();
-}
-
-bool migrate_auto_converge(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
-}
-
-bool migrate_zero_blocks(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
-}
-
-bool migrate_postcopy_blocktime(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
-}
-
-bool migrate_use_compression(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
-}
-
-int migrate_compress_level(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.compress_level;
-}
-
-int migrate_compress_threads(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.compress_threads;
-}
-
-int migrate_compress_wait_thread(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.compress_wait_thread;
-}
-
-int migrate_decompress_threads(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.decompress_threads;
-}
-
-bool migrate_dirty_bitmaps(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
-}
-
-bool migrate_ignore_shared(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
-}
-
-bool migrate_validate_uuid(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID];
-}
-
-bool migrate_use_events(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
-}
-
-bool migrate_use_multifd(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD];
-}
-
-bool migrate_pause_before_switchover(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[
- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
-}
-
-int migrate_multifd_channels(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.multifd_channels;
-}
-
-MultiFDCompression migrate_multifd_compression(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX);
- return s->parameters.multifd_compression;
-}
-
-int migrate_multifd_zlib_level(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.multifd_zlib_level;
-}
-
-int migrate_multifd_zstd_level(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.multifd_zstd_level;
-}
-
-#ifdef CONFIG_LINUX
-bool migrate_use_zero_copy_send(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND];
-}
-#endif
-
int migrate_use_tls(void)
{
MigrationState *s;
return s->parameters.tls_creds && *s->parameters.tls_creds;
}
-int migrate_use_xbzrle(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
-}
-
-uint64_t migrate_xbzrle_cache_size(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.xbzrle_cache_size;
-}
-
-static int64_t migrate_max_postcopy_bandwidth(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.max_postcopy_bandwidth;
-}
-
-bool migrate_use_block(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
-}
-
-bool migrate_use_return_path(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
-}
-
-bool migrate_use_block_incremental(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->parameters.block_incremental;
-}
-
-bool migrate_background_snapshot(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
-}
-
-bool migrate_postcopy_preempt(void)
-{
- MigrationState *s;
-
- s = migrate_get_current();
-
- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT];
-}
-
/* migration thread support */
/*
* Something bad happened to the RP stream, mark an error
ret = global_state_store();
if (!ret) {
- bool inactivate = !migrate_colo_enabled();
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
trace_migration_completion_vm_stop(ret);
if (ret >= 0) {
MIGRATION_STATUS_DEVICE);
}
if (ret >= 0) {
+ s->block_inactive = !migrate_colo();
qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
- inactivate);
- }
- if (inactivate && ret >= 0) {
- s->block_inactive = true;
+ s->block_inactive);
}
}
qemu_mutex_unlock_iothread();
goto fail_invalidate;
}
- if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) {
+ if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) {
/* COLO does not support postcopy */
migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
+ s->block_inactive = true;
} else {
s->block_inactive = false;
}
MIGRATION_STATUS_FAILED);
}
-bool migrate_colo_enabled(void)
-{
- MigrationState *s = migrate_get_current();
- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
-}
-
typedef enum MigThrError {
/* No error detected */
MIG_THR_ERR_NONE = 0,
static uint64_t migration_total_bytes(MigrationState *s)
{
return qemu_file_total_transferred(s->to_dst_file) +
- ram_counters.multifd_bytes;
+ stat64_get(&ram_counters.multifd_bytes);
}
static void migration_calculate_complete(MigrationState *s)
runstate_set(RUN_STATE_POSTMIGRATE);
break;
case MIGRATION_STATUS_COLO:
- if (!migrate_colo_enabled()) {
+ if (!migrate_colo()) {
error_report("%s: critical error: calling COLO code without "
"COLO enabled", __func__);
}
qemu_savevm_send_postcopy_advise(s->to_dst_file);
}
- if (migrate_colo_enabled()) {
+ if (migrate_colo()) {
/* Notify migration destination that we enable COLO */
qemu_savevm_send_colo_enable(s->to_dst_file);
}
if (resume) {
/* This is a resumed migration */
- rate_limit = s->parameters.max_postcopy_bandwidth /
+ rate_limit = migrate_max_postcopy_bandwidth() /
XFER_LIMIT_RATIO;
} else {
/* This is a fresh new migration */
- rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO;
+ rate_limit = migrate_max_bandwidth() / XFER_LIMIT_RATIO;
/* Notify before starting migration thread */
notifier_list_notify(&migration_state_notifiers, s);
* precopy, only if user specified "return-path" capability would
* QEMU uses the return path.
*/
- if (migrate_postcopy_ram() || migrate_use_return_path()) {
+ if (migrate_postcopy_ram() || migrate_return_path()) {
if (open_return_path_on_source(s, !resume)) {
error_report("Unable to open return-path for postcopy");
migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
s->migration_thread_running = true;
}
-void migration_global_dump(Monitor *mon)
-{
- MigrationState *ms = migrate_get_current();
-
- monitor_printf(mon, "globals:\n");
- monitor_printf(mon, "store-global-state: %s\n",
- ms->store_global_state ? "on" : "off");
- monitor_printf(mon, "only-migratable: %s\n",
- only_migratable ? "on" : "off");
- monitor_printf(mon, "send-configuration: %s\n",
- ms->send_configuration ? "on" : "off");
- monitor_printf(mon, "send-section-footer: %s\n",
- ms->send_section_footer ? "on" : "off");
- monitor_printf(mon, "decompress-error-check: %s\n",
- ms->decompress_error_check ? "on" : "off");
- monitor_printf(mon, "clear-bitmap-shift: %u\n",
- ms->clear_bitmap_shift);
-}
-
#define DEFINE_PROP_MIG_CAP(name, x) \
- DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
+ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false)
static Property migration_properties[] = {
DEFINE_PROP_BOOL("store-global-state", MigrationState,
*/
static bool migration_object_check(MigrationState *ms, Error **errp)
{
- MigrationCapabilityStatusList *head = NULL;
/* Assuming all off */
- bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
- int i;
+ bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 };
if (!migrate_params_check(&ms->parameters, errp)) {
return false;
}
- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
- if (ms->enabled_capabilities[i]) {
- QAPI_LIST_PREPEND(head, migrate_cap_add(i, true));
- }
- }
-
- ret = migrate_caps_check(cap_list, head, errp);
-
- /* It works with head == NULL */
- qapi_free_MigrationCapabilityStatusList(head);
-
- return ret;
+ return migrate_caps_check(old_caps, ms->capabilities, errp);
}
static const TypeInfo migration_type = {
int64_t downtime_start;
int64_t downtime;
int64_t expected_downtime;
- bool enabled_capabilities[MIGRATION_CAPABILITY__MAX];
+ bool capabilities[MIGRATION_CAPABILITY__MAX];
int64_t setup_time;
/*
* Whether guest was running when we enter the completion stage.
bool migration_in_postcopy(void);
MigrationState *migrate_get_current(void);
-bool migrate_postcopy(void);
-
-bool migrate_release_ram(void);
-bool migrate_postcopy_ram(void);
-bool migrate_zero_blocks(void);
-bool migrate_dirty_bitmaps(void);
-bool migrate_ignore_shared(void);
-bool migrate_validate_uuid(void);
-
-bool migrate_auto_converge(void);
-bool migrate_use_multifd(void);
-bool migrate_pause_before_switchover(void);
-int migrate_multifd_channels(void);
-MultiFDCompression migrate_multifd_compression(void);
-int migrate_multifd_zlib_level(void);
-int migrate_multifd_zstd_level(void);
-
-#ifdef CONFIG_LINUX
-bool migrate_use_zero_copy_send(void);
-#else
-#define migrate_use_zero_copy_send() (false)
-#endif
int migrate_use_tls(void);
-int migrate_use_xbzrle(void);
-uint64_t migrate_xbzrle_cache_size(void);
-bool migrate_colo_enabled(void);
-
-bool migrate_use_block(void);
-bool migrate_use_block_incremental(void);
-int migrate_max_cpu_throttle(void);
-bool migrate_use_return_path(void);
uint64_t ram_get_total_transferred_pages(void);
-bool migrate_use_compression(void);
-int migrate_compress_level(void);
-int migrate_compress_threads(void);
-int migrate_compress_wait_thread(void);
-int migrate_decompress_threads(void);
-bool migrate_use_events(void);
-bool migrate_postcopy_blocktime(void);
-bool migrate_background_snapshot(void);
-bool migrate_postcopy_preempt(void);
-
/* Sending on the return path - generic and then for each message type */
void migrate_send_rp_shut(MigrationIncomingState *mis,
uint32_t value);
#include "qapi/error.h"
#include "migration.h"
#include "trace.h"
+#include "options.h"
#include "multifd.h"
struct zlib_data {
#include "qapi/error.h"
#include "migration.h"
#include "trace.h"
+#include "options.h"
#include "multifd.h"
struct zstd_data {
#include "trace.h"
#include "multifd.h"
#include "threadinfo.h"
-
+#include "options.h"
#include "qemu/yank.h"
#include "io/channel-socket.h"
#include "yank_functions.h"
p->pages = pages;
transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len;
qemu_file_acct_rate_limit(f, transferred);
- ram_counters.multifd_bytes += transferred;
- stat64_add(&ram_atomic_counters.transferred, transferred);
qemu_mutex_unlock(&p->mutex);
+ stat64_add(&ram_counters.transferred, transferred);
+ stat64_add(&ram_counters.multifd_bytes, transferred);
qemu_sem_post(&p->sem);
return 1;
{
int i;
- if (!migrate_use_multifd()) {
+ if (!migrate_multifd()) {
return;
}
multifd_send_terminate_threads(NULL);
return -1;
}
if (ret == 1) {
- dirty_sync_missed_zero_copy();
+ stat64_add(&ram_counters.dirty_sync_missed_zero_copy, 1);
}
return ret;
int i;
bool flush_zero_copy;
- if (!migrate_use_multifd()) {
+ if (!migrate_multifd()) {
return 0;
}
if (multifd_send_state->pages->num) {
* all the dirty bitmaps.
*/
- flush_zero_copy = migrate_use_zero_copy_send();
+ flush_zero_copy = migrate_zero_copy_send();
for (i = 0; i < migrate_multifd_channels(); i++) {
MultiFDSendParams *p = &multifd_send_state->params[i];
p->flags |= MULTIFD_FLAG_SYNC;
p->pending_job++;
qemu_file_acct_rate_limit(f, p->packet_len);
- ram_counters.multifd_bytes += p->packet_len;
- stat64_add(&ram_atomic_counters.transferred, p->packet_len);
qemu_mutex_unlock(&p->mutex);
+ stat64_add(&ram_counters.transferred, p->packet_len);
+ stat64_add(&ram_counters.multifd_bytes, p->packet_len);
qemu_sem_post(&p->sem);
}
for (i = 0; i < migrate_multifd_channels(); i++) {
MigrationThread *thread = NULL;
Error *local_err = NULL;
int ret = 0;
- bool use_zero_copy_send = migrate_use_zero_copy_send();
+ bool use_zero_copy_send = migrate_zero_copy_send();
thread = MigrationThreadAdd(p->name, qemu_get_thread_id());
uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
uint8_t i;
- if (!migrate_use_multifd()) {
+ if (!migrate_multifd()) {
return 0;
}
p->page_size = qemu_target_page_size();
p->page_count = page_count;
- if (migrate_use_zero_copy_send()) {
+ if (migrate_zero_copy_send()) {
p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
} else {
p->write_flags = 0;
void multifd_load_shutdown(void)
{
- if (migrate_use_multifd()) {
+ if (migrate_multifd()) {
multifd_recv_terminate_threads(NULL);
}
}
{
int i;
- if (!migrate_use_multifd()) {
+ if (!migrate_multifd()) {
return;
}
multifd_recv_terminate_threads(NULL);
{
int i;
- if (!migrate_use_multifd()) {
+ if (!migrate_multifd()) {
return;
}
for (i = 0; i < migrate_multifd_channels(); i++) {
* Return successfully if multiFD recv state is already initialised
* or multiFD is not enabled.
*/
- if (multifd_recv_state || !migrate_use_multifd()) {
+ if (multifd_recv_state || !migrate_multifd()) {
return 0;
}
{
int thread_count = migrate_multifd_channels();
- if (!migrate_use_multifd()) {
+ if (!migrate_multifd()) {
return true;
}
--- /dev/null
+/*
+ * QEMU migration capabilities
+ *
+ * Copyright (c) 2012-2023 Red Hat Inc
+ *
+ * Authors:
+ * Orit Wasserman <owasserm@redhat.com>
+ * Juan Quintela <quintela@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/clone-visitor.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-migration.h"
+#include "qapi/qapi-visit-migration.h"
+#include "qapi/qmp/qerror.h"
+#include "sysemu/runstate.h"
+#include "migration/misc.h"
+#include "migration.h"
+#include "ram.h"
+#include "options.h"
+
+bool migrate_auto_converge(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
+}
+
+bool migrate_background_snapshot(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
+}
+
+bool migrate_block(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_BLOCK];
+}
+
+bool migrate_colo(void)
+{
+ MigrationState *s = migrate_get_current();
+ return s->capabilities[MIGRATION_CAPABILITY_X_COLO];
+}
+
+bool migrate_compress(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS];
+}
+
+bool migrate_dirty_bitmaps(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
+}
+
+bool migrate_events(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_EVENTS];
+}
+
+bool migrate_ignore_shared(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
+}
+
+bool migrate_late_block_activate(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE];
+}
+
+bool migrate_multifd(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD];
+}
+
+bool migrate_pause_before_switchover(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
+}
+
+bool migrate_postcopy_blocktime(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
+}
+
+bool migrate_postcopy_preempt(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT];
+}
+
+bool migrate_postcopy_ram(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
+}
+
+bool migrate_rdma_pin_all(void)
+{
+ MigrationState *s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL];
+}
+
+bool migrate_release_ram(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
+}
+
+bool migrate_return_path(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
+}
+
+bool migrate_validate_uuid(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID];
+}
+
+bool migrate_xbzrle(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE];
+}
+
+bool migrate_zero_blocks(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
+}
+
+bool migrate_zero_copy_send(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND];
+}
+
+/* pseudo capabilities */
+
+bool migrate_postcopy(void)
+{
+ return migrate_postcopy_ram() || migrate_dirty_bitmaps();
+}
+
+typedef enum WriteTrackingSupport {
+ WT_SUPPORT_UNKNOWN = 0,
+ WT_SUPPORT_ABSENT,
+ WT_SUPPORT_AVAILABLE,
+ WT_SUPPORT_COMPATIBLE
+} WriteTrackingSupport;
+
+static
+WriteTrackingSupport migrate_query_write_tracking(void)
+{
+ /* Check if kernel supports required UFFD features */
+ if (!ram_write_tracking_available()) {
+ return WT_SUPPORT_ABSENT;
+ }
+ /*
+ * Check if current memory configuration is
+ * compatible with required UFFD features.
+ */
+ if (!ram_write_tracking_compatible()) {
+ return WT_SUPPORT_AVAILABLE;
+ }
+
+ return WT_SUPPORT_COMPATIBLE;
+}
+
+/* Migration capabilities set */
+struct MigrateCapsSet {
+ int size; /* Capability set size */
+ MigrationCapability caps[]; /* Variadic array of capabilities */
+};
+typedef struct MigrateCapsSet MigrateCapsSet;
+
+/* Define and initialize MigrateCapsSet */
+#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \
+ MigrateCapsSet _name = { \
+ .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \
+ .caps = { __VA_ARGS__ } \
+ }
+
+/* Background-snapshot compatibility check list */
+static const
+INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
+ MIGRATION_CAPABILITY_POSTCOPY_RAM,
+ MIGRATION_CAPABILITY_DIRTY_BITMAPS,
+ MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME,
+ MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
+ MIGRATION_CAPABILITY_RETURN_PATH,
+ MIGRATION_CAPABILITY_MULTIFD,
+ MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
+ MIGRATION_CAPABILITY_AUTO_CONVERGE,
+ MIGRATION_CAPABILITY_RELEASE_RAM,
+ MIGRATION_CAPABILITY_RDMA_PIN_ALL,
+ MIGRATION_CAPABILITY_COMPRESS,
+ MIGRATION_CAPABILITY_XBZRLE,
+ MIGRATION_CAPABILITY_X_COLO,
+ MIGRATION_CAPABILITY_VALIDATE_UUID,
+ MIGRATION_CAPABILITY_ZERO_COPY_SEND);
+
+/**
+ * @migration_caps_check - check capability compatibility
+ *
+ * @old_caps: old capability list
+ * @new_caps: new capability list
+ * @errp: set *errp if the check failed, with reason
+ *
+ * Returns true if check passed, otherwise false.
+ */
+bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
+{
+ MigrationIncomingState *mis = migration_incoming_get_current();
+
+#ifndef CONFIG_LIVE_BLOCK_MIGRATION
+ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) {
+ error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
+ "block migration");
+ error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
+ return false;
+ }
+#endif
+
+#ifndef CONFIG_REPLICATION
+ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) {
+ error_setg(errp, "QEMU compiled without replication module"
+ " can't enable COLO");
+ error_append_hint(errp, "Please enable replication before COLO.\n");
+ return false;
+ }
+#endif
+
+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
+ /* This check is reasonably expensive, so only when it's being
+ * set the first time, also it's only the destination that needs
+ * special support.
+ */
+ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] &&
+ runstate_check(RUN_STATE_INMIGRATE) &&
+ !postcopy_ram_supported_by_host(mis)) {
+ /* postcopy_ram_supported_by_host will have emitted a more
+ * detailed message
+ */
+ error_setg(errp, "Postcopy is not supported");
+ return false;
+ }
+
+ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
+ error_setg(errp, "Postcopy is not compatible with ignore-shared");
+ return false;
+ }
+ }
+
+ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
+ WriteTrackingSupport wt_support;
+ int idx;
+ /*
+ * Check if 'background-snapshot' capability is supported by
+ * host kernel and compatible with guest memory configuration.
+ */
+ wt_support = migrate_query_write_tracking();
+ if (wt_support < WT_SUPPORT_AVAILABLE) {
+ error_setg(errp, "Background-snapshot is not supported by host kernel");
+ return false;
+ }
+ if (wt_support < WT_SUPPORT_COMPATIBLE) {
+ error_setg(errp, "Background-snapshot is not compatible "
+ "with guest memory configuration");
+ return false;
+ }
+
+ /*
+ * Check if there are any migration capabilities
+ * incompatible with 'background-snapshot'.
+ */
+ for (idx = 0; idx < check_caps_background_snapshot.size; idx++) {
+ int incomp_cap = check_caps_background_snapshot.caps[idx];
+ if (new_caps[incomp_cap]) {
+ error_setg(errp,
+ "Background-snapshot is not compatible with %s",
+ MigrationCapability_str(incomp_cap));
+ return false;
+ }
+ }
+ }
+
+#ifdef CONFIG_LINUX
+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] &&
+ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] ||
+ new_caps[MIGRATION_CAPABILITY_COMPRESS] ||
+ new_caps[MIGRATION_CAPABILITY_XBZRLE] ||
+ migrate_multifd_compression() ||
+ migrate_use_tls())) {
+ error_setg(errp,
+ "Zero copy only available for non-compressed non-TLS multifd migration");
+ return false;
+ }
+#else
+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) {
+ error_setg(errp,
+ "Zero copy currently only available on Linux");
+ return false;
+ }
+#endif
+
+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) {
+ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
+ error_setg(errp, "Postcopy preempt requires postcopy-ram");
+ return false;
+ }
+
+ /*
+ * Preempt mode requires urgent pages to be sent in separate
+ * channel, OTOH compression logic will disorder all pages into
+ * different compression channels, which is not compatible with the
+ * preempt assumptions on channel assignments.
+ */
+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) {
+ error_setg(errp, "Postcopy preempt not compatible with compress");
+ return false;
+ }
+ }
+
+ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) {
+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) {
+ error_setg(errp, "Multifd is not compatible with compress");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool migrate_cap_set(int cap, bool value, Error **errp)
+{
+ MigrationState *s = migrate_get_current();
+ bool new_caps[MIGRATION_CAPABILITY__MAX];
+
+ if (migration_is_running(s->state)) {
+ error_setg(errp, QERR_MIGRATION_ACTIVE);
+ return false;
+ }
+
+ memcpy(new_caps, s->capabilities, sizeof(new_caps));
+ new_caps[cap] = value;
+
+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) {
+ return false;
+ }
+ s->capabilities[cap] = value;
+ return true;
+}
+
+MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
+{
+ MigrationCapabilityStatusList *head = NULL, **tail = &head;
+ MigrationCapabilityStatus *caps;
+ MigrationState *s = migrate_get_current();
+ int i;
+
+ for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
+#ifndef CONFIG_LIVE_BLOCK_MIGRATION
+ if (i == MIGRATION_CAPABILITY_BLOCK) {
+ continue;
+ }
+#endif
+ caps = g_malloc0(sizeof(*caps));
+ caps->capability = i;
+ caps->state = s->capabilities[i];
+ QAPI_LIST_APPEND(tail, caps);
+ }
+
+ return head;
+}
+
+void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
+ Error **errp)
+{
+ MigrationState *s = migrate_get_current();
+ MigrationCapabilityStatusList *cap;
+ bool new_caps[MIGRATION_CAPABILITY__MAX];
+
+ if (migration_is_running(s->state)) {
+ error_setg(errp, QERR_MIGRATION_ACTIVE);
+ return;
+ }
+
+ memcpy(new_caps, s->capabilities, sizeof(new_caps));
+ for (cap = params; cap; cap = cap->next) {
+ new_caps[cap->value->capability] = cap->value->state;
+ }
+
+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) {
+ return;
+ }
+
+ for (cap = params; cap; cap = cap->next) {
+ s->capabilities[cap->value->capability] = cap->value->state;
+ }
+}
+
+/* parameters */
+
+bool migrate_block_incremental(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.block_incremental;
+}
+
+uint32_t migrate_checkpoint_delay(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.x_checkpoint_delay;
+}
+
+int migrate_compress_level(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.compress_level;
+}
+
+int migrate_compress_threads(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.compress_threads;
+}
+
+int migrate_compress_wait_thread(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.compress_wait_thread;
+}
+
+uint8_t migrate_cpu_throttle_increment(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.cpu_throttle_increment;
+}
+
+uint8_t migrate_cpu_throttle_initial(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.cpu_throttle_initial;
+}
+
+bool migrate_cpu_throttle_tailslow(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.cpu_throttle_tailslow;
+}
+
+int migrate_decompress_threads(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.decompress_threads;
+}
+
+uint8_t migrate_max_cpu_throttle(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.max_cpu_throttle;
+}
+
+uint64_t migrate_max_bandwidth(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.max_bandwidth;
+}
+
+int64_t migrate_max_postcopy_bandwidth(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.max_postcopy_bandwidth;
+}
+
+int migrate_multifd_channels(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.multifd_channels;
+}
+
+MultiFDCompression migrate_multifd_compression(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX);
+ return s->parameters.multifd_compression;
+}
+
+int migrate_multifd_zlib_level(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.multifd_zlib_level;
+}
+
+int migrate_multifd_zstd_level(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.multifd_zstd_level;
+}
+
+uint8_t migrate_throttle_trigger_threshold(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.throttle_trigger_threshold;
+}
+
+uint64_t migrate_xbzrle_cache_size(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.xbzrle_cache_size;
+}
+
+/* parameters helpers */
+
+AnnounceParameters *migrate_announce_params(void)
+{
+ static AnnounceParameters ap;
+
+ MigrationState *s = migrate_get_current();
+
+ ap.initial = s->parameters.announce_initial;
+ ap.max = s->parameters.announce_max;
+ ap.rounds = s->parameters.announce_rounds;
+ ap.step = s->parameters.announce_step;
+
+ return ≈
+}
+
+MigrationParameters *qmp_query_migrate_parameters(Error **errp)
+{
+ MigrationParameters *params;
+ MigrationState *s = migrate_get_current();
+
+ /* TODO use QAPI_CLONE() instead of duplicating it inline */
+ params = g_malloc0(sizeof(*params));
+ params->has_compress_level = true;
+ params->compress_level = s->parameters.compress_level;
+ params->has_compress_threads = true;
+ params->compress_threads = s->parameters.compress_threads;
+ params->has_compress_wait_thread = true;
+ params->compress_wait_thread = s->parameters.compress_wait_thread;
+ params->has_decompress_threads = true;
+ params->decompress_threads = s->parameters.decompress_threads;
+ params->has_throttle_trigger_threshold = true;
+ params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold;
+ params->has_cpu_throttle_initial = true;
+ params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
+ params->has_cpu_throttle_increment = true;
+ params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
+ params->has_cpu_throttle_tailslow = true;
+ params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
+ params->tls_creds = g_strdup(s->parameters.tls_creds);
+ params->tls_hostname = g_strdup(s->parameters.tls_hostname);
+ params->tls_authz = g_strdup(s->parameters.tls_authz ?
+ s->parameters.tls_authz : "");
+ params->has_max_bandwidth = true;
+ params->max_bandwidth = s->parameters.max_bandwidth;
+ params->has_downtime_limit = true;
+ params->downtime_limit = s->parameters.downtime_limit;
+ params->has_x_checkpoint_delay = true;
+ params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
+ params->has_block_incremental = true;
+ params->block_incremental = s->parameters.block_incremental;
+ params->has_multifd_channels = true;
+ params->multifd_channels = s->parameters.multifd_channels;
+ params->has_multifd_compression = true;
+ params->multifd_compression = s->parameters.multifd_compression;
+ params->has_multifd_zlib_level = true;
+ params->multifd_zlib_level = s->parameters.multifd_zlib_level;
+ params->has_multifd_zstd_level = true;
+ params->multifd_zstd_level = s->parameters.multifd_zstd_level;
+ params->has_xbzrle_cache_size = true;
+ params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
+ params->has_max_postcopy_bandwidth = true;
+ params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
+ params->has_max_cpu_throttle = true;
+ params->max_cpu_throttle = s->parameters.max_cpu_throttle;
+ params->has_announce_initial = true;
+ params->announce_initial = s->parameters.announce_initial;
+ params->has_announce_max = true;
+ params->announce_max = s->parameters.announce_max;
+ params->has_announce_rounds = true;
+ params->announce_rounds = s->parameters.announce_rounds;
+ params->has_announce_step = true;
+ params->announce_step = s->parameters.announce_step;
+
+ if (s->parameters.has_block_bitmap_mapping) {
+ params->has_block_bitmap_mapping = true;
+ params->block_bitmap_mapping =
+ QAPI_CLONE(BitmapMigrationNodeAliasList,
+ s->parameters.block_bitmap_mapping);
+ }
+
+ return params;
+}
--- /dev/null
+/*
+ * QEMU migration capabilities
+ *
+ * Copyright (c) 2012-2023 Red Hat Inc
+ *
+ * Authors:
+ * Orit Wasserman <owasserm@redhat.com>
+ * Juan Quintela <quintela@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_MIGRATION_OPTIONS_H
+#define QEMU_MIGRATION_OPTIONS_H
+
+/* capabilities */
+
+bool migrate_auto_converge(void);
+bool migrate_background_snapshot(void);
+bool migrate_block(void);
+bool migrate_colo(void);
+bool migrate_compress(void);
+bool migrate_dirty_bitmaps(void);
+bool migrate_events(void);
+bool migrate_ignore_shared(void);
+bool migrate_late_block_activate(void);
+bool migrate_multifd(void);
+bool migrate_pause_before_switchover(void);
+bool migrate_postcopy_blocktime(void);
+bool migrate_postcopy_preempt(void);
+bool migrate_postcopy_ram(void);
+bool migrate_rdma_pin_all(void);
+bool migrate_release_ram(void);
+bool migrate_return_path(void);
+bool migrate_validate_uuid(void);
+bool migrate_xbzrle(void);
+bool migrate_zero_blocks(void);
+bool migrate_zero_copy_send(void);
+
+/*
+ * pseudo capabilities
+ *
+ * These are functions that are used in a similar way to capabilities
+ * check, but they are not a capability.
+ */
+
+bool migrate_postcopy(void);
+
+/* capabilities helpers */
+
+bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp);
+bool migrate_cap_set(int cap, bool value, Error **errp);
+
+/* parameters */
+
+bool migrate_block_incremental(void);
+uint32_t migrate_checkpoint_delay(void);
+int migrate_compress_level(void);
+int migrate_compress_threads(void);
+int migrate_compress_wait_thread(void);
+uint8_t migrate_cpu_throttle_increment(void);
+uint8_t migrate_cpu_throttle_initial(void);
+bool migrate_cpu_throttle_tailslow(void);
+int migrate_decompress_threads(void);
+uint8_t migrate_max_cpu_throttle(void);
+uint64_t migrate_max_bandwidth(void);
+int64_t migrate_max_postcopy_bandwidth(void);
+int migrate_multifd_channels(void);
+MultiFDCompression migrate_multifd_compression(void);
+int migrate_multifd_zlib_level(void);
+int migrate_multifd_zstd_level(void);
+uint8_t migrate_throttle_trigger_threshold(void);
+uint64_t migrate_xbzrle_cache_size(void);
+
+#endif
#include "yank_functions.h"
#include "tls.h"
#include "qemu/userfaultfd.h"
+#include "qemu/mmap-alloc.h"
+#include "options.h"
/* Arbitrary limit on size of each discard command,
* keeps them around ~200 bytes
/* Callback from postcopy_ram_supported_by_host block iterator.
*/
-static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque)
+static int test_ramblock_postcopiable(RAMBlock *rb)
{
const char *block_name = qemu_ram_get_idstr(rb);
ram_addr_t length = qemu_ram_get_used_length(rb);
size_t pagesize = qemu_ram_pagesize(rb);
+ QemuFsType fs;
if (length % pagesize) {
error_report("Postcopy requires RAM blocks to be a page size multiple,"
"page size of 0x%zx", block_name, length, pagesize);
return 1;
}
+
+ if (rb->fd >= 0) {
+ fs = qemu_fd_getfs(rb->fd);
+ if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) {
+ error_report("Host backend files need to be TMPFS or HUGETLBFS only");
+ return 1;
+ }
+ }
+
return 0;
}
struct uffdio_range range_struct;
uint64_t feature_mask;
Error *local_err = NULL;
+ RAMBlock *block;
if (qemu_target_page_size() > pagesize) {
error_report("Target page size bigger than host page size");
goto out;
}
- /* We don't support postcopy with shared RAM yet */
- if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) {
- goto out;
+ /*
+ * We don't support postcopy with some type of ramblocks.
+ *
+ * NOTE: we explicitly ignored ramblock_is_ignored() instead we checked
+ * all possible ramblocks. This is because this function can be called
+ * when creating the migration object, during the phase RAM_MIGRATABLE
+ * is not even properly set for all the ramblocks.
+ *
+ * A side effect of this is we'll also check against RAM_SHARED
+ * ramblocks even if migrate_ignore_shared() is set (in which case
+ * we'll never migrate RAM_SHARED at all), but normally this shouldn't
+ * affect in reality, or we can revisit.
+ */
+ RAMBLOCK_FOREACH(block) {
+ if (test_ramblock_postcopiable(block)) {
+ goto out;
+ }
}
/*
#include "qemu/iov.h"
#include "multifd.h"
#include "sysemu/runstate.h"
+#include "options.h"
#include "hw/boards.h" /* for machine_dump_guest_core() */
static void XBZRLE_cache_lock(void)
{
- if (migrate_use_xbzrle()) {
+ if (migrate_xbzrle()) {
qemu_mutex_lock(&XBZRLE.lock);
}
}
static void XBZRLE_cache_unlock(void)
{
- if (migrate_use_xbzrle()) {
+ if (migrate_xbzrle()) {
qemu_mutex_unlock(&XBZRLE.lock);
}
}
0;
}
-/*
- * NOTE: not all stats in ram_counters are used in reality. See comments
- * for struct MigrationAtomicStats. The ultimate result of ram migration
- * counters will be a merged version with both ram_counters and the atomic
- * fields in ram_atomic_counters.
- */
-MigrationStats ram_counters;
-MigrationAtomicStats ram_atomic_counters;
+RAMStats ram_counters;
void ram_transferred_add(uint64_t bytes)
{
if (runstate_is_running()) {
- ram_counters.precopy_bytes += bytes;
+ stat64_add(&ram_counters.precopy_bytes, bytes);
} else if (migration_in_postcopy()) {
- stat64_add(&ram_atomic_counters.postcopy_bytes, bytes);
+ stat64_add(&ram_counters.postcopy_bytes, bytes);
} else {
- ram_counters.downtime_bytes += bytes;
+ stat64_add(&ram_counters.downtime_bytes, bytes);
}
- stat64_add(&ram_atomic_counters.transferred, bytes);
-}
-
-void dirty_sync_missed_zero_copy(void)
-{
- ram_counters.dirty_sync_missed_zero_copy++;
+ stat64_add(&ram_counters.transferred, bytes);
}
struct MigrationOps {
{
int i, thread_count;
- if (!migrate_use_compression() || !comp_param) {
+ if (!migrate_compress() || !comp_param) {
return;
}
{
int i, thread_count;
- if (!migrate_use_compression()) {
+ if (!migrate_compress()) {
return 0;
}
thread_count = migrate_compress_threads();
static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
uint64_t bytes_dirty_threshold)
{
- MigrationState *s = migrate_get_current();
- uint64_t pct_initial = s->parameters.cpu_throttle_initial;
- uint64_t pct_increment = s->parameters.cpu_throttle_increment;
- bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
- int pct_max = s->parameters.max_cpu_throttle;
+ uint64_t pct_initial = migrate_cpu_throttle_initial();
+ uint64_t pct_increment = migrate_cpu_throttle_increment();
+ bool pct_tailslow = migrate_cpu_throttle_tailslow();
+ int pct_max = migrate_max_cpu_throttle();
uint64_t throttle_now = cpu_throttle_get_percentage();
uint64_t cpu_now, cpu_ideal, throttle_inc;
rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
rs->num_dirty_pages_period = 0;
- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred);
+ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred);
}
/**
/* We don't care if this fails to allocate a new cache page
* as long as it updated an old one */
cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
- ram_counters.dirty_sync_count);
+ stat64_get(&ram_counters.dirty_sync_count));
}
#define ENCODING_FLAG_XBZRLE 0x1
int encoded_len = 0, bytes_xbzrle;
uint8_t *prev_cached_page;
QEMUFile *file = pss->pss_channel;
+ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count);
- if (!cache_is_cached(XBZRLE.cache, current_addr,
- ram_counters.dirty_sync_count)) {
+ if (!cache_is_cached(XBZRLE.cache, current_addr, generation)) {
xbzrle_counters.cache_miss++;
if (!rs->last_stage) {
if (cache_insert(XBZRLE.cache, current_addr, *current_data,
- ram_counters.dirty_sync_count) == -1) {
+ generation) == -1) {
return -1;
} else {
/* update *current_data when the page has been
uint64_t ram_get_total_transferred_pages(void)
{
- return stat64_get(&ram_atomic_counters.normal) +
- stat64_get(&ram_atomic_counters.duplicate) +
+ return stat64_get(&ram_counters.normal_pages) +
+ stat64_get(&ram_counters.zero_pages) +
compression_counters.pages + xbzrle_counters.pages;
}
return;
}
- if (migrate_use_xbzrle()) {
+ if (migrate_xbzrle()) {
double encoded_size, unencoded_size;
xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
}
- if (migrate_use_compression()) {
+ if (migrate_compress()) {
compression_counters.busy_rate = (double)(compression_counters.busy -
rs->compress_thread_busy_prev) / page_count;
rs->compress_thread_busy_prev = compression_counters.busy;
static void migration_trigger_throttle(RAMState *rs)
{
- MigrationState *s = migrate_get_current();
- uint64_t threshold = s->parameters.throttle_trigger_threshold;
+ uint64_t threshold = migrate_throttle_trigger_threshold();
uint64_t bytes_xfer_period =
- stat64_get(&ram_atomic_counters.transferred) - rs->bytes_xfer_prev;
+ stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev;
uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
RAMBlock *block;
int64_t end_time;
- ram_counters.dirty_sync_count++;
+ stat64_add(&ram_counters.dirty_sync_count, 1);
if (!rs->time_last_bitmap_sync) {
rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
/* reset period counters */
rs->time_last_bitmap_sync = end_time;
rs->num_dirty_pages_period = 0;
- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred);
+ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred);
}
- if (migrate_use_events()) {
- qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
+ if (migrate_events()) {
+ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count);
+ qapi_event_send_migration_pass(generation);
}
}
int len = save_zero_page_to_file(pss, f, block, offset);
if (len) {
- stat64_add(&ram_atomic_counters.duplicate, 1);
+ stat64_add(&ram_counters.zero_pages, 1);
ram_transferred_add(len);
return 1;
}
}
if (bytes_xmit > 0) {
- stat64_add(&ram_atomic_counters.normal, 1);
+ stat64_add(&ram_counters.normal_pages, 1);
} else if (bytes_xmit == 0) {
- stat64_add(&ram_atomic_counters.duplicate, 1);
+ stat64_add(&ram_counters.zero_pages, 1);
}
return true;
qemu_put_buffer(file, buf, TARGET_PAGE_SIZE);
}
ram_transferred_add(TARGET_PAGE_SIZE);
- stat64_add(&ram_atomic_counters.normal, 1);
+ stat64_add(&ram_counters.normal_pages, 1);
return 1;
}
if (multifd_queue_page(file, block, offset) < 0) {
return -1;
}
- stat64_add(&ram_atomic_counters.normal, 1);
+ stat64_add(&ram_counters.normal_pages, 1);
return 1;
}
ram_transferred_add(bytes_xmit);
if (param->zero_page) {
- stat64_add(&ram_atomic_counters.duplicate, 1);
+ stat64_add(&ram_counters.zero_pages, 1);
return;
}
/* Flag that we've looped */
pss->complete_round = true;
/* After the first round, enable XBZRLE. */
- if (migrate_use_xbzrle()) {
+ if (migrate_xbzrle()) {
rs->xbzrle_enabled = true;
}
}
RAMBlock *ramblock;
RAMState *rs = ram_state;
- ram_counters.postcopy_requests++;
+ stat64_add(&ram_counters.postcopy_requests, 1);
RCU_READ_LOCK_GUARD();
if (!rbname) {
static bool save_page_use_compression(RAMState *rs)
{
- if (!migrate_use_compression()) {
+ if (!migrate_compress()) {
return false;
}
* if host page size == guest page size the dest guest during run may
* still see partially copied pages which is data corruption.
*/
- if (migrate_use_multifd() && !migration_in_postcopy()) {
+ if (migrate_multifd() && !migration_in_postcopy()) {
return ram_save_multifd_page(pss->pss_channel, block, offset);
}
uint64_t pages = size / TARGET_PAGE_SIZE;
if (zero) {
- stat64_add(&ram_atomic_counters.duplicate, pages);
+ stat64_add(&ram_counters.zero_pages, pages);
} else {
- stat64_add(&ram_atomic_counters.normal, pages);
+ stat64_add(&ram_counters.normal_pages, pages);
ram_transferred_add(size);
qemu_file_credit_transfer(f, size);
}
{
Error *local_err = NULL;
- if (!migrate_use_xbzrle()) {
+ if (!migrate_xbzrle()) {
return 0;
}
migration_ops = g_malloc0(sizeof(MigrationOps));
migration_ops->ram_save_target_page = ram_save_target_page_legacy;
- ret = multifd_send_sync_main(f);
+ ret = multifd_send_sync_main(f);
if (ret < 0) {
return ret;
}
{
int idx, thread_count;
- if (!migrate_use_compression()) {
+ if (!migrate_compress()) {
return 0;
}
{
int i, thread_count;
- if (!migrate_use_compression()) {
+ if (!migrate_compress()) {
return;
}
thread_count = migrate_decompress_threads();
{
int i, thread_count;
- if (!migrate_use_compression()) {
+ if (!migrate_compress()) {
return 0;
}
int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
/* ADVISE is earlier, it shows the source has the postcopy capability on */
bool postcopy_advised = migration_incoming_postcopy_advised();
- if (!migrate_use_compression()) {
+ if (!migrate_compress()) {
invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
}
#include "qemu/stats64.h"
/*
- * These are the migration statistic counters that need to be updated using
- * atomic ops (can be accessed by more than one thread). Here since we
- * cannot modify MigrationStats directly to use Stat64 as it was defined in
- * the QAPI scheme, we define an internal structure to hold them, and we
- * propagate the real values when QMP queries happen.
- *
- * IOW, the corresponding fields within ram_counters on these specific
- * fields will be always zero and not being used at all; they're just
- * placeholders to make it QAPI-compatible.
+ * These are the ram migration statistic counters. It is loosely
+ * based on MigrationStats. We change to Stat64 any counter that
+ * needs to be updated using atomic ops (can be accessed by more than
+ * one thread).
*/
typedef struct {
- Stat64 transferred;
- Stat64 duplicate;
- Stat64 normal;
+ int64_t dirty_pages_rate;
+ Stat64 dirty_sync_count;
+ Stat64 dirty_sync_missed_zero_copy;
+ Stat64 downtime_bytes;
+ Stat64 zero_pages;
+ Stat64 multifd_bytes;
+ Stat64 normal_pages;
Stat64 postcopy_bytes;
-} MigrationAtomicStats;
+ Stat64 postcopy_requests;
+ Stat64 precopy_bytes;
+ int64_t remaining;
+ Stat64 transferred;
+} RAMStats;
-extern MigrationAtomicStats ram_atomic_counters;
-extern MigrationStats ram_counters;
+extern RAMStats ram_counters;
extern XBZRLECacheStats xbzrle_counters;
extern CompressionStats compression_counters;
int ram_write_tracking_start(void);
void ram_write_tracking_stop(void);
-void dirty_sync_missed_zero_copy(void);
-
#endif
#include <rdma/rdma_cma.h>
#include "trace.h"
#include "qom/object.h"
+#include "options.h"
#include <poll.h>
/*
* initialize the RDMAContext for return path for postcopy after first
* connection request reached.
*/
- if ((migrate_postcopy() || migrate_use_return_path())
+ if ((migrate_postcopy() || migrate_return_path())
&& !rdma->is_return_path) {
rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL);
if (rdma_return_path == NULL) {
}
/* Accept the second connection request for return path */
- if ((migrate_postcopy() || migrate_use_return_path())
+ if ((migrate_postcopy() || migrate_return_path())
&& !rdma->is_return_path) {
qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
NULL,
goto err;
}
- ret = qemu_rdma_source_init(rdma,
- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp);
+ ret = qemu_rdma_source_init(rdma, migrate_rdma_pin_all(), errp);
if (ret) {
goto err;
}
/* RDMA postcopy need a separate queue pair for return path */
- if (migrate_postcopy() || migrate_use_return_path()) {
+ if (migrate_postcopy() || migrate_return_path()) {
rdma_return_path = qemu_rdma_data_init(host_port, errp);
if (rdma_return_path == NULL) {
}
ret = qemu_rdma_source_init(rdma_return_path,
- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp);
+ migrate_rdma_pin_all(), errp);
if (ret) {
goto return_path_err;
#include "qemu/yank.h"
#include "yank_functions.h"
#include "sysemu/qtest.h"
+#include "options.h"
const unsigned int postcopy_ram_discard_version;
uint32_t result = 0;
int i;
for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
- if (should_validate_capability(i) && s->enabled_capabilities[i]) {
+ if (should_validate_capability(i) && s->capabilities[i]) {
result++;
}
}
state->capabilities = g_renew(MigrationCapability, state->capabilities,
state->caps_count);
for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
- if (should_validate_capability(i) && s->enabled_capabilities[i]) {
+ if (should_validate_capability(i) && s->capabilities[i]) {
state->capabilities[j++] = i;
}
}
continue;
}
source_state = test_bit(i, source_caps_bm);
- target_state = s->enabled_capabilities[i];
+ target_state = s->capabilities[i];
if (source_state != target_state) {
error_report("Capability %s is %s, but received capability is %s",
MigrationCapability_str(i),
return -EINVAL;
}
- if (migrate_use_block()) {
+ if (migrate_block()) {
error_setg(errp, "Block migration and snapshots are incompatible");
return -EINVAL;
}
#include "io/net-listener.h"
#include "trace.h"
#include "postcopy-ram.h"
+#include "options.h"
struct SocketOutgoingArgs {
SocketAddress *saddr;
trace_migration_socket_outgoing_connected(data->hostname);
- if (migrate_use_zero_copy_send() &&
+ if (migrate_zero_copy_send() &&
!qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
error_setg(&err, "Zero copy send feature not detected in host kernel");
}
qio_net_listener_set_name(listener, "migration-socket-listener");
- if (migrate_use_multifd()) {
+ if (migrate_multifd()) {
num = migrate_multifd_channels();
} else if (migrate_postcopy_preempt()) {
num = RAM_CHANNEL_MAX;
Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, &data);
monitor_set_cur(co, &mon->common);
aio_co_enter(qemu_get_aio_context(), co);
- AIO_WAIT_WHILE(qemu_get_aio_context(), !data.done);
+ AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done);
}
qobject_unref(qdict);
* We need to poll both qemu_aio_context and iohandler_ctx to make
* sure that the dispatcher coroutine keeps making progress and
* eventually terminates. qemu_aio_context is automatically
- * polled by calling AIO_WAIT_WHILE on it, but we must poll
+ * polled by calling AIO_WAIT_WHILE_UNLOCKED on it, but we must poll
* iohandler_ctx manually.
*
* Letting the iothread continue while shutting down the dispatcher
aio_co_wake(qmp_dispatcher_co);
}
- AIO_WAIT_WHILE(qemu_get_aio_context(),
+ AIO_WAIT_WHILE_UNLOCKED(NULL,
(aio_poll(iohandler_get_aio_context(), false),
qatomic_mb_read(&qmp_dispatcher_co_busy)));
return 1;
}
-static int nbd_receive_request(NBDClient *client, NBDRequest *request,
- Error **errp)
+static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *request,
+ Error **errp)
{
uint8_t buf[NBD_REQUEST_SIZE];
uint32_t magic;
stq_be_p(&reply->handle, handle);
}
-static int nbd_co_send_simple_reply(NBDClient *client,
- uint64_t handle,
- uint32_t error,
- void *data,
- size_t len,
- Error **errp)
+static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client,
+ uint64_t handle,
+ uint32_t error,
+ void *data,
+ size_t len,
+ Error **errp)
{
NBDSimpleReply reply;
int nbd_err = system_errno_to_nbd_errno(error);
stl_be_p(&chunk.length, pnum);
ret = nbd_co_send_iov(client, iov, 1, errp);
} else {
- ret = blk_pread(exp->common.blk, offset + progress, pnum,
- data + progress, 0);
+ ret = blk_co_pread(exp->common.blk, offset + progress, pnum,
+ data + progress, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "reading from file failed");
break;
* @ea is converted to BE by the function
* @last controls whether NBD_REPLY_FLAG_DONE is sent.
*/
-static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
- NBDExtentArray *ea,
- bool last, uint32_t context_id, Error **errp)
+static int coroutine_fn
+nbd_co_send_extents(NBDClient *client, uint64_t handle, NBDExtentArray *ea,
+ bool last, uint32_t context_id, Error **errp)
{
NBDStructuredMeta chunk;
struct iovec iov[] = {
bdrv_dirty_bitmap_unlock(bitmap);
}
-static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
- BdrvDirtyBitmap *bitmap, uint64_t offset,
- uint32_t length, bool dont_fragment, bool last,
- uint32_t context_id, Error **errp)
+static int coroutine_fn nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
+ BdrvDirtyBitmap *bitmap, uint64_t offset,
+ uint32_t length, bool dont_fragment, bool last,
+ uint32_t context_id, Error **errp)
{
unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
* to the client (although the caller may still need to disconnect after
* reporting the error).
*/
-static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
- Error **errp)
+static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
+ Error **errp)
{
NBDClient *client = req->client;
int valid_flags;
data, request->len, errp);
}
- ret = blk_pread(exp->common.blk, request->from, request->len, data, 0);
+ ret = blk_co_pread(exp->common.blk, request->from, request->len, data, 0);
if (ret < 0) {
return nbd_send_generic_reply(client, request->handle, ret,
"reading from file failed", errp);
if (request->flags & NBD_CMD_FLAG_FUA) {
flags |= BDRV_REQ_FUA;
}
- ret = blk_pwrite(exp->common.blk, request->from, request->len, data,
- flags);
+ ret = blk_co_pwrite(exp->common.blk, request->from, request->len, data,
+ flags);
return nbd_send_generic_reply(client, request->handle, ret,
"writing to file failed", errp);
if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
flags |= BDRV_REQ_NO_FALLBACK;
}
- ret = blk_pwrite_zeroes(exp->common.blk, request->from, request->len,
- flags);
+ ret = blk_co_pwrite_zeroes(exp->common.blk, request->from, request->len,
+ flags);
return nbd_send_generic_reply(client, request->handle, ret,
"writing to file failed", errp);
/* VHOST_F_LOG_ALL is exposed by SVQ */
BIT_ULL(VHOST_F_LOG_ALL) |
BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
- BIT_ULL(VIRTIO_NET_F_STANDBY);
+ BIT_ULL(VIRTIO_NET_F_STANDBY) |
+ BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX);
#define VHOST_VDPA_NET_CVQ_ASID 1
{ 'command': 'query-migrate-parameters',
'returns': 'MigrationParameters' }
-##
-# @client_migrate_info:
-#
-# Set migration information for remote display. This makes the server
-# ask the client to automatically reconnect using the new parameters
-# once migration finished successfully. Only implemented for SPICE.
-#
-# @protocol: must be "spice"
-# @hostname: migration target hostname
-# @port: spice tcp port for plaintext channels
-# @tls-port: spice tcp port for tls-secured channels
-# @cert-subject: server certificate subject
-#
-# Since: 0.14
-#
-# Example:
-#
-# -> { "execute": "client_migrate_info",
-# "arguments": { "protocol": "spice",
-# "hostname": "virt42.lab.kraxel.org",
-# "port": 1234 } }
-# <- { "return": {} }
-#
-##
-{ 'command': 'client_migrate_info',
- 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int',
- '*tls-port': 'int', '*cert-subject': 'str' } }
-
##
# @migrate-start-postcopy:
#
{ 'command': 'display-update',
'data': 'DisplayUpdateOptions',
'boxed' : true }
+
+##
+# @client_migrate_info:
+#
+# Set migration information for remote display. This makes the server
+# ask the client to automatically reconnect using the new parameters
+# once migration finished successfully. Only implemented for SPICE.
+#
+# @protocol: must be "spice"
+# @hostname: migration target hostname
+# @port: spice tcp port for plaintext channels
+# @tls-port: spice tcp port for tls-secured channels
+# @cert-subject: server certificate subject
+#
+# Since: 0.14
+#
+# Example:
+#
+# -> { "execute": "client_migrate_info",
+# "arguments": { "protocol": "spice",
+# "hostname": "virt42.lab.kraxel.org",
+# "port": 1234 } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'client_migrate_info',
+ 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int',
+ '*tls-port': 'int', '*cert-subject': 'str' } }
printf "%s\n" ' use idef-parser to automatically generate TCG'
printf "%s\n" ' code for the Hexagon frontend'
printf "%s\n" ' --disable-install-blobs install provided firmware blobs'
+ printf "%s\n" ' --disable-qom-cast-debug cast debugging support'
printf "%s\n" ' --docdir=VALUE Base directory for documentation installation'
printf "%s\n" ' (can be empty) [share/doc]'
printf "%s\n" ' --enable-block-drv-whitelist-in-tools'
printf "%s\n" ' --enable-module-upgrades try to load modules from alternate paths for'
printf "%s\n" ' upgrades'
printf "%s\n" ' --enable-profiler profiler support'
- printf "%s\n" ' --enable-qom-cast-debug cast debugging support'
printf "%s\n" ' --enable-rng-none dummy RNG, avoid using /dev/(u)random and'
printf "%s\n" ' getrandom()'
printf "%s\n" ' --enable-strip Strip targets on install'
__email__ = "stefanha@redhat.com"
+import os.path
+
from tracetool import out
args=event.args,
event_id="TRACE_" + event.name.upper(),
event_lineno=event.lineno,
- event_filename=event.filename,
+ event_filename=os.path.relpath(event.filename),
fmt=event.fmt.rstrip("\n"),
argnames=argnames)
__email__ = "stefanha@redhat.com"
+import os.path
+
from tracetool import out
' }',
cond=cond,
event_lineno=event.lineno,
- event_filename=event.filename,
+ event_filename=os.path.relpath(event.filename),
name=event.name,
fmt=event.fmt.rstrip("\n"),
argnames=argnames)
__email__ = "stefanha@redhat.com"
+import os.path
+
from tracetool import out
' }',
cond=cond,
event_lineno=event.lineno,
- event_filename=event.filename,
+ event_filename=os.path.relpath(event.filename),
name=event.name,
fmt=event.fmt.rstrip("\n"),
argnames=argnames)
int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd,
struct sg_io_hdr *hdr)
{
- ThreadPool *pool = aio_get_thread_pool(ctx);
PRManagerData data = {
.pr_mgr = pr_mgr,
.fd = fd,
/* The matching object_unref is in pr_manager_worker. */
object_ref(OBJECT(pr_mgr));
- return thread_pool_submit_co(pool, pr_manager_worker, &data);
+ return thread_pool_submit_co(pr_manager_worker, &data);
}
bool pr_manager_is_connected(PRManager *pr_mgr)
return status;
}
-static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
- uint8_t *buf, int *sz, int dir)
+static int coroutine_fn do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
+ uint8_t *buf, int *sz, int dir)
{
- ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
int r;
PRHelperSGIOData data = {
.dir = dir,
};
- r = thread_pool_submit_co(pool, do_sgio_worker, &data);
+ r = thread_pool_submit_co(do_sgio_worker, &data);
*sz = data.sz;
return r;
}
}
}
-static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
+static int coroutine_fn mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
{
switch (r) {
case MPATH_PR_SUCCESS:
}
}
-static int multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
- uint8_t *data, int sz)
+static int coroutine_fn multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
+ uint8_t *data, int sz)
{
int rq_servact = cdb[1];
struct prin_resp resp;
return mpath_reconstruct_sense(fd, r, sense);
}
-static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
- const uint8_t *param, int sz)
+static int coroutine_fn multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
+ const uint8_t *param, int sz)
{
int rq_servact = cdb[1];
int rq_scope = cdb[2] >> 4;
}
#endif
-static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
- uint8_t *data, int *resp_sz)
+static int coroutine_fn do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
+ uint8_t *data, int *resp_sz)
{
#ifdef CONFIG_MPATH
if (is_mpath(fd)) {
SG_DXFER_FROM_DEV);
}
-static int do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
- const uint8_t *param, int sz)
+static int coroutine_fn do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
+ const uint8_t *param, int sz)
{
int resp_sz;
machine_class->name, machine_class->deprecation_reason);
}
+ /*
+ * Create backends before creating migration objects, so that it can
+ * check against compatibilities on the backend memories (e.g. postcopy
+ * over memory-backend-file objects).
+ */
+ qemu_create_late_backends();
+
/*
* Note: creates a QOM object, must run only after global and
* compat properties have been set up.
*/
migration_object_init();
- qemu_create_late_backends();
-
/* parse features once if machine provides default cpu_type */
current_machine->cpu_type = machine_class->default_cpu_type;
if (cpu_option) {
_vu_queue_notify(dev, vq, true);
}
+void vu_config_change_msg(VuDev *dev)
+{
+ VhostUserMsg vmsg = {
+ .request = VHOST_USER_BACKEND_CONFIG_CHANGE_MSG,
+ .flags = VHOST_USER_VERSION,
+ };
+
+ vu_message_write(dev, dev->slave_fd, &vmsg);
+}
+
static inline void
vring_used_flags_set_bit(VuVirtq *vq, int mask)
{
*/
void vu_queue_notify(VuDev *dev, VuVirtq *vq);
+void vu_config_change_msg(VuDev *dev);
+
/**
* vu_queue_notify_sync:
* @dev: a VuDev context
#include "qemu/main-loop.h"
static AioContext *ctx;
-static ThreadPool *pool;
static int active;
typedef struct {
static void test_submit(void)
{
WorkerTestData data = { .n = 0 };
- thread_pool_submit(pool, worker_cb, &data);
+ thread_pool_submit(worker_cb, &data);
while (data.n == 0) {
aio_poll(ctx, true);
}
static void test_submit_aio(void)
{
WorkerTestData data = { .n = 0, .ret = -EINPROGRESS };
- data.aiocb = thread_pool_submit_aio(pool, worker_cb, &data,
+ data.aiocb = thread_pool_submit_aio(worker_cb, &data,
done_cb, &data);
/* The callbacks are not called until after the first wait. */
g_assert_cmpint(data.ret, ==, 0);
}
-static void co_test_cb(void *opaque)
+static void coroutine_fn co_test_cb(void *opaque)
{
WorkerTestData *data = opaque;
active = 1;
data->n = 0;
data->ret = -EINPROGRESS;
- thread_pool_submit_co(pool, worker_cb, data);
+ thread_pool_submit_co(worker_cb, data);
/* The test continues in test_submit_co, after qemu_coroutine_enter... */
for (i = 0; i < 100; i++) {
data[i].n = 0;
data[i].ret = -EINPROGRESS;
- thread_pool_submit_aio(pool, worker_cb, &data[i], done_cb, &data[i]);
+ thread_pool_submit_aio(worker_cb, &data[i], done_cb, &data[i]);
}
active = 100;
for (i = 0; i < 100; i++) {
data[i].n = 0;
data[i].ret = -EINPROGRESS;
- data[i].aiocb = thread_pool_submit_aio(pool, long_cb, &data[i],
+ data[i].aiocb = thread_pool_submit_aio(long_cb, &data[i],
done_cb, &data[i]);
}
{
qemu_init_main_loop(&error_abort);
ctx = qemu_get_current_aio_context();
- pool = aio_get_thread_pool(ctx);
g_test_init(&argc, &argv, NULL);
g_test_add_func("/thread-pool/submit", test_submit);
end:
hmp_handle_error(mon, err);
}
+
+void hmp_client_migrate_info(Monitor *mon, const QDict *qdict)
+{
+ Error *err = NULL;
+ const char *protocol = qdict_get_str(qdict, "protocol");
+ const char *hostname = qdict_get_str(qdict, "hostname");
+ bool has_port = qdict_haskey(qdict, "port");
+ int port = qdict_get_try_int(qdict, "port", -1);
+ bool has_tls_port = qdict_haskey(qdict, "tls-port");
+ int tls_port = qdict_get_try_int(qdict, "tls-port", -1);
+ const char *cert_subject = qdict_get_try_str(qdict, "cert-subject");
+
+ qmp_client_migrate_info(protocol, hostname,
+ has_port, port, has_tls_port, tls_port,
+ cert_subject, &err);
+ hmp_handle_error(mon, err);
+}
abort();
}
}
+
+void qmp_client_migrate_info(const char *protocol, const char *hostname,
+ bool has_port, int64_t port,
+ bool has_tls_port, int64_t tls_port,
+ const char *cert_subject,
+ Error **errp)
+{
+ if (strcmp(protocol, "spice") == 0) {
+ if (!qemu_using_spice(errp)) {
+ return;
+ }
+
+ if (!has_port && !has_tls_port) {
+ error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port");
+ return;
+ }
+
+ if (qemu_spice.migrate_info(hostname,
+ has_port ? port : -1,
+ has_tls_port ? tls_port : -1,
+ cert_subject)) {
+ error_setg(errp, "Could not set up display for migration");
+ return;
+ }
+ return;
+ }
+
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'");
+}
#ifdef CONFIG_LINUX
#include <sys/vfs.h>
+#include <linux/magic.h>
#endif
+QemuFsType qemu_fd_getfs(int fd)
+{
+#ifdef CONFIG_LINUX
+ struct statfs fs;
+ int ret;
+
+ if (fd < 0) {
+ return QEMU_FS_TYPE_UNKNOWN;
+ }
+
+ do {
+ ret = fstatfs(fd, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ switch (fs.f_type) {
+ case TMPFS_MAGIC:
+ return QEMU_FS_TYPE_TMPFS;
+ case HUGETLBFS_MAGIC:
+ return QEMU_FS_TYPE_HUGETLBFS;
+ default:
+ return QEMU_FS_TYPE_UNKNOWN;
+ }
+#else
+ return QEMU_FS_TYPE_UNKNOWN;
+#endif
+}
+
size_t qemu_fd_getpagesize(int fd)
{
#ifdef CONFIG_LINUX
/* Access to this list is protected by lock. */
QTAILQ_ENTRY(ThreadPoolElement) reqs;
- /* Access to this list is protected by the global mutex. */
+ /* This list is only written by the thread pool's mother thread. */
QLIST_ENTRY(ThreadPoolElement) all;
};
ThreadPool *pool = opaque;
ThreadPoolElement *elem, *next;
- aio_context_acquire(pool->ctx);
restart:
QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
if (elem->state != THREAD_DONE) {
*/
qemu_bh_schedule(pool->completion_bh);
- aio_context_release(pool->ctx);
elem->common.cb(elem->common.opaque, elem->ret);
- aio_context_acquire(pool->ctx);
/* We can safely cancel the completion_bh here regardless of someone
* else having scheduled it meanwhile because we reenter the
qemu_aio_unref(elem);
}
}
- aio_context_release(pool->ctx);
}
static void thread_pool_cancel(BlockAIOCB *acb)
.get_aio_context = thread_pool_get_aio_context,
};
-BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
- ThreadPoolFunc *func, void *arg,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
+ BlockCompletionFunc *cb, void *opaque)
{
ThreadPoolElement *req;
+ AioContext *ctx = qemu_get_current_aio_context();
+ ThreadPool *pool = aio_get_thread_pool(ctx);
+
+ /* Assert that the thread submitting work is the same running the pool */
+ assert(pool->ctx == qemu_get_current_aio_context());
req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque);
req->func = func;
aio_co_wake(co->co);
}
-int coroutine_fn thread_pool_submit_co(ThreadPool *pool, ThreadPoolFunc *func,
- void *arg)
+int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg)
{
ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS };
assert(qemu_in_coroutine());
- thread_pool_submit_aio(pool, func, arg, thread_pool_co_cb, &tpc);
+ thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc);
qemu_coroutine_yield();
return tpc.ret;
}
-void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg)
+void thread_pool_submit(ThreadPoolFunc *func, void *arg)
{
- thread_pool_submit_aio(pool, func, arg, NULL, NULL);
+ thread_pool_submit_aio(func, arg, NULL, NULL);
}
void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)