M: Michael S. Tsirkin <mst@redhat.com>
L: secalert@redhat.com
+Trivial patches
+---------------
+Trivial patches
+M: Michael Tokarev <mjt@tls.msk.ru>
+M: Laurent Vivier <laurent@vivier.eu>
+S: Maintained
+L: qemu-trivial@nongnu.org
+K: ^Subject:.*(?i)trivial
+T: git git://git.corpit.ru/qemu.git trivial-patches
+T: git git://github.com/vivier/qemu.git trivial-patches
+
Guest CPU cores (TCG):
----------------------
Overall
F: include/qemu/uuid.h
F: tests/test-uuid.c
+COLO Framework
+M: zhanghailiang <zhang.zhanghailiang@huawei.com>
+S: Maintained
+F: migration/colo*
+F: include/migration/colo.h
+F: include/migration/failover.h
+F: docs/COLO-FT.txt
+
COLO Proxy
M: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
M: Li Zhijian <lizhijian@cn.fujitsu.com>
@echo ''
ifdef CONFIG_WIN32
@echo 'Windows targets:'
- @echo ' installer - Build NSIS-based installer for qemu-ga'
+ @echo ' installer - Build NSIS-based installer for QEMU'
ifdef QEMU_GA_MSI_ENABLED
@echo ' msi - Build MSI-based installer for qemu-ga'
endif
#include "sysemu/qtest.h"
#include "hw/xen/xen.h"
#include "qom/object.h"
-#include "hw/boards.h"
int tcg_tb_size;
static bool tcg_allowed = true;
backing_hd->drv ? backing_hd->drv->format_name : "");
bdrv_op_block_all(backing_hd, bs->backing_blocker);
- /* Otherwise we won't be able to commit due to check in bdrv_commit */
+ /* Otherwise we won't be able to commit or stream */
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
bs->backing_blocker);
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
+ bs->backing_blocker);
/*
* We do backup in 3 ways:
* 1. drive backup
assert(bs_queue != NULL);
aio_context_release(ctx);
- bdrv_drain_all();
+ bdrv_drain_all_begin();
aio_context_acquire(ctx);
QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
g_free(bs_entry);
}
g_free(bs_queue);
+
+ bdrv_drain_all_end();
+
return ret;
}
BackupBlockJob *job = opaque;
BackupCompleteData *data;
BlockDriverState *bs = blk_bs(job->common.blk);
- BlockBackend *target = job->target;
int64_t start, end;
int64_t sectors_per_cluster = cluster_size_sectors(job);
int ret = 0;
qemu_co_rwlock_unlock(&job->flush_rwlock);
g_free(job->done_bitmap);
- bdrv_op_unblock_all(blk_bs(target), job->common.blocker);
-
data = g_malloc(sizeof(*data));
data->ret = ret;
block_job_defer_to_main_loop(&job->common, backup_complete, data);
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
}
- bdrv_op_block_all(target, job->common.blocker);
+ block_job_add_bdrv(&job->common, target);
job->common.len = len;
job->common.co = qemu_coroutine_create(backup_run, job);
block_job_txn_add_job(txn, &job->common);
BlockReopenQueue *reopen_queue = NULL;
int orig_overlay_flags;
int orig_base_flags;
+ BlockDriverState *iter;
BlockDriverState *overlay_bs;
Error *local_err = NULL;
}
+ /* Block all nodes between top and base, because they will
+ * disappear from the chain after this operation. */
+ assert(bdrv_chain_contains(top, base));
+ for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
+ block_job_add_bdrv(&s->common, iter);
+ }
+ /* overlay_bs must be blocked because it needs to be modified to
+ * update the backing image string, but if it's the root node then
+ * don't block it again */
+ if (bs != overlay_bs) {
+ block_job_add_bdrv(&s->common, overlay_bs);
+ }
+
s->base = blk_new();
blk_insert_bs(s->base, base);
*
* This function does not flush data to disk, use bdrv_flush_all() for that
* after calling this function.
+ *
+ * This pauses all block jobs and disables external clients. It must
+ * be paired with bdrv_drain_all_end().
+ *
+ * NOTE: no new block jobs or BlockDriverStates can be created between
+ * the bdrv_drain_all_begin() and bdrv_drain_all_end() calls.
*/
-void bdrv_drain_all(void)
+void bdrv_drain_all_begin(void)
{
/* Always run first iteration so any pending completion BHs run */
bool waited = true;
aio_context_acquire(aio_context);
bdrv_parent_drained_begin(bs);
bdrv_io_unplugged_begin(bs);
+ aio_disable_external(aio_context);
aio_context_release(aio_context);
if (!g_slist_find(aio_ctxs, aio_context)) {
}
}
+ g_slist_free(aio_ctxs);
+}
+
+void bdrv_drain_all_end(void)
+{
+ BlockDriverState *bs;
+ BdrvNextIterator it;
+ BlockJob *job = NULL;
+
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
+ aio_enable_external(aio_context);
bdrv_io_unplugged_end(bs);
bdrv_parent_drained_end(bs);
aio_context_release(aio_context);
}
- g_slist_free(aio_ctxs);
- job = NULL;
while ((job = block_job_next(job))) {
AioContext *aio_context = blk_get_aio_context(job->blk);
}
}
+void bdrv_drain_all(void)
+{
+ bdrv_drain_all_begin();
+ bdrv_drain_all_end();
+}
+
/**
* Remove an active request from the tracked requests list
*
aio_context_release(replace_aio_context);
}
g_free(s->replaces);
- bdrv_op_unblock_all(target_bs, s->common.blocker);
blk_unref(s->target);
s->target = NULL;
block_job_completed(&s->common, data->ret);
return;
}
- bdrv_op_block_all(target, s->common.blocker);
+ block_job_add_bdrv(&s->common, target);
+ /* In commit_active_start() all intermediate nodes disappear, so
+ * any jobs in them must be blocked */
+ if (bdrv_chain_contains(bs, target)) {
+ BlockDriverState *iter;
+ for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
+ block_job_add_bdrv(&s->common, iter);
+ }
+ }
s->common.co = qemu_coroutine_create(mirror_run, s);
trace_mirror_start(bs, s, s->common.co, opaque);
#include "qemu/uri.h"
#include "qemu/cutils.h"
#include "sysemu/sysemu.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi-visit.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
#include <nfsc/libnfs.h>
+
#define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
#define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE)
#define QEMU_NFS_MAX_DEBUG_LEVEL 2
AioContext *aio_context;
blkcnt_t st_blocks;
bool cache_used;
+ NFSServer *server;
+ char *path;
+ int64_t uid, gid, tcp_syncnt, readahead, pagecache, debug;
} NFSClient;
typedef struct NFSRPC {
NFSClient *client;
} NFSRPC;
+static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
+{
+ URI *uri = NULL;
+ QueryParams *qp = NULL;
+ int ret = -EINVAL, i;
+
+ uri = uri_parse(filename);
+ if (!uri) {
+ error_setg(errp, "Invalid URI specified");
+ goto out;
+ }
+ if (strcmp(uri->scheme, "nfs") != 0) {
+ error_setg(errp, "URI scheme must be 'nfs'");
+ goto out;
+ }
+
+ if (!uri->server) {
+ error_setg(errp, "missing hostname in URI");
+ goto out;
+ }
+
+ if (!uri->path) {
+ error_setg(errp, "missing file path in URI");
+ goto out;
+ }
+
+ qp = query_params_parse(uri->query);
+ if (!qp) {
+ error_setg(errp, "could not parse query parameters");
+ goto out;
+ }
+
+ qdict_put(options, "server.host", qstring_from_str(uri->server));
+ qdict_put(options, "server.type", qstring_from_str("inet"));
+ qdict_put(options, "path", qstring_from_str(uri->path));
+
+ for (i = 0; i < qp->n; i++) {
+ if (!qp->p[i].value) {
+ error_setg(errp, "Value for NFS parameter expected: %s",
+ qp->p[i].name);
+ goto out;
+ }
+ if (parse_uint_full(qp->p[i].value, NULL, 0)) {
+ error_setg(errp, "Illegal value for NFS parameter: %s",
+ qp->p[i].name);
+ goto out;
+ }
+ if (!strcmp(qp->p[i].name, "uid")) {
+ qdict_put(options, "user",
+ qstring_from_str(qp->p[i].value));
+ } else if (!strcmp(qp->p[i].name, "gid")) {
+ qdict_put(options, "group",
+ qstring_from_str(qp->p[i].value));
+ } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
+ qdict_put(options, "tcp-syn-count",
+ qstring_from_str(qp->p[i].value));
+ } else if (!strcmp(qp->p[i].name, "readahead")) {
+ qdict_put(options, "readahead-size",
+ qstring_from_str(qp->p[i].value));
+ } else if (!strcmp(qp->p[i].name, "pagecache")) {
+ qdict_put(options, "page-cache-size",
+ qstring_from_str(qp->p[i].value));
+ } else if (!strcmp(qp->p[i].name, "debug")) {
+ qdict_put(options, "debug-level",
+ qstring_from_str(qp->p[i].value));
+ } else {
+ error_setg(errp, "Unknown NFS parameter name: %s",
+ qp->p[i].name);
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ if (qp) {
+ query_params_free(qp);
+ }
+ if (uri) {
+ uri_free(uri);
+ }
+ return ret;
+}
+
+static bool nfs_has_filename_options_conflict(QDict *options, Error **errp)
+{
+ const QDictEntry *qe;
+
+ for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) {
+ if (!strcmp(qe->key, "host") ||
+ !strcmp(qe->key, "path") ||
+ !strcmp(qe->key, "user") ||
+ !strcmp(qe->key, "group") ||
+ !strcmp(qe->key, "tcp-syn-count") ||
+ !strcmp(qe->key, "readahead-size") ||
+ !strcmp(qe->key, "page-cache-size") ||
+ !strcmp(qe->key, "debug-level") ||
+ strstart(qe->key, "server.", NULL))
+ {
+ error_setg(errp, "Option %s cannot be used with a filename",
+ qe->key);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void nfs_parse_filename(const char *filename, QDict *options,
+ Error **errp)
+{
+ if (nfs_has_filename_options_conflict(options, errp)) {
+ return;
+ }
+
+ nfs_parse_uri(filename, options, errp);
+}
+
static void nfs_process_read(void *arg);
static void nfs_process_write(void *arg);
return task.ret;
}
-/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "nfs",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
- .name = "filename",
+ .name = "path",
.type = QEMU_OPT_STRING,
- .help = "URL to the NFS file",
+ .help = "Path of the image on the host",
+ },
+ {
+ .name = "uid",
+ .type = QEMU_OPT_NUMBER,
+ .help = "UID value to use when talking to the server",
+ },
+ {
+ .name = "gid",
+ .type = QEMU_OPT_NUMBER,
+ .help = "GID value to use when talking to the server",
+ },
+ {
+ .name = "tcp-syncnt",
+ .type = QEMU_OPT_NUMBER,
+ .help = "Number of SYNs to send during the session establish",
+ },
+ {
+ .name = "readahead",
+ .type = QEMU_OPT_NUMBER,
+ .help = "Set the readahead size in bytes",
+ },
+ {
+ .name = "pagecache",
+ .type = QEMU_OPT_NUMBER,
+ .help = "Set the pagecache size in bytes",
+ },
+ {
+ .name = "debug",
+ .type = QEMU_OPT_NUMBER,
+ .help = "Set the NFS debug level (max 2)",
},
{ /* end of list */ }
},
nfs_client_close(client);
}
-static int64_t nfs_client_open(NFSClient *client, const char *filename,
+static NFSServer *nfs_config(QDict *options, Error **errp)
+{
+ NFSServer *server = NULL;
+ QDict *addr = NULL;
+ QObject *crumpled_addr = NULL;
+ Visitor *iv = NULL;
+ Error *local_error = NULL;
+
+ qdict_extract_subqdict(options, &addr, "server.");
+ if (!qdict_size(addr)) {
+ error_setg(errp, "NFS server address missing");
+ goto out;
+ }
+
+ crumpled_addr = qdict_crumple(addr, errp);
+ if (!crumpled_addr) {
+ goto out;
+ }
+
+ iv = qobject_input_visitor_new(crumpled_addr, true);
+ visit_type_NFSServer(iv, NULL, &server, &local_error);
+ if (local_error) {
+ error_propagate(errp, local_error);
+ goto out;
+ }
+
+out:
+ QDECREF(addr);
+ qobject_decref(crumpled_addr);
+ visit_free(iv);
+ return server;
+}
+
+
+static int64_t nfs_client_open(NFSClient *client, QDict *options,
int flags, Error **errp, int open_flags)
{
- int ret = -EINVAL, i;
+ int ret = -EINVAL;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
struct stat st;
- URI *uri;
- QueryParams *qp = NULL;
char *file = NULL, *strp = NULL;
- uri = uri_parse(filename);
- if (!uri) {
- error_setg(errp, "Invalid URL specified");
+ opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
goto fail;
}
- if (!uri->server) {
- error_setg(errp, "Invalid URL specified");
+
+ client->path = g_strdup(qemu_opt_get(opts, "path"));
+ if (!client->path) {
+ ret = -EINVAL;
+ error_setg(errp, "No path was specified");
goto fail;
}
- strp = strrchr(uri->path, '/');
+
+ strp = strrchr(client->path, '/');
if (strp == NULL) {
error_setg(errp, "Invalid URL specified");
goto fail;
file = g_strdup(strp);
*strp = 0;
+ /* Pop the config into our state object, Exit if invalid */
+ client->server = nfs_config(options, errp);
+ if (!client->server) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
client->context = nfs_init_context();
if (client->context == NULL) {
error_setg(errp, "Failed to init NFS context");
goto fail;
}
- qp = query_params_parse(uri->query);
- for (i = 0; i < qp->n; i++) {
- unsigned long long val;
- if (!qp->p[i].value) {
- error_setg(errp, "Value for NFS parameter expected: %s",
- qp->p[i].name);
+ if (qemu_opt_get(opts, "uid")) {
+ client->uid = qemu_opt_get_number(opts, "uid", 0);
+ nfs_set_uid(client->context, client->uid);
+ }
+
+ if (qemu_opt_get(opts, "gid")) {
+ client->gid = qemu_opt_get_number(opts, "gid", 0);
+ nfs_set_gid(client->context, client->gid);
+ }
+
+ if (qemu_opt_get(opts, "tcp-syncnt")) {
+ client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syncnt", 0);
+ nfs_set_tcp_syncnt(client->context, client->tcp_syncnt);
+ }
+
+#ifdef LIBNFS_FEATURE_READAHEAD
+ if (qemu_opt_get(opts, "readahead")) {
+ if (open_flags & BDRV_O_NOCACHE) {
+ error_setg(errp, "Cannot enable NFS readahead "
+ "if cache.direct = on");
goto fail;
}
- if (parse_uint_full(qp->p[i].value, &val, 0)) {
- error_setg(errp, "Illegal value for NFS parameter: %s",
- qp->p[i].name);
- goto fail;
+ client->readahead = qemu_opt_get_number(opts, "readahead", 0);
+ if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
+ error_report("NFS Warning: Truncating NFS readahead "
+ "size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
+ client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE;
}
- if (!strcmp(qp->p[i].name, "uid")) {
- nfs_set_uid(client->context, val);
- } else if (!strcmp(qp->p[i].name, "gid")) {
- nfs_set_gid(client->context, val);
- } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
- nfs_set_tcp_syncnt(client->context, val);
-#ifdef LIBNFS_FEATURE_READAHEAD
- } else if (!strcmp(qp->p[i].name, "readahead")) {
- if (open_flags & BDRV_O_NOCACHE) {
- error_setg(errp, "Cannot enable NFS readahead "
- "if cache.direct = on");
- goto fail;
- }
- if (val > QEMU_NFS_MAX_READAHEAD_SIZE) {
- error_report("NFS Warning: Truncating NFS readahead"
- " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
- val = QEMU_NFS_MAX_READAHEAD_SIZE;
- }
- nfs_set_readahead(client->context, val);
+ nfs_set_readahead(client->context, client->readahead);
#ifdef LIBNFS_FEATURE_PAGECACHE
- nfs_set_pagecache_ttl(client->context, 0);
+ nfs_set_pagecache_ttl(client->context, 0);
#endif
- client->cache_used = true;
+ client->cache_used = true;
+ }
#endif
+
#ifdef LIBNFS_FEATURE_PAGECACHE
- nfs_set_pagecache_ttl(client->context, 0);
- } else if (!strcmp(qp->p[i].name, "pagecache")) {
- if (open_flags & BDRV_O_NOCACHE) {
- error_setg(errp, "Cannot enable NFS pagecache "
- "if cache.direct = on");
- goto fail;
- }
- if (val > QEMU_NFS_MAX_PAGECACHE_SIZE) {
- error_report("NFS Warning: Truncating NFS pagecache"
- " size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
- val = QEMU_NFS_MAX_PAGECACHE_SIZE;
- }
- nfs_set_pagecache(client->context, val);
- nfs_set_pagecache_ttl(client->context, 0);
- client->cache_used = true;
+ if (qemu_opt_get(opts, "pagecache")) {
+ if (open_flags & BDRV_O_NOCACHE) {
+ error_setg(errp, "Cannot enable NFS pagecache "
+ "if cache.direct = on");
+ goto fail;
+ }
+ client->pagecache = qemu_opt_get_number(opts, "pagecache", 0);
+ if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
+ error_report("NFS Warning: Truncating NFS pagecache "
+ "size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
+ client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE;
+ }
+ nfs_set_pagecache(client->context, client->pagecache);
+ nfs_set_pagecache_ttl(client->context, 0);
+ client->cache_used = true;
+ }
#endif
+
#ifdef LIBNFS_FEATURE_DEBUG
- } else if (!strcmp(qp->p[i].name, "debug")) {
- /* limit the maximum debug level to avoid potential flooding
- * of our log files. */
- if (val > QEMU_NFS_MAX_DEBUG_LEVEL) {
- error_report("NFS Warning: Limiting NFS debug level"
- " to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
- val = QEMU_NFS_MAX_DEBUG_LEVEL;
- }
- nfs_set_debug(client->context, val);
-#endif
- } else {
- error_setg(errp, "Unknown NFS parameter name: %s",
- qp->p[i].name);
- goto fail;
+ if (qemu_opt_get(opts, "debug")) {
+ client->debug = qemu_opt_get_number(opts, "debug", 0);
+ /* limit the maximum debug level to avoid potential flooding
+ * of our log files. */
+ if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) {
+ error_report("NFS Warning: Limiting NFS debug level "
+ "to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
+ client->debug = QEMU_NFS_MAX_DEBUG_LEVEL;
}
+ nfs_set_debug(client->context, client->debug);
}
+#endif
- ret = nfs_mount(client->context, uri->server, uri->path);
+ ret = nfs_mount(client->context, client->server->host, client->path);
if (ret < 0) {
error_setg(errp, "Failed to mount nfs share: %s",
nfs_get_error(client->context));
ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
client->st_blocks = st.st_blocks;
client->has_zero_init = S_ISREG(st.st_mode);
+ *strp = '/';
goto out;
+
fail:
nfs_client_close(client);
out:
- if (qp) {
- query_params_free(qp);
- }
- uri_free(uri);
+ qemu_opts_del(opts);
g_free(file);
return ret;
}
Error **errp) {
NFSClient *client = bs->opaque;
int64_t ret;
- QemuOpts *opts;
- Error *local_err = NULL;
client->aio_context = bdrv_get_aio_context(bs);
- opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
- qemu_opts_absorb_qdict(opts, options, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- ret = -EINVAL;
- goto out;
- }
- ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
+ ret = nfs_client_open(client, options,
(flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
errp, bs->open_flags);
if (ret < 0) {
- goto out;
+ return ret;
}
bs->total_sectors = ret;
ret = 0;
-out:
- qemu_opts_del(opts);
return ret;
}
int ret = 0;
int64_t total_size = 0;
NFSClient *client = g_new0(NFSClient, 1);
+ QDict *options = NULL;
client->aio_context = qemu_get_aio_context();
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
BDRV_SECTOR_SIZE);
- ret = nfs_client_open(client, url, O_CREAT, errp, 0);
+ options = qdict_new();
+ ret = nfs_parse_uri(url, options, errp);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = nfs_client_open(client, options, O_CREAT, errp, 0);
if (ret < 0) {
goto out;
}
return 0;
}
+static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
+{
+ NFSClient *client = bs->opaque;
+ QDict *opts = qdict_new();
+ QObject *server_qdict;
+ Visitor *ov;
+
+ qdict_put(opts, "driver", qstring_from_str("nfs"));
+
+ if (client->uid && !client->gid) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+ "nfs://%s%s?uid=%" PRId64, client->server->host, client->path,
+ client->uid);
+ } else if (!client->uid && client->gid) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+ "nfs://%s%s?gid=%" PRId64, client->server->host, client->path,
+ client->gid);
+ } else if (client->uid && client->gid) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+ "nfs://%s%s?uid=%" PRId64 "&gid=%" PRId64,
+ client->server->host, client->path, client->uid, client->gid);
+ } else {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+ "nfs://%s%s", client->server->host, client->path);
+ }
+
+ ov = qobject_output_visitor_new(&server_qdict);
+ visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
+ visit_complete(ov, &server_qdict);
+ assert(qobject_type(server_qdict) == QTYPE_QDICT);
+
+ qdict_put_obj(opts, "server", server_qdict);
+ qdict_put(opts, "path", qstring_from_str(client->path));
+
+ if (client->uid) {
+ qdict_put(opts, "uid", qint_from_int(client->uid));
+ }
+ if (client->gid) {
+ qdict_put(opts, "gid", qint_from_int(client->gid));
+ }
+ if (client->tcp_syncnt) {
+ qdict_put(opts, "tcp-syncnt",
+ qint_from_int(client->tcp_syncnt));
+ }
+ if (client->readahead) {
+ qdict_put(opts, "readahead",
+ qint_from_int(client->readahead));
+ }
+ if (client->pagecache) {
+ qdict_put(opts, "pagecache",
+ qint_from_int(client->pagecache));
+ }
+ if (client->debug) {
+ qdict_put(opts, "debug", qint_from_int(client->debug));
+ }
+
+ visit_free(ov);
+ qdict_flatten(opts);
+ bs->full_open_options = opts;
+}
+
#ifdef LIBNFS_FEATURE_PAGECACHE
static void nfs_invalidate_cache(BlockDriverState *bs,
Error **errp)
.protocol_name = "nfs",
.instance_size = sizeof(NFSClient),
- .bdrv_needs_filename = true,
+ .bdrv_parse_filename = nfs_parse_filename,
.create_opts = &nfs_create_opts,
.bdrv_has_zero_init = nfs_has_zero_init,
.bdrv_detach_aio_context = nfs_detach_aio_context,
.bdrv_attach_aio_context = nfs_attach_aio_context,
+ .bdrv_refresh_filename = nfs_refresh_filename,
#ifdef LIBNFS_FEATURE_PAGECACHE
.bdrv_invalidate_cache = nfs_invalidate_cache,
#include "qapi/error.h"
#include "qemu/option.h"
+typedef struct BDRVRawState {
+ uint64_t offset;
+ uint64_t size;
+ bool has_size;
+} BDRVRawState;
+
+static QemuOptsList raw_runtime_opts = {
+ .name = "raw",
+ .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
+ .desc = {
+ {
+ .name = "offset",
+ .type = QEMU_OPT_SIZE,
+ .help = "offset in the disk where the image starts",
+ },
+ {
+ .name = "size",
+ .type = QEMU_OPT_SIZE,
+ .help = "virtual disk size",
+ },
+ { /* end of list */ }
+ },
+};
+
static QemuOptsList raw_create_opts = {
.name = "raw-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
}
};
+static int raw_read_options(QDict *options, BlockDriverState *bs,
+ BDRVRawState *s, Error **errp)
+{
+ Error *local_err = NULL;
+ QemuOpts *opts = NULL;
+ int64_t real_size = 0;
+ int ret;
+
+ real_size = bdrv_getlength(bs->file->bs);
+ if (real_size < 0) {
+ error_setg_errno(errp, -real_size, "Could not get image size");
+ return real_size;
+ }
+
+ opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto end;
+ }
+
+ s->offset = qemu_opt_get_size(opts, "offset", 0);
+ if (qemu_opt_find(opts, "size") != NULL) {
+ s->size = qemu_opt_get_size(opts, "size", 0);
+ s->has_size = true;
+ } else {
+ s->has_size = false;
+ s->size = real_size - s->offset;
+ }
+
+ /* Check size and offset */
+ if (real_size < s->offset || (real_size - s->offset) < s->size) {
+ error_setg(errp, "The sum of offset (%" PRIu64 ") and size "
+ "(%" PRIu64 ") has to be smaller or equal to the "
+ " actual size of the containing file (%" PRId64 ")",
+ s->offset, s->size, real_size);
+ ret = -EINVAL;
+ goto end;
+ }
+
+ /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding
+ * up and leaking out of the specified area. */
+ if (!QEMU_IS_ALIGNED(s->size, BDRV_SECTOR_SIZE)) {
+ error_setg(errp, "Specified size is not multiple of %llu",
+ BDRV_SECTOR_SIZE);
+ ret = -EINVAL;
+ goto end;
+ }
+
+ ret = 0;
+
+end:
+
+ qemu_opts_del(opts);
+
+ return ret;
+}
+
static int raw_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue, Error **errp)
{
- return 0;
+ assert(reopen_state != NULL);
+ assert(reopen_state->bs != NULL);
+
+ reopen_state->opaque = g_new0(BDRVRawState, 1);
+
+ return raw_read_options(
+ reopen_state->options,
+ reopen_state->bs,
+ reopen_state->opaque,
+ errp);
+}
+
+static void raw_reopen_commit(BDRVReopenState *state)
+{
+ BDRVRawState *new_s = state->opaque;
+ BDRVRawState *s = state->bs->opaque;
+
+ memcpy(s, new_s, sizeof(BDRVRawState));
+
+ g_free(state->opaque);
+ state->opaque = NULL;
+}
+
+static void raw_reopen_abort(BDRVReopenState *state)
+{
+ g_free(state->opaque);
+ state->opaque = NULL;
}
static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
{
+ BDRVRawState *s = bs->opaque;
+
+ if (offset > UINT64_MAX - s->offset) {
+ return -EINVAL;
+ }
+ offset += s->offset;
+
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
}
uint64_t bytes, QEMUIOVector *qiov,
int flags)
{
+ BDRVRawState *s = bs->opaque;
void *buf = NULL;
BlockDriver *drv;
QEMUIOVector local_qiov;
int ret;
+ if (s->has_size && (offset > s->size || bytes > (s->size - offset))) {
+ /* There's not enough space for the data. Don't write anything and just
+ * fail to prevent leaking out of the size specified in options. */
+ return -ENOSPC;
+ }
+
+ if (offset > UINT64_MAX - s->offset) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
/* Handling partial writes would be a pain - so we just
* require that guests have 512-byte request alignment if
qiov = &local_qiov;
}
+ offset += s->offset;
+
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
int nb_sectors, int *pnum,
BlockDriverState **file)
{
+ BDRVRawState *s = bs->opaque;
*pnum = nb_sectors;
*file = bs->file->bs;
+ sector_num += s->offset / BDRV_SECTOR_SIZE;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
(sector_num << BDRV_SECTOR_BITS);
}
int64_t offset, int count,
BdrvRequestFlags flags)
{
+ BDRVRawState *s = bs->opaque;
+ if (offset > UINT64_MAX - s->offset) {
+ return -EINVAL;
+ }
+ offset += s->offset;
return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
}
static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
{
+ BDRVRawState *s = bs->opaque;
+ if (offset > UINT64_MAX - s->offset) {
+ return -EINVAL;
+ }
+ offset += s->offset;
return bdrv_co_pdiscard(bs->file->bs, offset, count);
}
static int64_t raw_getlength(BlockDriverState *bs)
{
- return bdrv_getlength(bs->file->bs);
+ int64_t len;
+ BDRVRawState *s = bs->opaque;
+
+ /* Update size. It should not change unless the file was externally
+ * modified. */
+ len = bdrv_getlength(bs->file->bs);
+ if (len < 0) {
+ return len;
+ }
+
+ if (len < s->offset) {
+ s->size = 0;
+ } else {
+ if (s->has_size) {
+ /* Try to honour the size */
+ s->size = MIN(s->size, len - s->offset);
+ } else {
+ s->size = len - s->offset;
+ }
+ }
+
+ return s->size;
}
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
static int raw_truncate(BlockDriverState *bs, int64_t offset)
{
+ BDRVRawState *s = bs->opaque;
+
+ if (s->has_size) {
+ return -ENOTSUP;
+ }
+
+ if (INT64_MAX - offset < s->offset) {
+ return -EINVAL;
+ }
+
+ s->size = offset;
+ offset += s->offset;
return bdrv_truncate(bs->file->bs, offset);
}
static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
{
+ BDRVRawState *s = bs->opaque;
+ if (s->offset || s->has_size) {
+ return -ENOTSUP;
+ }
return bdrv_co_ioctl(bs->file->bs, req, buf);
}
static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
+ BDRVRawState *s = bs->opaque;
+ int ret;
+
bs->sg = bs->file->bs->sg;
bs->supported_write_flags = BDRV_REQ_FUA &
bs->file->bs->supported_write_flags;
bs->file->bs->filename);
}
+ ret = raw_read_options(options, bs, s, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (bs->sg && (s->offset || s->has_size)) {
+ error_setg(errp, "Cannot use offset/size with SCSI generic devices");
+ return -EINVAL;
+ }
+
return 0;
}
static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
{
- return bdrv_probe_blocksizes(bs->file->bs, bsz);
+ BDRVRawState *s = bs->opaque;
+ int ret;
+
+ ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
+ return -ENOTSUP;
+ }
+
+ return 0;
}
static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
{
+ BDRVRawState *s = bs->opaque;
+ if (s->offset || s->has_size) {
+ return -ENOTSUP;
+ }
return bdrv_probe_geometry(bs->file->bs, geo);
}
BlockDriver bdrv_raw = {
.format_name = "raw",
+ .instance_size = sizeof(BDRVRawState),
.bdrv_probe = &raw_probe,
.bdrv_reopen_prepare = &raw_reopen_prepare,
+ .bdrv_reopen_commit = &raw_reopen_commit,
+ .bdrv_reopen_abort = &raw_reopen_abort,
.bdrv_open = &raw_open,
.bdrv_close = &raw_close,
.bdrv_create = &raw_create,
#include "block/block_int.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
+#include "qemu/cutils.h"
#include "qemu/sockets.h"
#include "qemu/uri.h"
+#include "qapi-visit.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qstring.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
/* DEBUG_SSH=1 enables the DPRINTF (debugging printf) statements in
* this block driver code.
*/
LIBSSH2_SFTP_ATTRIBUTES attrs;
+ InetSocketAddress *inet;
+
/* Used to warn if 'flush' is not supported. */
- char *hostport;
bool unsafe_flush_warning;
} BDRVSSHState;
static void ssh_state_free(BDRVSSHState *s)
{
- g_free(s->hostport);
if (s->sftp_handle) {
libssh2_sftp_close(s->sftp_handle);
}
{
URI *uri = NULL;
QueryParams *qp;
+ char *port_str;
int i;
uri = uri_parse(filename);
qdict_put(options, "user", qstring_from_str(uri->user));
}
- qdict_put(options, "host", qstring_from_str(uri->server));
+ qdict_put(options, "server.host", qstring_from_str(uri->server));
- if (uri->port) {
- qdict_put(options, "port", qint_from_int(uri->port));
- }
+ port_str = g_strdup_printf("%d", uri->port ?: 22);
+ qdict_put(options, "server.port", qstring_from_str(port_str));
+ g_free(port_str);
qdict_put(options, "path", qstring_from_str(uri->path));
return -EINVAL;
}
+static bool ssh_has_filename_options_conflict(QDict *options, Error **errp)
+{
+ const QDictEntry *qe;
+
+ for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) {
+ if (!strcmp(qe->key, "host") ||
+ !strcmp(qe->key, "port") ||
+ !strcmp(qe->key, "path") ||
+ !strcmp(qe->key, "user") ||
+ !strcmp(qe->key, "host_key_check") ||
+ strstart(qe->key, "server.", NULL))
+ {
+ error_setg(errp, "Option '%s' cannot be used with a file name",
+ qe->key);
+ return true;
+ }
+ }
+
+ return false;
+}
+
static void ssh_parse_filename(const char *filename, QDict *options,
Error **errp)
{
- if (qdict_haskey(options, "user") ||
- qdict_haskey(options, "host") ||
- qdict_haskey(options, "port") ||
- qdict_haskey(options, "path") ||
- qdict_haskey(options, "host_key_check")) {
- error_setg(errp, "user, host, port, path, host_key_check cannot be used at the same time as a file option");
+ if (ssh_has_filename_options_conflict(options, errp)) {
return;
}
},
};
+static bool ssh_process_legacy_socket_options(QDict *output_opts,
+ QemuOpts *legacy_opts,
+ Error **errp)
+{
+ const char *host = qemu_opt_get(legacy_opts, "host");
+ const char *port = qemu_opt_get(legacy_opts, "port");
+
+ if (!host && port) {
+ error_setg(errp, "port may not be used without host");
+ return false;
+ }
+
+ if (host) {
+ qdict_put(output_opts, "server.host", qstring_from_str(host));
+ qdict_put(output_opts, "server.port",
+ qstring_from_str(port ?: stringify(22)));
+ }
+
+ return true;
+}
+
+static InetSocketAddress *ssh_config(BDRVSSHState *s, QDict *options,
+ Error **errp)
+{
+ InetSocketAddress *inet = NULL;
+ QDict *addr = NULL;
+ QObject *crumpled_addr = NULL;
+ Visitor *iv = NULL;
+ Error *local_error = NULL;
+
+ qdict_extract_subqdict(options, &addr, "server.");
+ if (!qdict_size(addr)) {
+ error_setg(errp, "SSH server address missing");
+ goto out;
+ }
+
+ crumpled_addr = qdict_crumple(addr, errp);
+ if (!crumpled_addr) {
+ goto out;
+ }
+
+ iv = qobject_input_visitor_new(crumpled_addr, true);
+ visit_type_InetSocketAddress(iv, NULL, &inet, &local_error);
+ if (local_error) {
+ error_propagate(errp, local_error);
+ goto out;
+ }
+
+out:
+ QDECREF(addr);
+ qobject_decref(crumpled_addr);
+ visit_free(iv);
+ return inet;
+}
+
static int connect_to_ssh(BDRVSSHState *s, QDict *options,
int ssh_flags, int creat_mode, Error **errp)
{
int r, ret;
QemuOpts *opts = NULL;
Error *local_err = NULL;
- const char *host, *user, *path, *host_key_check;
- int port;
+ const char *user, *path, *host_key_check;
+ long port = 0;
opts = qemu_opts_create(&ssh_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
goto err;
}
- host = qemu_opt_get(opts, "host");
- if (!host) {
+ if (!ssh_process_legacy_socket_options(options, opts, errp)) {
ret = -EINVAL;
- error_setg(errp, "No hostname was specified");
goto err;
}
- port = qemu_opt_get_number(opts, "port", 22);
-
path = qemu_opt_get(opts, "path");
if (!path) {
ret = -EINVAL;
host_key_check = "yes";
}
- /* Construct the host:port name for inet_connect. */
- g_free(s->hostport);
- s->hostport = g_strdup_printf("%s:%d", host, port);
+ /* Pop the config into our state object, Exit if invalid */
+ s->inet = ssh_config(s, options, errp);
+ if (!s->inet) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
+ error_setg(errp, "Use only numeric port value");
+ ret = -EINVAL;
+ goto err;
+ }
/* Open the socket and connect. */
- s->sock = inet_connect(s->hostport, errp);
+ s->sock = inet_connect_saddr(s->inet, errp, NULL, NULL);
if (s->sock < 0) {
ret = -EIO;
goto err;
}
/* Check the remote host's key against known_hosts. */
- ret = check_host_key(s, host, port, host_key_check, errp);
+ ret = check_host_key(s, s->inet->host, port, host_key_check,
+ errp);
if (ret < 0) {
goto err;
}
{
if (!s->unsafe_flush_warning) {
error_report("warning: ssh server %s does not support fsync",
- s->hostport);
+ s->inet->host);
if (what) {
error_report("to support fsync, you need %s", what);
}
BlockDriverState *base;
BlockdevOnError on_error;
char *backing_file_str;
+ int bs_flags;
} StreamBlockJob;
static int coroutine_fn stream_populate(BlockBackend *blk,
bdrv_set_backing_hd(bs, base);
}
+ /* Reopen the image back in read-only mode if necessary */
+ if (s->bs_flags != bdrv_get_flags(bs)) {
+ bdrv_reopen(bs, s->bs_flags, NULL);
+ }
+
g_free(s->backing_file_str);
block_job_completed(&s->common, data->ret);
g_free(data);
BlockCompletionFunc *cb, void *opaque, Error **errp)
{
StreamBlockJob *s;
+ BlockDriverState *iter;
+ int orig_bs_flags;
s = block_job_create(job_id, &stream_job_driver, bs, speed,
cb, opaque, errp);
return;
}
+ /* Make sure that the image is opened in read-write mode */
+ orig_bs_flags = bdrv_get_flags(bs);
+ if (!(orig_bs_flags & BDRV_O_RDWR)) {
+ if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
+ block_job_unref(&s->common);
+ return;
+ }
+ }
+
+ /* Block all intermediate nodes between bs and base, because they
+ * will disappear from the chain after this operation */
+ for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
+ block_job_add_bdrv(&s->common, iter);
+ }
+
s->base = base;
s->backing_file_str = g_strdup(backing_file_str);
+ s->bs_flags = orig_bs_flags;
s->on_error = on_error;
s->common.co = qemu_coroutine_create(stream_run, s);
void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
bool has_base, const char *base,
+ bool has_base_node, const char *base_node,
bool has_backing_file, const char *backing_file,
bool has_speed, int64_t speed,
bool has_on_error, BlockdevOnError on_error,
Error **errp)
{
- BlockDriverState *bs;
+ BlockDriverState *bs, *iter;
BlockDriverState *base_bs = NULL;
AioContext *aio_context;
Error *local_err = NULL;
on_error = BLOCKDEV_ON_ERROR_REPORT;
}
- bs = qmp_get_root_bs(device, errp);
+ bs = bdrv_lookup_bs(device, device, errp);
if (!bs) {
return;
}
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
+ if (has_base && has_base_node) {
+ error_setg(errp, "'base' and 'base-node' cannot be specified "
+ "at the same time");
goto out;
}
base_name = base;
}
+ if (has_base_node) {
+ base_bs = bdrv_lookup_bs(NULL, base_node, errp);
+ if (!base_bs) {
+ goto out;
+ }
+ if (bs == base_bs || !bdrv_chain_contains(bs, base_bs)) {
+ error_setg(errp, "Node '%s' is not a backing image of '%s'",
+ base_node, device);
+ goto out;
+ }
+ assert(bdrv_get_aio_context(base_bs) == aio_context);
+ base_name = base_bs->filename;
+ }
+
+ /* Check for op blockers in the whole chain between bs and base */
+ for (iter = bs; iter && iter != base_bs; iter = backing_bs(iter)) {
+ if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) {
+ goto out;
+ }
+ }
+
/* if we are streaming the entire chain, the result will have no backing
* file, and specifying one is therefore an error */
if (base_bs == NULL && has_backing_file) {
Error **errp)
{
BlockDriverState *bs;
+ BlockDriverState *iter;
BlockDriverState *base_bs, *top_bs;
AioContext *aio_context;
Error *local_err = NULL;
assert(bdrv_get_aio_context(base_bs) == aio_context);
- if (bdrv_op_is_blocked(base_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
- goto out;
+ for (iter = top_bs; iter != backing_bs(base_bs); iter = backing_bs(iter)) {
+ if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
+ goto out;
+ }
}
/* Do not allow attempts to commit an image into itself */
commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, speed,
on_error, block_job_cb, bs, &local_err, false);
} else {
+ BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
+ if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
+ goto out;
+ }
commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
on_error, block_job_cb, bs,
has_backing_file ? backing_file : NULL, &local_err);
block_job_unref(job);
}
+void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
+{
+ job->nodes = g_slist_prepend(job->nodes, bs);
+ bdrv_ref(bs);
+ bdrv_op_block_all(bs, job->blocker);
+}
+
void *block_job_create(const char *job_id, const BlockJobDriver *driver,
BlockDriverState *bs, int64_t speed,
BlockCompletionFunc *cb, void *opaque, Error **errp)
job = g_malloc0(driver->instance_size);
error_setg(&job->blocker, "block device is in use by block job: %s",
BlockJobType_lookup[driver->job_type]);
- bdrv_op_block_all(bs, job->blocker);
+ block_job_add_bdrv(job, bs);
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
job->driver = driver;
void block_job_unref(BlockJob *job)
{
if (--job->refcnt == 0) {
+ GSList *l;
BlockDriverState *bs = blk_bs(job->blk);
bs->job = NULL;
- bdrv_op_unblock_all(bs, job->blocker);
+ for (l = job->nodes; l; l = l->next) {
+ bs = l->data;
+ bdrv_op_unblock_all(bs, job->blocker);
+ bdrv_unref(bs);
+ }
+ g_slist_free(job->nodes);
blk_remove_aio_context_notifier(job->blk,
block_job_attached_aio_context,
block_job_detach_aio_context, job);
}
}
+bool have_mmap_lock(void)
+{
+ return mmap_lock_count > 0 ? true : false;
+}
+
/* Grab lock to make sure things are in a consistent state after fork(). */
void mmap_fork_start(void)
{
vhost_scsi="no"
vhost_vsock="no"
kvm="no"
+colo="yes"
rdma=""
gprof="no"
debug_tcg="no"
;;
--enable-kvm) kvm="yes"
;;
+ --disable-colo) colo="no"
+ ;;
+ --enable-colo) colo="yes"
+ ;;
--disable-tcg-interpreter) tcg_interpreter="no"
;;
--enable-tcg-interpreter) tcg_interpreter="yes"
fdt fdt device tree
bluez bluez stack connectivity
kvm KVM acceleration support
+ colo COarse-grain LOck-stepping VM for Non-stop Service
rdma RDMA-based migration support
vde support for vde network
netmap support for netmap network
echo "ATTR/XATTR support $attr"
echo "Install blobs $blobs"
echo "KVM support $kvm"
+echo "COLO support $colo"
echo "RDMA support $rdma"
echo "TCG interpreter $tcg_interpreter"
echo "fdt support $fdt"
fi
echo "CONFIG_TRACE_FILE=$trace_file" >> $config_host_mak
+if test "$colo" = "yes"; then
+ echo "CONFIG_COLO=y" >> $config_host_mak
+fi
+
if test "$rdma" = "yes" ; then
echo "CONFIG_RDMA=y" >> $config_host_mak
fi
uint8_t *tb_ptr = itb->tc_ptr;
qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc,
- "Trace %p [" TARGET_FMT_lx "] %s\n",
- itb->tc_ptr, itb->pc, lookup_symbol(itb->pc));
+ "Trace %p [%d: " TARGET_FMT_lx "] %s\n",
+ itb->tc_ptr, cpu->cpu_index, itb->pc,
+ lookup_symbol(itb->pc));
#if defined(DEBUG_DISAS)
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)
if (max_cycles > CF_COUNT_MASK)
max_cycles = CF_COUNT_MASK;
+ tb_lock();
tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
max_cycles | CF_NOCACHE
| (ignore_icount ? CF_IGNORE_ICOUNT : 0));
tb->orig_tb = orig_tb;
+ tb_unlock();
+
/* execute the generated code */
trace_exec_tb_nocache(tb, tb->pc);
cpu_tb_exec(cpu, tb);
+
+ tb_lock();
tb_phys_invalidate(tb, -1);
tb_free(tb);
+ tb_unlock();
}
#endif
struct qemu_work_item {
struct qemu_work_item *next;
run_on_cpu_func func;
- void *data;
+ run_on_cpu_data data;
bool free, exclusive, done;
};
qemu_cpu_kick(cpu);
}
-void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data,
+void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
QemuMutex *mutex)
{
struct qemu_work_item wi;
}
}
-void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
+void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
{
struct qemu_work_item *wi;
}
}
-void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
+void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
+ run_on_cpu_data data)
{
struct qemu_work_item *wi;
#endif /* CONFIG_LINUX */
-static CPUState *next_cpu;
int64_t max_delay;
int64_t max_advance;
}
};
-static void cpu_throttle_thread(CPUState *cpu, void *opaque)
+static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
{
double pct;
double throttle_ratio;
}
CPU_FOREACH(cpu) {
if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
- async_run_on_cpu(cpu, cpu_throttle_thread, NULL);
+ async_run_on_cpu(cpu, cpu_throttle_thread,
+ RUN_ON_CPU_NULL);
}
}
qemu_thread_get_self(&io_thread);
}
-void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
+void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
{
do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
}
#endif
}
-static void tcg_exec_all(void);
+static int64_t tcg_get_icount_limit(void)
+{
+ int64_t deadline;
+
+ if (replay_mode != REPLAY_MODE_PLAY) {
+ deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+
+ /* Maintain prior (possibly buggy) behaviour where if no deadline
+ * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
+ * INT32_MAX nanoseconds ahead, we still use INT32_MAX
+ * nanoseconds.
+ */
+ if ((deadline < 0) || (deadline > INT32_MAX)) {
+ deadline = INT32_MAX;
+ }
+
+ return qemu_icount_round(deadline);
+ } else {
+ return replay_get_instructions();
+ }
+}
+
+static void handle_icount_deadline(void)
+{
+ if (use_icount) {
+ int64_t deadline =
+ qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+
+ if (deadline == 0) {
+ qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+ }
+ }
+}
+
+static int tcg_cpu_exec(CPUState *cpu)
+{
+ int ret;
+#ifdef CONFIG_PROFILER
+ int64_t ti;
+#endif
+
+#ifdef CONFIG_PROFILER
+ ti = profile_getclock();
+#endif
+ if (use_icount) {
+ int64_t count;
+ int decr;
+ timers_state.qemu_icount -= (cpu->icount_decr.u16.low
+ + cpu->icount_extra);
+ cpu->icount_decr.u16.low = 0;
+ cpu->icount_extra = 0;
+ count = tcg_get_icount_limit();
+ timers_state.qemu_icount += count;
+ decr = (count > 0xffff) ? 0xffff : count;
+ count -= decr;
+ cpu->icount_decr.u16.low = decr;
+ cpu->icount_extra = count;
+ }
+ cpu_exec_start(cpu);
+ ret = cpu_exec(cpu);
+ cpu_exec_end(cpu);
+#ifdef CONFIG_PROFILER
+ tcg_time += profile_getclock() - ti;
+#endif
+ if (use_icount) {
+ /* Fold pending instructions back into the
+ instruction counter, and clear the interrupt flag. */
+ timers_state.qemu_icount -= (cpu->icount_decr.u16.low
+ + cpu->icount_extra);
+ cpu->icount_decr.u32 = 0;
+ cpu->icount_extra = 0;
+ replay_account_executed_instructions();
+ }
+ return ret;
+}
+
+/* Destroy any remaining vCPUs which have been unplugged and have
+ * finished running
+ */
+static void deal_with_unplugged_cpus(void)
+{
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu) {
+ if (cpu->unplug && !cpu_can_run(cpu)) {
+ qemu_tcg_destroy_vcpu(cpu);
+ cpu->created = false;
+ qemu_cond_signal(&qemu_cpu_cond);
+ break;
+ }
+ }
+}
static void *qemu_tcg_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
- CPUState *remove_cpu = NULL;
rcu_register_thread();
/* process any pending work */
atomic_mb_set(&exit_request, 1);
- while (1) {
- tcg_exec_all();
+ cpu = first_cpu;
- if (use_icount) {
- int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+ while (1) {
+ /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
+ qemu_account_warp_timer();
- if (deadline == 0) {
- qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
- }
+ if (!cpu) {
+ cpu = first_cpu;
}
- qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
- CPU_FOREACH(cpu) {
- if (cpu->unplug && !cpu_can_run(cpu)) {
- remove_cpu = cpu;
+
+ for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
+
+ qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
+ (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
+
+ if (cpu_can_run(cpu)) {
+ int r;
+ r = tcg_cpu_exec(cpu);
+ if (r == EXCP_DEBUG) {
+ cpu_handle_guest_debug(cpu);
+ break;
+ }
+ } else if (cpu->stop || cpu->stopped) {
+ if (cpu->unplug) {
+ cpu = CPU_NEXT(cpu);
+ }
break;
}
- }
- if (remove_cpu) {
- qemu_tcg_destroy_vcpu(remove_cpu);
- cpu->created = false;
- qemu_cond_signal(&qemu_cpu_cond);
- remove_cpu = NULL;
- }
+
+ } /* for cpu.. */
+
+ /* Pairs with smp_wmb in qemu_cpu_kick. */
+ atomic_mb_set(&exit_request, 0);
+
+ handle_icount_deadline();
+
+ qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
+ deal_with_unplugged_cpus();
}
return NULL;
qemu_mutex_unlock(&qemu_global_mutex);
}
-static int all_vcpus_paused(void)
+static bool all_vcpus_paused(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (!cpu->stopped) {
- return 0;
+ return false;
}
}
- return 1;
+ return true;
}
void pause_all_vcpus(void)
}
}
-static int64_t tcg_get_icount_limit(void)
-{
- int64_t deadline;
-
- if (replay_mode != REPLAY_MODE_PLAY) {
- deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
-
- /* Maintain prior (possibly buggy) behaviour where if no deadline
- * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
- * INT32_MAX nanoseconds ahead, we still use INT32_MAX
- * nanoseconds.
- */
- if ((deadline < 0) || (deadline > INT32_MAX)) {
- deadline = INT32_MAX;
- }
-
- return qemu_icount_round(deadline);
- } else {
- return replay_get_instructions();
- }
-}
-
-static int tcg_cpu_exec(CPUState *cpu)
-{
- int ret;
-#ifdef CONFIG_PROFILER
- int64_t ti;
-#endif
-
-#ifdef CONFIG_PROFILER
- ti = profile_getclock();
-#endif
- if (use_icount) {
- int64_t count;
- int decr;
- timers_state.qemu_icount -= (cpu->icount_decr.u16.low
- + cpu->icount_extra);
- cpu->icount_decr.u16.low = 0;
- cpu->icount_extra = 0;
- count = tcg_get_icount_limit();
- timers_state.qemu_icount += count;
- decr = (count > 0xffff) ? 0xffff : count;
- count -= decr;
- cpu->icount_decr.u16.low = decr;
- cpu->icount_extra = count;
- }
- cpu_exec_start(cpu);
- ret = cpu_exec(cpu);
- cpu_exec_end(cpu);
-#ifdef CONFIG_PROFILER
- tcg_time += profile_getclock() - ti;
-#endif
- if (use_icount) {
- /* Fold pending instructions back into the
- instruction counter, and clear the interrupt flag. */
- timers_state.qemu_icount -= (cpu->icount_decr.u16.low
- + cpu->icount_extra);
- cpu->icount_decr.u32 = 0;
- cpu->icount_extra = 0;
- replay_account_executed_instructions();
- }
- return ret;
-}
-
-static void tcg_exec_all(void)
-{
- int r;
-
- /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
- qemu_account_warp_timer();
-
- if (next_cpu == NULL) {
- next_cpu = first_cpu;
- }
- for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
- CPUState *cpu = next_cpu;
-
- qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
- (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
-
- if (cpu_can_run(cpu)) {
- r = tcg_cpu_exec(cpu);
- if (r == EXCP_DEBUG) {
- cpu_handle_guest_debug(cpu);
- break;
- } else if (r == EXCP_ATOMIC) {
- cpu_exec_step_atomic(cpu);
- }
- } else if (cpu->stop || cpu->stopped) {
- if (cpu->unplug) {
- next_cpu = CPU_NEXT(cpu);
- }
- break;
- }
- }
-
- /* Pairs with smp_wmb in qemu_cpu_kick. */
- atomic_mb_set(&exit_request, 0);
-}
-
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
{
/* XXX: implement xxx_cpu_list for targets that still miss it */
#include "exec/cputlb.h"
#include "exec/memory-internal.h"
#include "exec/ram_addr.h"
-#include "exec/exec-all.h"
#include "tcg/tcg.h"
#include "qemu/error-report.h"
#include "exec/log.h"
--- /dev/null
+COarse-grained LOck-stepping Virtual Machines for Non-stop Service
+----------------------------------------
+Copyright (c) 2016 Intel Corporation
+Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+Copyright (c) 2016 Fujitsu, Corp.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+This document gives an overview of COLO's design and how to use it.
+
+== Background ==
+Virtual machine (VM) replication is a well known technique for providing
+application-agnostic software-implemented hardware fault tolerance,
+also known as "non-stop service".
+
+COLO (COarse-grained LOck-stepping) is a high availability solution.
+Both primary VM (PVM) and secondary VM (SVM) run in parallel. They receive the
+same request from client, and generate response in parallel too.
+If the response packets from PVM and SVM are identical, they are released
+immediately. Otherwise, a VM checkpoint (on demand) is conducted.
+
+== Architecture ==
+
+The architecture of COLO is shown in the diagram below.
+It consists of a pair of networked physical nodes:
+The primary node running the PVM, and the secondary node running the SVM
+to maintain a valid replica of the PVM.
+PVM and SVM execute in parallel and generate output of response packets for
+client requests according to the application semantics.
+
+The incoming packets from the client or external network are received by the
+primary node, and then forwarded to the secondary node, so that both the PVM
+and the SVM are stimulated with the same requests.
+
+COLO receives the outbound packets from both the PVM and SVM and compares them
+before allowing the output to be sent to clients.
+
+The SVM is qualified as a valid replica of the PVM, as long as it generates
+identical responses to all client requests. Once the differences in the outputs
+are detected between the PVM and SVM, COLO withholds transmission of the
+outbound packets until it has successfully synchronized the PVM state to the SVM.
+
+ Primary Node Secondary Node
+ +------------+ +-----------------------+ +------------------------+ +------------+
+ | | | HeartBeat |<----->| HeartBeat | | |
+ | Primary VM | +-----------|-----------+ +-----------|------------+ |Secondary VM|
+ | | | | | |
+ | | +-----------|-----------+ +-----------|------------+ | |
+ | | |QEMU +---v----+ | |QEMU +----v---+ | | |
+ | | | |Failover| | | |Failover| | | |
+ | | | +--------+ | | +--------+ | | |
+ | | | +---------------+ | | +---------------+ | | |
+ | | | | VM Checkpoint |-------------->| VM Checkpoint | | | |
+ | | | +---------------+ | | +---------------+ | | |
+ | | | | | | | |
+ |Requests<---------------------------^------------------------------------------>Requests|
+ |Responses----------------------\ /--|--------------\ /------------------------Responses|
+ | | | | | | | | | | | | |
+ | | | +-----------+ | | | | | | | +------------+ | | |
+ | | | | COLO disk | | | | | | | | | COLO disk | | | |
+ | | | | Manager |-|-|--|--------------|--|->| Manager | | | |
+ | | | +|----------+ | | | | | | | +-----------|+ | | |
+ | | | | | | | | | | | | | | |
+ +------------+ +--|------------|-|--|--+ +---|--|--------------|--+ +------------+
+ | | | | | | |
+ +-------------+ | +----------v-v--|--+ +---|--v-----------+ | +-------------+
+ | VM Monitor | | | COLO Proxy | | COLO Proxy | | | VM Monitor |
+ | | | |(compare packet) | | (adjust sequence)| | | |
+ +-------------+ | +----------|----^--+ +------------------+ | +-------------+
+ | | | |
+ +------------------|------------|----|--+ +---------------------|------------------+
+ | Kernel | | | | | Kernel | |
+ +------------------|------------|----|--+ +---------------------|------------------+
+ | | | |
+ +--------------v+ +--------v----|--+ +------------------+ +v-------------+
+ | Storage | |External Network| | External Network | | Storage |
+ +---------------+ +----------------+ +------------------+ +--------------+
+
+== Components introduction ==
+
+You can see there are several components in COLO's diagram of architecture.
+Their functions are described below.
+
+HeartBeat:
+Runs on both the primary and secondary nodes, to periodically check platform
+availability. When the primary node suffers a hardware fail-stop failure,
+the heartbeat stops responding, the secondary node will trigger a failover
+as soon as it determines the absence.
+
+COLO disk Manager:
+When primary VM writes data into image, the colo disk manger captures this data
+and sends it to secondary VM's which makes sure the context of secondary VM's
+image is consistent with the context of primary VM 's image.
+For more details, please refer to docs/block-replication.txt.
+
+Checkpoint/Failover Controller:
+Modifications of save/restore flow to realize continuous migration,
+to make sure the state of VM in Secondary side is always consistent with VM in
+Primary side.
+
+COLO Proxy:
+Delivers packets to Primary and Seconday, and then compare the responses from
+both side. Then decide whether to start a checkpoint according to some rules.
+Please refer to docs/colo-proxy.txt for more informations.
+
+Note:
+HeartBeat has not been implemented yet, so you need to trigger failover process
+by using 'x-colo-lost-heartbeat' command.
+
+== Test procedure ==
+1. Startup qemu
+Primary:
+# qemu-kvm -enable-kvm -m 2048 -smp 2 -qmp stdio -vnc :7 -name primary \
+ -device piix3-usb-uhci \
+ -device usb-tablet -netdev tap,id=hn0,vhost=off \
+ -device virtio-net-pci,id=net-pci0,netdev=hn0 \
+ -drive if=virtio,id=primary-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,\
+ children.0.file.filename=1.raw,\
+ children.0.driver=raw -S
+Secondary:
+# qemu-kvm -enable-kvm -m 2048 -smp 2 -qmp stdio -vnc :7 -name secondary \
+ -device piix3-usb-uhci \
+ -device usb-tablet -netdev tap,id=hn0,vhost=off \
+ -device virtio-net-pci,id=net-pci0,netdev=hn0 \
+ -drive if=none,id=secondary-disk0,file.filename=1.raw,driver=raw,node-name=node0 \
+ -drive if=virtio,id=active-disk0,driver=replication,mode=secondary,\
+ file.driver=qcow2,top-id=active-disk0,\
+ file.file.filename=/mnt/ramfs/active_disk.img,\
+ file.backing.driver=qcow2,\
+ file.backing.file.filename=/mnt/ramfs/hidden_disk.img,\
+ file.backing.backing=secondary-disk0 \
+ -incoming tcp:0:8888
+
+2. On Secondary VM's QEMU monitor, issue command
+{'execute':'qmp_capabilities'}
+{ 'execute': 'nbd-server-start',
+ 'arguments': {'addr': {'type': 'inet', 'data': {'host': 'xx.xx.xx.xx', 'port': '8889'} } }
+}
+{'execute': 'nbd-server-add', 'arguments': {'device': 'secondeary-disk0', 'writable': true } }
+
+Note:
+ a. The qmp command nbd-server-start and nbd-server-add must be run
+ before running the qmp command migrate on primary QEMU
+ b. Active disk, hidden disk and nbd target's length should be the
+ same.
+ c. It is better to put active disk and hidden disk in ramdisk.
+
+3. On Primary VM's QEMU monitor, issue command:
+{'execute':'qmp_capabilities'}
+{ 'execute': 'human-monitor-command',
+ 'arguments': {'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xx.xx.xx.xx,file.port=8889,file.export=secondary-disk0,node-name=nbd_client0'}}
+{ 'execute':'x-blockdev-change', 'arguments':{'parent': 'primary-disk0', 'node': 'nbd_client0' } }
+{ 'execute': 'migrate-set-capabilities',
+ 'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
+{ 'execute': 'migrate', 'arguments': {'uri': 'tcp:xx.xx.xx.xx:8888' } }
+
+ Note:
+ a. There should be only one NBD Client for each primary disk.
+ b. xx.xx.xx.xx is the secondary physical machine's hostname or IP
+ c. The qmp command line must be run after running qmp command line in
+ secondary qemu.
+
+4. After the above steps, you will see, whenever you make changes to PVM, SVM will be synced.
+You can issue command '{ "execute": "migrate-set-parameters" , "arguments":{ "x-checkpoint-delay": 2000 } }'
+to change the checkpoint period time
+
+5. Failover test
+You can kill Primary VM and run 'x_colo_lost_heartbeat' in Secondary VM's
+monitor at the same time, then SVM will failover and client will not detect this
+change.
+
+Before issuing '{ "execute": "x-colo-lost-heartbeat" }' command, we have to
+issue block related command to stop block replication.
+Primary:
+ Remove the nbd child from the quorum:
+ { 'execute': 'x-blockdev-change', 'arguments': {'parent': 'colo-disk0', 'child': 'children.1'}}
+ { 'execute': 'human-monitor-command','arguments': {'command-line': 'drive_del blk-buddy0'}}
+ Note: there is no qmp command to remove the blockdev now
+
+Secondary:
+ The primary host is down, so we should do the following thing:
+ { 'execute': 'nbd-server-stop' }
+
+== TODO ==
+1. Support continuous VM replication.
+2. Support shared storage.
+3. Develop the heartbeat part.
+4. Reduce checkpoint VM’s downtime while doing checkpoint.
High level description of live block operations. Note these are not
supported for use with the raw format at the moment.
+Note also that this document is incomplete and it currently only
+covers the 'stream' operation. Other operations supported by QEMU such
+as 'commit', 'mirror' and 'backup' are not described here yet. Please
+refer to the qapi/block-core.json file for an overview of those.
+
Snapshot live merge
===================
Given a snapshot chain, described in this document in the following
format:
-[A] -> [B] -> [C] -> [D]
+[A] <- [B] <- [C] <- [D] <- [E]
-Where the rightmost object ([D] in the example) described is the current
+Where the rightmost object ([E] in the example) described is the current
image which the guest OS has write access to. To the left of it is its base
image, and so on accordingly until the leftmost image, which has no
base.
smaller one with fewer elements, such as this transformation relative
to the first example:
-[A] -> [D]
+[A] <- [E]
+
+Data is copied in the right direction with destination being the
+rightmost image, but any other intermediate image can be specified
+instead. In this example data is copied from [C] into [D], so [D] can
+be backed by [B]:
-Currently only forward merge with target being the active image is
-supported, that is, data copy is performed in the right direction with
-destination being the rightmost image.
+[A] <- [B] <- [D] <- [E]
The operation is implemented in QEMU through image streaming facilities.
copies data from the backing file(s) into the active image. When finished,
it adjusts the backing file pointer.
-The 'base' parameter specifies an image which data need not be streamed from.
-This image will be used as the backing file for the active image when the
-operation is finished.
+The 'base' parameter specifies an image which data need not be
+streamed from. This image will be used as the backing file for the
+destination image when the operation is finished.
+
+In the first example above, the command would be:
+
+(qemu) block_stream virtio0 file-A.img
-In the example above, the command would be:
+In order to specify a destination image different from the active
+(rightmost) one we can use its node name instead.
-(qemu) block_stream virtio0 A
+In the second example above, the command would be:
+(qemu) block_stream node-D file-B.img
Live block copy
===============
-> { "execute": "migrate_set_downtime", "arguments": { "value": 0.1 } }
<- { "return": {} }
+x-colo-lost-heartbeat
+--------------------
+
+Tell COLO that heartbeat is lost, a failover or takeover is needed.
+
+Example:
+
+-> { "execute": "x-colo-lost-heartbeat" }
+<- { "return": {} }
+
client_migrate_info
-------------------
- "job-id": Identifier for the newly-created block job. If omitted,
the device name will be used. (json-string, optional)
- "device": The device name or node-name of a root node (json-string)
-- "base": The file name of the backing image above which copying starts
- (json-string, optional)
+- "base": The file name of the backing image above which copying starts.
+ It cannot be set if 'base-node' is also set (json-string, optional)
+- "base-node": the node name of the backing image above which copying starts.
+ It cannot be set if 'base' is also set.
+ (json-string, optional) (Since 2.8)
- "backing-file": The backing file string to write into the active layer. This
filename is not validated.
- "compress": use multiple compression threads to accelerate live migration
- "events": generate events for each migration state change
- "postcopy-ram": postcopy mode for live migration
+- "x-colo": COarse-Grain LOck Stepping (COLO) for Non-stop Service
Arguments:
- "compress": Multiple compression threads state (json-bool)
- "events": Migration state change event state (json-bool)
- "postcopy-ram": postcopy ram state (json-bool)
+ - "x-colo": COarse-Grain LOck Stepping for Non-stop Service (json-bool)
Arguments:
{"state": false, "capability": "zero-blocks"},
{"state": false, "capability": "compress"},
{"state": true, "capability": "events"},
- {"state": false, "capability": "postcopy-ram"}
+ {"state": false, "capability": "postcopy-ram"},
+ {"state": false, "capability": "x-colo"}
]}
migrate-set-parameters
- "max-bandwidth": set maximum speed for migrations (in bytes/sec) (json-int)
- "downtime-limit": set maximum tolerated downtime (in milliseconds) for
migrations (json-int)
+- "x-checkpoint-delay": set the delay time for periodic checkpoint (json-int)
+
Arguments:
Example:
#if defined(CONFIG_USER_ONLY)
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
{
+ mmap_lock();
+ tb_lock();
tb_invalidate_phys_page_range(pc, pc + 1, 0);
+ tb_unlock();
+ mmap_unlock();
}
#else
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
int asidx = cpu_asidx_from_attrs(cpu, attrs);
if (phys != -1) {
+ /* Locks grabbed by tb_invalidate_phys_addr */
tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
phys | (pc & ~TARGET_PAGE_MASK));
}
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
uint64_t val, unsigned size)
{
+ bool locked = false;
+
if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
+ locked = true;
+ tb_lock();
tb_invalidate_phys_page_fast(ram_addr, size);
}
switch (size) {
default:
abort();
}
+
+ if (locked) {
+ tb_unlock();
+ }
+
/* Set both VGA and migration bits for simplicity and to remove
* the notdirty callback faster.
*/
continue;
}
cpu->watchpoint_hit = wp;
+
+ /* The tb_lock will be reset when cpu_loop_exit or
+ * cpu_loop_exit_noexc longjmp back into the cpu_exec
+ * main loop.
+ */
+ tb_lock();
tb_check_watchpoint(cpu);
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
cpu->exception_index = EXCP_DEBUG;
cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
}
if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
+ tb_lock();
tb_invalidate_phys_range(addr, addr + length);
+ tb_unlock();
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
}
cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
#define MAX_PACKET_LENGTH 4096
-#include "cpu.h"
#include "qemu/sockets.h"
#include "sysemu/kvm.h"
#include "exec/semihost.h"
@findex migrate_start_postcopy
Switch in-progress migration to postcopy mode. Ignored after the end of
migration (or once already in postcopy).
+ETEXI
+
+ {
+ .name = "x_colo_lost_heartbeat",
+ .args_type = "",
+ .params = "",
+ .help = "Tell COLO that heartbeat is lost,\n\t\t\t"
+ "a failover or takeover is needed.",
+ .cmd = hmp_x_colo_lost_heartbeat,
+ },
+
+STEXI
+@item x_colo_lost_heartbeat
+@findex x_colo_lost_heartbeat
+Tell COLO that heartbeat is lost, a failover or takeover is needed.
ETEXI
{
monitor_printf(mon, " %s: %" PRId64 " milliseconds",
MigrationParameter_lookup[MIGRATION_PARAMETER_DOWNTIME_LIMIT],
params->downtime_limit);
+ monitor_printf(mon, " %s: %" PRId64,
+ MigrationParameter_lookup[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY],
+ params->x_checkpoint_delay);
monitor_printf(mon, "\n");
}
p.has_downtime_limit = true;
use_int_value = true;
break;
+ case MIGRATION_PARAMETER_X_CHECKPOINT_DELAY:
+ p.has_x_checkpoint_delay = true;
+ use_int_value = true;
+ break;
}
if (use_int_value) {
p.cpu_throttle_initial = valueint;
p.cpu_throttle_increment = valueint;
p.downtime_limit = valueint;
+ p.x_checkpoint_delay = valueint;
}
qmp_migrate_set_parameters(&p, &err);
hmp_handle_error(mon, &err);
}
+void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
+{
+ Error *err = NULL;
+
+ qmp_x_colo_lost_heartbeat(&err);
+ hmp_handle_error(mon, &err);
+}
+
void hmp_set_password(Monitor *mon, const QDict *qdict)
{
const char *protocol = qdict_get_str(qdict, "protocol");
int64_t speed = qdict_get_try_int(qdict, "speed", 0);
qmp_block_stream(false, NULL, device, base != NULL, base, false, NULL,
- qdict_haskey(qdict, "speed"), speed,
+ false, NULL, qdict_haskey(qdict, "speed"), speed,
true, BLOCKDEV_ON_ERROR_REPORT, &error);
hmp_handle_error(mon, &error);
void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
void hmp_client_migrate_info(Monitor *mon, const QDict *qdict);
void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict);
+void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict);
void hmp_set_password(Monitor *mon, const QDict *qdict);
void hmp_expire_password(Monitor *mon, const QDict *qdict);
void hmp_eject(Monitor *mon, const QDict *qdict);
if (!cqid || nvme_check_cqid(n, cqid)) {
return NVME_INVALID_CQID | NVME_DNR;
}
- if (!sqid || (sqid && !nvme_check_sqid(n, sqid))) {
+ if (!sqid || !nvme_check_sqid(n, sqid)) {
return NVME_INVALID_QID | NVME_DNR;
}
if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
uint16_t qflags = le16_to_cpu(c->cq_flags);
uint64_t prp1 = le64_to_cpu(c->prp1);
- if (!cqid || (cqid && !nvme_check_cqid(n, cqid))) {
+ if (!cqid || !nvme_check_cqid(n, cqid)) {
return NVME_INVALID_CQID | NVME_DNR;
}
if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
xengnttab_handle *gnt = grant->blkdev->xendev.gnttabdev;
if (xengnttab_unmap(gnt, grant->page, 1) != 0) {
- xen_be_printf(&grant->blkdev->xendev, 0,
+ xen_pv_printf(&grant->blkdev->xendev, 0,
"xengnttab_unmap failed: %s\n",
strerror(errno));
}
grant->blkdev->persistent_gnt_count--;
- xen_be_printf(&grant->blkdev->xendev, 3,
+ xen_pv_printf(&grant->blkdev->xendev, 3,
"unmapped grant %p\n", grant->page);
g_free(grant);
}
xengnttab_handle *gnt = blkdev->xendev.gnttabdev;
if (xengnttab_unmap(gnt, region->addr, region->num) != 0) {
- xen_be_printf(&blkdev->xendev, 0,
+ xen_pv_printf(&blkdev->xendev, 0,
"xengnttab_unmap region %p failed: %s\n",
region->addr, strerror(errno));
}
- xen_be_printf(&blkdev->xendev, 3,
+ xen_pv_printf(&blkdev->xendev, 3,
"unmapped grant region %p with %d pages\n",
region->addr, region->num);
g_free(region);
size_t len;
int i;
- xen_be_printf(&blkdev->xendev, 3,
+ xen_pv_printf(&blkdev->xendev, 3,
"op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
ioreq->req.operation, ioreq->req.nr_segments,
ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
case BLKIF_OP_DISCARD:
return 0;
default:
- xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
+ xen_pv_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
ioreq->req.operation);
goto err;
};
if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
- xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
+ xen_pv_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
goto err;
}
ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
for (i = 0; i < ioreq->req.nr_segments; i++) {
if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
- xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
+ xen_pv_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
goto err;
}
if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
- xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n");
+ xen_pv_printf(&blkdev->xendev, 0, "error: first > last sector\n");
goto err;
}
if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
- xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n");
+ xen_pv_printf(&blkdev->xendev, 0, "error: page crossing\n");
goto err;
}
qemu_iovec_add(&ioreq->v, (void*)mem, len);
}
if (ioreq->start + ioreq->v.size > blkdev->file_size) {
- xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
+ xen_pv_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
goto err;
}
return 0;
return;
}
if (xengnttab_unmap(gnt, ioreq->pages, ioreq->num_unmap) != 0) {
- xen_be_printf(&ioreq->blkdev->xendev, 0,
+ xen_pv_printf(&ioreq->blkdev->xendev, 0,
"xengnttab_unmap failed: %s\n",
strerror(errno));
}
continue;
}
if (xengnttab_unmap(gnt, ioreq->page[i], 1) != 0) {
- xen_be_printf(&ioreq->blkdev->xendev, 0,
+ xen_pv_printf(&ioreq->blkdev->xendev, 0,
"xengnttab_unmap failed: %s\n",
strerror(errno));
}
if (grant != NULL) {
page[i] = grant->page;
- xen_be_printf(&ioreq->blkdev->xendev, 3,
+ xen_pv_printf(&ioreq->blkdev->xendev, 3,
"using persistent-grant %" PRIu32 "\n",
ioreq->refs[i]);
} else {
ioreq->pages = xengnttab_map_grant_refs
(gnt, new_maps, domids, refs, ioreq->prot);
if (ioreq->pages == NULL) {
- xen_be_printf(&ioreq->blkdev->xendev, 0,
+ xen_pv_printf(&ioreq->blkdev->xendev, 0,
"can't map %d grant refs (%s, %d maps)\n",
new_maps, strerror(errno), ioreq->blkdev->cnt_map);
return -1;
ioreq->page[i] = xengnttab_map_grant_ref
(gnt, domids[i], refs[i], ioreq->prot);
if (ioreq->page[i] == NULL) {
- xen_be_printf(&ioreq->blkdev->xendev, 0,
+ xen_pv_printf(&ioreq->blkdev->xendev, 0,
"can't map grant ref %d (%s, %d maps)\n",
refs[i], strerror(errno), ioreq->blkdev->cnt_map);
ioreq->mapped = 1;
grant->page = ioreq->page[new_maps];
}
grant->blkdev = ioreq->blkdev;
- xen_be_printf(&ioreq->blkdev->xendev, 3,
+ xen_pv_printf(&ioreq->blkdev->xendev, 3,
"adding grant %" PRIu32 " page: %p\n",
refs[new_maps], grant->page);
g_tree_insert(ioreq->blkdev->persistent_gnts,
rc = xengnttab_grant_copy(gnt, count, segs);
if (rc) {
- xen_be_printf(&ioreq->blkdev->xendev, 0,
+ xen_pv_printf(&ioreq->blkdev->xendev, 0,
"failed to copy data %d\n", rc);
ioreq->aio_errors++;
return -1;
for (i = 0; i < count; i++) {
if (segs[i].status != GNTST_okay) {
- xen_be_printf(&ioreq->blkdev->xendev, 3,
+ xen_pv_printf(&ioreq->blkdev->xendev, 3,
"failed to copy data %d for gref %d, domid %d\n",
segs[i].status, ioreq->refs[i], ioreq->domids[i]);
ioreq->aio_errors++;
struct ioreq *ioreq = opaque;
if (ret != 0) {
- xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
+ xen_pv_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
ioreq->req.operation == BLKIF_OP_READ ? "read" : "write");
ioreq->aio_errors++;
}
ioreq_release(ioreq, true);
}
if (send_notify) {
- xen_be_send_notify(&blkdev->xendev);
+ xen_pv_send_notify(&blkdev->xendev);
}
}
};
if (blk_send_response_one(ioreq)) {
- xen_be_send_notify(&blkdev->xendev);
+ xen_pv_send_notify(&blkdev->xendev);
}
ioreq_release(ioreq, false);
continue;
}
if (xengnttab_set_max_grants(xendev->gnttabdev,
MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) {
- xen_be_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
+ xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
strerror(errno));
}
}
}
/* setup via xenbus -> create new block driver instance */
- xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
+ xen_pv_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
blkdev->blk = blk_new_open(blkdev->filename, NULL, options,
qflags, &local_err);
if (!blkdev->blk) {
- xen_be_printf(&blkdev->xendev, 0, "error: %s\n",
+ xen_pv_printf(&blkdev->xendev, 0, "error: %s\n",
error_get_pretty(local_err));
error_free(local_err);
return -1;
blk_set_enable_write_cache(blkdev->blk, !writethrough);
} else {
/* setup via qemu cmdline -> already setup for us */
- xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
+ xen_pv_printf(&blkdev->xendev, 2,
+ "get configured bdrv (cmdline setup)\n");
blkdev->blk = blk_by_legacy_dinfo(blkdev->dinfo);
if (blk_is_read_only(blkdev->blk) && !readonly) {
- xen_be_printf(&blkdev->xendev, 0, "Unexpected read-only drive");
+ xen_pv_printf(&blkdev->xendev, 0, "Unexpected read-only drive");
blkdev->blk = NULL;
return -1;
}
if (blkdev->file_size < 0) {
BlockDriverState *bs = blk_bs(blkdev->blk);
const char *drv_name = bs ? bdrv_get_format_name(bs) : NULL;
- xen_be_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n",
+ xen_pv_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n",
(int)blkdev->file_size, strerror(-blkdev->file_size),
drv_name ?: "-");
blkdev->file_size = 0;
}
- xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
+ xen_pv_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
" size %" PRId64 " (%" PRId64 " MB)\n",
blkdev->type, blkdev->fileproto, blkdev->filename,
blkdev->file_size, blkdev->file_size >> 20);
blkdev->feature_grant_copy =
(xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
- xen_be_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
+ xen_pv_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
blkdev->feature_grant_copy ? "enabled" : "disabled");
- xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
+ xen_pv_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
"remote port %d, local port %d\n",
blkdev->xendev.protocol, blkdev->ring_ref,
blkdev->xendev.remote_port, blkdev->xendev.local_port);
blk_unref(blkdev->blk);
blkdev->blk = NULL;
}
- xen_be_unbind_evtchn(&blkdev->xendev);
+ xen_pv_unbind_evtchn(&blkdev->xendev);
if (blkdev->sring) {
xengnttab_unmap(blkdev->xendev.gnttabdev, blkdev->sring, 1);
xen_mb();
intf->out_cons = cons;
- xen_be_send_notify(&con->xendev);
+ xen_pv_send_notify(&con->xendev);
if (buffer->max_capacity &&
buffer->size > buffer->max_capacity) {
}
xen_wmb();
intf->in_prod = prod;
- xen_be_send_notify(&con->xendev);
+ xen_pv_send_notify(&con->xendev);
}
static void xencons_send(struct XenConsole *con)
len = size;
}
if (len < 1) {
- if (!con->backlog) {
- con->backlog = 1;
- xen_be_printf(&con->xendev, 1, "backlog piling up, nobody listening?\n");
- }
+ if (!con->backlog) {
+ con->backlog = 1;
+ xen_pv_printf(&con->xendev, 1,
+ "backlog piling up, nobody listening?\n");
+ }
} else {
- buffer_advance(&con->buffer, len);
- if (con->backlog && len == size) {
- con->backlog = 0;
- xen_be_printf(&con->xendev, 1, "backlog is gone\n");
- }
+ buffer_advance(&con->buffer, len);
+ if (con->backlog && len == size) {
+ con->backlog = 0;
+ xen_pv_printf(&con->xendev, 1, "backlog is gone\n");
+ }
}
}
type = xenstore_read_str(con->console, "type");
if (!type || strcmp(type, "ioemu") != 0) {
- xen_be_printf(xendev, 1, "not for me (type=%s)\n", type);
+ xen_pv_printf(xendev, 1, "not for me (type=%s)\n", type);
ret = -1;
goto out;
}
qemu_chr_fe_set_handlers(&con->chr, xencons_can_receive,
xencons_receive, NULL, con, NULL, true);
- xen_be_printf(xendev, 1, "ring mfn %d, remote port %d, local port %d, limit %zd\n",
+ xen_pv_printf(xendev, 1,
+ "ring mfn %d, remote port %d, local port %d, limit %zd\n",
con->ring_ref,
con->xendev.remote_port,
con->xendev.local_port,
struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
qemu_chr_fe_deinit(&con->chr);
- xen_be_unbind_evtchn(&con->xendev);
+ xen_pv_unbind_evtchn(&con->xendev);
if (con->sring) {
if (!xendev->dev) {
/* Read the QEMU source framebuffer into an OpenGL texture */
glGenTextures(1, &texture);
glBindTexture(GL_TEXTURE_2D, texture);
- fb_len = 2*s->regs[R_TEXHRES]*s->regs[R_TEXVRES];
+ fb_len = 2ULL * s->regs[R_TEXHRES] * s->regs[R_TEXVRES];
fb = cpu_physical_memory_map(s->regs[R_TEXFBUF], &fb_len, 0);
if (fb == NULL) {
glDeleteTextures(1, &texture);
xen_pfn_t mfn;
if (xenstore_read_fe_uint64(&c->xendev, "page-ref", &val) == -1)
- return -1;
+ return -1;
mfn = (xen_pfn_t)val;
assert(val == mfn);
if (xenstore_read_fe_int(&c->xendev, "event-channel", &c->xendev.remote_port) == -1)
- return -1;
+ return -1;
c->page = xenforeignmemory_map(xen_fmem, c->xendev.dom,
PROT_READ | PROT_WRITE, 1, &mfn, NULL);
if (c->page == NULL)
- return -1;
+ return -1;
xen_be_bind_evtchn(&c->xendev);
- xen_be_printf(&c->xendev, 1, "ring mfn %"PRI_xen_pfn", remote-port %d, local-port %d\n",
- mfn, c->xendev.remote_port, c->xendev.local_port);
+ xen_pv_printf(&c->xendev, 1,
+ "ring mfn %"PRI_xen_pfn", remote-port %d, local-port %d\n",
+ mfn, c->xendev.remote_port, c->xendev.local_port);
return 0;
}
static void common_unbind(struct common *c)
{
- xen_be_unbind_evtchn(&c->xendev);
+ xen_pv_unbind_evtchn(&c->xendev);
if (c->page) {
xenforeignmemory_unmap(xen_fmem, c->page, 1);
c->page = NULL;
XENKBD_IN_RING_REF(page, prod) = *event;
xen_wmb(); /* ensure ring contents visible */
page->in_prod = prod + 1;
- return xen_be_send_notify(&xenfb->c.xendev);
+ return xen_pv_send_notify(&xenfb->c.xendev);
}
/* Send a keyboard (or mouse button) event */
int rc;
if (!in->c.con) {
- xen_be_printf(xendev, 1, "ds not set (yet)\n");
+ xen_pv_printf(xendev, 1, "ds not set (yet)\n");
return -1;
}
if (page->out_prod == page->out_cons)
return;
page->out_cons = page->out_prod;
- xen_be_send_notify(&xenfb->c.xendev);
+ xen_pv_send_notify(&xenfb->c.xendev);
}
/* -------------------------------------------------------------------- */
}
static int xenfb_configure_fb(struct XenFB *xenfb, size_t fb_len_lim,
- int width, int height, int depth,
- size_t fb_len, int offset, int row_stride)
+ int width, int height, int depth,
+ size_t fb_len, int offset, int row_stride)
{
size_t mfn_sz = sizeof(*((struct xenfb_page *)0)->pd);
size_t pd_len = sizeof(((struct xenfb_page *)0)->pd) / mfn_sz;
int max_width, max_height;
if (fb_len_lim > fb_len_max) {
- xen_be_printf(&xenfb->c.xendev, 0, "fb size limit %zu exceeds %zu, corrected\n",
- fb_len_lim, fb_len_max);
- fb_len_lim = fb_len_max;
+ xen_pv_printf(&xenfb->c.xendev, 0,
+ "fb size limit %zu exceeds %zu, corrected\n",
+ fb_len_lim, fb_len_max);
+ fb_len_lim = fb_len_max;
}
if (fb_len_lim && fb_len > fb_len_lim) {
- xen_be_printf(&xenfb->c.xendev, 0, "frontend fb size %zu limited to %zu\n",
- fb_len, fb_len_lim);
- fb_len = fb_len_lim;
+ xen_pv_printf(&xenfb->c.xendev, 0,
+ "frontend fb size %zu limited to %zu\n",
+ fb_len, fb_len_lim);
+ fb_len = fb_len_lim;
}
if (depth != 8 && depth != 16 && depth != 24 && depth != 32) {
- xen_be_printf(&xenfb->c.xendev, 0, "can't handle frontend fb depth %d\n",
- depth);
- return -1;
+ xen_pv_printf(&xenfb->c.xendev, 0,
+ "can't handle frontend fb depth %d\n",
+ depth);
+ return -1;
}
if (row_stride <= 0 || row_stride > fb_len) {
- xen_be_printf(&xenfb->c.xendev, 0, "invalid frontend stride %d\n", row_stride);
- return -1;
+ xen_pv_printf(&xenfb->c.xendev, 0, "invalid frontend stride %d\n",
+ row_stride);
+ return -1;
}
max_width = row_stride / (depth / 8);
if (width < 0 || width > max_width) {
- xen_be_printf(&xenfb->c.xendev, 0, "invalid frontend width %d limited to %d\n",
- width, max_width);
- width = max_width;
+ xen_pv_printf(&xenfb->c.xendev, 0,
+ "invalid frontend width %d limited to %d\n",
+ width, max_width);
+ width = max_width;
}
if (offset < 0 || offset >= fb_len) {
- xen_be_printf(&xenfb->c.xendev, 0, "invalid frontend offset %d (max %zu)\n",
- offset, fb_len - 1);
- return -1;
+ xen_pv_printf(&xenfb->c.xendev, 0,
+ "invalid frontend offset %d (max %zu)\n",
+ offset, fb_len - 1);
+ return -1;
}
max_height = (fb_len - offset) / row_stride;
if (height < 0 || height > max_height) {
- xen_be_printf(&xenfb->c.xendev, 0, "invalid frontend height %d limited to %d\n",
- height, max_height);
- height = max_height;
+ xen_pv_printf(&xenfb->c.xendev, 0,
+ "invalid frontend height %d limited to %d\n",
+ height, max_height);
+ height = max_height;
}
xenfb->fb_len = fb_len;
xenfb->row_stride = row_stride;
xenfb->offset = offset;
xenfb->up_fullscreen = 1;
xenfb->do_resize = 1;
- xen_be_printf(&xenfb->c.xendev, 1, "framebuffer %dx%dx%d offset %d stride %d\n",
- width, height, depth, offset, row_stride);
+ xen_pv_printf(&xenfb->c.xendev, 1,
+ "framebuffer %dx%dx%d offset %d stride %d\n",
+ width, height, depth, offset, row_stride);
return 0;
}
}
}
if (oops) /* should not happen */
- xen_be_printf(&xenfb->c.xendev, 0, "%s: oops: convert %d -> %d bpp?\n",
+ xen_pv_printf(&xenfb->c.xendev, 0, "%s: oops: convert %d -> %d bpp?\n",
__FUNCTION__, xenfb->depth, bpp);
dpy_gfx_update(xenfb->c.con, x, y, w, h);
xen_wmb(); /* ensure ring contents visible */
page->in_prod = prod + 1;
- xen_be_send_notify(&xenfb->c.xendev);
+ xen_pv_send_notify(&xenfb->c.xendev);
}
static void xenfb_send_refresh_period(struct XenFB *xenfb, int period)
return;
if (!xenfb->feature_update) {
- /* we don't get update notifications, thus use the
- * sledge hammer approach ... */
- xenfb->up_fullscreen = 1;
+ /* we don't get update notifications, thus use the
+ * sledge hammer approach ... */
+ xenfb->up_fullscreen = 1;
}
/* resize if needed */
break;
}
dpy_gfx_replace_surface(xenfb->c.con, surface);
- xen_be_printf(&xenfb->c.xendev, 1, "update: resizing: %dx%d @ %d bpp%s\n",
+ xen_pv_printf(&xenfb->c.xendev, 1,
+ "update: resizing: %dx%d @ %d bpp%s\n",
xenfb->width, xenfb->height, xenfb->depth,
is_buffer_shared(surface) ? " (shared)" : "");
xenfb->up_fullscreen = 1;
/* run queued updates */
if (xenfb->up_fullscreen) {
- xen_be_printf(&xenfb->c.xendev, 3, "update: fullscreen\n");
- xenfb_guest_copy(xenfb, 0, 0, xenfb->width, xenfb->height);
+ xen_pv_printf(&xenfb->c.xendev, 3, "update: fullscreen\n");
+ xenfb_guest_copy(xenfb, 0, 0, xenfb->width, xenfb->height);
} else if (xenfb->up_count) {
- xen_be_printf(&xenfb->c.xendev, 3, "update: %d rects\n", xenfb->up_count);
- for (i = 0; i < xenfb->up_count; i++)
- xenfb_guest_copy(xenfb,
- xenfb->up_rects[i].x,
- xenfb->up_rects[i].y,
- xenfb->up_rects[i].w,
- xenfb->up_rects[i].h);
+ xen_pv_printf(&xenfb->c.xendev, 3, "update: %d rects\n",
+ xenfb->up_count);
+ for (i = 0; i < xenfb->up_count; i++)
+ xenfb_guest_copy(xenfb,
+ xenfb->up_rects[i].x,
+ xenfb->up_rects[i].y,
+ xenfb->up_rects[i].w,
+ xenfb->up_rects[i].h);
} else {
- xen_be_printf(&xenfb->c.xendev, 3, "update: nothing\n");
+ xen_pv_printf(&xenfb->c.xendev, 3, "update: nothing\n");
}
xenfb->up_count = 0;
xenfb->up_fullscreen = 0;
w = MIN(event->update.width, xenfb->width - x);
h = MIN(event->update.height, xenfb->height - y);
if (w < 0 || h < 0) {
- xen_be_printf(&xenfb->c.xendev, 1, "bogus update ignored\n");
+ xen_pv_printf(&xenfb->c.xendev, 1, "bogus update ignored\n");
break;
}
if (x != event->update.x ||
y != event->update.y ||
w != event->update.width ||
h != event->update.height) {
- xen_be_printf(&xenfb->c.xendev, 1, "bogus update clipped\n");
+ xen_pv_printf(&xenfb->c.xendev, 1, "bogus update clipped\n");
}
if (w == xenfb->width && h > xenfb->height / 2) {
/* scroll detector: updated more than 50% of the lines,
if (fb->feature_update)
xenstore_write_be_int(xendev, "request-update", 1);
- xen_be_printf(xendev, 1, "feature-update=%d, videoram=%d\n",
+ xen_pv_printf(xendev, 1, "feature-update=%d, videoram=%d\n",
fb->feature_update, videoram);
return 0;
}
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON,
-1, 0);
if (fb->pixels == MAP_FAILED) {
- xen_be_printf(xendev, 0,
+ xen_pv_printf(xendev, 0,
"Couldn't replace the framebuffer with anonymous memory errno=%d\n",
errno);
}
if (fb->bug_trigger == 0 && strcmp(node, "state") == 0 &&
xendev->fe_state == XenbusStateConnected &&
xendev->be_state == XenbusStateConnected) {
- xen_be_printf(xendev, 2, "re-trigger connected (frontend bug)\n");
+ xen_pv_printf(xendev, 2, "re-trigger connected (frontend bug)\n");
xen_be_set_state(xendev, XenbusStateConnected);
fb->bug_trigger = 1; /* only once */
}
struct XenFB *xenfb = container_of(xendev, struct XenFB, c.xendev);
xenfb_handle_events(xenfb);
- xen_be_send_notify(&xenfb->c.xendev);
+ xen_pv_send_notify(&xenfb->c.xendev);
}
/* -------------------------------------------------------------------- */
wait_more:
i++;
main_loop_wait(true);
- xfb = xen_be_find_xendev("vfb", domid, 0);
- xin = xen_be_find_xendev("vkbd", domid, 0);
+ xfb = xen_pv_find_xendev("vfb", domid, 0);
+ xin = xen_pv_find_xendev("vkbd", domid, 0);
if (!xfb || !xin) {
if (i < 256) {
usleep(10000);
goto wait_more;
}
- xen_be_printf(NULL, 1, "displaystate setup failed\n");
+ xen_pv_printf(NULL, 1, "displaystate setup failed\n");
return;
}
#include "hw/pci/pci_bus.h"
#include "hw/pci-host/q35.h"
#include "hw/i386/x86-iommu.h"
-#include "hw/timer/hpet.h"
#include "hw/acpi/aml-build.h"
}
}
-static void kvm_apic_put(CPUState *cs, void *data)
+static void kvm_apic_put(CPUState *cs, run_on_cpu_data data)
{
- APICCommonState *s = data;
+ APICCommonState *s = data.host_ptr;
struct kvm_lapic_state kapic;
int ret;
static void kvm_apic_post_load(APICCommonState *s)
{
- run_on_cpu(CPU(s->cpu), kvm_apic_put, s);
+ run_on_cpu(CPU(s->cpu), kvm_apic_put, RUN_ON_CPU_HOST_PTR(s));
}
-static void do_inject_external_nmi(CPUState *cpu, void *data)
+static void do_inject_external_nmi(CPUState *cpu, run_on_cpu_data data)
{
- APICCommonState *s = data;
+ APICCommonState *s = data.host_ptr;
uint32_t lvt;
int ret;
static void kvm_apic_external_nmi(APICCommonState *s)
{
- run_on_cpu(CPU(s->cpu), do_inject_external_nmi, s);
+ run_on_cpu(CPU(s->cpu), do_inject_external_nmi, RUN_ON_CPU_HOST_PTR(s));
}
static void kvm_send_msi(MSIMessage *msg)
/* Not used by KVM, which uses the CPU mp_state instead. */
s->wait_for_sipi = 0;
- run_on_cpu(CPU(s->cpu), kvm_apic_put, s);
+ run_on_cpu(CPU(s->cpu), kvm_apic_put, RUN_ON_CPU_HOST_PTR(s));
}
static void kvm_apic_realize(DeviceState *dev, Error **errp)
#include "sysemu/kvm.h"
#include "hw/i386/apic_internal.h"
#include "hw/sysbus.h"
+#include "tcg/tcg.h"
#define VAPIC_IO_PORT 0x7e
resume_all_vcpus();
if (!kvm_enabled()) {
+ /* tb_lock will be reset when cpu_loop_exit_noexc longjmps
+ * back into the cpu_exec loop. */
+ tb_lock();
tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1);
cpu_loop_exit_noexc(cs);
}
bool enable;
} VAPICEnableTPRReporting;
-static void vapic_do_enable_tpr_reporting(CPUState *cpu, void *data)
+static void vapic_do_enable_tpr_reporting(CPUState *cpu, run_on_cpu_data data)
{
- VAPICEnableTPRReporting *info = data;
-
+ VAPICEnableTPRReporting *info = data.host_ptr;
apic_enable_tpr_access_reporting(info->apic, info->enable);
}
CPU_FOREACH(cs) {
cpu = X86_CPU(cs);
info.apic = cpu->apic_state;
- run_on_cpu(cs, vapic_do_enable_tpr_reporting, &info);
+ run_on_cpu(cs, vapic_do_enable_tpr_reporting, RUN_ON_CPU_HOST_PTR(&info));
}
}
nb_option_roms++;
}
-static void do_vapic_enable(CPUState *cs, void *data)
+static void do_vapic_enable(CPUState *cs, run_on_cpu_data data)
{
- VAPICROMState *s = data;
+ VAPICROMState *s = data.host_ptr;
X86CPU *cpu = X86_CPU(cs);
static const uint8_t enabled = 1;
if (s->state == VAPIC_ACTIVE) {
if (smp_cpus == 1) {
- run_on_cpu(first_cpu, do_vapic_enable, s);
+ run_on_cpu(first_cpu, do_vapic_enable, RUN_ON_CPU_HOST_PTR(s));
} else {
zero = g_malloc0(s->rom_state.vapic_size);
cpu_physical_memory_write(s->vapic_paddr, zero,
#include "qemu/option.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
-#include "qemu-common.h"
#include "sysemu/device_tree.h"
#include "sysemu/sysemu.h"
#include "hw/loader.h"
#include "elf.h"
#include "hw/timer/mc146818rtc.h"
#include "hw/timer/i8254.h"
-#include "sysemu/block-backend.h"
#include "sysemu/blockdev.h"
#include "exec/address-spaces.h"
#include "hw/sysbus.h" /* SysBusDevice */
};
typedef struct MilkymistPFPUState MilkymistPFPUState;
-static inline hwaddr
+static inline uint32_t
get_dma_address(uint32_t base, uint32_t x, uint32_t y)
{
return base + 8 * (128 * y + x);
netdev->tx_ring.rsp_prod_pvt = ++i;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->tx_ring, notify);
if (notify) {
- xen_be_send_notify(&netdev->xendev);
+ xen_pv_send_notify(&netdev->xendev);
}
if (i == netdev->tx_ring.req_cons) {
/* should not happen in theory, we don't announce the *
* feature-{sg,gso,whatelse} flags in xenstore (yet?) */
if (txreq.flags & NETTXF_extra_info) {
- xen_be_printf(&netdev->xendev, 0, "FIXME: extra info flag\n");
+ xen_pv_printf(&netdev->xendev, 0, "FIXME: extra info flag\n");
net_tx_error(netdev, &txreq, rc);
continue;
}
if (txreq.flags & NETTXF_more_data) {
- xen_be_printf(&netdev->xendev, 0, "FIXME: more data flag\n");
+ xen_pv_printf(&netdev->xendev, 0, "FIXME: more data flag\n");
net_tx_error(netdev, &txreq, rc);
continue;
}
#endif
if (txreq.size < 14) {
- xen_be_printf(&netdev->xendev, 0, "bad packet size: %d\n", txreq.size);
+ xen_pv_printf(&netdev->xendev, 0, "bad packet size: %d\n",
+ txreq.size);
net_tx_error(netdev, &txreq, rc);
continue;
}
if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) {
- xen_be_printf(&netdev->xendev, 0, "error: page crossing\n");
+ xen_pv_printf(&netdev->xendev, 0, "error: page crossing\n");
net_tx_error(netdev, &txreq, rc);
continue;
}
- xen_be_printf(&netdev->xendev, 3, "tx packet ref %d, off %d, len %d, flags 0x%x%s%s%s%s\n",
+ xen_pv_printf(&netdev->xendev, 3,
+ "tx packet ref %d, off %d, len %d, flags 0x%x%s%s%s%s\n",
txreq.gref, txreq.offset, txreq.size, txreq.flags,
(txreq.flags & NETTXF_csum_blank) ? " csum_blank" : "",
(txreq.flags & NETTXF_data_validated) ? " data_validated" : "",
netdev->xendev.dom,
txreq.gref, PROT_READ);
if (page == NULL) {
- xen_be_printf(&netdev->xendev, 0, "error: tx gref dereference failed (%d)\n",
- txreq.gref);
+ xen_pv_printf(&netdev->xendev, 0,
+ "error: tx gref dereference failed (%d)\n",
+ txreq.gref);
net_tx_error(netdev, &txreq, rc);
continue;
}
resp->status = (int16_t)st;
}
- xen_be_printf(&netdev->xendev, 3, "rx response: idx %d, status %d, flags 0x%x\n",
+ xen_pv_printf(&netdev->xendev, 3,
+ "rx response: idx %d, status %d, flags 0x%x\n",
i, resp->status, resp->flags);
netdev->rx_ring.rsp_prod_pvt = ++i;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->rx_ring, notify);
if (notify) {
- xen_be_send_notify(&netdev->xendev);
+ xen_pv_send_notify(&netdev->xendev);
}
}
return 0;
}
if (size > XC_PAGE_SIZE - NET_IP_ALIGN) {
- xen_be_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)",
+ xen_pv_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)",
(unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN);
return -1;
}
netdev->xendev.dom,
rxreq.gref, PROT_WRITE);
if (page == NULL) {
- xen_be_printf(&netdev->xendev, 0, "error: rx gref dereference failed (%d)\n",
+ xen_pv_printf(&netdev->xendev, 0,
+ "error: rx gref dereference failed (%d)\n",
rxreq.gref);
net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0);
return -1;
rx_copy = 0;
}
if (rx_copy == 0) {
- xen_be_printf(&netdev->xendev, 0, "frontend doesn't support rx-copy.\n");
+ xen_pv_printf(&netdev->xendev, 0,
+ "frontend doesn't support rx-copy.\n");
return -1;
}
xen_be_bind_evtchn(&netdev->xendev);
- xen_be_printf(&netdev->xendev, 1, "ok: tx-ring-ref %d, rx-ring-ref %d, "
+ xen_pv_printf(&netdev->xendev, 1, "ok: tx-ring-ref %d, rx-ring-ref %d, "
"remote port %d, local port %d\n",
netdev->tx_ring_ref, netdev->rx_ring_ref,
netdev->xendev.remote_port, netdev->xendev.local_port);
{
struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
- xen_be_unbind_evtchn(&netdev->xendev);
+ xen_pv_unbind_evtchn(&netdev->xendev);
if (netdev->txs) {
xengnttab_unmap(netdev->xendev.gnttabdev, netdev->txs, 1);
#include "hw/isa/isa.h"
#include "hw/nvram/fw_cfg.h"
#include "hw/sysbus.h"
-#include "hw/boards.h"
#include "trace.h"
#include "qemu/error-report.h"
#include "qemu/config-file.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_bus.h"
#include "hw/pci/pci_host.h"
-#include "hw/pci/pci_bus.h"
#include "hw/pci/pci_bridge.h"
#include "hw/i386/pc.h"
#include "qemu/range.h"
#include "qemu/log.h"
#include "qemu/error-report.h"
#include "hw/loader.h"
-#include "sysemu/block-backend.h"
#include "sysemu/blockdev.h"
#include "exec/address-spaces.h"
env->tlb_dirty = true;
}
-static void spin_kick(CPUState *cs, void *data)
+static void spin_kick(CPUState *cs, run_on_cpu_data data)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
CPUPPCState *env = &cpu->env;
- SpinInfo *curspin = data;
+ SpinInfo *curspin = data.host_ptr;
hwaddr map_size = 64 * 1024 * 1024;
hwaddr map_start;
if (!(ldq_p(&curspin->addr) & 1)) {
/* run CPU */
- run_on_cpu(cpu, spin_kick, curspin);
+ run_on_cpu(cpu, spin_kick, RUN_ON_CPU_HOST_PTR(curspin));
}
}
#include "sysemu/block-backend.h"
#include "sysemu/cpus.h"
#include "sysemu/kvm.h"
-#include "sysemu/device_tree.h"
#include "kvm_ppc.h"
#include "migration/migration.h"
#include "mmu-hash64.h"
g_free(spapr->kvm_type);
}
-static void ppc_cpu_do_nmi_on_cpu(CPUState *cs, void *arg)
+static void ppc_cpu_do_nmi_on_cpu(CPUState *cs, run_on_cpu_data arg)
{
cpu_synchronize_state(cs);
ppc_cpu_do_system_reset(cs);
CPUState *cs;
CPU_FOREACH(cs) {
- async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, NULL);
+ async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, RUN_ON_CPU_NULL);
}
}
target_ulong mask;
};
-static void do_spr_sync(CPUState *cs, void *arg)
+static void do_spr_sync(CPUState *cs, run_on_cpu_data arg)
{
- struct SPRSyncState *s = arg;
+ struct SPRSyncState *s = arg.host_ptr;
PowerPCCPU *cpu = POWERPC_CPU(cs);
CPUPPCState *env = &cpu->env;
.value = value,
.mask = mask
};
- run_on_cpu(cs, do_spr_sync, &s);
+ run_on_cpu(cs, do_spr_sync, RUN_ON_CPU_HOST_PTR(&s));
}
static bool has_spr(PowerPCCPU *cpu, int spr)
Error *err;
} SetCompatState;
-static void do_set_compat(CPUState *cs, void *arg)
+static void do_set_compat(CPUState *cs, run_on_cpu_data arg)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
- SetCompatState *s = arg;
+ SetCompatState *s = arg.host_ptr;
cpu_synchronize_state(cs);
ppc_set_compat(cpu, s->cpu_version, &s->err);
.err = NULL,
};
- run_on_cpu(cs, do_set_compat, &s);
+ run_on_cpu(cs, do_set_compat, RUN_ON_CPU_HOST_PTR(&s));
if (s.err) {
error_report_err(s.err);
unsigned int size)
{
S390PCIBusDevice *pbdev = opaque;
- uint32_t io_int_word;
uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
uint32_t vec = data & ZPCI_MSI_VEC_MASK;
uint64_t ind_bit;
0x80 >> ((ind_bit + vec) % 8));
if (!set_ind_atomic(pbdev->routes.adapter.summary_addr + sum_bit / 8,
0x80 >> (sum_bit % 8))) {
- io_int_word = (pbdev->isc << 27) | IO_INT_WORD_AI;
- s390_io_interrupt(0, 0, 0, io_int_word);
+ css_adapter_interrupt(pbdev->isc);
}
}
{
uint32_t fid = 0;
- while (fid <= ZPCI_MAX_FID) {
+ do {
if (!s390_pci_find_dev_by_fid(fid)) {
return fid;
}
-
- if (fid == ZPCI_MAX_FID) {
- break;
- }
-
- fid++;
- }
+ } while (fid++ != ZPCI_MAX_FID);
error_setg(errp, "no free fid could be found");
return 0;
uint64_t offset;
uint64_t data;
MemoryRegion *mr;
+ MemTxResult result;
uint8_t len;
uint32_t fh;
uint8_t pcias;
return 0;
}
mr = pbdev->pdev->io_regions[pcias].memory;
- memory_region_dispatch_read(mr, offset, &data, len,
- MEMTXATTRS_UNSPECIFIED);
+ result = memory_region_dispatch_read(mr, offset, &data, len,
+ MEMTXATTRS_UNSPECIFIED);
+ if (result != MEMTX_OK) {
+ program_interrupt(env, PGM_OPERAND, 4);
+ return 0;
+ }
} else if (pcias == 15) {
if ((4 - (offset & 0x3)) < len) {
program_interrupt(env, PGM_OPERAND, 4);
uint64_t offset, data;
S390PCIBusDevice *pbdev;
MemoryRegion *mr;
+ MemTxResult result;
uint8_t len;
uint32_t fh;
uint8_t pcias;
mr = pbdev->pdev->io_regions[pcias].memory;
}
- memory_region_dispatch_write(mr, offset, data, len,
+ result = memory_region_dispatch_write(mr, offset, data, len,
MEMTXATTRS_UNSPECIFIED);
+ if (result != MEMTX_OK) {
+ program_interrupt(env, PGM_OPERAND, 4);
+ return 0;
+ }
} else if (pcias == 15) {
if ((4 - (offset & 0x3)) < len) {
program_interrupt(env, PGM_OPERAND, 4);
CPUS390XState *env = &cpu->env;
S390PCIBusDevice *pbdev;
MemoryRegion *mr;
+ MemTxResult result;
int i;
uint32_t fh;
uint8_t pcias;
mr = pbdev->pdev->io_regions[pcias].memory;
if (!memory_region_access_valid(mr, env->regs[r3], len, true)) {
- program_interrupt(env, PGM_ADDRESSING, 6);
+ program_interrupt(env, PGM_OPERAND, 6);
return 0;
}
}
for (i = 0; i < len / 8; i++) {
- memory_region_dispatch_write(mr, env->regs[r3] + i * 8,
+ result = memory_region_dispatch_write(mr, env->regs[r3] + i * 8,
ldq_p(buffer + i * 8), 8,
MEMTXATTRS_UNSPECIFIED);
+ if (result != MEMTX_OK) {
+ program_interrupt(env, PGM_OPERAND, 6);
+ return 0;
+ }
}
setcc(cpu, ZPCI_PCI_LS_OK);
#include "hw/sysbus.h"
#include "qemu/timer.h"
#include "hw/ptimer.h"
-#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "trace.h"
ret = tpm_passthrough_unix_write(tpm_pt->tpm_fd, in, in_len);
if (ret != in_len) {
- if (!tpm_pt->tpm_op_canceled ||
- (tpm_pt->tpm_op_canceled && errno != ECANCELED)) {
+ if (!tpm_pt->tpm_op_canceled || errno != ECANCELED) {
error_report("tpm_passthrough: error while transmitting data "
"to TPM: %s (%i)",
strerror(errno), errno);
ret = tpm_passthrough_unix_read(tpm_pt->tpm_fd, out, out_len);
if (ret < 0) {
- if (!tpm_pt->tpm_op_canceled ||
- (tpm_pt->tpm_op_canceled && errno != ECANCELED)) {
+ if (!tpm_pt->tpm_op_canceled || errno != ECANCELED) {
error_report("tpm_passthrough: error while reading data from "
"TPM: %s (%i)",
strerror(errno), errno);
#include "qapi/error.h"
#include "qemu-common.h"
#include "qemu/main-loop.h"
-#include "sysemu/tpm_backend.h"
#define DEBUG_TIS 0
#include "qapi/error.h"
#include "qemu-common.h"
#include "cpu.h"
-#include "qemu-common.h"
#include "ui/console.h"
#include "elf.h"
#include "exec/address-spaces.h"
return 0;
}
-static int emulated_exitfn(CCIDCardState *base)
+static void emulated_exitfn(CCIDCardState *base)
{
EmulatedState *card = EMULATED_CCID_CARD(base);
VEvent *vevent = vevent_new(VEVENT_LAST, NULL, NULL);
qemu_mutex_destroy(&card->handle_apdu_mutex);
qemu_mutex_destroy(&card->vreader_mutex);
qemu_mutex_destroy(&card->event_list_mutex);
- return 0;
}
static Property emulated_card_properties[] = {
return 0;
}
-static int passthru_exitfn(CCIDCardState *base)
-{
- return 0;
-}
-
static VMStateDescription passthru_vmstate = {
.name = "ccid-card-passthru",
.version_id = 1,
CCIDCardClass *cc = CCID_CARD_CLASS(klass);
cc->initfn = passthru_initfn;
- cc->exitfn = passthru_exitfn;
cc->get_atr = passthru_get_atr;
cc->apdu_from_guest = passthru_apdu_from_guest;
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
void (*apdu_from_guest)(CCIDCardState *card,
const uint8_t *apdu,
uint32_t len);
- int (*exitfn)(CCIDCardState *card);
+ void (*exitfn)(CCIDCardState *card);
int (*initfn)(CCIDCardState *card);
} CCIDCardClass;
#include <sys/statvfs.h>
#ifdef CONFIG_INOTIFY1
#include <sys/inotify.h>
-#include "qapi/error.h"
#include "qemu/main-loop.h"
#endif
}
}
-static int ccid_card_exitfn(CCIDCardState *card)
+static void ccid_card_exitfn(CCIDCardState *card)
{
CCIDCardClass *cc = CCID_CARD_GET_CLASS(card);
if (cc->exitfn) {
- return cc->exitfn(card);
+ cc->exitfn(card);
}
- return 0;
+
}
static int ccid_card_initfn(CCIDCardState *card)
static int ccid_card_exit(DeviceState *qdev)
{
- int ret = 0;
CCIDCardState *card = CCID_CARD(qdev);
USBDevice *dev = USB_DEVICE(qdev->parent_bus->parent);
USBCCIDState *s = USB_CCID_DEV(dev);
if (ccid_card_inserted(s)) {
ccid_card_card_removed(card);
}
- ret = ccid_card_exitfn(card);
+ ccid_card_exitfn(card);
s->card = NULL;
- return ret;
+ return 0;
}
static int ccid_card_init(DeviceState *qdev)
struct timeval tv; \
\
gettimeofday(&tv, NULL); \
- xen_be_printf(xendev, lvl, "%8ld.%06ld xen-usb(%s):" fmt, \
+ xen_pv_printf(xendev, lvl, "%8ld.%06ld xen-usb(%s):" fmt, \
tv.tv_sec, tv.tv_usec, __func__, ##args); \
}
#define TR_BUS(xendev, fmt, args...) TR(xendev, 2, fmt, ##args)
}
if (nr_segs > USBIF_MAX_SEGMENTS_PER_REQUEST) {
- xen_be_printf(xendev, 0, "bad number of segments in request (%d)\n",
+ xen_pv_printf(xendev, 0, "bad number of segments in request (%d)\n",
nr_segs);
return -EINVAL;
}
for (i = 0; i < nr_segs; i++) {
if ((unsigned)usbback_req->req.seg[i].offset +
(unsigned)usbback_req->req.seg[i].length > XC_PAGE_SIZE) {
- xen_be_printf(xendev, 0, "segment crosses page boundary\n");
+ xen_pv_printf(xendev, 0, "segment crosses page boundary\n");
return -EINVAL;
}
}
*/
if (!usbback_req->nr_extra_segs) {
- xen_be_printf(xendev, 0, "iso request without descriptor segments\n");
+ xen_pv_printf(xendev, 0, "iso request without descriptor segments\n");
return -EINVAL;
}
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&usbif->urb_ring, notify);
if (notify) {
- xen_be_send_notify(xendev);
+ xen_pv_send_notify(xendev);
}
}
ret = usbback_init_packet(usbback_req);
if (ret) {
- xen_be_printf(&usbif->xendev, 0, "invalid request\n");
+ xen_pv_printf(&usbif->xendev, 0, "invalid request\n");
ret = -ESHUTDOWN;
goto fail_free_urb;
}
ret = usbback_gnttab_map(usbback_req);
if (ret) {
- xen_be_printf(&usbif->xendev, 0, "invalid buffer, ret=%d\n", ret);
+ xen_pv_printf(&usbif->xendev, 0, "invalid buffer, ret=%d\n", ret);
ret = -ESHUTDOWN;
goto fail_free_urb;
}
/* Check for full ring. */
if ((RING_SIZE(ring) - ring->rsp_prod_pvt - ring->req_cons) == 0) {
- xen_be_send_notify(&usbif->xendev);
+ xen_pv_send_notify(&usbif->xendev);
return;
}
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(ring, notify);
if (notify) {
- xen_be_send_notify(&usbif->xendev);
+ xen_pv_send_notify(&usbif->xendev);
}
TR_BUS(&usbif->xendev, "hotplug port %d speed %d\n", usb_hp->port,
if (RING_REQUEST_PROD_OVERFLOW(urb_ring, rp)) {
rc = urb_ring->rsp_prod_pvt;
- xen_be_printf(&usbif->xendev, 0, "domU provided bogus ring requests "
+ xen_pv_printf(&usbif->xendev, 0, "domU provided bogus ring requests "
"(%#x - %#x = %u). Halting ring processing.\n",
rp, rc, rp - rc);
usbif->ring_error = true;
portname = strchr(busid, '-');
if (!portname) {
- xen_be_printf(&usbif->xendev, 0, "device %s illegal specification\n",
+ xen_pv_printf(&usbif->xendev, 0, "device %s illegal specification\n",
busid);
return;
}
break;
}
if (speed == USBIF_SPEED_NONE) {
- xen_be_printf(&usbif->xendev, 0, "device %s wrong speed\n", busid);
+ xen_pv_printf(&usbif->xendev, 0, "device %s wrong speed\n", busid);
object_unparent(OBJECT(usbif->ports[port - 1].dev));
usbif->ports[port - 1].dev = NULL;
return;
err:
QDECREF(qdict);
snprintf(p->path, sizeof(p->path), "%d", 99);
- xen_be_printf(&usbif->xendev, 0, "device %s could not be opened\n", busid);
+ xen_pv_printf(&usbif->xendev, 0, "device %s could not be opened\n", busid);
}
static void usbback_process_port(struct usbback_info *usbif, unsigned port)
snprintf(node, sizeof(node), "port/%d", port);
busid = xenstore_read_be_str(&usbif->xendev, node);
if (busid == NULL) {
- xen_be_printf(&usbif->xendev, 0, "xenstore_read %s failed\n", node);
+ xen_pv_printf(&usbif->xendev, 0, "xenstore_read %s failed\n", node);
return;
}
usbif = container_of(xendev, struct usbback_info, xendev);
- xen_be_unbind_evtchn(xendev);
+ xen_pv_unbind_evtchn(xendev);
if (usbif->urb_sring) {
xengnttab_unmap(xendev->gnttabdev, usbif->urb_sring, 1);
usbif = container_of(xendev, struct usbback_info, xendev);
if (xenstore_read_fe_int(xendev, "urb-ring-ref", &urb_ring_ref)) {
- xen_be_printf(xendev, 0, "error reading urb-ring-ref\n");
+ xen_pv_printf(xendev, 0, "error reading urb-ring-ref\n");
return -1;
}
if (xenstore_read_fe_int(xendev, "conn-ring-ref", &conn_ring_ref)) {
- xen_be_printf(xendev, 0, "error reading conn-ring-ref\n");
+ xen_pv_printf(xendev, 0, "error reading conn-ring-ref\n");
return -1;
}
if (xenstore_read_fe_int(xendev, "event-channel", &xendev->remote_port)) {
- xen_be_printf(xendev, 0, "error reading event-channel\n");
+ xen_pv_printf(xendev, 0, "error reading event-channel\n");
return -1;
}
conn_ring_ref,
PROT_READ | PROT_WRITE);
if (!usbif->urb_sring || !usbif->conn_sring) {
- xen_be_printf(xendev, 0, "error mapping rings\n");
+ xen_pv_printf(xendev, 0, "error mapping rings\n");
usbback_disconnect(xendev);
return -1;
}
xen_be_bind_evtchn(xendev);
- xen_be_printf(xendev, 1, "urb-ring-ref %d, conn-ring-ref %d, "
+ xen_pv_printf(xendev, 1, "urb-ring-ref %d, conn-ring-ref %d, "
"remote port %d, local port %d\n", urb_ring_ref,
conn_ring_ref, xendev->remote_port, xendev->local_port);
if (xenstore_read_be_int(xendev, "num-ports", &usbif->num_ports) ||
usbif->num_ports < 1 || usbif->num_ports > USBBACK_MAXPORTS) {
- xen_be_printf(xendev, 0, "num-ports not readable or out of bounds\n");
+ xen_pv_printf(xendev, 0, "num-ports not readable or out of bounds\n");
return -1;
}
if (xenstore_read_be_int(xendev, "usb-ver", &usbif->usb_ver) ||
(usbif->usb_ver != USB_VER_USB11 && usbif->usb_ver != USB_VER_USB20)) {
- xen_be_printf(xendev, 0, "usb-ver not readable or out of bounds\n");
+ xen_pv_printf(xendev, 0, "usb-ver not readable or out of bounds\n");
return -1;
}
/* max_grants: for each request and for the rings (request and connect). */
max_grants = USBIF_MAX_SEGMENTS_PER_REQUEST * USB_URB_RING_SIZE + 2;
if (xengnttab_set_max_grants(xendev->gnttabdev, max_grants) < 0) {
- xen_be_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
+ xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
strerror(errno));
}
}
return NULL;
}
-static void vfio_setup_region_sparse_mmaps(VFIORegion *region,
- struct vfio_region_info *info)
+static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
+ struct vfio_region_info *info)
{
struct vfio_info_cap_header *hdr;
struct vfio_region_info_cap_sparse_mmap *sparse;
- int i;
+ int i, j;
hdr = vfio_get_region_info_cap(info, VFIO_REGION_INFO_CAP_SPARSE_MMAP);
if (!hdr) {
- return;
+ return -ENODEV;
}
sparse = container_of(hdr, struct vfio_region_info_cap_sparse_mmap, header);
trace_vfio_region_sparse_mmap_header(region->vbasedev->name,
region->nr, sparse->nr_areas);
- region->nr_mmaps = sparse->nr_areas;
- region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
+ region->mmaps = g_new0(VFIOMmap, sparse->nr_areas);
- for (i = 0; i < region->nr_mmaps; i++) {
- region->mmaps[i].offset = sparse->areas[i].offset;
- region->mmaps[i].size = sparse->areas[i].size;
- trace_vfio_region_sparse_mmap_entry(i, region->mmaps[i].offset,
- region->mmaps[i].offset +
- region->mmaps[i].size);
+ for (i = 0, j = 0; i < sparse->nr_areas; i++) {
+ trace_vfio_region_sparse_mmap_entry(i, sparse->areas[i].offset,
+ sparse->areas[i].offset +
+ sparse->areas[i].size);
+
+ if (sparse->areas[i].size) {
+ region->mmaps[j].offset = sparse->areas[i].offset;
+ region->mmaps[j].size = sparse->areas[i].size;
+ j++;
+ }
}
+
+ region->nr_mmaps = j;
+ region->mmaps = g_realloc(region->mmaps, j * sizeof(VFIOMmap));
+
+ return 0;
}
int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
region, name, region->size);
if (!vbasedev->no_mmap &&
- region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
- !(region->size & ~qemu_real_host_page_mask)) {
+ region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
- vfio_setup_region_sparse_mmaps(region, info);
+ ret = vfio_setup_region_sparse_mmaps(region, info);
- if (!region->nr_mmaps) {
+ if (ret) {
region->nr_mmaps = 1;
region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
region->mmaps[0].offset = 0;
name = g_strdup_printf("%s mmaps[%d]",
memory_region_name(region->mem), i);
- memory_region_init_ram_ptr(®ion->mmaps[i].mem,
- memory_region_owner(region->mem),
- name, region->mmaps[i].size,
- region->mmaps[i].mmap);
+ memory_region_init_ram_device_ptr(®ion->mmaps[i].mem,
+ memory_region_owner(region->mem),
+ name, region->mmaps[i].size,
+ region->mmaps[i].mmap);
g_free(name);
- memory_region_set_skip_dump(®ion->mmaps[i].mem);
memory_region_add_subregion(region->mem, region->mmaps[i].offset,
®ion->mmaps[i].mem);
.endianness = DEVICE_LITTLE_ENDIAN,
};
+/*
+ * Expand memory region of sub-page(size < PAGE_SIZE) MMIO BAR to page
+ * size if the BAR is in an exclusive page in host so that we could map
+ * this BAR to guest. But this sub-page BAR may not occupy an exclusive
+ * page in guest. So we should set the priority of the expanded memory
+ * region to zero in case of overlap with BARs which share the same page
+ * with the sub-page BAR in guest. Besides, we should also recover the
+ * size of this sub-page BAR when its base address is changed in guest
+ * and not page aligned any more.
+ */
+static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
+{
+ VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+ VFIORegion *region = &vdev->bars[bar].region;
+ MemoryRegion *mmap_mr, *mr;
+ PCIIORegion *r;
+ pcibus_t bar_addr;
+ uint64_t size = region->size;
+
+ /* Make sure that the whole region is allowed to be mmapped */
+ if (region->nr_mmaps != 1 || !region->mmaps[0].mmap ||
+ region->mmaps[0].size != region->size) {
+ return;
+ }
+
+ r = &pdev->io_regions[bar];
+ bar_addr = r->addr;
+ mr = region->mem;
+ mmap_mr = ®ion->mmaps[0].mem;
+
+ /* If BAR is mapped and page aligned, update to fill PAGE_SIZE */
+ if (bar_addr != PCI_BAR_UNMAPPED &&
+ !(bar_addr & ~qemu_real_host_page_mask)) {
+ size = qemu_real_host_page_size;
+ }
+
+ memory_region_transaction_begin();
+
+ memory_region_set_size(mr, size);
+ memory_region_set_size(mmap_mr, size);
+ if (size != region->size && memory_region_is_mapped(mr)) {
+ memory_region_del_subregion(r->address_space, mr);
+ memory_region_add_subregion_overlap(r->address_space,
+ bar_addr, mr, 0);
+ }
+
+ memory_region_transaction_commit();
+}
+
/*
* PCI config space
*/
} else if (was_enabled && !is_enabled) {
vfio_msix_disable(vdev);
}
+ } else if (ranges_overlap(addr, len, PCI_BASE_ADDRESS_0, 24) ||
+ range_covers_byte(addr, len, PCI_COMMAND)) {
+ pcibus_t old_addr[PCI_NUM_REGIONS - 1];
+ int bar;
+
+ for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
+ old_addr[bar] = pdev->io_regions[bar].addr;
+ }
+
+ pci_default_write_config(pdev, addr, val, len);
+
+ for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
+ if (old_addr[bar] != pdev->io_regions[bar].addr &&
+ pdev->io_regions[bar].size > 0 &&
+ pdev->io_regions[bar].size < qemu_real_host_page_size) {
+ vfio_sub_page_bar_update_mapping(pdev, bar);
+ }
+ }
} else {
/* Write everything to QEMU to keep emulated bits correct */
pci_default_write_config(pdev, addr, val, len);
static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
{
Error *err = NULL;
+ int nr;
vfio_intx_enable(vdev, &err);
if (err) {
error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
}
+
+ for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) {
+ off_t addr = vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr);
+ uint32_t val = 0;
+ uint32_t len = sizeof(val);
+
+ if (pwrite(vdev->vbasedev.fd, &val, len, addr) != len) {
+ error_report("%s(%s) reset bar %d failed: %m", __func__,
+ vdev->vbasedev.name, nr);
+ }
+ }
}
static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
}
return !memory_region_is_ram(section->mr) ||
- memory_region_is_skip_dump(section->mr);
+ memory_region_is_ram_device(section->mr);
}
static void *vfio_prereg_gpa_to_vaddr(MemoryRegionSection *section, hwaddr gpa)
# xen backend driver support
-common-obj-$(CONFIG_XEN_BACKEND) += xen_backend.o xen_devconfig.o
+common-obj-$(CONFIG_XEN_BACKEND) += xen_backend.o xen_devconfig.o xen_pvdev.o
obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o
obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o xen_pt_graphics.o xen_pt_msi.o
#include "sysemu/char.h"
#include "qemu/log.h"
#include "hw/xen/xen_backend.h"
+#include "hw/xen/xen_pvdev.h"
#include <xen/grant_table.h>
const char *xen_protocol;
/* private */
-struct xs_dirs {
- char *xs_dir;
- QTAILQ_ENTRY(xs_dirs) list;
-};
-static QTAILQ_HEAD(xs_dirs_head, xs_dirs) xs_cleanup =
- QTAILQ_HEAD_INITIALIZER(xs_cleanup);
-
-static QTAILQ_HEAD(XenDeviceHead, XenDevice) xendevs = QTAILQ_HEAD_INITIALIZER(xendevs);
-static int debug = 0;
-
-/* ------------------------------------------------------------- */
-
-static void xenstore_cleanup_dir(char *dir)
-{
- struct xs_dirs *d;
-
- d = g_malloc(sizeof(*d));
- d->xs_dir = dir;
- QTAILQ_INSERT_TAIL(&xs_cleanup, d, list);
-}
-
-void xen_config_cleanup(void)
-{
- struct xs_dirs *d;
-
- QTAILQ_FOREACH(d, &xs_cleanup, list) {
- xs_rm(xenstore, 0, d->xs_dir);
- }
-}
-
-int xenstore_write_str(const char *base, const char *node, const char *val)
-{
- char abspath[XEN_BUFSIZE];
-
- snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
- if (!xs_write(xenstore, 0, abspath, val, strlen(val))) {
- return -1;
- }
- return 0;
-}
-
-char *xenstore_read_str(const char *base, const char *node)
-{
- char abspath[XEN_BUFSIZE];
- unsigned int len;
- char *str, *ret = NULL;
-
- snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
- str = xs_read(xenstore, 0, abspath, &len);
- if (str != NULL) {
- /* move to qemu-allocated memory to make sure
- * callers can savely g_free() stuff. */
- ret = g_strdup(str);
- free(str);
- }
- return ret;
-}
-
-int xenstore_mkdir(char *path, int p)
-{
- struct xs_permissions perms[2] = {
- {
- .id = 0, /* set owner: dom0 */
- }, {
- .id = xen_domid,
- .perms = p,
- }
- };
-
- if (!xs_mkdir(xenstore, 0, path)) {
- xen_be_printf(NULL, 0, "xs_mkdir %s: failed\n", path);
- return -1;
- }
- xenstore_cleanup_dir(g_strdup(path));
-
- if (!xs_set_permissions(xenstore, 0, path, perms, 2)) {
- xen_be_printf(NULL, 0, "xs_set_permissions %s: failed\n", path);
- return -1;
- }
- return 0;
-}
-
-int xenstore_write_int(const char *base, const char *node, int ival)
-{
- char val[12];
-
- snprintf(val, sizeof(val), "%d", ival);
- return xenstore_write_str(base, node, val);
-}
-
-int xenstore_write_int64(const char *base, const char *node, int64_t ival)
-{
- char val[21];
-
- snprintf(val, sizeof(val), "%"PRId64, ival);
- return xenstore_write_str(base, node, val);
-}
-
-int xenstore_read_int(const char *base, const char *node, int *ival)
-{
- char *val;
- int rc = -1;
-
- val = xenstore_read_str(base, node);
- if (val && 1 == sscanf(val, "%d", ival)) {
- rc = 0;
- }
- g_free(val);
- return rc;
-}
-
-int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval)
-{
- char *val;
- int rc = -1;
-
- val = xenstore_read_str(base, node);
- if (val && 1 == sscanf(val, "%"SCNu64, uval)) {
- rc = 0;
- }
- g_free(val);
- return rc;
-}
+static int debug;
int xenstore_write_be_str(struct XenDevice *xendev, const char *node, const char *val)
{
return xenstore_read_int(xendev->fe, node, ival);
}
-int xenstore_read_fe_uint64(struct XenDevice *xendev, const char *node, uint64_t *uval)
+int xenstore_read_fe_uint64(struct XenDevice *xendev, const char *node,
+ uint64_t *uval)
{
return xenstore_read_uint64(xendev->fe, node, uval);
}
/* ------------------------------------------------------------- */
-const char *xenbus_strstate(enum xenbus_state state)
-{
- static const char *const name[] = {
- [ XenbusStateUnknown ] = "Unknown",
- [ XenbusStateInitialising ] = "Initialising",
- [ XenbusStateInitWait ] = "InitWait",
- [ XenbusStateInitialised ] = "Initialised",
- [ XenbusStateConnected ] = "Connected",
- [ XenbusStateClosing ] = "Closing",
- [ XenbusStateClosed ] = "Closed",
- };
- return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
-}
-
int xen_be_set_state(struct XenDevice *xendev, enum xenbus_state state)
{
int rc;
if (rc < 0) {
return rc;
}
- xen_be_printf(xendev, 1, "backend state: %s -> %s\n",
+ xen_pv_printf(xendev, 1, "backend state: %s -> %s\n",
xenbus_strstate(xendev->be_state), xenbus_strstate(state));
xendev->be_state = state;
return 0;
}
-/* ------------------------------------------------------------- */
-
-struct XenDevice *xen_be_find_xendev(const char *type, int dom, int dev)
-{
- struct XenDevice *xendev;
-
- QTAILQ_FOREACH(xendev, &xendevs, next) {
- if (xendev->dom != dom) {
- continue;
- }
- if (xendev->dev != dev) {
- continue;
- }
- if (strcmp(xendev->type, type) != 0) {
- continue;
- }
- return xendev;
- }
- return NULL;
-}
-
/*
* get xen backend device, allocate a new one if it doesn't exist.
*/
{
struct XenDevice *xendev;
- xendev = xen_be_find_xendev(type, dom, dev);
+ xendev = xen_pv_find_xendev(type, dom, dev);
if (xendev) {
return xendev;
}
xendev->evtchndev = xenevtchn_open(NULL, 0);
if (xendev->evtchndev == NULL) {
- xen_be_printf(NULL, 0, "can't open evtchn device\n");
+ xen_pv_printf(NULL, 0, "can't open evtchn device\n");
g_free(xendev);
return NULL;
}
if (ops->flags & DEVOPS_FLAG_NEED_GNTDEV) {
xendev->gnttabdev = xengnttab_open(NULL, 0);
if (xendev->gnttabdev == NULL) {
- xen_be_printf(NULL, 0, "can't open gnttab device\n");
+ xen_pv_printf(NULL, 0, "can't open gnttab device\n");
xenevtchn_close(xendev->evtchndev);
g_free(xendev);
return NULL;
xendev->gnttabdev = NULL;
}
- QTAILQ_INSERT_TAIL(&xendevs, xendev, next);
+ xen_pv_insert_xendev(xendev);
if (xendev->ops->alloc) {
xendev->ops->alloc(xendev);
return xendev;
}
-/*
- * release xen backend device.
- */
-static void xen_be_del_xendev(struct XenDevice *xendev)
-{
- if (xendev->ops->free) {
- xendev->ops->free(xendev);
- }
-
- if (xendev->fe) {
- char token[XEN_BUFSIZE];
- snprintf(token, sizeof(token), "fe:%p", xendev);
- xs_unwatch(xenstore, xendev->fe, token);
- g_free(xendev->fe);
- }
-
- if (xendev->evtchndev != NULL) {
- xenevtchn_close(xendev->evtchndev);
- }
- if (xendev->gnttabdev != NULL) {
- xengnttab_close(xendev->gnttabdev);
- }
-
- QTAILQ_REMOVE(&xendevs, xendev, next);
- g_free(xendev);
-}
/*
* Sync internal data structures on xenstore updates.
}
if (node) {
- xen_be_printf(xendev, 2, "backend update: %s\n", node);
+ xen_pv_printf(xendev, 2, "backend update: %s\n", node);
if (xendev->ops->backend_changed) {
xendev->ops->backend_changed(xendev, node);
}
fe_state = XenbusStateUnknown;
}
if (xendev->fe_state != fe_state) {
- xen_be_printf(xendev, 1, "frontend state: %s -> %s\n",
+ xen_pv_printf(xendev, 1, "frontend state: %s -> %s\n",
xenbus_strstate(xendev->fe_state),
xenbus_strstate(fe_state));
}
g_free(xendev->protocol);
xendev->protocol = xenstore_read_fe_str(xendev, "protocol");
if (xendev->protocol) {
- xen_be_printf(xendev, 1, "frontend protocol: %s\n", xendev->protocol);
+ xen_pv_printf(xendev, 1, "frontend protocol: %s\n",
+ xendev->protocol);
}
}
if (node) {
- xen_be_printf(xendev, 2, "frontend update: %s\n", node);
+ xen_pv_printf(xendev, 2, "frontend update: %s\n", node);
if (xendev->ops->frontend_changed) {
xendev->ops->frontend_changed(xendev, node);
}
int be_state;
if (xenstore_read_be_int(xendev, "state", &be_state) == -1) {
- xen_be_printf(xendev, 0, "reading backend state failed\n");
+ xen_pv_printf(xendev, 0, "reading backend state failed\n");
return -1;
}
if (be_state != XenbusStateInitialising) {
- xen_be_printf(xendev, 0, "initial backend state is wrong (%s)\n",
+ xen_pv_printf(xendev, 0, "initial backend state is wrong (%s)\n",
xenbus_strstate(be_state));
return -1;
}
xendev->fe = xenstore_read_be_str(xendev, "frontend");
if (xendev->fe == NULL) {
- xen_be_printf(xendev, 0, "reading frontend path failed\n");
+ xen_pv_printf(xendev, 0, "reading frontend path failed\n");
return -1;
}
/* setup frontend watch */
snprintf(token, sizeof(token), "fe:%p", xendev);
if (!xs_watch(xenstore, xendev->fe, token)) {
- xen_be_printf(xendev, 0, "watching frontend path (%s) failed\n",
+ xen_pv_printf(xendev, 0, "watching frontend path (%s) failed\n",
xendev->fe);
return -1;
}
int rc = 0;
if (!xendev->online) {
- xen_be_printf(xendev, 1, "not online\n");
+ xen_pv_printf(xendev, 1, "not online\n");
return -1;
}
rc = xendev->ops->init(xendev);
}
if (rc != 0) {
- xen_be_printf(xendev, 1, "init() failed\n");
+ xen_pv_printf(xendev, 1, "init() failed\n");
return rc;
}
if (xendev->fe_state != XenbusStateInitialised &&
xendev->fe_state != XenbusStateConnected) {
if (xendev->ops->flags & DEVOPS_FLAG_IGNORE_STATE) {
- xen_be_printf(xendev, 2, "frontend not ready, ignoring\n");
+ xen_pv_printf(xendev, 2, "frontend not ready, ignoring\n");
} else {
- xen_be_printf(xendev, 2, "frontend not ready (yet)\n");
+ xen_pv_printf(xendev, 2, "frontend not ready (yet)\n");
return -1;
}
}
rc = xendev->ops->initialise(xendev);
}
if (rc != 0) {
- xen_be_printf(xendev, 0, "initialise() failed\n");
+ xen_pv_printf(xendev, 0, "initialise() failed\n");
return rc;
}
if (xendev->fe_state != XenbusStateConnected) {
if (xendev->ops->flags & DEVOPS_FLAG_IGNORE_STATE) {
- xen_be_printf(xendev, 2, "frontend not ready, ignoring\n");
+ xen_pv_printf(xendev, 2, "frontend not ready, ignoring\n");
} else {
- xen_be_printf(xendev, 2, "frontend not ready (yet)\n");
+ xen_pv_printf(xendev, 2, "frontend not ready (yet)\n");
return;
}
}
return -1;
}
- xen_be_printf(xendev, 1, "device reset (for re-connect)\n");
+ xen_pv_printf(xendev, 1, "device reset (for re-connect)\n");
xen_be_set_state(xendev, XenbusStateInitialising);
return 0;
}
snprintf(token, sizeof(token), "be:%p:%d:%p", type, dom, ops);
snprintf(path, sizeof(path), "backend/%s/%d", type, dom);
if (!xs_watch(xenstore, path, token)) {
- xen_be_printf(NULL, 0, "xen be: watching backend path (%s) failed\n", path);
+ xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n",
+ path);
return -1;
}
return 0;
}
-static void xenstore_update_be(char *watch, char *type, int dom,
- struct XenDevOps *ops)
+void xenstore_update_be(char *watch, char *type, int dom,
+ struct XenDevOps *ops)
{
struct XenDevice *xendev;
char path[XEN_BUFSIZE], *bepath;
if (xendev != NULL) {
bepath = xs_read(xenstore, 0, xendev->be, &len);
if (bepath == NULL) {
- xen_be_del_xendev(xendev);
+ xen_pv_del_xendev(xendev);
} else {
free(bepath);
xen_be_backend_changed(xendev, path);
}
}
-static void xenstore_update_fe(char *watch, struct XenDevice *xendev)
+void xenstore_update_fe(char *watch, struct XenDevice *xendev)
{
char *node;
unsigned int len;
xen_be_frontend_changed(xendev, node);
xen_be_check_state(xendev);
}
-
-static void xenstore_update(void *unused)
-{
- char **vec = NULL;
- intptr_t type, ops, ptr;
- unsigned int dom, count;
-
- vec = xs_read_watch(xenstore, &count);
- if (vec == NULL) {
- goto cleanup;
- }
-
- if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR,
- &type, &dom, &ops) == 3) {
- xenstore_update_be(vec[XS_WATCH_PATH], (void*)type, dom, (void*)ops);
- }
- if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) {
- xenstore_update_fe(vec[XS_WATCH_PATH], (void*)ptr);
- }
-
-cleanup:
- free(vec);
-}
-
-static void xen_be_evtchn_event(void *opaque)
-{
- struct XenDevice *xendev = opaque;
- evtchn_port_t port;
-
- port = xenevtchn_pending(xendev->evtchndev);
- if (port != xendev->local_port) {
- xen_be_printf(xendev, 0,
- "xenevtchn_pending returned %d (expected %d)\n",
- port, xendev->local_port);
- return;
- }
- xenevtchn_unmask(xendev->evtchndev, port);
-
- if (xendev->ops->event) {
- xendev->ops->event(xendev);
- }
-}
-
/* -------------------------------------------------------------------- */
int xen_be_init(void)
{
xenstore = xs_daemon_open();
if (!xenstore) {
- xen_be_printf(NULL, 0, "can't connect to xenstored\n");
+ xen_pv_printf(NULL, 0, "can't connect to xenstored\n");
return -1;
}
xendev->local_port = xenevtchn_bind_interdomain
(xendev->evtchndev, xendev->dom, xendev->remote_port);
if (xendev->local_port == -1) {
- xen_be_printf(xendev, 0, "xenevtchn_bind_interdomain failed\n");
+ xen_pv_printf(xendev, 0, "xenevtchn_bind_interdomain failed\n");
return -1;
}
- xen_be_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
+ xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev),
- xen_be_evtchn_event, NULL, xendev);
+ xen_pv_evtchn_event, NULL, xendev);
return 0;
}
-void xen_be_unbind_evtchn(struct XenDevice *xendev)
-{
- if (xendev->local_port == -1) {
- return;
- }
- qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), NULL, NULL, NULL);
- xenevtchn_unbind(xendev->evtchndev, xendev->local_port);
- xen_be_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port);
- xendev->local_port = -1;
-}
-
-int xen_be_send_notify(struct XenDevice *xendev)
-{
- return xenevtchn_notify(xendev->evtchndev, xendev->local_port);
-}
-
-/*
- * msg_level:
- * 0 == errors (stderr + logfile).
- * 1 == informative debug messages (logfile only).
- * 2 == noisy debug messages (logfile only).
- * 3 == will flood your log (logfile only).
- */
-void xen_be_printf(struct XenDevice *xendev, int msg_level, const char *fmt, ...)
-{
- va_list args;
-
- if (xendev) {
- if (msg_level > xendev->debug) {
- return;
- }
- qemu_log("xen be: %s: ", xendev->name);
- if (msg_level == 0) {
- fprintf(stderr, "xen be: %s: ", xendev->name);
- }
- } else {
- if (msg_level > debug) {
- return;
- }
- qemu_log("xen be core: ");
- if (msg_level == 0) {
- fprintf(stderr, "xen be core: ");
- }
- }
- va_start(args, fmt);
- qemu_log_vprintf(fmt, args);
- va_end(args);
- if (msg_level == 0) {
- va_start(args, fmt);
- vfprintf(stderr, fmt, args);
- va_end(args);
- }
- qemu_log_flush();
-}
static int xen_sysdev_init(SysBusDevice *dev)
{
const char *filename = qemu_opt_get(disk->opts, "file");
snprintf(device_name, sizeof(device_name), "xvd%c", 'a' + disk->unit);
- xen_be_printf(NULL, 1, "config disk %d [%s]: %s\n",
+ xen_pv_printf(NULL, 1, "config disk %d [%s]: %s\n",
disk->unit, device_name, filename);
xen_config_dev_dirs("vbd", "qdisk", vdev, fe, be, sizeof(fe));
snprintf(mac, sizeof(mac), "%02x:%02x:%02x:%02x:%02x:%02x",
nic->macaddr.a[0], nic->macaddr.a[1], nic->macaddr.a[2],
nic->macaddr.a[3], nic->macaddr.a[4], nic->macaddr.a[5]);
- xen_be_printf(NULL, 1, "config nic %d: mac=\"%s\"\n", vlan_id, mac);
+ xen_pv_printf(NULL, 1, "config nic %d: mac=\"%s\"\n", vlan_id, mac);
xen_config_dev_dirs("vif", "qnic", vlan_id, fe, be, sizeof(fe));
/* frontend */
--- /dev/null
+/*
+ * Xen para-virtualization device
+ *
+ * (c) 2008 Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/xen/xen_backend.h"
+#include "hw/xen/xen_pvdev.h"
+
+/* private */
+static int debug;
+
+struct xs_dirs {
+ char *xs_dir;
+ QTAILQ_ENTRY(xs_dirs) list;
+};
+
+static QTAILQ_HEAD(xs_dirs_head, xs_dirs) xs_cleanup =
+ QTAILQ_HEAD_INITIALIZER(xs_cleanup);
+
+static QTAILQ_HEAD(XenDeviceHead, XenDevice) xendevs =
+ QTAILQ_HEAD_INITIALIZER(xendevs);
+
+/* ------------------------------------------------------------- */
+
+static void xenstore_cleanup_dir(char *dir)
+{
+ struct xs_dirs *d;
+
+ d = g_malloc(sizeof(*d));
+ d->xs_dir = dir;
+ QTAILQ_INSERT_TAIL(&xs_cleanup, d, list);
+}
+
+void xen_config_cleanup(void)
+{
+ struct xs_dirs *d;
+
+ QTAILQ_FOREACH(d, &xs_cleanup, list) {
+ xs_rm(xenstore, 0, d->xs_dir);
+ }
+}
+
+int xenstore_mkdir(char *path, int p)
+{
+ struct xs_permissions perms[2] = {
+ {
+ .id = 0, /* set owner: dom0 */
+ }, {
+ .id = xen_domid,
+ .perms = p,
+ }
+ };
+
+ if (!xs_mkdir(xenstore, 0, path)) {
+ xen_pv_printf(NULL, 0, "xs_mkdir %s: failed\n", path);
+ return -1;
+ }
+ xenstore_cleanup_dir(g_strdup(path));
+
+ if (!xs_set_permissions(xenstore, 0, path, perms, 2)) {
+ xen_pv_printf(NULL, 0, "xs_set_permissions %s: failed\n", path);
+ return -1;
+ }
+ return 0;
+}
+
+int xenstore_write_str(const char *base, const char *node, const char *val)
+{
+ char abspath[XEN_BUFSIZE];
+
+ snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
+ if (!xs_write(xenstore, 0, abspath, val, strlen(val))) {
+ return -1;
+ }
+ return 0;
+}
+
+char *xenstore_read_str(const char *base, const char *node)
+{
+ char abspath[XEN_BUFSIZE];
+ unsigned int len;
+ char *str, *ret = NULL;
+
+ snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
+ str = xs_read(xenstore, 0, abspath, &len);
+ if (str != NULL) {
+ /* move to qemu-allocated memory to make sure
+ * callers can savely g_free() stuff. */
+ ret = g_strdup(str);
+ free(str);
+ }
+ return ret;
+}
+
+int xenstore_write_int(const char *base, const char *node, int ival)
+{
+ char val[12];
+
+ snprintf(val, sizeof(val), "%d", ival);
+ return xenstore_write_str(base, node, val);
+}
+
+int xenstore_write_int64(const char *base, const char *node, int64_t ival)
+{
+ char val[21];
+
+ snprintf(val, sizeof(val), "%"PRId64, ival);
+ return xenstore_write_str(base, node, val);
+}
+
+int xenstore_read_int(const char *base, const char *node, int *ival)
+{
+ char *val;
+ int rc = -1;
+
+ val = xenstore_read_str(base, node);
+ if (val && 1 == sscanf(val, "%d", ival)) {
+ rc = 0;
+ }
+ g_free(val);
+ return rc;
+}
+
+int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval)
+{
+ char *val;
+ int rc = -1;
+
+ val = xenstore_read_str(base, node);
+ if (val && 1 == sscanf(val, "%"SCNu64, uval)) {
+ rc = 0;
+ }
+ g_free(val);
+ return rc;
+}
+
+void xenstore_update(void *unused)
+{
+ char **vec = NULL;
+ intptr_t type, ops, ptr;
+ unsigned int dom, count;
+
+ vec = xs_read_watch(xenstore, &count);
+ if (vec == NULL) {
+ goto cleanup;
+ }
+
+ if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR,
+ &type, &dom, &ops) == 3) {
+ xenstore_update_be(vec[XS_WATCH_PATH], (void *)type, dom, (void*)ops);
+ }
+ if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) {
+ xenstore_update_fe(vec[XS_WATCH_PATH], (void *)ptr);
+ }
+
+cleanup:
+ free(vec);
+}
+
+const char *xenbus_strstate(enum xenbus_state state)
+{
+ static const char *const name[] = {
+ [XenbusStateUnknown] = "Unknown",
+ [XenbusStateInitialising] = "Initialising",
+ [XenbusStateInitWait] = "InitWait",
+ [XenbusStateInitialised] = "Initialised",
+ [XenbusStateConnected] = "Connected",
+ [XenbusStateClosing] = "Closing",
+ [XenbusStateClosed] = "Closed",
+ };
+ return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+
+/*
+ * msg_level:
+ * 0 == errors (stderr + logfile).
+ * 1 == informative debug messages (logfile only).
+ * 2 == noisy debug messages (logfile only).
+ * 3 == will flood your log (logfile only).
+ */
+void xen_pv_printf(struct XenDevice *xendev, int msg_level,
+ const char *fmt, ...)
+{
+ va_list args;
+
+ if (xendev) {
+ if (msg_level > xendev->debug) {
+ return;
+ }
+ qemu_log("xen be: %s: ", xendev->name);
+ if (msg_level == 0) {
+ fprintf(stderr, "xen be: %s: ", xendev->name);
+ }
+ } else {
+ if (msg_level > debug) {
+ return;
+ }
+ qemu_log("xen be core: ");
+ if (msg_level == 0) {
+ fprintf(stderr, "xen be core: ");
+ }
+ }
+ va_start(args, fmt);
+ qemu_log_vprintf(fmt, args);
+ va_end(args);
+ if (msg_level == 0) {
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ }
+ qemu_log_flush();
+}
+
+void xen_pv_evtchn_event(void *opaque)
+{
+ struct XenDevice *xendev = opaque;
+ evtchn_port_t port;
+
+ port = xenevtchn_pending(xendev->evtchndev);
+ if (port != xendev->local_port) {
+ xen_pv_printf(xendev, 0,
+ "xenevtchn_pending returned %d (expected %d)\n",
+ port, xendev->local_port);
+ return;
+ }
+ xenevtchn_unmask(xendev->evtchndev, port);
+
+ if (xendev->ops->event) {
+ xendev->ops->event(xendev);
+ }
+}
+
+void xen_pv_unbind_evtchn(struct XenDevice *xendev)
+{
+ if (xendev->local_port == -1) {
+ return;
+ }
+ qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), NULL, NULL, NULL);
+ xenevtchn_unbind(xendev->evtchndev, xendev->local_port);
+ xen_pv_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port);
+ xendev->local_port = -1;
+}
+
+int xen_pv_send_notify(struct XenDevice *xendev)
+{
+ return xenevtchn_notify(xendev->evtchndev, xendev->local_port);
+}
+
+/* ------------------------------------------------------------- */
+
+struct XenDevice *xen_pv_find_xendev(const char *type, int dom, int dev)
+{
+ struct XenDevice *xendev;
+
+ QTAILQ_FOREACH(xendev, &xendevs, next) {
+ if (xendev->dom != dom) {
+ continue;
+ }
+ if (xendev->dev != dev) {
+ continue;
+ }
+ if (strcmp(xendev->type, type) != 0) {
+ continue;
+ }
+ return xendev;
+ }
+ return NULL;
+}
+
+/*
+ * release xen backend device.
+ */
+void xen_pv_del_xendev(struct XenDevice *xendev)
+{
+ if (xendev->ops->free) {
+ xendev->ops->free(xendev);
+ }
+
+ if (xendev->fe) {
+ char token[XEN_BUFSIZE];
+ snprintf(token, sizeof(token), "fe:%p", xendev);
+ xs_unwatch(xenstore, xendev->fe, token);
+ g_free(xendev->fe);
+ }
+
+ if (xendev->evtchndev != NULL) {
+ xenevtchn_close(xendev->evtchndev);
+ }
+ if (xendev->gnttabdev != NULL) {
+ xengnttab_close(xendev->gnttabdev);
+ }
+
+ QTAILQ_REMOVE(&xendevs, xendev, next);
+ g_free(xendev);
+}
+
+void xen_pv_insert_xendev(struct XenDevice *xendev)
+{
+ QTAILQ_INSERT_TAIL(&xendevs, xendev, next);
+}
void bdrv_close_all(void);
void bdrv_drain(BlockDriverState *bs);
void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
+void bdrv_drain_all_begin(void);
+void bdrv_drain_all_end(void);
void bdrv_drain_all(void);
#define BDRV_POLL_WHILE(bs, cond) ({ \
/** Block other operations when block job is running */
Error *blocker;
+ /** BlockDriverStates that are involved in this block job */
+ GSList *nodes;
+
/** The opaque value that is passed to the completion function. */
void *opaque;
BlockDriverState *bs, int64_t speed,
BlockCompletionFunc *cb, void *opaque, Error **errp);
+/**
+ * block_job_add_bdrv:
+ * @job: A block job
+ * @bs: A BlockDriverState that is involved in @job
+ *
+ * Add @bs to the list of BlockDriverState that are involved in
+ * @job. This means that all operations will be blocked on @bs while
+ * @job exists.
+ */
+void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs);
+
/**
* block_job_sleep_ns:
* @job: The job that calls the function.
#endif
+/* Called with tb_lock held. */
static inline void tb_add_jump(TranslationBlock *tb, int n,
TranslationBlock *tb_next)
{
#if defined(CONFIG_USER_ONLY)
void mmap_lock(void);
void mmap_unlock(void);
+bool have_mmap_lock(void);
static inline tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
{
void (*destructor)(MemoryRegion *mr);
uint64_t align;
bool terminates;
- bool skip_dump;
+ bool ram_device;
bool enabled;
bool warning_printed; /* For reservations */
uint8_t vga_logging_count;
uint64_t size,
void *ptr);
+/**
+ * memory_region_init_ram_device_ptr: Initialize RAM device memory region from
+ * a user-provided pointer.
+ *
+ * A RAM device represents a mapping to a physical device, such as to a PCI
+ * MMIO BAR of an vfio-pci assigned device. The memory region may be mapped
+ * into the VM address space and access to the region will modify memory
+ * directly. However, the memory region should not be included in a memory
+ * dump (device may not be enabled/mapped at the time of the dump), and
+ * operations incompatible with manipulating MMIO should be avoided. Replaces
+ * skip_dump flag.
+ *
+ * @mr: the #MemoryRegion to be initialized.
+ * @owner: the object that tracks the region's reference count
+ * @name: the name of the region.
+ * @size: size of the region.
+ * @ptr: memory to be mapped; must contain at least @size bytes.
+ */
+void memory_region_init_ram_device_ptr(MemoryRegion *mr,
+ struct Object *owner,
+ const char *name,
+ uint64_t size,
+ void *ptr);
+
/**
* memory_region_init_alias: Initialize a memory region that aliases all or a
* part of another memory region.
}
/**
- * memory_region_is_skip_dump: check whether a memory region should not be
- * dumped
- *
- * Returns %true is a memory region should not be dumped(e.g. VFIO BAR MMAP).
+ * memory_region_is_ram_device: check whether a memory region is a ram device
*
- * @mr: the memory region being queried
- */
-bool memory_region_is_skip_dump(MemoryRegion *mr);
-
-/**
- * memory_region_set_skip_dump: Set skip_dump flag, dump will ignore this memory
- * region
+ * Returns %true is a memory region is a device backed ram region
*
* @mr: the memory region being queried
*/
-void memory_region_set_skip_dump(MemoryRegion *mr);
+bool memory_region_is_ram_device(MemoryRegion *mr);
/**
* memory_region_is_romd: check whether a memory region is in ROMD mode
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
{
if (is_write) {
- return memory_region_is_ram(mr) && !mr->readonly;
+ return memory_region_is_ram(mr) &&
+ !mr->readonly && !memory_region_is_ram_device(mr);
} else {
- return memory_region_is_ram(mr) || memory_region_is_romd(mr);
+ return (memory_region_is_ram(mr) && !memory_region_is_ram_device(mr)) ||
+ memory_region_is_romd(mr);
}
}
#include "qemu/bitmap.h"
#include "sysemu/sysemu.h"
#include "hw/pci/pci.h"
-#include "hw/boards.h"
#include "hw/compat.h"
#include "hw/mem/pc-dimm.h"
#include "hw/mem/nvdimm.h"
#define QEMU_HW_XEN_BACKEND_H
#include "hw/xen/xen_common.h"
+#include "hw/xen/xen_pvdev.h"
#include "sysemu/sysemu.h"
#include "net/net.h"
-/* ------------------------------------------------------------- */
-
-#define XEN_BUFSIZE 1024
-
-struct XenDevice;
-
-/* driver uses grant tables -> open gntdev device (xendev->gnttabdev) */
-#define DEVOPS_FLAG_NEED_GNTDEV 1
-/* don't expect frontend doing correct state transitions (aka console quirk) */
-#define DEVOPS_FLAG_IGNORE_STATE 2
-
-struct XenDevOps {
- size_t size;
- uint32_t flags;
- void (*alloc)(struct XenDevice *xendev);
- int (*init)(struct XenDevice *xendev);
- int (*initialise)(struct XenDevice *xendev);
- void (*connected)(struct XenDevice *xendev);
- void (*event)(struct XenDevice *xendev);
- void (*disconnect)(struct XenDevice *xendev);
- int (*free)(struct XenDevice *xendev);
- void (*backend_changed)(struct XenDevice *xendev, const char *node);
- void (*frontend_changed)(struct XenDevice *xendev, const char *node);
- int (*backend_register)(void);
-};
-
-struct XenDevice {
- const char *type;
- int dom;
- int dev;
- char name[64];
- int debug;
-
- enum xenbus_state be_state;
- enum xenbus_state fe_state;
- int online;
- char be[XEN_BUFSIZE];
- char *fe;
- char *protocol;
- int remote_port;
- int local_port;
-
- xenevtchn_handle *evtchndev;
- xengnttab_handle *gnttabdev;
-
- struct XenDevOps *ops;
- QTAILQ_ENTRY(XenDevice) next;
-};
-
-/* ------------------------------------------------------------- */
-
/* variables */
extern xc_interface *xen_xc;
extern xenforeignmemory_handle *xen_fmem;
extern const char *xen_protocol;
extern DeviceState *xen_sysdev;
-/* xenstore helper functions */
int xenstore_mkdir(char *path, int p);
-int xenstore_write_str(const char *base, const char *node, const char *val);
-int xenstore_write_int(const char *base, const char *node, int ival);
-int xenstore_write_int64(const char *base, const char *node, int64_t ival);
-char *xenstore_read_str(const char *base, const char *node);
-int xenstore_read_int(const char *base, const char *node, int *ival);
-
int xenstore_write_be_str(struct XenDevice *xendev, const char *node, const char *val);
int xenstore_write_be_int(struct XenDevice *xendev, const char *node, int ival);
int xenstore_write_be_int64(struct XenDevice *xendev, const char *node, int64_t ival);
char *xenstore_read_be_str(struct XenDevice *xendev, const char *node);
int xenstore_read_be_int(struct XenDevice *xendev, const char *node, int *ival);
+void xenstore_update_fe(char *watch, struct XenDevice *xendev);
+void xenstore_update_be(char *watch, char *type, int dom,
+ struct XenDevOps *ops);
char *xenstore_read_fe_str(struct XenDevice *xendev, const char *node);
int xenstore_read_fe_int(struct XenDevice *xendev, const char *node, int *ival);
-int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval);
-int xenstore_read_fe_uint64(struct XenDevice *xendev, const char *node, uint64_t *uval);
+int xenstore_read_fe_uint64(struct XenDevice *xendev, const char *node,
+ uint64_t *uval);
-const char *xenbus_strstate(enum xenbus_state state);
-struct XenDevice *xen_be_find_xendev(const char *type, int dom, int dev);
void xen_be_check_state(struct XenDevice *xendev);
/* xen backend driver bits */
int xen_be_register(const char *type, struct XenDevOps *ops);
int xen_be_set_state(struct XenDevice *xendev, enum xenbus_state state);
int xen_be_bind_evtchn(struct XenDevice *xendev);
-void xen_be_unbind_evtchn(struct XenDevice *xendev);
-int xen_be_send_notify(struct XenDevice *xendev);
-void xen_be_printf(struct XenDevice *xendev, int msg_level, const char *fmt, ...)
- GCC_FMT_ATTR(3, 4);
/* actual backend drivers */
extern struct XenDevOps xen_console_ops; /* xen_console.c */
--- /dev/null
+#ifndef QEMU_HW_XEN_PVDEV_H
+#define QEMU_HW_XEN_PVDEV_H
+
+#include "hw/xen/xen_common.h"
+/* ------------------------------------------------------------- */
+
+#define XEN_BUFSIZE 1024
+
+struct XenDevice;
+
+/* driver uses grant tables -> open gntdev device (xendev->gnttabdev) */
+#define DEVOPS_FLAG_NEED_GNTDEV 1
+/* don't expect frontend doing correct state transitions (aka console quirk) */
+#define DEVOPS_FLAG_IGNORE_STATE 2
+
+struct XenDevOps {
+ size_t size;
+ uint32_t flags;
+ void (*alloc)(struct XenDevice *xendev);
+ int (*init)(struct XenDevice *xendev);
+ int (*initialise)(struct XenDevice *xendev);
+ void (*connected)(struct XenDevice *xendev);
+ void (*event)(struct XenDevice *xendev);
+ void (*disconnect)(struct XenDevice *xendev);
+ int (*free)(struct XenDevice *xendev);
+ void (*backend_changed)(struct XenDevice *xendev, const char *node);
+ void (*frontend_changed)(struct XenDevice *xendev, const char *node);
+ int (*backend_register)(void);
+};
+
+struct XenDevice {
+ const char *type;
+ int dom;
+ int dev;
+ char name[64];
+ int debug;
+
+ enum xenbus_state be_state;
+ enum xenbus_state fe_state;
+ int online;
+ char be[XEN_BUFSIZE];
+ char *fe;
+ char *protocol;
+ int remote_port;
+ int local_port;
+
+ xenevtchn_handle *evtchndev;
+ xengnttab_handle *gnttabdev;
+
+ struct XenDevOps *ops;
+ QTAILQ_ENTRY(XenDevice) next;
+};
+
+/* ------------------------------------------------------------- */
+
+/* xenstore helper functions */
+int xenstore_write_str(const char *base, const char *node, const char *val);
+int xenstore_write_int(const char *base, const char *node, int ival);
+int xenstore_write_int64(const char *base, const char *node, int64_t ival);
+char *xenstore_read_str(const char *base, const char *node);
+int xenstore_read_int(const char *base, const char *node, int *ival);
+int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval);
+void xenstore_update(void *unused);
+
+const char *xenbus_strstate(enum xenbus_state state);
+
+void xen_pv_evtchn_event(void *opaque);
+void xen_pv_insert_xendev(struct XenDevice *xendev);
+void xen_pv_del_xendev(struct XenDevice *xendev);
+struct XenDevice *xen_pv_find_xendev(const char *type, int dom, int dev);
+
+void xen_pv_unbind_evtchn(struct XenDevice *xendev);
+int xen_pv_send_notify(struct XenDevice *xendev);
+
+void xen_pv_printf(struct XenDevice *xendev, int msg_level,
+ const char *fmt, ...) GCC_FMT_ATTR(3, 4);
+
+#endif /* QEMU_HW_XEN_PVDEV_H */
--- /dev/null
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_COLO_H
+#define QEMU_COLO_H
+
+#include "qemu-common.h"
+#include "migration/migration.h"
+#include "qemu/coroutine_int.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+
+bool colo_supported(void);
+void colo_info_init(void);
+
+void migrate_start_colo_process(MigrationState *s);
+bool migration_in_colo_state(void);
+
+/* loadvm */
+bool migration_incoming_enable_colo(void);
+void migration_incoming_exit_colo(void);
+void *colo_process_incoming_thread(void *opaque);
+bool migration_incoming_in_colo_state(void);
+
+COLOMode get_colo_mode(void);
+
+/* failover */
+void colo_do_failover(MigrationState *s);
+#endif
--- /dev/null
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_FAILOVER_H
+#define QEMU_FAILOVER_H
+
+#include "qemu-common.h"
+#include "qapi-types.h"
+
+void failover_init_state(void);
+FailoverStatus failover_set_state(FailoverStatus old_state,
+ FailoverStatus new_state);
+FailoverStatus failover_get_state(void);
+void failover_request_active(Error **errp);
+bool failover_request_is_active(void);
+
+#endif
#include "migration/vmstate.h"
#include "qapi-types.h"
#include "exec/cpu-common.h"
+#include "qemu/coroutine_int.h"
#define QEMU_VM_FILE_MAGIC 0x5145564d
#define QEMU_VM_FILE_VERSION_COMPAT 0x00000002
QEMUBH *bh;
int state;
+
+ bool have_colo_incoming_thread;
+ QemuThread colo_incoming_thread;
+ /* The coroutine we should enter (back) after failover */
+ Coroutine *migration_incoming_co;
+
/* See savevm.c */
LoadStateEntry_Head loadvm_handlers;
};
int migrate_use_xbzrle(void);
int64_t migrate_xbzrle_cache_size(void);
+bool migrate_colo_enabled(void);
int64_t xbzrle_cache_resize(int64_t new_size);
extern Monitor *cur_mon;
/* flags for monitor_init */
-#define MONITOR_IS_DEFAULT 0x01
+/* 0x01 unused */
#define MONITOR_USE_READLINE 0x02
#define MONITOR_USE_CONTROL 0x04
#define MONITOR_USE_PRETTY 0x08
InetSocketAddress *inet_parse(const char *str, Error **errp);
int inet_connect(const char *str, Error **errp);
+int inet_connect_saddr(InetSocketAddress *saddr, Error **errp,
+ NonBlockingConnectHandler *callback, void *opaque);
NetworkAddressFamily inet_netfamily(int family);
#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
/* work queue */
-typedef void (*run_on_cpu_func)(CPUState *cpu, void *data);
+
+/* The union type allows passing of 64 bit target pointers on 32 bit
+ * hosts in a single parameter
+ */
+typedef union {
+ int host_int;
+ unsigned long host_ulong;
+ void *host_ptr;
+ vaddr target_ptr;
+} run_on_cpu_data;
+
+#define RUN_ON_CPU_HOST_PTR(p) ((run_on_cpu_data){.host_ptr = (p)})
+#define RUN_ON_CPU_HOST_INT(i) ((run_on_cpu_data){.host_int = (i)})
+#define RUN_ON_CPU_HOST_ULONG(ul) ((run_on_cpu_data){.host_ulong = (ul)})
+#define RUN_ON_CPU_TARGET_PTR(v) ((run_on_cpu_data){.target_ptr = (v)})
+#define RUN_ON_CPU_NULL RUN_ON_CPU_HOST_PTR(NULL)
+
+typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data);
+
struct qemu_work_item;
/**
MemoryRegion *memory;
void *env_ptr; /* CPUArchState */
+
+ /* Writes protected by tb_lock, reads not thread-safe */
struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
+
struct GDBRegisterState *gdb_regs;
int gdb_num_regs;
int gdb_num_g_regs;
*
* Used internally in the implementation of run_on_cpu.
*/
-void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data,
+void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
QemuMutex *mutex);
/**
*
* Schedules the function @func for execution on the vCPU @cpu.
*/
-void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data);
+void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data);
/**
* async_run_on_cpu:
*
* Schedules the function @func for execution on the vCPU @cpu asynchronously.
*/
-void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data);
+void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data);
/**
* async_safe_run_on_cpu:
* Unlike run_on_cpu and async_run_on_cpu, the function is run outside the
* BQL.
*/
-void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data);
+void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data);
/**
* qemu_get_cpu:
s->coalesced_flush_in_progress = false;
}
-static void do_kvm_cpu_synchronize_state(CPUState *cpu, void *arg)
+static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
{
if (!cpu->kvm_vcpu_dirty) {
kvm_arch_get_registers(cpu);
void kvm_cpu_synchronize_state(CPUState *cpu)
{
if (!cpu->kvm_vcpu_dirty) {
- run_on_cpu(cpu, do_kvm_cpu_synchronize_state, NULL);
+ run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL);
}
}
-static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, void *arg)
+static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
{
kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
cpu->kvm_vcpu_dirty = false;
void kvm_cpu_synchronize_post_reset(CPUState *cpu)
{
- run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, NULL);
+ run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
}
-static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, void *arg)
+static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
{
kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
cpu->kvm_vcpu_dirty = false;
void kvm_cpu_synchronize_post_init(CPUState *cpu)
{
- run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, NULL);
+ run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
}
int kvm_cpu_exec(CPUState *cpu)
int err;
};
-static void kvm_invoke_set_guest_debug(CPUState *cpu, void *data)
+static void kvm_invoke_set_guest_debug(CPUState *cpu, run_on_cpu_data data)
{
- struct kvm_set_guest_debug_data *dbg_data = data;
+ struct kvm_set_guest_debug_data *dbg_data =
+ (struct kvm_set_guest_debug_data *) data.host_ptr;
dbg_data->err = kvm_vcpu_ioctl(cpu, KVM_SET_GUEST_DEBUG,
&dbg_data->dbg);
}
kvm_arch_update_guest_debug(cpu, &data.dbg);
- run_on_cpu(cpu, kvm_invoke_set_guest_debug, &data);
+ run_on_cpu(cpu, kvm_invoke_set_guest_debug,
+ RUN_ON_CPU_HOST_PTR(&data));
return data.err;
}
info->pt_dynamic_addr = 0;
#endif
+ mmap_lock();
+
/* Find the maximum size of the image and allocate an appropriate
amount of memory to handle that. */
loaddr = -1, hiaddr = 0;
load_symbols(ehdr, image_fd, load_bias);
}
+ mmap_unlock();
+
close(image_fd);
return;
}
}
+bool have_mmap_lock(void)
+{
+ return mmap_lock_count > 0 ? true : false;
+}
+
/* Grab lock to make sure things are in a consistent state after fork(). */
void mmap_fork_start(void)
{
.endianness = DEVICE_NATIVE_ENDIAN,
};
+static uint64_t memory_region_ram_device_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ MemoryRegion *mr = opaque;
+ uint64_t data = (uint64_t)~0;
+
+ switch (size) {
+ case 1:
+ data = *(uint8_t *)(mr->ram_block->host + addr);
+ break;
+ case 2:
+ data = *(uint16_t *)(mr->ram_block->host + addr);
+ break;
+ case 4:
+ data = *(uint32_t *)(mr->ram_block->host + addr);
+ break;
+ case 8:
+ data = *(uint64_t *)(mr->ram_block->host + addr);
+ break;
+ }
+
+ trace_memory_region_ram_device_read(get_cpu_index(), mr, addr, data, size);
+
+ return data;
+}
+
+static void memory_region_ram_device_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ MemoryRegion *mr = opaque;
+
+ trace_memory_region_ram_device_write(get_cpu_index(), mr, addr, data, size);
+
+ switch (size) {
+ case 1:
+ *(uint8_t *)(mr->ram_block->host + addr) = (uint8_t)data;
+ break;
+ case 2:
+ *(uint16_t *)(mr->ram_block->host + addr) = (uint16_t)data;
+ break;
+ case 4:
+ *(uint32_t *)(mr->ram_block->host + addr) = (uint32_t)data;
+ break;
+ case 8:
+ *(uint64_t *)(mr->ram_block->host + addr) = data;
+ break;
+ }
+}
+
+static const MemoryRegionOps ram_device_mem_ops = {
+ .read = memory_region_ram_device_read,
+ .write = memory_region_ram_device_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ .unaligned = true,
+ },
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ .unaligned = true,
+ },
+};
+
bool memory_region_access_valid(MemoryRegion *mr,
hwaddr addr,
unsigned size,
mr->ram_block = qemu_ram_alloc_from_ptr(size, ptr, mr, &error_fatal);
}
-void memory_region_set_skip_dump(MemoryRegion *mr)
+void memory_region_init_ram_device_ptr(MemoryRegion *mr,
+ Object *owner,
+ const char *name,
+ uint64_t size,
+ void *ptr)
{
- mr->skip_dump = true;
+ memory_region_init_ram_ptr(mr, owner, name, size, ptr);
+ mr->ram_device = true;
+ mr->ops = &ram_device_mem_ops;
+ mr->opaque = mr;
}
void memory_region_init_alias(MemoryRegion *mr,
return mr->name;
}
-bool memory_region_is_skip_dump(MemoryRegion *mr)
+bool memory_region_is_ram_device(MemoryRegion *mr)
{
- return mr->skip_dump;
+ return mr->ram_device;
}
uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
/* we only care about RAM */
if (!memory_region_is_ram(section->mr) ||
- memory_region_is_skip_dump(section->mr)) {
+ memory_region_is_ram_device(section->mr)) {
return;
}
common-obj-y += migration.o socket.o fd.o exec.o
common-obj-y += tls.o
+common-obj-y += colo-comm.o
+common-obj-$(CONFIG_COLO) += colo.o colo-failover.o
common-obj-y += vmstate.o
common-obj-y += qemu-file.o
common-obj-y += qemu-file-channel.o
--- /dev/null
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <migration/colo.h>
+#include "trace.h"
+
+typedef struct {
+ bool colo_requested;
+} COLOInfo;
+
+static COLOInfo colo_info;
+
+COLOMode get_colo_mode(void)
+{
+ if (migration_in_colo_state()) {
+ return COLO_MODE_PRIMARY;
+ } else if (migration_incoming_in_colo_state()) {
+ return COLO_MODE_SECONDARY;
+ } else {
+ return COLO_MODE_UNKNOWN;
+ }
+}
+
+static void colo_info_pre_save(void *opaque)
+{
+ COLOInfo *s = opaque;
+
+ s->colo_requested = migrate_colo_enabled();
+}
+
+static bool colo_info_need(void *opaque)
+{
+ return migrate_colo_enabled();
+}
+
+static const VMStateDescription colo_state = {
+ .name = "COLOState",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .pre_save = colo_info_pre_save,
+ .needed = colo_info_need,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(colo_requested, COLOInfo),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+void colo_info_init(void)
+{
+ vmstate_register(NULL, 0, &colo_state, &colo_info);
+}
+
+bool migration_incoming_enable_colo(void)
+{
+ return colo_info.colo_requested;
+}
+
+void migration_incoming_exit_colo(void)
+{
+ colo_info.colo_requested = false;
+}
--- /dev/null
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "migration/colo.h"
+#include "migration/failover.h"
+#include "qmp-commands.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+static QEMUBH *failover_bh;
+static FailoverStatus failover_state;
+
+static void colo_failover_bh(void *opaque)
+{
+ int old_state;
+
+ qemu_bh_delete(failover_bh);
+ failover_bh = NULL;
+
+ old_state = failover_set_state(FAILOVER_STATUS_REQUIRE,
+ FAILOVER_STATUS_ACTIVE);
+ if (old_state != FAILOVER_STATUS_REQUIRE) {
+ error_report("Unknown error for failover, old_state = %s",
+ FailoverStatus_lookup[old_state]);
+ return;
+ }
+
+ colo_do_failover(NULL);
+}
+
+void failover_request_active(Error **errp)
+{
+ if (failover_set_state(FAILOVER_STATUS_NONE,
+ FAILOVER_STATUS_REQUIRE) != FAILOVER_STATUS_NONE) {
+ error_setg(errp, "COLO failover is already actived");
+ return;
+ }
+ failover_bh = qemu_bh_new(colo_failover_bh, NULL);
+ qemu_bh_schedule(failover_bh);
+}
+
+void failover_init_state(void)
+{
+ failover_state = FAILOVER_STATUS_NONE;
+}
+
+FailoverStatus failover_set_state(FailoverStatus old_state,
+ FailoverStatus new_state)
+{
+ FailoverStatus old;
+
+ old = atomic_cmpxchg(&failover_state, old_state, new_state);
+ if (old == old_state) {
+ trace_colo_failover_set_state(FailoverStatus_lookup[new_state]);
+ }
+ return old;
+}
+
+FailoverStatus failover_get_state(void)
+{
+ return atomic_read(&failover_state);
+}
+
+void qmp_x_colo_lost_heartbeat(Error **errp)
+{
+ if (get_colo_mode() == COLO_MODE_UNKNOWN) {
+ error_setg(errp, QERR_FEATURE_DISABLED, "colo");
+ return;
+ }
+
+ failover_request_active(errp);
+}
--- /dev/null
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "sysemu/sysemu.h"
+#include "migration/colo.h"
+#include "io/channel-buffer.h"
+#include "trace.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "migration/failover.h"
+
+#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
+
+bool colo_supported(void)
+{
+ return true;
+}
+
+bool migration_in_colo_state(void)
+{
+ MigrationState *s = migrate_get_current();
+
+ return (s->state == MIGRATION_STATUS_COLO);
+}
+
+bool migration_incoming_in_colo_state(void)
+{
+ MigrationIncomingState *mis = migration_incoming_get_current();
+
+ return mis && (mis->state == MIGRATION_STATUS_COLO);
+}
+
+static bool colo_runstate_is_stopped(void)
+{
+ return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
+}
+
+static void secondary_vm_do_failover(void)
+{
+ int old_state;
+ MigrationIncomingState *mis = migration_incoming_get_current();
+
+ migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
+ MIGRATION_STATUS_COMPLETED);
+
+ if (!autostart) {
+ error_report("\"-S\" qemu option will be ignored in secondary side");
+ /* recover runstate to normal migration finish state */
+ autostart = true;
+ }
+
+ old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
+ FAILOVER_STATUS_COMPLETED);
+ if (old_state != FAILOVER_STATUS_ACTIVE) {
+ error_report("Incorrect state (%s) while doing failover for "
+ "secondary VM", FailoverStatus_lookup[old_state]);
+ return;
+ }
+ /* For Secondary VM, jump to incoming co */
+ if (mis->migration_incoming_co) {
+ qemu_coroutine_enter(mis->migration_incoming_co);
+ }
+}
+
+static void primary_vm_do_failover(void)
+{
+ MigrationState *s = migrate_get_current();
+ int old_state;
+
+ migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
+ MIGRATION_STATUS_COMPLETED);
+
+ old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
+ FAILOVER_STATUS_COMPLETED);
+ if (old_state != FAILOVER_STATUS_ACTIVE) {
+ error_report("Incorrect state (%s) while doing failover for Primary VM",
+ FailoverStatus_lookup[old_state]);
+ return;
+ }
+}
+
+void colo_do_failover(MigrationState *s)
+{
+ /* Make sure VM stopped while failover happened. */
+ if (!colo_runstate_is_stopped()) {
+ vm_stop_force_state(RUN_STATE_COLO);
+ }
+
+ if (get_colo_mode() == COLO_MODE_PRIMARY) {
+ primary_vm_do_failover();
+ } else {
+ secondary_vm_do_failover();
+ }
+}
+
+static void colo_send_message(QEMUFile *f, COLOMessage msg,
+ Error **errp)
+{
+ int ret;
+
+ if (msg >= COLO_MESSAGE__MAX) {
+ error_setg(errp, "%s: Invalid message", __func__);
+ return;
+ }
+ qemu_put_be32(f, msg);
+ qemu_fflush(f);
+
+ ret = qemu_file_get_error(f);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Can't send COLO message");
+ }
+ trace_colo_send_message(COLOMessage_lookup[msg]);
+}
+
+static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
+ uint64_t value, Error **errp)
+{
+ Error *local_err = NULL;
+ int ret;
+
+ colo_send_message(f, msg, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ qemu_put_be64(f, value);
+ qemu_fflush(f);
+
+ ret = qemu_file_get_error(f);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to send value for message:%s",
+ COLOMessage_lookup[msg]);
+ }
+}
+
+static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
+{
+ COLOMessage msg;
+ int ret;
+
+ msg = qemu_get_be32(f);
+ ret = qemu_file_get_error(f);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Can't receive COLO message");
+ return msg;
+ }
+ if (msg >= COLO_MESSAGE__MAX) {
+ error_setg(errp, "%s: Invalid message", __func__);
+ return msg;
+ }
+ trace_colo_receive_message(COLOMessage_lookup[msg]);
+ return msg;
+}
+
+static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
+ Error **errp)
+{
+ COLOMessage msg;
+ Error *local_err = NULL;
+
+ msg = colo_receive_message(f, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ if (msg != expect_msg) {
+ error_setg(errp, "Unexpected COLO message %d, expected %d",
+ msg, expect_msg);
+ }
+}
+
+static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
+ Error **errp)
+{
+ Error *local_err = NULL;
+ uint64_t value;
+ int ret;
+
+ colo_receive_check_message(f, expect_msg, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return 0;
+ }
+
+ value = qemu_get_be64(f);
+ ret = qemu_file_get_error(f);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
+ COLOMessage_lookup[expect_msg]);
+ }
+ return value;
+}
+
+static int colo_do_checkpoint_transaction(MigrationState *s,
+ QIOChannelBuffer *bioc,
+ QEMUFile *fb)
+{
+ Error *local_err = NULL;
+ int ret = -1;
+
+ colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
+ &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ colo_receive_check_message(s->rp_state.from_dst_file,
+ COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ /* Reset channel-buffer directly */
+ qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
+ bioc->usage = 0;
+
+ qemu_mutex_lock_iothread();
+ if (failover_get_state() != FAILOVER_STATUS_NONE) {
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+ vm_stop_force_state(RUN_STATE_COLO);
+ qemu_mutex_unlock_iothread();
+ trace_colo_vm_state_change("run", "stop");
+ /*
+ * Failover request bh could be called after vm_stop_force_state(),
+ * So we need check failover_request_is_active() again.
+ */
+ if (failover_get_state() != FAILOVER_STATUS_NONE) {
+ goto out;
+ }
+
+ /* Disable block migration */
+ s->params.blk = 0;
+ s->params.shared = 0;
+ qemu_savevm_state_header(fb);
+ qemu_savevm_state_begin(fb, &s->params);
+ qemu_mutex_lock_iothread();
+ qemu_savevm_state_complete_precopy(fb, false);
+ qemu_mutex_unlock_iothread();
+
+ qemu_fflush(fb);
+
+ colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ /*
+ * We need the size of the VMstate data in Secondary side,
+ * With which we can decide how much data should be read.
+ */
+ colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE,
+ bioc->usage, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage);
+ qemu_fflush(s->to_dst_file);
+ ret = qemu_file_get_error(s->to_dst_file);
+ if (ret < 0) {
+ goto out;
+ }
+
+ colo_receive_check_message(s->rp_state.from_dst_file,
+ COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ colo_receive_check_message(s->rp_state.from_dst_file,
+ COLO_MESSAGE_VMSTATE_LOADED, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ ret = 0;
+
+ qemu_mutex_lock_iothread();
+ vm_start();
+ qemu_mutex_unlock_iothread();
+ trace_colo_vm_state_change("stop", "run");
+
+out:
+ if (local_err) {
+ error_report_err(local_err);
+ }
+ return ret;
+}
+
+static void colo_process_checkpoint(MigrationState *s)
+{
+ QIOChannelBuffer *bioc;
+ QEMUFile *fb = NULL;
+ int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+ Error *local_err = NULL;
+ int ret;
+
+ failover_init_state();
+
+ s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
+ if (!s->rp_state.from_dst_file) {
+ error_report("Open QEMUFile from_dst_file failed");
+ goto out;
+ }
+
+ /*
+ * Wait for Secondary finish loading VM states and enter COLO
+ * restore.
+ */
+ colo_receive_check_message(s->rp_state.from_dst_file,
+ COLO_MESSAGE_CHECKPOINT_READY, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
+ fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
+ object_unref(OBJECT(bioc));
+
+ qemu_mutex_lock_iothread();
+ vm_start();
+ qemu_mutex_unlock_iothread();
+ trace_colo_vm_state_change("stop", "run");
+
+ while (s->state == MIGRATION_STATUS_COLO) {
+ if (failover_get_state() != FAILOVER_STATUS_NONE) {
+ error_report("failover request");
+ goto out;
+ }
+
+ current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+ if (current_time - checkpoint_time <
+ s->parameters.x_checkpoint_delay) {
+ int64_t delay_ms;
+
+ delay_ms = s->parameters.x_checkpoint_delay -
+ (current_time - checkpoint_time);
+ g_usleep(delay_ms * 1000);
+ }
+ ret = colo_do_checkpoint_transaction(s, bioc, fb);
+ if (ret < 0) {
+ goto out;
+ }
+ checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+ }
+
+out:
+ /* Throw the unreported error message after exited from loop */
+ if (local_err) {
+ error_report_err(local_err);
+ }
+
+ if (fb) {
+ qemu_fclose(fb);
+ }
+
+ if (s->rp_state.from_dst_file) {
+ qemu_fclose(s->rp_state.from_dst_file);
+ }
+}
+
+void migrate_start_colo_process(MigrationState *s)
+{
+ qemu_mutex_unlock_iothread();
+ migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
+ MIGRATION_STATUS_COLO);
+ colo_process_checkpoint(s);
+ qemu_mutex_lock_iothread();
+}
+
+static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
+ Error **errp)
+{
+ COLOMessage msg;
+ Error *local_err = NULL;
+
+ msg = colo_receive_message(f, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ switch (msg) {
+ case COLO_MESSAGE_CHECKPOINT_REQUEST:
+ *checkpoint_request = 1;
+ break;
+ default:
+ *checkpoint_request = 0;
+ error_setg(errp, "Got unknown COLO message: %d", msg);
+ break;
+ }
+}
+
+void *colo_process_incoming_thread(void *opaque)
+{
+ MigrationIncomingState *mis = opaque;
+ QEMUFile *fb = NULL;
+ QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
+ uint64_t total_size;
+ uint64_t value;
+ Error *local_err = NULL;
+
+ migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
+ MIGRATION_STATUS_COLO);
+
+ failover_init_state();
+
+ mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
+ if (!mis->to_src_file) {
+ error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
+ goto out;
+ }
+ /*
+ * Note: the communication between Primary side and Secondary side
+ * should be sequential, we set the fd to unblocked in migration incoming
+ * coroutine, and here we are in the COLO incoming thread, so it is ok to
+ * set the fd back to blocked.
+ */
+ qemu_file_set_blocking(mis->from_src_file, true);
+
+ bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
+ fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
+ object_unref(OBJECT(bioc));
+
+ colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
+ &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ while (mis->state == MIGRATION_STATUS_COLO) {
+ int request = 0;
+
+ colo_wait_handle_message(mis->from_src_file, &request, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ assert(request);
+ if (failover_get_state() != FAILOVER_STATUS_NONE) {
+ error_report("failover request");
+ goto out;
+ }
+
+ /* FIXME: This is unnecessary for periodic checkpoint mode */
+ colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
+ &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ colo_receive_check_message(mis->from_src_file,
+ COLO_MESSAGE_VMSTATE_SEND, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ value = colo_receive_message_value(mis->from_src_file,
+ COLO_MESSAGE_VMSTATE_SIZE, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ /*
+ * Read VM device state data into channel buffer,
+ * It's better to re-use the memory allocated.
+ * Here we need to handle the channel buffer directly.
+ */
+ if (value > bioc->capacity) {
+ bioc->capacity = value;
+ bioc->data = g_realloc(bioc->data, bioc->capacity);
+ }
+ total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
+ if (total_size != value) {
+ error_report("Got %" PRIu64 " VMState data, less than expected"
+ " %" PRIu64, total_size, value);
+ goto out;
+ }
+ bioc->usage = total_size;
+ qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
+
+ colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
+ &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ qemu_mutex_lock_iothread();
+ qemu_system_reset(VMRESET_SILENT);
+ if (qemu_loadvm_state(fb) < 0) {
+ error_report("COLO: loadvm failed");
+ qemu_mutex_unlock_iothread();
+ goto out;
+ }
+ qemu_mutex_unlock_iothread();
+
+ colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
+ &local_err);
+ if (local_err) {
+ goto out;
+ }
+ }
+
+out:
+ /* Throw the unreported error message after exited from loop */
+ if (local_err) {
+ error_report_err(local_err);
+ }
+
+ if (fb) {
+ qemu_fclose(fb);
+ }
+
+ if (mis->to_src_file) {
+ qemu_fclose(mis->to_src_file);
+ }
+ migration_incoming_exit_colo();
+
+ return NULL;
+}
#include "exec/address-spaces.h"
#include "io/channel-buffer.h"
#include "io/channel-tls.h"
+#include "migration/colo.h"
#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
/* Migration XBZRLE default cache size */
#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
+/* The delay time (in ms) between two COLO checkpoints
+ * Note: Please change this default value to 10000 when we support hybrid mode.
+ */
+#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
+
static NotifierList migration_state_notifiers =
NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
.cpu_throttle_increment = DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT,
.max_bandwidth = MAX_THROTTLE,
.downtime_limit = DEFAULT_MIGRATE_SET_DOWNTIME,
+ .x_checkpoint_delay = DEFAULT_MIGRATE_X_CHECKPOINT_DELAY,
},
};
/* Else if something went wrong then just fall out of the normal exit */
}
+ /* we get COLO info, and know if we are in COLO mode */
+ if (!ret && migration_incoming_enable_colo()) {
+ mis->migration_incoming_co = qemu_coroutine_self();
+ qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
+ colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
+ mis->have_colo_incoming_thread = true;
+ qemu_coroutine_yield();
+
+ /* Wait checkpoint incoming thread exit before free resource */
+ qemu_thread_join(&mis->colo_incoming_thread);
+ }
+
qemu_fclose(f);
free_xbzrle_decoded_buf();
caps = NULL; /* silence compiler warning */
for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
+ if (i == MIGRATION_CAPABILITY_X_COLO && !colo_supported()) {
+ continue;
+ }
if (head == NULL) {
head = g_malloc0(sizeof(*caps));
caps = head;
params->max_bandwidth = s->parameters.max_bandwidth;
params->has_downtime_limit = true;
params->downtime_limit = s->parameters.downtime_limit;
+ params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
return params;
}
get_xbzrle_cache_stats(info);
break;
+ case MIGRATION_STATUS_COLO:
+ info->has_status = true;
+ /* TODO: display COLO specific information (checkpoint info etc.) */
+ break;
case MIGRATION_STATUS_COMPLETED:
get_xbzrle_cache_stats(info);
}
for (cap = params; cap; cap = cap->next) {
+ if (cap->value->capability == MIGRATION_CAPABILITY_X_COLO) {
+ if (!colo_supported()) {
+ error_setg(errp, "COLO is not currently supported, please"
+ " configure with --enable-colo option in order to"
+ " support COLO feature");
+ continue;
+ }
+ }
s->enabled_capabilities[cap->value->capability] = cap->value->state;
}
"an integer in the range of 0 to 2000000 milliseconds");
return;
}
+ if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+ "x_checkpoint_delay",
+ "is invalid, it should be positive");
+ }
if (params->has_compress_level) {
s->parameters.compress_level = params->compress_level;
if (params->has_downtime_limit) {
s->parameters.downtime_limit = params->downtime_limit;
}
+
+ if (params->has_x_checkpoint_delay) {
+ s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
+ }
}
void migrate_fd_error(MigrationState *s, const Error *error)
{
- trace_migrate_fd_error(error ? error_get_pretty(error) : "");
+ trace_migrate_fd_error(error_get_pretty(error));
assert(s->to_dst_file == NULL);
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_FAILED);
params.shared = has_inc && inc;
if (migration_is_setup_or_active(s->state) ||
- s->state == MIGRATION_STATUS_CANCELLING) {
+ s->state == MIGRATION_STATUS_CANCELLING ||
+ s->state == MIGRATION_STATUS_COLO) {
error_setg(errp, QERR_MIGRATION_ACTIVE);
return;
}
if (!ret) {
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
- if (ret >= 0) {
+ /*
+ * Don't mark the image with BDRV_O_INACTIVE flag if
+ * we will go into COLO stage later.
+ */
+ if (ret >= 0 && !migrate_colo_enabled()) {
ret = bdrv_inactivate_all();
}
if (ret >= 0) {
goto fail_invalidate;
}
- migrate_set_state(&s->state, current_active_state,
- MIGRATION_STATUS_COMPLETED);
+ if (!migrate_colo_enabled()) {
+ migrate_set_state(&s->state, current_active_state,
+ MIGRATION_STATUS_COMPLETED);
+ }
+
return;
fail_invalidate:
MIGRATION_STATUS_FAILED);
}
+bool migrate_colo_enabled(void)
+{
+ MigrationState *s = migrate_get_current();
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
+}
+
/*
* Master migration thread on the source VM.
* It drives the migration and pumps the data down the outgoing channel.
bool entered_postcopy = false;
/* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE;
+ bool enable_colo = migrate_colo_enabled();
rcu_register_thread();
end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
qemu_mutex_lock_iothread();
- qemu_savevm_state_cleanup();
+ /*
+ * The resource has been allocated by migration will be reused in COLO
+ * process, so don't release them.
+ */
+ if (!enable_colo) {
+ qemu_savevm_state_cleanup();
+ }
if (s->state == MIGRATION_STATUS_COMPLETED) {
uint64_t transferred_bytes = qemu_ftell(s->to_dst_file);
s->total_time = end_time - s->total_time;
}
runstate_set(RUN_STATE_POSTMIGRATE);
} else {
+ if (s->state == MIGRATION_STATUS_ACTIVE && enable_colo) {
+ migrate_start_colo_process(s);
+ qemu_savevm_state_cleanup();
+ /*
+ * Fixme: we will run VM in COLO no matter its old running state.
+ * After exited COLO, we will keep running.
+ */
+ old_vm_running = true;
+ }
if (old_vm_running && !entered_postcopy) {
vm_start();
} else {
#include "trace.h"
#include "exec/ram_addr.h"
#include "qemu/rcu_queue.h"
+#include "migration/colo.h"
#ifdef DEBUG_MIGRATION_RAM
#define DPRINTF(fmt, ...) \
return ret;
}
-
-/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
- * long-running RCU critical section. When rcu-reclaims in the code
- * start to become numerous it will be necessary to reduce the
- * granularity of these critical sections.
- */
-
-static int ram_save_setup(QEMUFile *f, void *opaque)
+static int ram_save_init_globals(void)
{
- RAMBlock *block;
int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
dirty_rate_high_cnt = 0;
migration_bitmap_sync();
qemu_mutex_unlock_ramlist();
qemu_mutex_unlock_iothread();
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
+ * long-running RCU critical section. When rcu-reclaims in the code
+ * start to become numerous it will be necessary to reduce the
+ * granularity of these critical sections.
+ */
+
+static int ram_save_setup(QEMUFile *f, void *opaque)
+{
+ RAMBlock *block;
+
+ /* migration has already setup the bitmap, reuse it. */
+ if (!migration_in_colo_state()) {
+ if (ram_save_init_globals() < 0) {
+ return -1;
+ }
+ }
+
+ rcu_read_lock();
qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
while (true) {
int pages;
- pages = ram_find_and_save_block(f, true, &bytes_transferred);
+ pages = ram_find_and_save_block(f, !migration_in_colo_state(),
+ &bytes_transferred);
/* no more blocks to sent */
if (pages == 0) {
break;
migration_tls_incoming_handshake_start(void) ""
migration_tls_incoming_handshake_error(const char *err) "err=%s"
migration_tls_incoming_handshake_complete(void) ""
+
+# migration/colo.c
+colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
+colo_send_message(const char *msg) "Send '%s' message"
+colo_receive_message(const char *msg) "Receive '%s' message"
+colo_failover_set_state(const char *new_state) "new state %s"
#include "qapi/qmp/json-streamer.h"
#include "qapi/qmp/json-parser.h"
#include "qom/object_interfaces.h"
-#include "cpu.h"
#include "trace.h"
#include "trace/control.h"
#include "monitor/hmp-target.h"
#include "qapi/qmp-event.h"
#include "qapi-event.h"
#include "qmp-introspect.h"
-#include "sysemu/block-backend.h"
#include "sysemu/qtest.h"
#include "qemu/cutils.h"
#include "qapi/qmp/dispatch.h"
.name = "chardev",
.type = QEMU_OPT_STRING,
},{
- .name = "default",
+ .name = "default", /* deprecated */
.type = QEMU_OPT_BOOL,
},{
.name = "pretty",
ObjectClass parent_class;
} CompareClass;
-typedef struct CompareChardevProps {
- bool is_socket;
-} CompareChardevProps;
-
enum {
PRIMARY_IN = 0,
SECONDARY_IN,
char *chr_name,
Error **errp)
{
- CompareChardevProps props;
-
*chr = qemu_chr_find(chr_name);
if (*chr == NULL) {
error_setg(errp, "Device '%s' not found",
return 1;
}
- memset(&props, 0, sizeof(props));
-
if (!qemu_chr_has_feature(*chr, QEMU_CHAR_FEATURE_RECONNECTABLE)) {
error_setg(errp, "chardev \"%s\" is not reconnectable",
chr_name);
# @watchdog: the watchdog action is configured to pause and has been triggered
#
# @guest-panicked: guest has been panicked as a result of guest OS panic
+#
+# @colo: guest is paused to save/restore VM state under colo checkpoint (since
+# 2.8)
##
{ 'enum': 'RunState',
'data': [ 'debug', 'inmigrate', 'internal-error', 'io-error', 'paused',
'postmigrate', 'prelaunch', 'finish-migrate', 'restore-vm',
'running', 'save-vm', 'shutdown', 'suspended', 'watchdog',
- 'guest-panicked' ] }
+ 'guest-panicked', 'colo' ] }
##
# @StatusInfo:
#
# @failed: some error occurred during migration process.
#
+# @colo: VM is in the process of fault tolerance. (since 2.8)
+#
# Since: 2.3
#
##
{ 'enum': 'MigrationStatus',
'data': [ 'none', 'setup', 'cancelling', 'cancelled',
- 'active', 'postcopy-active', 'completed', 'failed' ] }
+ 'active', 'postcopy-active', 'completed', 'failed', 'colo' ] }
##
# @MigrationInfo
# been migrated, pulling the remaining pages along as needed. NOTE: If
# the migration fails during postcopy the VM will fail. (since 2.6)
#
+# @x-colo: If enabled, migration will never end, and the state of the VM on the
+# primary side will be migrated continuously to the VM on secondary
+# side, this process is called COarse-Grain LOck Stepping (COLO) for
+# Non-stop Service. (since 2.8)
+#
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
- 'compress', 'events', 'postcopy-ram'] }
+ 'compress', 'events', 'postcopy-ram', 'x-colo'] }
##
# @MigrationCapabilityStatus
# @downtime-limit: set maximum tolerated downtime for migration. maximum
# downtime in milliseconds (Since 2.8)
#
+# @x-checkpoint-delay: The delay time (in ms) between two COLO checkpoints in
+# periodic mode. (Since 2.8)
+#
# Since: 2.4
##
{ 'enum': 'MigrationParameter',
'data': ['compress-level', 'compress-threads', 'decompress-threads',
'cpu-throttle-initial', 'cpu-throttle-increment',
'tls-creds', 'tls-hostname', 'max-bandwidth',
- 'downtime-limit'] }
+ 'downtime-limit', 'x-checkpoint-delay' ] }
#
# @migrate-set-parameters
#
# Set various migration parameters. See MigrationParameters for details.
#
+# @x-checkpoint-delay: the delay time between two checkpoints. (Since 2.8)
+#
# Since: 2.4
##
{ 'command': 'migrate-set-parameters', 'boxed': true,
# @downtime-limit: set maximum tolerated downtime for migration. maximum
# downtime in milliseconds (Since 2.8)
#
+# @x-checkpoint-delay: the delay time between two COLO checkpoints. (Since 2.8)
+#
# Since: 2.4
##
{ 'struct': 'MigrationParameters',
'*tls-creds': 'str',
'*tls-hostname': 'str',
'*max-bandwidth': 'int',
- '*downtime-limit': 'int'} }
+ '*downtime-limit': 'int',
+ '*x-checkpoint-delay': 'int'} }
##
# @query-migrate-parameters
# Since: 2.5
{ 'command': 'migrate-start-postcopy' }
+##
+# @COLOMessage
+#
+# The message transmission between Primary side and Secondary side.
+#
+# @checkpoint-ready: Secondary VM (SVM) is ready for checkpointing
+#
+# @checkpoint-request: Primary VM (PVM) tells SVM to prepare for checkpointing
+#
+# @checkpoint-reply: SVM gets PVM's checkpoint request
+#
+# @vmstate-send: VM's state will be sent by PVM.
+#
+# @vmstate-size: The total size of VMstate.
+#
+# @vmstate-received: VM's state has been received by SVM.
+#
+# @vmstate-loaded: VM's state has been loaded by SVM.
+#
+# Since: 2.8
+##
+{ 'enum': 'COLOMessage',
+ 'data': [ 'checkpoint-ready', 'checkpoint-request', 'checkpoint-reply',
+ 'vmstate-send', 'vmstate-size', 'vmstate-received',
+ 'vmstate-loaded' ] }
+
+##
+# @COLOMode
+#
+# The colo mode
+#
+# @unknown: unknown mode
+#
+# @primary: master side
+#
+# @secondary: slave side
+#
+# Since: 2.8
+##
+{ 'enum': 'COLOMode',
+ 'data': [ 'unknown', 'primary', 'secondary'] }
+
+##
+# @FailoverStatus
+#
+# An enumeration of COLO failover status
+#
+# @none: no failover has ever happened
+#
+# @require: got failover requirement but not handled
+#
+# @active: in the process of doing failover
+#
+# @completed: finish the process of failover
+#
+# Since: 2.8
+##
+{ 'enum': 'FailoverStatus',
+ 'data': [ 'none', 'require', 'active', 'completed'] }
+
+##
+# @x-colo-lost-heartbeat
+#
+# Tell qemu that heartbeat is lost, request it to do takeover procedures.
+# If this command is sent to the PVM, the Primary side will exit COLO mode.
+# If sent to the Secondary, the Secondary side will run failover work,
+# then takes over server operation to become the service VM.
+#
+# Since: 2.8
+##
+{ 'command': 'x-colo-lost-heartbeat' }
+
##
# @MouseInfo:
#
# with query-block-jobs. The operation can be stopped before it has completed
# using the block-job-cancel command.
#
+# The node that receives the data is called the top image, can be located in
+# any part of the chain (but always above the base image; see below) and can be
+# specified using its device or node name. Earlier qemu versions only allowed
+# 'device' to name the top level node; presence of the 'base-node' parameter
+# during introspection can be used as a witness of the enhanced semantics
+# of 'device'.
+#
# If a base file is specified then sectors are not copied from that base file and
# its backing chain. When streaming completes the image file will have the base
# file as its backing file. This can be used to stream a subset of the backing
# @job-id: #optional identifier for the newly-created block job. If
# omitted, the device name will be used. (Since 2.7)
#
-# @device: the device name or node-name of a root node
+# @device: the device or node name of the top image
+#
+# @base: #optional the common backing file name.
+# It cannot be set if @base-node is also set.
#
-# @base: #optional the common backing file name
+# @base-node: #optional the node name of the backing file.
+# It cannot be set if @base is also set. (Since 2.8)
#
-# @backing-file: #optional The backing file string to write into the active
-# layer. This filename is not validated.
+# @backing-file: #optional The backing file string to write into the top
+# image. This filename is not validated.
#
# If a pathname string is such that it cannot be
# resolved by QEMU, that means that subsequent QMP or
##
{ 'command': 'block-stream',
'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
- '*backing-file': 'str', '*speed': 'int',
+ '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
'*on-error': 'BlockdevOnError' } }
##
#
# @host_device, @host_cdrom: Since 2.1
# @gluster: Since 2.7
-# @nbd: Since 2.8
+# @nbd, @nfs, @replication, @ssh: Since 2.8
#
# Since: 2.0
##
{ 'enum': 'BlockdevDriver',
'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
- 'host_device', 'http', 'https', 'luks', 'nbd', 'null-aio',
+ 'host_device', 'http', 'https', 'luks', 'nbd', 'nfs', 'null-aio',
'null-co', 'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw',
- 'replication', 'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'replication', 'ssh', 'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc',
+ 'vvfat' ] }
##
# @BlockdevOptionsFile
'*vport': 'int',
'*segment': 'str' } }
+##
+# @BlockdevOptionsSsh
+#
+# @server: host address
+#
+# @path: path to the image on the host
+#
+# @user: #optional user as which to connect, defaults to current
+# local user name
+#
+# TODO: Expose the host_key_check option in QMP
+#
+# Since 2.8
+##
+{ 'struct': 'BlockdevOptionsSsh',
+ 'data': { 'server': 'InetSocketAddress',
+ 'path': 'str',
+ '*user': 'str' } }
+
##
# @BlkdebugEvent
'data': { 'mode': 'ReplicationMode',
'*top-id': 'str' } }
+##
+# @NFSTransport
+#
+# An enumeration of NFS transport types
+#
+# @inet: TCP transport
+#
+# Since 2.8
+##
+{ 'enum': 'NFSTransport',
+ 'data': [ 'inet' ] }
+
+##
+# @NFSServer
+#
+# Captures the address of the socket
+#
+# @type: transport type used for NFS (only TCP supported)
+#
+# @host: host address for NFS server
+#
+# Since 2.8
+##
+{ 'struct': 'NFSServer',
+ 'data': { 'type': 'NFSTransport',
+ 'host': 'str' } }
+
+##
+# @BlockdevOptionsNfs
+#
+# Driver specific block device option for NFS
+#
+# @server: host address
+#
+# @path: path of the image on the host
+#
+# @user: #optional UID value to use when talking to the
+# server (defaults to 65534 on Windows and getuid()
+# on unix)
+#
+# @group: #optional GID value to use when talking to the
+# server (defaults to 65534 on Windows and getgid()
+# in unix)
+#
+# @tcp-syn-count: #optional number of SYNs during the session
+# establishment (defaults to libnfs default)
+#
+# @readahead-size: #optional set the readahead size in bytes (defaults
+# to libnfs default)
+#
+# @page-cache-size: #optional set the pagecache size in bytes (defaults
+# to libnfs default)
+#
+# @debug-level: #optional set the NFS debug level (max 2) (defaults
+# to libnfs default)
+#
+# Since 2.8
+##
+{ 'struct': 'BlockdevOptionsNfs',
+ 'data': { 'server': 'NFSServer',
+ 'path': 'str',
+ '*user': 'int',
+ '*group': 'int',
+ '*tcp-syn-count': 'int',
+ '*readahead-size': 'int',
+ '*page-cache-size': 'int',
+ '*debug-level': 'int' } }
+
##
# @BlockdevOptionsCurl
#
'*export': 'str',
'*tls-creds': 'str' } }
+##
+# @BlockdevOptionsRaw
+#
+# Driver specific block device options for the raw driver.
+#
+# @offset: #optional position where the block device starts
+# @size: #optional the assumed size of the device
+#
+# Since: 2.8
+##
+{ 'struct': 'BlockdevOptionsRaw',
+ 'base': 'BlockdevOptionsGenericFormat',
+ 'data': { '*offset': 'int', '*size': 'int' } }
+
##
# @BlockdevOptions
#
# TODO iscsi: Wait for structured options
'luks': 'BlockdevOptionsLUKS',
'nbd': 'BlockdevOptionsNbd',
-# TODO nfs: Wait for structured options
+ 'nfs': 'BlockdevOptionsNfs',
'null-aio': 'BlockdevOptionsNull',
'null-co': 'BlockdevOptionsNull',
'parallels': 'BlockdevOptionsGenericFormat',
'qcow': 'BlockdevOptionsGenericCOWFormat',
'qed': 'BlockdevOptionsGenericCOWFormat',
'quorum': 'BlockdevOptionsQuorum',
- 'raw': 'BlockdevOptionsGenericFormat',
+ 'raw': 'BlockdevOptionsRaw',
# TODO rbd: Wait for structured options
'replication':'BlockdevOptionsReplication',
# TODO sheepdog: Wait for structured options
-# TODO ssh: Should take InetSocketAddress for 'host'?
+ 'ssh': 'BlockdevOptionsSsh',
'tftp': 'BlockdevOptionsCurl',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
@end itemize
qemu-ga will read a system configuration file on startup (located at
-q@file{/etc/qemu/qemu-ga.conf} by default), then parse remaining
+@file{/etc/qemu/qemu-ga.conf} by default), then parse remaining
configuration options on the command line. For the same key, the last
option wins, but the lists accumulate (see below for configuration
file format).
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
#include "qemu/timer.h"
-#include "sysemu/block-backend.h"
#include "qemu/cutils.h"
#define CMD_NOFILE_OK 0x01
STEXI
@item -set @var{group}.@var{id}.@var{arg}=@var{value}
@findex -set
-Set parameter @var{arg} for item @var{id} of type @var{group}\n"
+Set parameter @var{arg} for item @var{id} of type @var{group}
ETEXI
DEF("global", HAS_ARG, QEMU_OPTION_global,
@example
-chardev stdio,mux=on,id=char0 \
--mon chardev=char0,mode=readline,default \
+-mon chardev=char0,mode=readline \
-serial chardev:char0 \
-serial chardev:char0
@end example
@example
-chardev stdio,mux=on,id=char0 \
--mon chardev=char0,mode=readline,default \
+-mon chardev=char0,mode=readline \
-parallel chardev:char0 \
-chardev tcp,...,mux=on,id=char1 \
-serial chardev:char1 \
ETEXI
DEF("mon", HAS_ARG, QEMU_OPTION_mon, \
- "-mon [chardev=]name[,mode=readline|control][,default]\n", QEMU_ARCH_ALL)
+ "-mon [chardev=]name[,mode=readline|control]\n", QEMU_ARCH_ALL)
STEXI
-@item -mon [chardev=]name[,mode=readline|control][,default]
+@item -mon [chardev=]name[,mode=readline|control]
@findex -mon
Setup monitor on chardev @var{name}.
ETEXI
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
-object filter-rewriter,id=rew0,netdev=hn0,queue=all
-@item -object filter-dump,id=@var{id},netdev=@var{dev},file=@var{filename}][,maxlen=@var{len}]
+@item -object filter-dump,id=@var{id},netdev=@var{dev}[,file=@var{filename}][,maxlen=@var{len}]
Dump the network traffic on netdev @var{dev} to the file specified by
@var{filename}. At most @var{len} bytes (64k by default) per packet are stored.
#include "qom/object_interfaces.h"
#include "hw/mem/pc-dimm.h"
#include "hw/acpi/acpi_dev_interface.h"
-#include "qemu/uuid.h"
NameInfo *qmp_query_name(Error **errp)
{
-Subproject commit c5542f226c0d3d61e7bb578b70e591097d575479
+Subproject commit 1dc4f162efc0f00a36126cab8e7b906335f6b706
# the top-level directory.
# Usage:
-# clean-includes [--git subjectprefix] file ...
+# clean-includes [--git subjectprefix] [--check-dup-head] file ...
# or
-# clean-includes [--git subjectprefix] --all
+# clean-includes [--git subjectprefix] [--check-dup-head] --all
#
# If the --git subjectprefix option is given, then after making
# the changes to the files this script will create a git commit
# with the subject line "subjectprefix: Clean up includes"
# and a boilerplate commit message.
#
+# If --check-dup-head is specified, additionally check for duplicate
+# header includes.
+#
# Using --all will cause clean-includes to run on the whole source
# tree (excluding certain directories which are known not to need
# handling).
GIT=no
+DUPHEAD=no
# Extended regular expression defining files to ignore when using --all
XDIRREGEX='^(tests/tcg|tests/multiboot|pc-bios|disas/libvixl)'
-if [ $# -ne 0 ] && [ "$1" = "--git" ]; then
- if [ $# -eq 1 ]; then
- echo "--git option requires an argument"
- exit 1
- fi
- GITSUBJ="$2"
- GIT=yes
- shift
- shift
-fi
+while true
+do
+ case $1 in
+ "--git")
+ if [ $# -eq 1 ]; then
+ echo "--git option requires an argument"
+ exit 1
+ fi
+ GITSUBJ="$2"
+ GIT=yes
+ shift
+ shift
+ ;;
+ "--check-dup-head")
+ DUPHEAD=yes
+ shift
+ ;;
+ "--")
+ shift
+ break
+ ;;
+ *)
+ break
+ ;;
+ esac
+done
if [ $# -eq 0 ]; then
- echo "Usage: clean-includes [--git subjectprefix] [--all | foo.c ...]"
+ echo "Usage: clean-includes [--git subjectprefix] [--check-dup-head] [--all | foo.c ...]"
echo "(modifies the files in place)"
exit 1
fi
)
EOT
-
for f in "$@"; do
case "$f" in
*.inc.c)
done
+if [ "$DUPHEAD" = "yes" ]; then
+ egrep "^[[:space:]]*#[[:space:]]*include" "$@" | tr -d '[:blank:]' \
+ | sort | uniq -c | awk '{if ($1 > 1) print $0}'
+ if [ $? -eq 0 ]; then
+ echo "Found duplicate header file includes. Please check the above files manually."
+ exit 1
+ fi
+fi
+
if [ "$GIT" = "yes" ]; then
git add -- "$@"
git commit --signoff -F - <<EOF
;;
STEXI*)
if test $flag -eq 1 ; then
- echo "line $line: syntax error: expected ETEXI, found $str" >&2
+ printf "line %d: syntax error: expected ETEXI, found '%s'\n" "$line" "$str" >&2
exit 1
fi
flag=1
;;
ETEXI*)
if test $flag -ne 1 ; then
- echo "line $line: syntax error: expected STEXI, found $str" >&2
+ printf "line %d: syntax error: expected STEXI, found '%s'\n" "$line" "$str" >&2
exit 1
fi
flag=0
;;
SQMP*|EQMP*)
if test $flag -eq 1 ; then
- echo "line $line: syntax error: expected ETEXI, found $str" >&2
+ printf "line %d: syntax error: expected ETEXI, found '%s'\n" "$line" "$str" >&2
exit 1
fi
;;
DEFHEADING*)
- echo "$(expr "$str" : "DEFHEADING(\(.*\))")"
+ printf '%s\n' "$(expr "$str" : "DEFHEADING(\(.*\))")"
;;
ARCHHEADING*)
- echo "$(expr "$str" : "ARCHHEADING(\(.*\),.*)")"
+ printf '%s\n' "$(expr "$str" : "ARCHHEADING(\(.*\),.*)")"
;;
*)
- test $flag -eq 1 && echo "$str"
+ test $flag -eq 1 && printf '%s\n' "$str"
;;
esac
line=$((line+1))
;;
SQMP*)
if test $flag -eq 1 ; then
- echo "line $line: syntax error: expected EQMP, found $str" >&2
+ printf "line %d: syntax error: expected EQMP, found '%s'\n" "$line" "$str" >&2
exit 1
fi
flag=1
;;
EQMP*)
if test $flag -ne 1 ; then
- echo "line $line: syntax error: expected SQMP, found $str" >&2
+ printf "line %d: syntax error: expected SQMP, found '%s'\n" "$line" "$str" >&2
exit 1
fi
flag=0
;;
STEXI*|ETEXI*)
if test $flag -eq 1 ; then
- echo "line $line: syntax error: expected EQMP, found $str" >&2
+ printf "line %d: syntax error: expected EQMP, found '%s'\n" "$line" "$str" >&2
exit 1
fi
;;
*)
- test $flag -eq 1 && echo "$str"
+ test $flag -eq 1 && printf '%s\n' "$str"
;;
esac
line=$((line+1))
if dirname == "":
return "common"
- return re.sub(r"/|-", "_", dirname)
+ return re.sub(r"[^A-Za-z0-9]", "_", dirname)
def main(args):
global _SCRIPT
stub-obj-y += smbios_type_38.o
stub-obj-y += ipmi.o
stub-obj-y += pc_madt_cpu_entry.o
+stub-obj-y += migration-colo.o
--- /dev/null
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "migration/colo.h"
+#include "qmp-commands.h"
+
+bool colo_supported(void)
+{
+ return false;
+}
+
+bool migration_in_colo_state(void)
+{
+ return false;
+}
+
+bool migration_incoming_in_colo_state(void)
+{
+ return false;
+}
+
+void migrate_start_colo_process(MigrationState *s)
+{
+}
+
+void *colo_process_incoming_thread(void *opaque)
+{
+ return NULL;
+}
+
+void qmp_x_colo_lost_heartbeat(Error **errp)
+{
+ error_setg(errp, "COLO is not supported, please rerun configure"
+ " with --enable-colo option in order to support"
+ " COLO feature");
+}
obj-y += gdbstub.o
obj-$(TARGET_AARCH64) += cpu64.o translate-a64.o helper-a64.o gdbstub64.o
obj-y += crypto_helper.o
-obj-y += arm-powerctl.o
+obj-$(CONFIG_SOFTMMU) += arm-powerctl.o
/* Start the new CPU at the requested address */
cpu_set_pc(target_cpu_state, entry);
+ qemu_cpu_kick(target_cpu_state);
+
/* We are good to go */
return QEMU_ARM_POWERCTL_RET_SUCCESS;
}
*/
static GList *plus_features, *minus_features;
+static gint compare_string(gconstpointer a, gconstpointer b)
+{
+ return g_strcmp0(a, b);
+}
+
/* Parse "+feature,-feature,feature=foo" CPU feature string
*/
static void x86_cpu_parse_featurestr(const char *typename, char *features,
char *featurestr; /* Single 'key=value" string being parsed */
Error *local_err = NULL;
static bool cpu_globals_initialized;
+ bool ambiguous = false;
if (cpu_globals_initialized) {
return;
feat2prop(featurestr);
name = featurestr;
+ if (g_list_find_custom(plus_features, name, compare_string)) {
+ error_report("warning: Ambiguous CPU model string. "
+ "Don't mix both \"+%s\" and \"%s=%s\"",
+ name, name, val);
+ ambiguous = true;
+ }
+ if (g_list_find_custom(minus_features, name, compare_string)) {
+ error_report("warning: Ambiguous CPU model string. "
+ "Don't mix both \"-%s\" and \"%s=%s\"",
+ name, name, val);
+ ambiguous = true;
+ }
+
/* Special case: */
if (!strcmp(name, "tsc-freq")) {
int64_t tsc_freq;
qdev_prop_register_global(prop);
}
+ if (ambiguous) {
+ error_report("warning: Compatibility of ambiguous CPU model "
+ "strings won't be kept on future QEMU versions");
+ }
+
if (local_err) {
error_propagate(errp, local_err);
}
int flags;
} MCEInjectionParams;
-static void do_inject_x86_mce(CPUState *cs, void *data)
+static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
{
- MCEInjectionParams *params = data;
+ MCEInjectionParams *params = data.host_ptr;
X86CPU *cpu = X86_CPU(cs);
CPUX86State *cenv = &cpu->env;
uint64_t *banks = cenv->mce_banks + 4 * params->bank;
return;
}
- run_on_cpu(cs, do_inject_x86_mce, ¶ms);
+ run_on_cpu(cs, do_inject_x86_mce, RUN_ON_CPU_HOST_PTR(¶ms));
if (flags & MCE_INJECT_BROADCAST) {
CPUState *other_cs;
if (other_cs == cs) {
continue;
}
- run_on_cpu(other_cs, do_inject_x86_mce, ¶ms);
+ run_on_cpu(other_cs, do_inject_x86_mce, RUN_ON_CPU_HOST_PTR(¶ms));
}
}
}
return 0;
}
-static inline void do_kvm_synchronize_tsc(CPUState *cpu, void *arg)
+static inline void do_kvm_synchronize_tsc(CPUState *cpu, run_on_cpu_data arg)
{
kvm_get_tsc(cpu);
}
if (kvm_enabled()) {
CPU_FOREACH(cpu) {
- run_on_cpu(cpu, do_kvm_synchronize_tsc, NULL);
+ run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL);
}
}
}
#include "hw/i386/pc.h"
#include "hw/isa/isa.h"
#include "migration/cpu.h"
-#include "exec/exec-all.h"
-#include "cpu.h"
-#include "exec/exec-all.h"
#include "sysemu/kvm.h"
#include "qemu/error-report.h"
#include "exec/log.h"
-#define DISAS_LM32 1
-#if DISAS_LM32
-# define LOG_DIS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
-#else
-# define LOG_DIS(...) do { } while (0)
-#endif
+#define DISAS_LM32 0
+
+#define LOG_DIS(...) \
+ do { \
+ if (DISAS_LM32) { \
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__); \
+ } \
+ } while (0)
#define EXTRACT_FIELD(src, start, end) \
(((src) >> start) & ((1 << (end - start + 1)) - 1))
static void dec_andhi(DisasContext *dc)
{
- LOG_DIS("andhi r%d, r%d, %d\n", dc->r2, dc->r0, dc->imm16);
+ LOG_DIS("andhi r%d, r%d, %d\n", dc->r1, dc->r0, dc->imm16);
tcg_gen_andi_tl(cpu_R[dc->r1], cpu_R[dc->r0], (dc->imm16 << 16));
}
static void dec_be(DisasContext *dc)
{
- LOG_DIS("be r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("be r%d, r%d, %d\n", dc->r1, dc->r0,
sign_extend(dc->imm16, 16) * 4);
gen_cond_branch(dc, TCG_COND_EQ);
static void dec_bg(DisasContext *dc)
{
- LOG_DIS("bg r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("bg r%d, r%d, %d\n", dc->r1, dc->r0,
sign_extend(dc->imm16, 16 * 4));
gen_cond_branch(dc, TCG_COND_GT);
static void dec_bge(DisasContext *dc)
{
- LOG_DIS("bge r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("bge r%d, r%d, %d\n", dc->r1, dc->r0,
sign_extend(dc->imm16, 16) * 4);
gen_cond_branch(dc, TCG_COND_GE);
static void dec_bgeu(DisasContext *dc)
{
- LOG_DIS("bgeu r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("bgeu r%d, r%d, %d\n", dc->r1, dc->r0,
sign_extend(dc->imm16, 16) * 4);
gen_cond_branch(dc, TCG_COND_GEU);
static void dec_bgu(DisasContext *dc)
{
- LOG_DIS("bgu r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("bgu r%d, r%d, %d\n", dc->r1, dc->r0,
sign_extend(dc->imm16, 16) * 4);
gen_cond_branch(dc, TCG_COND_GTU);
static void dec_bne(DisasContext *dc)
{
- LOG_DIS("bne r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("bne r%d, r%d, %d\n", dc->r1, dc->r0,
sign_extend(dc->imm16, 16) * 4);
gen_cond_branch(dc, TCG_COND_NE);
static inline void gen_compare(DisasContext *dc, int cond)
{
- int rX = (dc->format == OP_FMT_RR) ? dc->r2 : dc->r1;
- int rY = (dc->format == OP_FMT_RR) ? dc->r0 : dc->r0;
- int rZ = (dc->format == OP_FMT_RR) ? dc->r1 : -1;
int i;
if (dc->format == OP_FMT_RI) {
break;
}
- tcg_gen_setcondi_tl(cond, cpu_R[rX], cpu_R[rY], i);
+ tcg_gen_setcondi_tl(cond, cpu_R[dc->r1], cpu_R[dc->r0], i);
} else {
- tcg_gen_setcond_tl(cond, cpu_R[rX], cpu_R[rY], cpu_R[rZ]);
+ tcg_gen_setcond_tl(cond, cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
}
}
static void dec_cmpe(DisasContext *dc)
{
if (dc->format == OP_FMT_RI) {
- LOG_DIS("cmpei r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("cmpei r%d, r%d, %d\n", dc->r1, dc->r0,
sign_extend(dc->imm16, 16));
} else {
LOG_DIS("cmpe r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
static void dec_cmpg(DisasContext *dc)
{
if (dc->format == OP_FMT_RI) {
- LOG_DIS("cmpgi r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("cmpgi r%d, r%d, %d\n", dc->r1, dc->r0,
sign_extend(dc->imm16, 16));
} else {
LOG_DIS("cmpg r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
static void dec_cmpge(DisasContext *dc)
{
if (dc->format == OP_FMT_RI) {
- LOG_DIS("cmpgei r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("cmpgei r%d, r%d, %d\n", dc->r1, dc->r0,
sign_extend(dc->imm16, 16));
} else {
LOG_DIS("cmpge r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
static void dec_cmpgeu(DisasContext *dc)
{
if (dc->format == OP_FMT_RI) {
- LOG_DIS("cmpgeui r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("cmpgeui r%d, r%d, %d\n", dc->r1, dc->r0,
zero_extend(dc->imm16, 16));
} else {
LOG_DIS("cmpgeu r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
static void dec_cmpgu(DisasContext *dc)
{
if (dc->format == OP_FMT_RI) {
- LOG_DIS("cmpgui r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("cmpgui r%d, r%d, %d\n", dc->r1, dc->r0,
zero_extend(dc->imm16, 16));
} else {
LOG_DIS("cmpgu r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
static void dec_cmpne(DisasContext *dc)
{
if (dc->format == OP_FMT_RI) {
- LOG_DIS("cmpnei r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("cmpnei r%d, r%d, %d\n", dc->r1, dc->r0,
sign_extend(dc->imm16, 16));
} else {
LOG_DIS("cmpne r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
static void dec_mul(DisasContext *dc)
{
if (dc->format == OP_FMT_RI) {
- LOG_DIS("muli r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("muli r%d, r%d, %d\n", dc->r1, dc->r0,
sign_extend(dc->imm16, 16));
} else {
LOG_DIS("mul r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
static void dec_nor(DisasContext *dc)
{
if (dc->format == OP_FMT_RI) {
- LOG_DIS("nori r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("nori r%d, r%d, %d\n", dc->r1, dc->r0,
zero_extend(dc->imm16, 16));
} else {
LOG_DIS("nor r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
{
int no;
- LOG_DIS("wcsr r%d, %d\n", dc->r1, dc->csr);
+ LOG_DIS("wcsr %d, r%d\n", dc->csr, dc->r1);
switch (dc->csr) {
case CSR_IE:
static void dec_xnor(DisasContext *dc)
{
if (dc->format == OP_FMT_RI) {
- LOG_DIS("xnori r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("xnori r%d, r%d, %d\n", dc->r1, dc->r0,
zero_extend(dc->imm16, 16));
} else {
if (dc->r1 == R_R0) {
static void dec_xor(DisasContext *dc)
{
if (dc->format == OP_FMT_RI) {
- LOG_DIS("xori r%d, r%d, %d\n", dc->r0, dc->r1,
+ LOG_DIS("xori r%d, r%d, %d\n", dc->r1, dc->r0,
zero_extend(dc->imm16, 16));
} else {
LOG_DIS("xor r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
#include "qemu-common.h"
#include "cpu.h"
#include "hw/hw.h"
-#include "cpu.h"
#include "migration/cpu.h"
static int cpu_post_load(void *opaque, int version_id)
#include "helper_regs.h"
#include "mmu-hash64.h"
#include "migration/cpu.h"
-#include "exec/exec-all.h"
static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
{
#include "exec/helper-proto.h"
#include "helper_regs.h"
-#include "exec/exec-all.h"
#include "exec/cpu_ldst.h"
//#define DEBUG_OP
{
S390CPU *cpu = opaque;
- run_on_cpu(CPU(cpu), s390_do_cpu_full_reset, NULL);
+ run_on_cpu(CPU(cpu), s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
}
#endif
s390_cpu_gdb_init(cs);
qemu_init_vcpu(cs);
#if !defined(CONFIG_USER_ONLY)
- run_on_cpu(cs, s390_do_cpu_full_reset, NULL);
+ run_on_cpu(cs, s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
#else
cpu_reset(cs);
#endif
#define decode_basedisp_rs decode_basedisp_s
/* helper functions for run_on_cpu() */
-static inline void s390_do_cpu_reset(CPUState *cs, void *arg)
+static inline void s390_do_cpu_reset(CPUState *cs, run_on_cpu_data arg)
{
S390CPUClass *scc = S390_CPU_GET_CLASS(cs);
scc->cpu_reset(cs);
}
-static inline void s390_do_cpu_full_reset(CPUState *cs, void *arg)
+static inline void s390_do_cpu_full_reset(CPUState *cs, run_on_cpu_data arg)
{
cpu_reset(cs);
}
{
SigpInfo si = {};
- run_on_cpu(CPU(cpu), sigp_restart, &si);
+ run_on_cpu(CPU(cpu), sigp_restart, RUN_ON_CPU_HOST_PTR(&si));
DPRINTF("DONE: KVM cpu restart: %p\n", &cpu->env);
return 0;
}
switch (order) {
case SIGP_START:
- run_on_cpu(CPU(dst_cpu), sigp_start, &si);
+ run_on_cpu(CPU(dst_cpu), sigp_start, RUN_ON_CPU_HOST_PTR(&si));
break;
case SIGP_STOP:
- run_on_cpu(CPU(dst_cpu), sigp_stop, &si);
+ run_on_cpu(CPU(dst_cpu), sigp_stop, RUN_ON_CPU_HOST_PTR(&si));
break;
case SIGP_RESTART:
- run_on_cpu(CPU(dst_cpu), sigp_restart, &si);
+ run_on_cpu(CPU(dst_cpu), sigp_restart, RUN_ON_CPU_HOST_PTR(&si));
break;
case SIGP_STOP_STORE_STATUS:
- run_on_cpu(CPU(dst_cpu), sigp_stop_and_store_status, &si);
+ run_on_cpu(CPU(dst_cpu), sigp_stop_and_store_status, RUN_ON_CPU_HOST_PTR(&si));
break;
case SIGP_STORE_STATUS_ADDR:
- run_on_cpu(CPU(dst_cpu), sigp_store_status_at_address, &si);
+ run_on_cpu(CPU(dst_cpu), sigp_store_status_at_address, RUN_ON_CPU_HOST_PTR(&si));
break;
case SIGP_STORE_ADTL_STATUS:
- run_on_cpu(CPU(dst_cpu), sigp_store_adtl_status, &si);
+ run_on_cpu(CPU(dst_cpu), sigp_store_adtl_status, RUN_ON_CPU_HOST_PTR(&si));
break;
case SIGP_SET_PREFIX:
- run_on_cpu(CPU(dst_cpu), sigp_set_prefix, &si);
+ run_on_cpu(CPU(dst_cpu), sigp_set_prefix, RUN_ON_CPU_HOST_PTR(&si));
break;
case SIGP_INITIAL_CPU_RESET:
- run_on_cpu(CPU(dst_cpu), sigp_initial_cpu_reset, &si);
+ run_on_cpu(CPU(dst_cpu), sigp_initial_cpu_reset, RUN_ON_CPU_HOST_PTR(&si));
break;
case SIGP_CPU_RESET:
- run_on_cpu(CPU(dst_cpu), sigp_cpu_reset, &si);
+ run_on_cpu(CPU(dst_cpu), sigp_cpu_reset, RUN_ON_CPU_HOST_PTR(&si));
break;
default:
DPRINTF("KVM: unknown SIGP: 0x%x\n", order);
pause_all_vcpus();
cpu_synchronize_all_states();
CPU_FOREACH(t) {
- run_on_cpu(t, s390_do_cpu_full_reset, NULL);
+ run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
}
s390_cmma_reset();
subsystem_reset();
pause_all_vcpus();
cpu_synchronize_all_states();
CPU_FOREACH(t) {
- run_on_cpu(t, s390_do_cpu_reset, NULL);
+ run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL);
}
s390_cmma_reset();
subsystem_reset();
#include "hw/boards.h"
#include "qemu/timer.h"
-#include "cpu.h"
-#include "exec/exec-all.h"
#include "migration/cpu.h"
-#include "exec/exec-all.h"
#ifdef TARGET_SPARC64
static const VMStateDescription vmstate_cpu_timer = {
#include "tcg-op.h"
#include "qemu/log.h"
#include "sysemu/sysemu.h"
-#include "exec/exec-all.h"
#include "exec/cpu_ldst.h"
#include "exec/semihost.h"
/* pool based memory allocation */
+/* tb_lock must be held for tcg_malloc_internal. */
void *tcg_malloc_internal(TCGContext *s, int size);
void tcg_pool_reset(TCGContext *s);
void tb_unlock(void);
void tb_lock_reset(void);
+/* Called with tb_lock held. */
static inline void *tcg_malloc(int size)
{
TCGContext *s = &tcg_ctx;
#include <gnutls/gnutls.h>
#include <gnutls/x509.h>
-#include <gnutls/gnutls.h>
-#include <gnutls/x509.h>
-
#if !(defined WIN32) && \
defined(CONFIG_TASN1) && \
(LIBGNUTLS_VERSION_NUMBER >= 0x020600)
prepare_blkdebug_script(debug_path, "flush_to_disk");
ide_test_start(
- "-vnc none "
"-drive file=blkdebug:%s:%s,if=ide,cache=writeback,format=raw,"
"rerror=stop,werror=stop",
debug_path, tmp_path);
/* Run the tests */
g_test_init(&argc, &argv, NULL);
- cmdline = g_strdup_printf("-vnc none"
+ cmdline = g_strdup_printf(
" -chardev socket,id=ipmi0,host=localhost,port=%d,reconnect=10"
" -device ipmi-bmc-extern,chardev=ipmi0,id=bmc0"
" -device isa-ipmi-bt,bmc=bmc0", emu_port);
/* Run the tests */
g_test_init(&argc, &argv, NULL);
- cmdline = g_strdup_printf("-vnc none -device ipmi-bmc-sim,id=bmc0"
+ cmdline = g_strdup_printf("-device ipmi-bmc-sim,id=bmc0"
" -device isa-ipmi-kcs,bmc=bmc0");
qtest_start(cmdline);
qtest_irq_intercept_in(global_qtest, "ioapic");
qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
qemu_io('-f', 'raw', '-c', 'write -P 0x1 0 512', backing_img)
qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0x1 524288 512', mid_img)
- self.vm = iotests.VM().add_drive("blkdebug::" + test_img)
+ self.vm = iotests.VM().add_drive("blkdebug::" + test_img, "backing.node-name=mid")
self.vm.launch()
def tearDown(self):
qemu_io('-f', iotests.imgfmt, '-c', 'map', test_img),
'image file map does not match backing file after streaming')
+ def test_stream_intermediate(self):
+ self.assert_no_active_block_jobs()
+
+ self.assertNotEqual(qemu_io('-f', 'raw', '-c', 'map', backing_img),
+ qemu_io('-f', iotests.imgfmt, '-c', 'map', mid_img),
+ 'image file map matches backing file before streaming')
+
+ result = self.vm.qmp('block-stream', device='mid', job_id='stream-mid')
+ self.assert_qmp(result, 'return', {})
+
+ self.wait_until_completed(drive='stream-mid')
+
+ self.assert_no_active_block_jobs()
+ self.vm.shutdown()
+
+ self.assertEqual(qemu_io('-f', 'raw', '-c', 'map', backing_img),
+ qemu_io('-f', iotests.imgfmt, '-c', 'map', mid_img),
+ 'image file map does not match backing file after streaming')
+
def test_stream_pause(self):
self.assert_no_active_block_jobs()
self.assert_qmp(result, 'error/class', 'GenericError')
+class TestParallelOps(iotests.QMPTestCase):
+ num_ops = 4 # Number of parallel block-stream operations
+ num_imgs = num_ops * 2 + 1
+ image_len = num_ops * 1024 * 1024
+ imgs = []
+
+ def setUp(self):
+ opts = []
+ self.imgs = []
+
+ # Initialize file names and command-line options
+ for i in range(self.num_imgs):
+ img_depth = self.num_imgs - i - 1
+ opts.append("backing." * img_depth + "node-name=node%d" % i)
+ self.imgs.append(os.path.join(iotests.test_dir, 'img-%d.img' % i))
+
+ # Create all images
+ iotests.create_image(self.imgs[0], self.image_len)
+ for i in range(1, self.num_imgs):
+ qemu_img('create', '-f', iotests.imgfmt,
+ '-o', 'backing_file=%s' % self.imgs[i-1], self.imgs[i])
+
+ # Put data into the images we are copying data from
+ for i in range(self.num_imgs / 2):
+ img_index = i * 2 + 1
+ # Alternate between 512k and 1M.
+ # This way jobs will not finish in the same order they were created
+ num_kb = 512 + 512 * (i % 2)
+ qemu_io('-f', iotests.imgfmt,
+ '-c', 'write -P %d %d %d' % (i, i*1024*1024, num_kb * 1024),
+ self.imgs[img_index])
+
+ # Attach the drive to the VM
+ self.vm = iotests.VM()
+ self.vm.add_drive(self.imgs[-1], ','.join(opts))
+ self.vm.launch()
+
+ def tearDown(self):
+ self.vm.shutdown()
+ for img in self.imgs:
+ os.remove(img)
+
+ # Test that it's possible to run several block-stream operations
+ # in parallel in the same snapshot chain
+ def test_stream_parallel(self):
+ self.assert_no_active_block_jobs()
+
+ # Check that the maps don't match before the streaming operations
+ for i in range(2, self.num_imgs, 2):
+ self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i]),
+ qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i-1]),
+ 'image file map matches backing file before streaming')
+
+ # Create all streaming jobs
+ pending_jobs = []
+ for i in range(2, self.num_imgs, 2):
+ node_name = 'node%d' % i
+ job_id = 'stream-%s' % node_name
+ pending_jobs.append(job_id)
+ result = self.vm.qmp('block-stream', device=node_name, job_id=job_id, base=self.imgs[i-2], speed=512*1024)
+ self.assert_qmp(result, 'return', {})
+
+ # Wait for all jobs to be finished.
+ while len(pending_jobs) > 0:
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'BLOCK_JOB_COMPLETED':
+ job_id = self.dictpath(event, 'data/device')
+ self.assertTrue(job_id in pending_jobs)
+ self.assert_qmp_absent(event, 'data/error')
+ pending_jobs.remove(job_id)
+
+ self.assert_no_active_block_jobs()
+ self.vm.shutdown()
+
+ # Check that all maps match now
+ for i in range(2, self.num_imgs, 2):
+ self.assertEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i]),
+ qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i-1]),
+ 'image file map does not match backing file after streaming')
+
+ # Test that it's not possible to perform two block-stream
+ # operations if there are nodes involved in both.
+ def test_overlapping_1(self):
+ self.assert_no_active_block_jobs()
+
+ # Set a speed limit to make sure that this job blocks the rest
+ result = self.vm.qmp('block-stream', device='node4', job_id='stream-node4', base=self.imgs[1], speed=1024*1024)
+ self.assert_qmp(result, 'return', {})
+
+ result = self.vm.qmp('block-stream', device='node5', job_id='stream-node5', base=self.imgs[2])
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ result = self.vm.qmp('block-stream', device='node3', job_id='stream-node3', base=self.imgs[2])
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ result = self.vm.qmp('block-stream', device='node4', job_id='stream-node4-v2')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ # block-commit should also fail if it touches nodes used by the stream job
+ result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[4], job_id='commit-node4')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[1], top=self.imgs[3], job_id='commit-node1')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ # This fails because it needs to modify the backing string in node2, which is blocked
+ result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[0], top=self.imgs[1], job_id='commit-node0')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ self.wait_until_completed(drive='stream-node4')
+ self.assert_no_active_block_jobs()
+
+ # Similar to test_overlapping_1, but with block-commit
+ # blocking the other jobs
+ def test_overlapping_2(self):
+ self.assertLessEqual(9, self.num_imgs)
+ self.assert_no_active_block_jobs()
+
+ # Set a speed limit to make sure that this job blocks the rest
+ result = self.vm.qmp('block-commit', device='drive0', top=self.imgs[5], base=self.imgs[3], job_id='commit-node3', speed=1024*1024)
+ self.assert_qmp(result, 'return', {})
+
+ result = self.vm.qmp('block-stream', device='node3', job_id='stream-node3')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ result = self.vm.qmp('block-stream', device='node6', base=self.imgs[2], job_id='stream-node6')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ result = self.vm.qmp('block-stream', device='node4', base=self.imgs[2], job_id='stream-node4')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ result = self.vm.qmp('block-stream', device='node6', base=self.imgs[4], job_id='stream-node6-v2')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ # This fails because block-commit needs to block node6, the overlay of the 'top' image
+ result = self.vm.qmp('block-stream', device='node7', base=self.imgs[5], job_id='stream-node6-v3')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ # This fails because block-commit currently blocks the active layer even if it's not used
+ result = self.vm.qmp('block-stream', device='drive0', base=self.imgs[5], job_id='stream-drive0')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ self.wait_until_completed(drive='commit-node3')
+
+ # Similar to test_overlapping_2, but here block-commit doesn't use the 'top' parameter.
+ # Internally this uses a mirror block job, hence the separate test case.
+ def test_overlapping_3(self):
+ self.assertLessEqual(8, self.num_imgs)
+ self.assert_no_active_block_jobs()
+
+ # Set a speed limit to make sure that this job blocks the rest
+ result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[3], job_id='commit-drive0', speed=1024*1024)
+ self.assert_qmp(result, 'return', {})
+
+ result = self.vm.qmp('block-stream', device='node5', base=self.imgs[3], job_id='stream-node6')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ event = self.vm.get_qmp_event(wait=True)
+ self.assertEqual(event['event'], 'BLOCK_JOB_READY')
+ self.assert_qmp(event, 'data/device', 'commit-drive0')
+ self.assert_qmp(event, 'data/type', 'commit')
+ self.assert_qmp_absent(event, 'data/error')
+
+ result = self.vm.qmp('block-job-complete', device='commit-drive0')
+ self.assert_qmp(result, 'return', {})
+
+ self.wait_until_completed(drive='commit-drive0')
+
+ # Test a block-stream and a block-commit job in parallel
+ def test_stream_commit(self):
+ self.assertLessEqual(8, self.num_imgs)
+ self.assert_no_active_block_jobs()
+
+ # Stream from node0 into node2
+ result = self.vm.qmp('block-stream', device='node2', job_id='node2')
+ self.assert_qmp(result, 'return', {})
+
+ # Commit from the active layer into node3
+ result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[3])
+ self.assert_qmp(result, 'return', {})
+
+ # Wait for all jobs to be finished.
+ pending_jobs = ['node2', 'drive0']
+ while len(pending_jobs) > 0:
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'BLOCK_JOB_COMPLETED':
+ node_name = self.dictpath(event, 'data/device')
+ self.assertTrue(node_name in pending_jobs)
+ self.assert_qmp_absent(event, 'data/error')
+ pending_jobs.remove(node_name)
+ if event['event'] == 'BLOCK_JOB_READY':
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/type', 'commit')
+ self.assert_qmp_absent(event, 'data/error')
+ self.assertTrue('drive0' in pending_jobs)
+ self.vm.qmp('block-job-complete', device='drive0')
+
+ self.assert_no_active_block_jobs()
+
+ # Test the base_node parameter
+ def test_stream_base_node_name(self):
+ self.assert_no_active_block_jobs()
+
+ self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[4]),
+ qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[3]),
+ 'image file map matches backing file before streaming')
+
+ # Error: the base node does not exist
+ result = self.vm.qmp('block-stream', device='node4', base_node='none', job_id='stream')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ # Error: the base node is not a backing file of the top node
+ result = self.vm.qmp('block-stream', device='node4', base_node='node6', job_id='stream')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ # Error: the base node is the same as the top node
+ result = self.vm.qmp('block-stream', device='node4', base_node='node4', job_id='stream')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ # Error: cannot specify 'base' and 'base-node' at the same time
+ result = self.vm.qmp('block-stream', device='node4', base=self.imgs[2], base_node='node2', job_id='stream')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ # Success: the base node is a backing file of the top node
+ result = self.vm.qmp('block-stream', device='node4', base_node='node2', job_id='stream')
+ self.assert_qmp(result, 'return', {})
+
+ self.wait_until_completed(drive='stream')
+
+ self.assert_no_active_block_jobs()
+ self.vm.shutdown()
+
+ self.assertEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[4]),
+ qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[3]),
+ 'image file map matches backing file after streaming')
+
+class TestQuorum(iotests.QMPTestCase):
+ num_children = 3
+ children = []
+ backing = []
+
+ def setUp(self):
+ opts = ['driver=quorum', 'vote-threshold=2']
+
+ # Initialize file names and command-line options
+ for i in range(self.num_children):
+ child_img = os.path.join(iotests.test_dir, 'img-%d.img' % i)
+ backing_img = os.path.join(iotests.test_dir, 'backing-%d.img' % i)
+ self.children.append(child_img)
+ self.backing.append(backing_img)
+ qemu_img('create', '-f', iotests.imgfmt, backing_img, '1M')
+ qemu_io('-f', iotests.imgfmt,
+ '-c', 'write -P 0x55 0 1024', backing_img)
+ qemu_img('create', '-f', iotests.imgfmt,
+ '-o', 'backing_file=%s' % backing_img, child_img)
+ opts.append("children.%d.file.filename=%s" % (i, child_img))
+ opts.append("children.%d.node-name=node%d" % (i, i))
+
+ # Attach the drive to the VM
+ self.vm = iotests.VM()
+ self.vm.add_drive(path = None, opts = ','.join(opts))
+ self.vm.launch()
+
+ def tearDown(self):
+ self.vm.shutdown()
+ for img in self.children:
+ os.remove(img)
+ for img in self.backing:
+ os.remove(img)
+
+ def test_stream_quorum(self):
+ if not iotests.supports_quorum():
+ return
+
+ self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.children[0]),
+ qemu_io('-f', iotests.imgfmt, '-c', 'map', self.backing[0]),
+ 'image file map matches backing file before streaming')
+
+ self.assert_no_active_block_jobs()
+
+ result = self.vm.qmp('block-stream', device='node0', job_id='stream-node0')
+ self.assert_qmp(result, 'return', {})
+
+ self.wait_until_completed(drive='stream-node0')
+
+ self.assert_no_active_block_jobs()
+ self.vm.shutdown()
+
+ self.assertEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.children[0]),
+ qemu_io('-f', iotests.imgfmt, '-c', 'map', self.backing[0]),
+ 'image file map does not match backing file after streaming')
+
class TestSmallerBackingFile(iotests.QMPTestCase):
backing_len = 1 * 1024 * 1024 # MB
image_len = 2 * backing_len
-..............
+......................
----------------------------------------------------------------------
-Ran 14 tests
+Ran 22 tests
OK
image_len = 1 * 1024 * 1024 # MB
IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ]
- def has_quorum(self):
- return 'quorum' in iotests.qemu_img_pipe('--help')
-
def setUp(self):
self.vm = iotests.VM()
#assemble the quorum block device from the individual files
args = { "driver": "quorum", "node-name": "quorum0",
"vote-threshold": 2, "children": [ "img0", "img1", "img2" ] }
- if self.has_quorum():
+ if iotests.supports_quorum():
result = self.vm.qmp("blockdev-add", **args)
self.assert_qmp(result, 'return', {})
pass
def test_complete(self):
- if not self.has_quorum():
+ if not iotests.supports_quorum():
return
self.assert_no_active_block_jobs()
'target image does not match source after mirroring')
def test_cancel(self):
- if not self.has_quorum():
+ if not iotests.supports_quorum():
return
self.assert_no_active_block_jobs()
self.vm.shutdown()
def test_cancel_after_ready(self):
- if not self.has_quorum():
+ if not iotests.supports_quorum():
return
self.assert_no_active_block_jobs()
'target image does not match source after mirroring')
def test_pause(self):
- if not self.has_quorum():
+ if not iotests.supports_quorum():
return
self.assert_no_active_block_jobs()
'target image does not match source after mirroring')
def test_medium_not_found(self):
- if not self.has_quorum():
+ if not iotests.supports_quorum():
return
if iotests.qemu_default_machine != 'pc':
self.assert_qmp(result, 'error/class', 'GenericError')
def test_image_not_found(self):
- if not self.has_quorum():
+ if not iotests.supports_quorum():
return
result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
self.assert_qmp(result, 'error/class', 'GenericError')
def test_device_not_found(self):
- if not self.has_quorum():
+ if not iotests.supports_quorum():
return
result = self.vm.qmp('drive-mirror', job_id='job0',
self.assert_qmp(result, 'error/class', 'GenericError')
def test_wrong_sync_mode(self):
- if not self.has_quorum():
+ if not iotests.supports_quorum():
return
result = self.vm.qmp('drive-mirror', device='quorum0', job_id='job0',
self.assert_qmp(result, 'error/class', 'GenericError')
def test_no_node_name(self):
- if not self.has_quorum():
+ if not iotests.supports_quorum():
return
result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
self.assert_qmp(result, 'error/class', 'GenericError')
def test_nonexistent_replaces(self):
- if not self.has_quorum():
+ if not iotests.supports_quorum():
return
result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
self.assert_qmp(result, 'error/class', 'GenericError')
def test_after_a_quorum_snapshot(self):
- if not self.has_quorum():
+ if not iotests.supports_quorum():
return
result = self.vm.qmp('blockdev-snapshot-sync', node_name='img1',
self.checkBlockDriverState('node1', False)
def testQuorum(self):
- if not 'quorum' in iotests.qemu_img_pipe('--help'):
+ if not iotests.supports_quorum():
return
+
self.addQuorum('quorum0', 'node0', 'node1')
# We cannot remove the children of a Quorum device
self.delBlockDriverState('node0', expect_error = True)
--- /dev/null
+#!/bin/bash
+#
+# Test 'offset' and 'size' options of the raw driver. Make sure we can't
+# (or can) read and write outside of the image size.
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=tgolembi@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt raw
+_supported_proto file
+_supported_os Linux
+
+
+# Create JSON with options
+img_json() {
+ echo -n 'json:{"driver":"raw", '
+ echo -n "\"offset\":\"$img_offset\", "
+ if [ "$img_size" -ne -1 ] ; then
+ echo -n "\"size\":\"$img_size\", "
+ fi
+ echo -n '"file": {'
+ echo -n '"driver":"file", '
+ echo -n "\"filename\":\"$TEST_IMG\" "
+ echo -n "} }"
+}
+
+do_general_test() {
+ if [ "$img_size" -ge 0 ] ; then
+ test_size=$img_size
+ else
+ test_size=$((size-img_offset))
+ fi
+
+ echo
+ echo "write to image"
+ $QEMU_IO -c "write -P 0x0a 0 $test_size" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "read the image"
+ $QEMU_IO -c "read -P 0x0a 0 $test_size" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "check that offset is respected"
+ $QEMU_IO -c "read -v $((img_offset-2)) 4" $TEST_IMG | _filter_qemu_io
+
+ echo
+ echo "write before image boundary"
+ $QEMU_IO -c "write $((test_size-1)) 1" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "write across image boundary"
+ $QEMU_IO -c "write $((test_size-1)) 2" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "write at image boundary"
+ $QEMU_IO -c "write $test_size 1" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "write after image boundary"
+ $QEMU_IO -c "write $((test_size+512)) 1" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "writev before/after image boundary"
+ $QEMU_IO -c "writev $((test_size-512)) 512 512" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "read before image boundary"
+ $QEMU_IO -c "read $((test_size-1)) 1" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "read across image boundary"
+ $QEMU_IO -c "read $((test_size-1)) 2" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "read at image boundary"
+ $QEMU_IO -c "read $test_size 1" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "read after image boundary"
+ $QEMU_IO -c "read $((test_size+512)) 1" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "readv before/after image boundary"
+ $QEMU_IO -c "readv $((test_size-512)) 512 512" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "fill image with pattern"
+ $QEMU_IO -c "write -P 0x0a 0 $size" $TEST_IMG | _filter_qemu_io
+
+ echo
+ echo "write zeroes and check"
+ $QEMU_IO -c "write -z 0 512" "$(img_json)" | _filter_qemu_io
+ $QEMU_IO -c "read -v $((img_offset-2)) 4" $TEST_IMG | _filter_qemu_io
+
+ echo
+ echo "write zeroes across image boundary"
+ $QEMU_IO -c "write -z $((test_size-1)) 2" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "write zeroes at image boundary and check"
+ $QEMU_IO -c "write -z $((test_size-2)) 2" "$(img_json)" | _filter_qemu_io
+ $QEMU_IO -c "read -v $((img_offset+test_size-2)) 2" $TEST_IMG | _filter_qemu_io
+ $QEMU_IO -c "read -v $((img_offset+test_size)) 2" $TEST_IMG | _filter_qemu_io
+
+ echo
+ echo "fill image with pattern"
+ $QEMU_IO -c "write -P 0x0a 0 $size" $TEST_IMG | _filter_qemu_io
+
+ echo
+ echo "discard and check"
+ $QEMU_IO -c "discard 0 512" "$(img_json)" | _filter_qemu_io
+ $QEMU_IO -c "read -v $((img_offset-2)) 4" $TEST_IMG | _filter_qemu_io
+
+ echo
+ echo "discard across image boundary"
+ $QEMU_IO -c "discard $((test_size-1)) 2" "$(img_json)" | _filter_qemu_io
+
+ echo
+ echo "discard at image boundary and check"
+ $QEMU_IO -c "discard $((test_size-2)) 2" "$(img_json)" | _filter_qemu_io
+ $QEMU_IO -c "read -v $((img_offset+test_size-2)) 2" $TEST_IMG | _filter_qemu_io
+ $QEMU_IO -c "read -v $((img_offset+test_size)) 2" $TEST_IMG | _filter_qemu_io
+}
+
+echo
+echo "== test 'offset' option =="
+size=4096
+img_offset=512
+img_size=-1
+_make_test_img $size
+do_general_test
+_cleanup_test_img
+
+echo
+echo "== test 'offset' and 'size' options =="
+size=4096
+img_offset=512
+img_size=2048
+_make_test_img $size
+do_general_test
+_cleanup_test_img
+
+echo
+echo "== test misaligned 'offset' =="
+size=4096
+img_offset=10
+img_size=2048
+_make_test_img $size
+do_general_test
+_cleanup_test_img
+
+echo
+echo "== test reopen =="
+size=4096
+img_offset=512
+img_size=512
+_make_test_img $size
+(
+$QEMU_IO "$(img_json)" <<EOT
+write -P 0x0a 0 512
+write -P 0x0a 511 1
+write -P 0x0a 512 1
+reopen -o driver=raw,offset=1536,size=1024
+write -P 0x0a 0 1024
+write -P 0x0a 1023 1
+write -P 0x0a 1024 1
+EOT
+) | _filter_qemu_io
+echo "checking boundaries"
+$QEMU_IO -c "read -v 510 4" $TEST_IMG | _filter_qemu_io
+$QEMU_IO -c "read -v 1022 4" $TEST_IMG | _filter_qemu_io
+$QEMU_IO -c "read -v 1534 4" $TEST_IMG | _filter_qemu_io
+$QEMU_IO -c "read -v 2558 4" $TEST_IMG | _filter_qemu_io
+_cleanup_test_img
+
+# success, all done
+echo
+echo "*** done"
+rm -f $seq.full
+status=0
--- /dev/null
+QA output created by 171
+
+== test 'offset' option ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4096
+
+write to image
+wrote 3584/3584 bytes at offset 0
+3.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read the image
+read 3584/3584 bytes at offset 0
+3.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+check that offset is respected
+000001fe: 00 00 0a 0a ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write before image boundary
+wrote 1/1 bytes at offset 3583
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write across image boundary
+write failed: Input/output error
+
+write at image boundary
+write failed: Input/output error
+
+write after image boundary
+write failed: Input/output error
+
+writev before/after image boundary
+writev failed: Input/output error
+
+read before image boundary
+read 1/1 bytes at offset 3583
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read across image boundary
+read failed: Input/output error
+
+read at image boundary
+read failed: Input/output error
+
+read after image boundary
+read failed: Input/output error
+
+readv before/after image boundary
+readv failed: Input/output error
+
+fill image with pattern
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write zeroes and check
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000001fe: 0a 0a 00 00 ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write zeroes across image boundary
+write failed: Input/output error
+
+write zeroes at image boundary and check
+wrote 2/2 bytes at offset 3582
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000ffe: 00 00 ..
+read 2/2 bytes at offset 4094
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read failed: Input/output error
+
+fill image with pattern
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+discard and check
+discard 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000001fe: 0a 0a 00 00 ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+discard across image boundary
+discard failed: Input/output error
+
+discard at image boundary and check
+discard 2/2 bytes at offset 3582
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000ffe: 00 00 ..
+read 2/2 bytes at offset 4094
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read failed: Input/output error
+
+== test 'offset' and 'size' options ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4096
+
+write to image
+wrote 2048/2048 bytes at offset 0
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read the image
+read 2048/2048 bytes at offset 0
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+check that offset is respected
+000001fe: 00 00 0a 0a ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write before image boundary
+wrote 1/1 bytes at offset 2047
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write across image boundary
+write failed: Input/output error
+
+write at image boundary
+write failed: Input/output error
+
+write after image boundary
+write failed: Input/output error
+
+writev before/after image boundary
+writev failed: Input/output error
+
+read before image boundary
+read 1/1 bytes at offset 2047
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read across image boundary
+read failed: Input/output error
+
+read at image boundary
+read failed: Input/output error
+
+read after image boundary
+read failed: Input/output error
+
+readv before/after image boundary
+readv failed: Input/output error
+
+fill image with pattern
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write zeroes and check
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000001fe: 0a 0a 00 00 ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write zeroes across image boundary
+write failed: Input/output error
+
+write zeroes at image boundary and check
+wrote 2/2 bytes at offset 2046
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000009fe: 00 00 ..
+read 2/2 bytes at offset 2558
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000a00: 0a 0a ..
+read 2/2 bytes at offset 2560
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+fill image with pattern
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+discard and check
+discard 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000001fe: 0a 0a 00 00 ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+discard across image boundary
+discard failed: Input/output error
+
+discard at image boundary and check
+discard 2/2 bytes at offset 2046
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000009fe: 00 00 ..
+read 2/2 bytes at offset 2558
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000a00: 0a 0a ..
+read 2/2 bytes at offset 2560
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== test misaligned 'offset' ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4096
+
+write to image
+wrote 2048/2048 bytes at offset 0
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read the image
+read 2048/2048 bytes at offset 0
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+check that offset is respected
+00000008: 00 00 0a 0a ....
+read 4/4 bytes at offset 8
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write before image boundary
+wrote 1/1 bytes at offset 2047
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write across image boundary
+write failed: Input/output error
+
+write at image boundary
+write failed: Input/output error
+
+write after image boundary
+write failed: Input/output error
+
+writev before/after image boundary
+writev failed: Input/output error
+
+read before image boundary
+read 1/1 bytes at offset 2047
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read across image boundary
+read failed: Input/output error
+
+read at image boundary
+read failed: Input/output error
+
+read after image boundary
+read failed: Input/output error
+
+readv before/after image boundary
+readv failed: Input/output error
+
+fill image with pattern
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write zeroes and check
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000008: 0a 0a 00 00 ....
+read 4/4 bytes at offset 8
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write zeroes across image boundary
+write failed: Input/output error
+
+write zeroes at image boundary and check
+wrote 2/2 bytes at offset 2046
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000808: 00 00 ..
+read 2/2 bytes at offset 2056
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+0000080a: 0a 0a ..
+read 2/2 bytes at offset 2058
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+fill image with pattern
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+discard and check
+discard 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000008: 0a 0a 00 00 ....
+read 4/4 bytes at offset 8
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+discard across image boundary
+discard failed: Input/output error
+
+discard at image boundary and check
+discard 2/2 bytes at offset 2046
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000808: 00 00 ..
+read 2/2 bytes at offset 2056
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+0000080a: 0a 0a ..
+read 2/2 bytes at offset 2058
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== test reopen ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4096
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1/1 bytes at offset 511
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Input/output error
+wrote 1024/1024 bytes at offset 0
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1/1 bytes at offset 1023
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Input/output error
+checking boundaries
+000001fe: 00 00 0a 0a ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000003fe: 0a 0a 00 00 ....
+read 4/4 bytes at offset 1022
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000005fe: 00 00 0a 0a ....
+read 4/4 bytes at offset 1534
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000009fe: 0a 0a 00 00 ....
+read 4/4 bytes at offset 2558
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+*** done
160 rw auto quick
162 auto quick
170 rw auto quick
+171 rw auto quick
172 auto
if True not in [sys.platform.startswith(x) for x in supported_oses]:
notrun('not suitable for this OS: %s' % sys.platform)
+def supports_quorum():
+ return 'quorum' in qemu_img_pipe('--help')
+
def verify_quorum():
'''Skip test suite if quorum support is not available'''
- if 'quorum' not in qemu_img_pipe('--help'):
+ if not supports_quorum():
notrun('quorum support missing')
def main(supported_fmts=[], supported_oses=['linux']):
return ret;
}
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
static bool qom_get_bool(const char *path, const char *prop)
{
QBool *value = qobject_to_qbool(qom_get(path, prop));
QDECREF(value);
return b;
}
+#endif
typedef struct CpuidTestArgs {
const char *cmdline;
qtest_add_data_func(name, args, test_cpuid_prop);
}
-static void test_plus_minus(void)
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+static void test_plus_minus_subprocess(void)
{
char *path;
* 3) Old feature names with underscores (e.g. "sse4_2")
* should keep working
*
- * Note: rules 1 and 2 are planned to be removed soon, but we
- * need to keep compatibility for a while until we start
- * warning users about it.
+ * Note: rules 1 and 2 are planned to be removed soon, and
+ * should generate a warning.
*/
qtest_start("-cpu pentium,-fpu,+fpu,-mce,mce=on,+cx8,cx8=off,+sse4_1,sse4_2=on");
path = get_cpu0_qom_path();
g_free(path);
}
+static void test_plus_minus(void)
+{
+ g_test_trap_subprocess("/x86/cpuid/parsing-plus-minus/subprocess", 0, 0);
+ g_test_trap_assert_passed();
+ g_test_trap_assert_stderr("*Ambiguous CPU model string. "
+ "Don't mix both \"-mce\" and \"mce=on\"*");
+ g_test_trap_assert_stderr("*Ambiguous CPU model string. "
+ "Don't mix both \"+cx8\" and \"cx8=off\"*");
+ g_test_trap_assert_stdout("");
+}
+#endif
+
int main(int argc, char **argv)
{
g_test_init(&argc, &argv, NULL);
- qtest_add_func("x86/cpuid/parsing-plus-minus", test_plus_minus);
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+ g_test_add_func("/x86/cpuid/parsing-plus-minus/subprocess",
+ test_plus_minus_subprocess);
+ g_test_add_func("/x86/cpuid/parsing-plus-minus", test_plus_minus);
+#endif
/* Original level values for CPU models: */
add_cpuid_test("x86/cpuid/phenom/level",
#include "libqos/virtio-pci.h"
#include "qapi/error.h"
-#include "libqos/pci-pc.h"
-#include "libqos/virtio-pci.h"
#include "libqos/malloc-pc.h"
#include "hw/virtio/virtio-net.h"
memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
### Guest events, keep at bottom
#include "tcg.h"
#if defined(CONFIG_USER_ONLY)
#include "qemu.h"
+#include "exec/exec-all.h"
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
#include <sys/param.h>
#if __FreeBSD_version >= 700104
#include "qemu/timer.h"
#include "exec/log.h"
-//#define DEBUG_TB_INVALIDATE
-//#define DEBUG_FLUSH
+/* #define DEBUG_TB_INVALIDATE */
+/* #define DEBUG_TB_FLUSH */
+/* #define DEBUG_LOCKING */
/* make various TB consistency checks */
-//#define DEBUG_TB_CHECK
+/* #define DEBUG_TB_CHECK */
#if !defined(CONFIG_USER_ONLY)
/* TB consistency checks only implemented for usermode emulation. */
#undef DEBUG_TB_CHECK
#endif
+/* Access to the various translations structures need to be serialised via locks
+ * for consistency. This is automatic for SoftMMU based system
+ * emulation due to its single threaded nature. In user-mode emulation
+ * access to the memory related structures are protected with the
+ * mmap_lock.
+ */
+#ifdef DEBUG_LOCKING
+#define DEBUG_MEM_LOCKS 1
+#else
+#define DEBUG_MEM_LOCKS 0
+#endif
+
+#ifdef CONFIG_SOFTMMU
+#define assert_memory_lock() do { /* nothing */ } while (0)
+#else
+#define assert_memory_lock() do { \
+ if (DEBUG_MEM_LOCKS) { \
+ g_assert(have_mmap_lock()); \
+ } \
+ } while (0)
+#endif
+
#define SMC_BITMAP_USE_THRESHOLD 10
typedef struct PageDesc {
#endif
}
+#ifdef DEBUG_LOCKING
+#define DEBUG_TB_LOCKS 1
+#else
+#define DEBUG_TB_LOCKS 0
+#endif
+
+#ifdef CONFIG_SOFTMMU
+#define assert_tb_lock() do { /* nothing */ } while (0)
+#else
+#define assert_tb_lock() do { \
+ if (DEBUG_TB_LOCKS) { \
+ g_assert(have_tb_lock); \
+ } \
+ } while (0)
+#endif
+
+
static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
void cpu_gen_init(void)
return p - block;
}
-/* The cpu state corresponding to 'searched_pc' is restored. */
+/* The cpu state corresponding to 'searched_pc' is restored.
+ * Called with tb_lock held.
+ */
static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
uintptr_t searched_pc)
{
bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
{
TranslationBlock *tb;
+ bool r = false;
+ tb_lock();
tb = tb_find_pc(retaddr);
if (tb) {
cpu_restore_state_from_tb(cpu, tb, retaddr);
tb_phys_invalidate(tb, -1);
tb_free(tb);
}
- return true;
+ r = true;
}
- return false;
+ tb_unlock();
+
+ return r;
}
void page_size_init(void)
}
/* If alloc=1:
+ * Called with tb_lock held for system emulation.
* Called with mmap_lock held for user-mode emulation.
*/
static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
void **lp;
int i;
+ if (alloc) {
+ assert_memory_lock();
+ }
+
/* Level 1. Always allocated. */
lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
return tcg_ctx.code_gen_buffer != NULL;
}
-/* Allocate a new translation block. Flush the translation buffer if
- too many translation blocks or too much generated code. */
+/*
+ * Allocate a new translation block. Flush the translation buffer if
+ * too many translation blocks or too much generated code.
+ *
+ * Called with tb_lock held.
+ */
static TranslationBlock *tb_alloc(target_ulong pc)
{
TranslationBlock *tb;
+ assert_tb_lock();
+
if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
return NULL;
}
return tb;
}
+/* Called with tb_lock held. */
void tb_free(TranslationBlock *tb)
{
+ assert_tb_lock();
+
/* In practice this is mostly used for single use temporary TB
Ignore the hard cases and just back up if this TB happens to
be the last one generated. */
}
/* flush all the translation blocks */
-static void do_tb_flush(CPUState *cpu, void *data)
+static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
{
- unsigned tb_flush_req = (unsigned) (uintptr_t) data;
-
tb_lock();
- /* If it's already been done on request of another CPU,
+ /* If it is already been done on request of another CPU,
* just retry.
*/
- if (tcg_ctx.tb_ctx.tb_flush_count != tb_flush_req) {
+ if (tcg_ctx.tb_ctx.tb_flush_count != tb_flush_count.host_int) {
goto done;
}
-#if defined(DEBUG_FLUSH)
+#if defined(DEBUG_TB_FLUSH)
printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
(unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ?
void tb_flush(CPUState *cpu)
{
if (tcg_enabled()) {
- uintptr_t tb_flush_req = atomic_mb_read(&tcg_ctx.tb_ctx.tb_flush_count);
- async_safe_run_on_cpu(cpu, do_tb_flush, (void *) tb_flush_req);
+ unsigned tb_flush_count = atomic_mb_read(&tcg_ctx.tb_ctx.tb_flush_count);
+ async_safe_run_on_cpu(cpu, do_tb_flush,
+ RUN_ON_CPU_HOST_INT(tb_flush_count));
}
}
}
}
+/* verify that all the pages have correct rights for code
+ *
+ * Called with tb_lock held.
+ */
static void tb_invalidate_check(target_ulong address)
{
address &= TARGET_PAGE_MASK;
}
}
-/* invalidate one TB */
+/* invalidate one TB
+ *
+ * Called with tb_lock held.
+ */
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
{
CPUState *cpu;
uint32_t h;
tb_page_addr_t phys_pc;
+ assert_tb_lock();
+
atomic_set(&tb->invalid, true);
/* remove the TB from the hash list */
tb_end = tb_start + tb->size;
if (tb_end > TARGET_PAGE_SIZE) {
tb_end = TARGET_PAGE_SIZE;
- }
+ }
} else {
tb_start = 0;
tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
bool page_already_protected;
#endif
+ assert_memory_lock();
+
tb->page_addr[n] = page_addr;
p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
tb->page_next[n] = p->first_tb;
{
uint32_t h;
+ assert_memory_lock();
+
/* add in the page list */
tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
if (phys_page2 != -1) {
#ifdef CONFIG_PROFILER
int64_t ti;
#endif
+ assert_memory_lock();
phys_pc = get_page_addr_code(env, pc);
if (use_icount && !(cflags & CF_IGNORE_ICOUNT)) {
* access: the virtual CPU will exit the current TB if code is modified inside
* this TB.
*
- * Called with mmap_lock held for user-mode emulation
+ * Called with mmap_lock held for user-mode emulation, grabs tb_lock
+ * Called with tb_lock held for system-mode emulation
*/
-void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
+static void tb_invalidate_phys_range_1(tb_page_addr_t start, tb_page_addr_t end)
{
while (start < end) {
tb_invalidate_phys_page_range(start, end, 0);
}
}
+#ifdef CONFIG_SOFTMMU
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
+{
+ assert_tb_lock();
+ tb_invalidate_phys_range_1(start, end);
+}
+#else
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
+{
+ assert_memory_lock();
+ tb_lock();
+ tb_invalidate_phys_range_1(start, end);
+ tb_unlock();
+}
+#endif
/*
* Invalidate all TBs which intersect with the target physical address range
* [start;end[. NOTE: start and end must refer to the *same* physical page.
* access: the virtual CPU will exit the current TB if code is modified inside
* this TB.
*
- * Called with mmap_lock held for user-mode emulation
+ * Called with tb_lock/mmap_lock held for user-mode emulation
+ * Called with tb_lock held for system-mode emulation
*/
void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
int is_cpu_write_access)
uint32_t current_flags = 0;
#endif /* TARGET_HAS_PRECISE_SMC */
+ assert_memory_lock();
+ assert_tb_lock();
+
p = page_find(start >> TARGET_PAGE_BITS);
if (!p) {
return;
}
#ifdef CONFIG_SOFTMMU
-/* len must be <= 8 and start must be a multiple of len */
+/* len must be <= 8 and start must be a multiple of len.
+ * Called via softmmu_template.h when code areas are written to with
+ * tb_lock held.
+ */
void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
{
PageDesc *p;
(intptr_t)cpu_single_env->segs[R_CS].base);
}
#endif
+ assert_memory_lock();
+
p = page_find(start >> TARGET_PAGE_BITS);
if (!p) {
return;
}
if (!p->code_bitmap &&
++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
- /* build code bitmap */
+ /* build code bitmap. FIXME: writes should be protected by
+ * tb_lock, reads by tb_lock or RCU.
+ */
build_page_bitmap(p);
}
if (p->code_bitmap) {
uint32_t current_flags = 0;
#endif
+ assert_memory_lock();
+
addr &= TARGET_PAGE_MASK;
p = page_find(addr >> TARGET_PAGE_BITS);
if (!p) {
return false;
}
+
+ tb_lock();
tb = p->first_tb;
#ifdef TARGET_HAS_PRECISE_SMC
if (tb && pc != 0) {
modifying the memory. It will ensure that it cannot modify
itself */
tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
+ /* tb_lock will be reset after cpu_loop_exit_noexc longjmps
+ * back into the cpu_exec loop. */
return true;
}
#endif
+ tb_unlock();
+
return false;
}
#endif
return;
}
ram_addr = memory_region_get_ram_addr(mr) + addr;
+ tb_lock();
tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
+ tb_unlock();
rcu_read_unlock();
}
#endif /* !defined(CONFIG_USER_ONLY) */
+/* Called with tb_lock held. */
void tb_check_watchpoint(CPUState *cpu)
{
TranslationBlock *tb;
target_ulong pc, cs_base;
uint32_t flags;
+ tb_lock();
tb = tb_find_pc(retaddr);
if (!tb) {
cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
/* FIXME: In theory this could raise an exception. In practice
we have already translated the block once so it's probably ok. */
tb_gen_code(cpu, pc, cs_base, flags, cflags);
+
/* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
- the first in the TB) then we end up generating a whole new TB and
- repeating the fault, which is horribly inefficient.
- Better would be to execute just this insn uncached, or generate a
- second new TB. */
+ * the first in the TB) then we end up generating a whole new TB and
+ * repeating the fault, which is horribly inefficient.
+ * Better would be to execute just this insn uncached, or generate a
+ * second new TB.
+ *
+ * cpu_loop_exit_noexc will longjmp back to cpu_exec where the
+ * tb_lock gets reset.
+ */
cpu_loop_exit_noexc(cpu);
}
TranslationBlock *tb;
struct qht_stats hst;
+ tb_lock();
+
target_code_size = 0;
max_target_code_size = 0;
cross_page = 0;
tcg_ctx.tb_ctx.tb_phys_invalidate_count);
cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
tcg_dump_info(f, cpu_fprintf);
+
+ tb_unlock();
}
void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)
assert(end < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
#endif
assert(start < end);
+ assert_memory_lock();
start = start & TARGET_PAGE_MASK;
end = TARGET_PAGE_ALIGN(end);
#include "qemu/osdep.h"
#include <termios.h>
-#include <termios.h>
#include <glib/gprintf.h>
* function succeeds, callback will be called when the connection
* completes, with the file descriptor on success, or -1 on error.
*/
-static int inet_connect_saddr(InetSocketAddress *saddr, Error **errp,
- NonBlockingConnectHandler *callback, void *opaque)
+int inet_connect_saddr(InetSocketAddress *saddr, Error **errp,
+ NonBlockingConnectHandler *callback, void *opaque)
{
Error *local_err = NULL;
struct addrinfo *res, *e;
#include "audio/audio.h"
#include "migration/migration.h"
#include "sysemu/cpus.h"
+#include "migration/colo.h"
#include "sysemu/kvm.h"
#include "qapi/qmp/qjson.h"
#include "qemu/option.h"
#include "trace.h"
#include "trace/control.h"
#include "qemu/queue.h"
-#include "sysemu/cpus.h"
#include "sysemu/arch_init.h"
#include "ui/qemu-spice.h"
{ RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_INMIGRATE, RUN_STATE_PRELAUNCH },
{ RUN_STATE_INMIGRATE, RUN_STATE_POSTMIGRATE },
+ { RUN_STATE_INMIGRATE, RUN_STATE_COLO },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_PAUSED, RUN_STATE_RUNNING },
{ RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_PAUSED, RUN_STATE_PRELAUNCH },
+ { RUN_STATE_PAUSED, RUN_STATE_COLO},
{ RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_PRELAUNCH },
+ { RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO},
{ RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
{ RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
+ { RUN_STATE_COLO, RUN_STATE_RUNNING },
+
{ RUN_STATE_RUNNING, RUN_STATE_DEBUG },
{ RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
{ RUN_STATE_RUNNING, RUN_STATE_IO_ERROR },
{ RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN },
{ RUN_STATE_RUNNING, RUN_STATE_WATCHDOG },
{ RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED },
+ { RUN_STATE_RUNNING, RUN_STATE_COLO},
{ RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
{ RUN_STATE_SUSPENDED, RUN_STATE_RUNNING },
{ RUN_STATE_SUSPENDED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_SUSPENDED, RUN_STATE_PRELAUNCH },
+ { RUN_STATE_SUSPENDED, RUN_STATE_COLO},
{ RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
{ RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_WATCHDOG, RUN_STATE_PRELAUNCH },
+ { RUN_STATE_WATCHDOG, RUN_STATE_COLO},
{ RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING },
{ RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE },
if (qemu_opt_get_bool(opts, "pretty", 0))
flags |= MONITOR_USE_PRETTY;
- if (qemu_opt_get_bool(opts, "default", 0))
- flags |= MONITOR_IS_DEFAULT;
+ if (qemu_opt_get_bool(opts, "default", 0)) {
+ error_report("option 'default' does nothing and is deprecated");
+ }
chardev = qemu_opt_get(opts, "chardev");
chr = qemu_chr_find(chardev);
QemuOpts *opts;
const char *p;
char label[32];
- int def = 0;
if (strstart(optarg, "chardev:", &p)) {
snprintf(label, sizeof(label), "%s", p);
} else {
snprintf(label, sizeof(label), "compat_monitor%d",
monitor_device_index);
- if (monitor_device_index == 0) {
- def = 1;
- }
opts = qemu_chr_parse_compat(label, optarg);
if (!opts) {
error_report("parse error: %s", optarg);
qemu_opt_set(opts, "mode", mode, &error_abort);
qemu_opt_set(opts, "chardev", label, &error_abort);
qemu_opt_set_bool(opts, "pretty", pretty, &error_abort);
- if (def)
- qemu_opt_set(opts, "default", "on", &error_abort);
monitor_device_index++;
}
#endif
}
+ colo_info_init();
+
if (net_init_clients() < 0) {
exit(1);
}
{
xen_xc = xc_interface_open(0, 0, 0);
if (xen_xc == NULL) {
- xen_be_printf(NULL, 0, "can't open xen interface\n");
+ xen_pv_printf(NULL, 0, "can't open xen interface\n");
return -1;
}
xen_fmem = xenforeignmemory_open(0, 0);
if (xen_fmem == NULL) {
- xen_be_printf(NULL, 0, "can't open xen fmem interface\n");
+ xen_pv_printf(NULL, 0, "can't open xen fmem interface\n");
xc_interface_close(xen_xc);
return -1;
}