#include "sysemu/qtest.h"
#include "options.h"
#include "sysemu/dirtylimit.h"
+#include "qemu/sockets.h"
static NotifierList migration_state_notifiers =
NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
static MigrationState *current_migration;
static MigrationIncomingState *current_incoming;
-static GSList *migration_blockers;
+static GSList *migration_blockers[MIG_MODE__MAX];
static bool migration_object_check(MigrationState *ms, Error **errp);
static int migration_maybe_pause(MigrationState *s,
int *current_active_state,
int new_state);
static void migrate_fd_cancel(MigrationState *s);
-static int close_return_path_on_source(MigrationState *s);
+static bool close_return_path_on_source(MigrationState *s);
static void migration_downtime_start(MigrationState *s)
{
+ trace_vmstate_downtime_checkpoint("src-downtime-start");
s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
}
if (!s->downtime) {
s->downtime = now - s->downtime_start;
}
+
+ trace_vmstate_downtime_checkpoint("src-downtime-end");
}
static bool migration_needs_multiple_sockets(void)
return migrate_multifd() || migrate_postcopy_preempt();
}
-static bool uri_supports_multi_channels(const char *uri)
+static bool transport_supports_multi_channels(SocketAddress *saddr)
{
- return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) ||
- strstart(uri, "vsock:", NULL);
+ return saddr->type == SOCKET_ADDRESS_TYPE_INET ||
+ saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
+ saddr->type == SOCKET_ADDRESS_TYPE_VSOCK;
}
static bool
-migration_channels_and_uri_compatible(const char *uri, Error **errp)
+migration_channels_and_transport_compatible(MigrationAddress *addr,
+ Error **errp)
{
if (migration_needs_multiple_sockets() &&
- !uri_supports_multi_channels(uri)) {
+ (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) &&
+ !transport_supports_multi_channels(&addr->u.socket)) {
error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
return false;
}
int migration_stop_vm(RunState state)
{
- return vm_stop_force_state(state);
+ int ret = vm_stop_force_state(state);
+
+ trace_vmstate_downtime_checkpoint("src-vm-stopped");
+
+ return ret;
}
void migration_object_init(void)
QAPI_CLONE(SocketAddress, address));
}
-static void qemu_start_incoming_migration(const char *uri, Error **errp)
+bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
+ Error **errp)
+{
+ g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1);
+ g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
+ InetSocketAddress *isock = &addr->u.rdma;
+ strList **tail = &addr->u.exec.args;
+
+ if (strstart(uri, "exec:", NULL)) {
+ addr->transport = MIGRATION_ADDRESS_TYPE_EXEC;
+#ifdef WIN32
+ QAPI_LIST_APPEND(tail, g_strdup(exec_get_cmd_path()));
+ QAPI_LIST_APPEND(tail, g_strdup("/c"));
+#else
+ QAPI_LIST_APPEND(tail, g_strdup("/bin/sh"));
+ QAPI_LIST_APPEND(tail, g_strdup("-c"));
+#endif
+ QAPI_LIST_APPEND(tail, g_strdup(uri + strlen("exec:")));
+ } else if (strstart(uri, "rdma:", NULL)) {
+ if (inet_parse(isock, uri + strlen("rdma:"), errp)) {
+ qapi_free_InetSocketAddress(isock);
+ return false;
+ }
+ addr->transport = MIGRATION_ADDRESS_TYPE_RDMA;
+ } else if (strstart(uri, "tcp:", NULL) ||
+ strstart(uri, "unix:", NULL) ||
+ strstart(uri, "vsock:", NULL) ||
+ strstart(uri, "fd:", NULL)) {
+ addr->transport = MIGRATION_ADDRESS_TYPE_SOCKET;
+ SocketAddress *saddr = socket_parse(uri, errp);
+ if (!saddr) {
+ return false;
+ }
+ addr->u.socket.type = saddr->type;
+ addr->u.socket.u = saddr->u;
+ /* Don't free the objects inside; their ownership moved to "addr" */
+ g_free(saddr);
+ } else if (strstart(uri, "file:", NULL)) {
+ addr->transport = MIGRATION_ADDRESS_TYPE_FILE;
+ addr->u.file.filename = g_strdup(uri + strlen("file:"));
+ if (file_parse_offset(addr->u.file.filename, &addr->u.file.offset,
+ errp)) {
+ return false;
+ }
+ } else {
+ error_setg(errp, "unknown migration protocol: %s", uri);
+ return false;
+ }
+
+ val->channel_type = MIGRATION_CHANNEL_TYPE_MAIN;
+ val->addr = g_steal_pointer(&addr);
+ *channel = g_steal_pointer(&val);
+ return true;
+}
+
+static void qemu_start_incoming_migration(const char *uri, bool has_channels,
+ MigrationChannelList *channels,
+ Error **errp)
{
- const char *p = NULL;
+ g_autoptr(MigrationChannel) channel = NULL;
+ MigrationAddress *addr = NULL;
MigrationIncomingState *mis = migration_incoming_get_current();
- /* URI is not suitable for migration? */
- if (!migration_channels_and_uri_compatible(uri, errp)) {
+ /*
+ * Having preliminary checks for uri and channel
+ */
+ if (!uri == !channels) {
+ error_setg(errp, "need either 'uri' or 'channels' argument");
+ return;
+ }
+
+ if (channels) {
+ /* To verify that Migrate channel list has only item */
+ if (channels->next) {
+ error_setg(errp, "Channel list has more than one entries");
+ return;
+ }
+ addr = channels->value->addr;
+ }
+
+ if (uri) {
+ /* caller uses the old URI syntax */
+ if (!migrate_uri_parse(uri, &channel, errp)) {
+ return;
+ }
+ addr = channel->addr;
+ }
+
+ /* transport mechanism not suitable for migration? */
+ if (!migration_channels_and_transport_compatible(addr, errp)) {
return;
}
migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
MIGRATION_STATUS_SETUP);
- if (strstart(uri, "tcp:", &p) ||
- strstart(uri, "unix:", NULL) ||
- strstart(uri, "vsock:", NULL)) {
- socket_start_incoming_migration(p ? p : uri, errp);
+ if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
+ SocketAddress *saddr = &addr->u.socket;
+ if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
+ saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
+ saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
+ socket_start_incoming_migration(saddr, errp);
+ } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) {
+ fd_start_incoming_migration(saddr->u.fd.str, errp);
+ }
#ifdef CONFIG_RDMA
- } else if (strstart(uri, "rdma:", &p)) {
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
if (migrate_compress()) {
error_setg(errp, "RDMA and compression can't be used together");
return;
error_setg(errp, "RDMA and multifd can't be used together");
return;
}
- rdma_start_incoming_migration(p, errp);
+ rdma_start_incoming_migration(&addr->u.rdma, errp);
#endif
- } else if (strstart(uri, "exec:", &p)) {
- exec_start_incoming_migration(p, errp);
- } else if (strstart(uri, "fd:", &p)) {
- fd_start_incoming_migration(p, errp);
- } else if (strstart(uri, "file:", &p)) {
- file_start_incoming_migration(p, errp);
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
+ exec_start_incoming_migration(addr->u.exec.args, errp);
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
+ file_start_incoming_migration(&addr->u.file, errp);
} else {
error_setg(errp, "unknown migration protocol: %s", uri);
}
Error *local_err = NULL;
MigrationIncomingState *mis = opaque;
+ trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter");
+
/* If capability late_block_activate is set:
* Only fire up the block code now if we're going to restart the
* VM, else 'cont' will do it.
*/
if (!migrate_late_block_activate() ||
(autostart && (!global_state_received() ||
- global_state_get_runstate() == RUN_STATE_RUNNING))) {
+ runstate_is_live(global_state_get_runstate())))) {
/* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet. */
bdrv_activate_all(&local_err);
*/
qemu_announce_self(&mis->announce_timer, migrate_announce_params());
+ trace_vmstate_downtime_checkpoint("dst-precopy-bh-announced");
+
multifd_load_shutdown();
dirty_bitmap_mig_before_vm_start();
if (!global_state_received() ||
- global_state_get_runstate() == RUN_STATE_RUNNING) {
+ runstate_is_live(global_state_get_runstate())) {
if (autostart) {
vm_start();
} else {
} else {
runstate_set(global_state_get_runstate());
}
+ trace_vmstate_downtime_checkpoint("dst-precopy-bh-vm-started");
/*
* This must happen after any state changes since as soon as an external
* observer sees this event they might start to prod at the VM assuming
MIGRATION_STATUS_COMPLETED);
qemu_bh_delete(mis->bh);
migration_incoming_state_destroy();
+ object_unref(OBJECT(migrate_get_current()));
}
static void coroutine_fn
ret = qemu_loadvm_state(mis->from_src_file);
mis->loadvm_co = NULL;
+ trace_vmstate_downtime_checkpoint("dst-precopy-loadvm-completed");
+
ps = postcopy_state_get();
trace_process_incoming_migration_co_end(ret, ps);
if (ps != POSTCOPY_INCOMING_NONE) {
}
if (ret < 0) {
+ MigrationState *s = migrate_get_current();
+
+ if (migrate_has_error(s)) {
+ WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
+ error_report_err(s->error);
+ }
+ }
error_report("load of migration failed: %s", strerror(-ret));
goto fail;
}
}
mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
+ object_ref(OBJECT(migrate_get_current()));
qemu_bh_schedule(mis->bh);
return;
fail:
/**
* migration_incoming_setup: Setup incoming migration
* @f: file for main migration channel
- * @errp: where to put errors
- *
- * Returns: %true on success, %false on error.
*/
-static bool migration_incoming_setup(QEMUFile *f, Error **errp)
+static void migration_incoming_setup(QEMUFile *f)
{
MigrationIncomingState *mis = migration_incoming_get_current();
mis->from_src_file = f;
}
qemu_file_set_blocking(f, false);
- return true;
}
void migration_incoming_process(void)
return false;
}
-void migration_fd_process_incoming(QEMUFile *f, Error **errp)
+void migration_fd_process_incoming(QEMUFile *f)
{
- if (!migration_incoming_setup(f, errp)) {
- return;
- }
+ migration_incoming_setup(f);
if (postcopy_try_recover()) {
return;
}
* issue is not possible.
*/
ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
- sizeof(channel_magic), &local_err);
+ sizeof(channel_magic), errp);
if (ret != 0) {
- error_propagate(errp, local_err);
return;
}
}
if (multifd_load_setup(errp) != 0) {
- error_setg(errp, "Failed to setup multifd channels");
return;
}
if (default_channel) {
f = qemu_file_new_input(ioc);
-
- if (!migration_incoming_setup(f, errp)) {
- return;
- }
+ migration_incoming_setup(f);
} else {
/* Multiple connections */
assert(migration_needs_multiple_sockets());
{
MigrationState *s = migrate_get_current();
int state = qatomic_read(&s->state);
- GSList *cur_blocker = migration_blockers;
+ GSList *cur_blocker = migration_blockers[migrate_mode()];
info->blocked_reasons = NULL;
QEMUFile *tmp;
trace_migrate_fd_cleanup();
- qemu_mutex_unlock_iothread();
+ bql_unlock();
if (s->migration_thread_running) {
qemu_thread_join(&s->thread);
s->migration_thread_running = false;
}
- qemu_mutex_lock_iothread();
+ bql_lock();
multifd_save_cleanup();
qemu_mutex_lock(&s->qemu_file_lock);
}
}
+bool migration_postcopy_is_alive(int state)
+{
+ switch (state) {
+ case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+ case MIGRATION_STATUS_POSTCOPY_RECOVER:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool migration_in_postcopy_after_devices(MigrationState *s)
{
return migration_in_postcopy() && s->postcopy_after_devices;
s->to_dst_file = NULL;
s->state = MIGRATION_STATUS_NONE;
s->rp_state.from_dst_file = NULL;
- s->rp_state.error = false;
s->mbps = 0.0;
s->pages_per_second = 0.0;
s->downtime = 0;
s->migration_thread_running = false;
error_free(s->error);
s->error = NULL;
- s->hostname = NULL;
s->vmdesc = NULL;
migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
return 0;
}
-int migrate_add_blocker_internal(Error **reasonp, Error **errp)
+static bool is_busy(Error **reasonp, Error **errp)
{
+ ERRP_GUARD();
+
/* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) {
error_propagate_prepend(errp, *reasonp,
"disallowing migration blocker "
"(migration/snapshot in progress) for: ");
*reasonp = NULL;
- return -EBUSY;
+ return true;
}
-
- migration_blockers = g_slist_prepend(migration_blockers, *reasonp);
- return 0;
+ return false;
}
-int migrate_add_blocker(Error **reasonp, Error **errp)
+static bool is_only_migratable(Error **reasonp, Error **errp, int modes)
{
- if (only_migratable) {
+ ERRP_GUARD();
+
+ if (only_migratable && (modes & BIT(MIG_MODE_NORMAL))) {
error_propagate_prepend(errp, *reasonp,
"disallowing migration blocker "
"(--only-migratable) for: ");
*reasonp = NULL;
+ return true;
+ }
+ return false;
+}
+
+static int get_modes(MigMode mode, va_list ap)
+{
+ int modes = 0;
+
+ while (mode != -1 && mode != MIG_MODE_ALL) {
+ assert(mode >= MIG_MODE_NORMAL && mode < MIG_MODE__MAX);
+ modes |= BIT(mode);
+ mode = va_arg(ap, MigMode);
+ }
+ if (mode == MIG_MODE_ALL) {
+ modes = BIT(MIG_MODE__MAX) - 1;
+ }
+ return modes;
+}
+
+static int add_blockers(Error **reasonp, Error **errp, int modes)
+{
+ for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) {
+ if (modes & BIT(mode)) {
+ migration_blockers[mode] = g_slist_prepend(migration_blockers[mode],
+ *reasonp);
+ }
+ }
+ return 0;
+}
+
+int migrate_add_blocker(Error **reasonp, Error **errp)
+{
+ return migrate_add_blocker_modes(reasonp, errp, MIG_MODE_ALL);
+}
+
+int migrate_add_blocker_normal(Error **reasonp, Error **errp)
+{
+ return migrate_add_blocker_modes(reasonp, errp, MIG_MODE_NORMAL, -1);
+}
+
+int migrate_add_blocker_modes(Error **reasonp, Error **errp, MigMode mode, ...)
+{
+ int modes;
+ va_list ap;
+
+ va_start(ap, mode);
+ modes = get_modes(mode, ap);
+ va_end(ap);
+
+ if (is_only_migratable(reasonp, errp, modes)) {
return -EACCES;
+ } else if (is_busy(reasonp, errp)) {
+ return -EBUSY;
}
+ return add_blockers(reasonp, errp, modes);
+}
+
+int migrate_add_blocker_internal(Error **reasonp, Error **errp)
+{
+ int modes = BIT(MIG_MODE__MAX) - 1;
- return migrate_add_blocker_internal(reasonp, errp);
+ if (is_busy(reasonp, errp)) {
+ return -EBUSY;
+ }
+ return add_blockers(reasonp, errp, modes);
}
void migrate_del_blocker(Error **reasonp)
{
if (*reasonp) {
- migration_blockers = g_slist_remove(migration_blockers, *reasonp);
+ for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) {
+ migration_blockers[mode] = g_slist_remove(migration_blockers[mode],
+ *reasonp);
+ }
error_free(*reasonp);
*reasonp = NULL;
}
}
-void qmp_migrate_incoming(const char *uri, Error **errp)
+void qmp_migrate_incoming(const char *uri, bool has_channels,
+ MigrationChannelList *channels, Error **errp)
{
Error *local_err = NULL;
static bool once = true;
return;
}
- qemu_start_incoming_migration(uri, &local_err);
+ qemu_start_incoming_migration(uri, has_channels, channels, &local_err);
if (local_err) {
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
* only re-setup the migration stream and poke existing migration
* to continue using that newly established channel.
*/
- qemu_start_incoming_migration(uri, errp);
+ qemu_start_incoming_migration(uri, false, NULL, errp);
}
void qmp_migrate_pause(Error **errp)
MigrationIncomingState *mis = migration_incoming_get_current();
int ret = 0;
- if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+ if (migration_postcopy_is_alive(ms->state)) {
/* Source side, during postcopy */
+ Error *error = NULL;
+
+ /* Tell the core migration that we're pausing */
+ error_setg(&error, "Postcopy migration is paused by the user");
+ migrate_set_error(ms, error);
+ error_free(error);
+
qemu_mutex_lock(&ms->qemu_file_lock);
if (ms->to_dst_file) {
ret = qemu_file_shutdown(ms->to_dst_file);
if (ret) {
error_setg(errp, "Failed to pause source migration");
}
+
+ /*
+ * Kick the migration thread out of any waiting windows (on behalf
+ * of the rp thread).
+ */
+ migration_rp_kick(ms);
+
return;
}
- if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+ if (migration_postcopy_is_alive(mis->state)) {
ret = qemu_file_shutdown(mis->from_src_file);
if (ret) {
error_setg(errp, "Failed to pause destination migration");
}
error_setg(errp, "migrate-pause is currently only supported "
- "during postcopy-active state");
+ "during postcopy-active or postcopy-recover state");
}
bool migration_is_blocked(Error **errp)
{
+ GSList *blockers = migration_blockers[migrate_mode()];
+
if (qemu_savevm_state_blocked(errp)) {
return true;
}
- if (migration_blockers) {
- error_propagate(errp, error_copy(migration_blockers->data));
+ if (blockers) {
+ error_propagate(errp, error_copy(blockers->data));
return true;
}
static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
bool resume, Error **errp)
{
- Error *local_err = NULL;
-
if (blk_inc) {
warn_report("parameter 'inc' is deprecated;"
" use blockdev-mirror with NBD instead");
"current migration capabilities");
return false;
}
- if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, &local_err)) {
- error_propagate(errp, local_err);
+ if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, errp)) {
return false;
}
s->must_remove_block_options = true;
return true;
}
-void qmp_migrate(const char *uri, bool has_blk, bool blk,
+void qmp_migrate(const char *uri, bool has_channels,
+ MigrationChannelList *channels, bool has_blk, bool blk,
bool has_inc, bool inc, bool has_detach, bool detach,
bool has_resume, bool resume, Error **errp)
{
bool resume_requested;
Error *local_err = NULL;
MigrationState *s = migrate_get_current();
- const char *p = NULL;
+ g_autoptr(MigrationChannel) channel = NULL;
+ MigrationAddress *addr = NULL;
- /* URI is not suitable for migration? */
- if (!migration_channels_and_uri_compatible(uri, errp)) {
+ /*
+ * Having preliminary checks for uri and channel
+ */
+ if (!uri == !channels) {
+ error_setg(errp, "need either 'uri' or 'channels' argument");
+ return;
+ }
+
+ if (channels) {
+ /* To verify that Migrate channel list has only item */
+ if (channels->next) {
+ error_setg(errp, "Channel list has more than one entries");
+ return;
+ }
+ addr = channels->value->addr;
+ }
+
+ if (uri) {
+ /* caller uses the old URI syntax */
+ if (!migrate_uri_parse(uri, &channel, errp)) {
+ return;
+ }
+ addr = channel->addr;
+ }
+
+ /* transport mechanism not suitable for migration? */
+ if (!migration_channels_and_transport_compatible(addr, errp)) {
return;
}
}
}
- if (strstart(uri, "tcp:", &p) ||
- strstart(uri, "unix:", NULL) ||
- strstart(uri, "vsock:", NULL)) {
- socket_start_outgoing_migration(s, p ? p : uri, &local_err);
+ if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
+ SocketAddress *saddr = &addr->u.socket;
+ if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
+ saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
+ saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
+ socket_start_outgoing_migration(s, saddr, &local_err);
+ } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) {
+ fd_start_outgoing_migration(s, saddr->u.fd.str, &local_err);
+ }
#ifdef CONFIG_RDMA
- } else if (strstart(uri, "rdma:", &p)) {
- rdma_start_outgoing_migration(s, p, &local_err);
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
+ rdma_start_outgoing_migration(s, &addr->u.rdma, &local_err);
#endif
- } else if (strstart(uri, "exec:", &p)) {
- exec_start_outgoing_migration(s, p, &local_err);
- } else if (strstart(uri, "fd:", &p)) {
- fd_start_outgoing_migration(s, p, &local_err);
- } else if (strstart(uri, "file:", &p)) {
- file_start_outgoing_migration(s, p, &local_err);
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
+ exec_start_outgoing_migration(s, addr->u.exec.args, &local_err);
+ } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
+ file_start_outgoing_migration(s, &addr->u.file, &local_err);
} else {
error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri",
"a valid migration protocol");
qemu_sem_post(&s->pause_sem);
}
-/* migration thread support */
-/*
- * Something bad happened to the RP stream, mark an error
- * The caller shall print or trace something to indicate why
- */
-static void mark_source_rp_bad(MigrationState *s)
+int migration_rp_wait(MigrationState *s)
{
- s->rp_state.error = true;
-}
+ /* If migration has failure already, ignore the wait */
+ if (migrate_has_error(s)) {
+ return -1;
+ }
-void migration_rp_wait(MigrationState *s)
-{
qemu_sem_wait(&s->rp_state.rp_sem);
+
+ /* After wait, double check that there's no failure */
+ if (migrate_has_error(s)) {
+ return -1;
+ }
+
+ return 0;
}
void migration_rp_kick(MigrationState *s)
* We're allowed to send more than requested (e.g. to round to our page size)
* and we don't need to send pages that have already been sent.
*/
-static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
- ram_addr_t start, size_t len)
+static void
+migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
+ ram_addr_t start, size_t len, Error **errp)
{
long our_host_ps = qemu_real_host_page_size();
*/
if (!QEMU_IS_ALIGNED(start, our_host_ps) ||
!QEMU_IS_ALIGNED(len, our_host_ps)) {
- error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
- " len: %zd", __func__, start, len);
- mark_source_rp_bad(ms);
+ error_setg(errp, "MIG_RP_MSG_REQ_PAGES: Misaligned page request, start:"
+ RAM_ADDR_FMT " len: %zd", start, len);
return;
}
- if (ram_save_queue_pages(rbname, start, len)) {
- mark_source_rp_bad(ms);
- }
+ ram_save_queue_pages(rbname, start, len, errp);
}
-static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name)
+static bool migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name,
+ Error **errp)
{
RAMBlock *block = qemu_ram_block_by_name(block_name);
if (!block) {
- error_report("%s: invalid block name '%s'", __func__, block_name);
- return -EINVAL;
+ error_setg(errp, "MIG_RP_MSG_RECV_BITMAP has invalid block name '%s'",
+ block_name);
+ return false;
}
/* Fetch the received bitmap and refresh the dirty bitmap */
- return ram_dirty_bitmap_reload(s, block);
+ return ram_dirty_bitmap_reload(s, block, errp);
}
-static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
+static bool migrate_handle_rp_resume_ack(MigrationState *s,
+ uint32_t value, Error **errp)
{
trace_source_return_path_thread_resume_ack(value);
if (value != MIGRATION_RESUME_ACK_VALUE) {
- error_report("%s: illegal resume_ack value %"PRIu32,
- __func__, value);
- return -1;
+ error_setg(errp, "illegal resume_ack value %"PRIu32, value);
+ return false;
}
/* Now both sides are active. */
/* Notify send thread that time to continue send pages */
migration_rp_kick(s);
- return 0;
+ return true;
}
/*
uint32_t tmp32, sibling_error;
ram_addr_t start = 0; /* =0 to silence warning */
size_t len = 0, expected_len;
+ Error *err = NULL;
int res;
trace_source_return_path_thread_entry();
rcu_register_thread();
- while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
- migration_is_setup_or_active(ms->state)) {
+ while (migration_is_setup_or_active(ms->state)) {
trace_source_return_path_thread_loop_top();
+
header_type = qemu_get_be16(rp);
header_len = qemu_get_be16(rp);
if (qemu_file_get_error(rp)) {
- mark_source_rp_bad(ms);
+ qemu_file_get_error_obj(rp, &err);
goto out;
}
if (header_type >= MIG_RP_MSG_MAX ||
header_type == MIG_RP_MSG_INVALID) {
- error_report("RP: Received invalid message 0x%04x length 0x%04x",
- header_type, header_len);
- mark_source_rp_bad(ms);
+ error_setg(&err, "Received invalid message 0x%04x length 0x%04x",
+ header_type, header_len);
goto out;
}
if ((rp_cmd_args[header_type].len != -1 &&
header_len != rp_cmd_args[header_type].len) ||
header_len > sizeof(buf)) {
- error_report("RP: Received '%s' message (0x%04x) with"
- "incorrect length %d expecting %zu",
- rp_cmd_args[header_type].name, header_type, header_len,
- (size_t)rp_cmd_args[header_type].len);
- mark_source_rp_bad(ms);
+ error_setg(&err, "Received '%s' message (0x%04x) with"
+ "incorrect length %d expecting %zu",
+ rp_cmd_args[header_type].name, header_type, header_len,
+ (size_t)rp_cmd_args[header_type].len);
goto out;
}
/* We know we've got a valid header by this point */
res = qemu_get_buffer(rp, buf, header_len);
if (res != header_len) {
- error_report("RP: Failed reading data for message 0x%04x"
- " read %d expected %d",
- header_type, res, header_len);
- mark_source_rp_bad(ms);
+ error_setg(&err, "Failed reading data for message 0x%04x"
+ " read %d expected %d",
+ header_type, res, header_len);
goto out;
}
sibling_error = ldl_be_p(buf);
trace_source_return_path_thread_shut(sibling_error);
if (sibling_error) {
- error_report("RP: Sibling indicated error %d", sibling_error);
- mark_source_rp_bad(ms);
+ error_setg(&err, "Sibling indicated error %d", sibling_error);
}
/*
* We'll let the main thread deal with closing the RP
case MIG_RP_MSG_REQ_PAGES:
start = ldq_be_p(buf);
len = ldl_be_p(buf + 8);
- migrate_handle_rp_req_pages(ms, NULL, start, len);
+ migrate_handle_rp_req_pages(ms, NULL, start, len, &err);
+ if (err) {
+ goto out;
+ }
break;
case MIG_RP_MSG_REQ_PAGES_ID:
expected_len += tmp32;
}
if (header_len != expected_len) {
- error_report("RP: Req_Page_id with length %d expecting %zd",
- header_len, expected_len);
- mark_source_rp_bad(ms);
+ error_setg(&err, "Req_Page_id with length %d expecting %zd",
+ header_len, expected_len);
+ goto out;
+ }
+ migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len,
+ &err);
+ if (err) {
goto out;
}
- migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
break;
case MIG_RP_MSG_RECV_BITMAP:
if (header_len < 1) {
- error_report("%s: missing block name", __func__);
- mark_source_rp_bad(ms);
+ error_setg(&err, "MIG_RP_MSG_RECV_BITMAP missing block name");
goto out;
}
/* Format: len (1B) + idstr (<255B). This ends the idstr. */
buf[buf[0] + 1] = '\0';
- if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
- mark_source_rp_bad(ms);
+ if (!migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1), &err)) {
goto out;
}
break;
case MIG_RP_MSG_RESUME_ACK:
tmp32 = ldl_be_p(buf);
- if (migrate_handle_rp_resume_ack(ms, tmp32)) {
- mark_source_rp_bad(ms);
+ if (!migrate_handle_rp_resume_ack(ms, tmp32, &err)) {
goto out;
}
break;
}
out:
- if (qemu_file_get_error(rp)) {
+ if (err) {
+ migrate_set_error(ms, err);
+ error_free(err);
trace_source_return_path_thread_bad_end();
- mark_source_rp_bad(ms);
+ }
+
+ if (ms->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
+ /*
+ * this will be extremely unlikely: that we got yet another network
+ * issue during recovering of the 1st network failure.. during this
+ * period the main migration thread can be waiting on rp_sem for
+ * this thread to sync with the other side.
+ *
+ * When this happens, explicitly kick the migration thread out of
+ * RECOVER stage and back to PAUSED, so the admin can try
+ * everything again.
+ */
+ migration_rp_kick(ms);
}
trace_source_return_path_thread_end();
rcu_unregister_thread();
+
return NULL;
}
return 0;
}
-static int close_return_path_on_source(MigrationState *ms)
+/* Return true if error detected, or false otherwise */
+static bool close_return_path_on_source(MigrationState *ms)
{
- int ret;
-
if (!ms->rp_state.rp_thread_created) {
- return 0;
+ return false;
}
trace_migration_return_path_end_before();
}
}
- trace_await_return_path_close_on_source_joining();
qemu_thread_join(&ms->rp_state.rp_thread);
ms->rp_state.rp_thread_created = false;
- trace_await_return_path_close_on_source_close();
-
- ret = ms->rp_state.error;
- ms->rp_state.error = false;
-
migration_release_dst_files(ms);
+ trace_migration_return_path_end_after();
- trace_migration_return_path_end_after(ret);
- return ret;
+ /* Return path will persist the error in MigrationState when quit */
+ return migrate_has_error(ms);
}
static inline void
}
trace_postcopy_start();
- qemu_mutex_lock_iothread();
+ bql_lock();
trace_postcopy_start_set_run();
migration_downtime_start(ms);
- qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
global_state_store();
ret = migration_stop_vm(RUN_STATE_FINISH_MIGRATE);
if (ret < 0) {
migration_downtime_end(ms);
- qemu_mutex_unlock_iothread();
+ bql_unlock();
if (migrate_postcopy_ram()) {
/*
error_report_err(local_err);
}
}
- qemu_mutex_unlock_iothread();
+ bql_unlock();
return -1;
}
/**
* migration_maybe_pause: Pause if required to by
- * migrate_pause_before_switchover called with the iothread locked
+ * migrate_pause_before_switchover called with the BQL locked
* Returns: 0 on success
*/
static int migration_maybe_pause(MigrationState *s,
* wait for the 'pause_sem' semaphore.
*/
if (s->state != MIGRATION_STATUS_CANCELLING) {
- qemu_mutex_unlock_iothread();
+ bql_unlock();
migrate_set_state(&s->state, *current_active_state,
MIGRATION_STATUS_PRE_SWITCHOVER);
qemu_sem_wait(&s->pause_sem);
migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
new_state);
*current_active_state = new_state;
- qemu_mutex_lock_iothread();
+ bql_lock();
}
return s->state == new_state ? 0 : -EINVAL;
{
int ret;
- qemu_mutex_lock_iothread();
+ bql_lock();
migration_downtime_start(s);
- qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
s->vm_old_state = runstate_get();
global_state_store();
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
s->block_inactive);
out_unlock:
- qemu_mutex_unlock_iothread();
+ bql_unlock();
return ret;
}
{
trace_migration_completion_postcopy_end();
- qemu_mutex_lock_iothread();
+ bql_lock();
qemu_savevm_state_complete_postcopy(s->to_dst_file);
- qemu_mutex_unlock_iothread();
+ bql_unlock();
/*
* Shutdown the postcopy fast path thread. This is only needed when dest
*/
Error *local_err = NULL;
- qemu_mutex_lock_iothread();
+ bql_lock();
bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
} else {
s->block_inactive = false;
}
- qemu_mutex_unlock_iothread();
+ bql_unlock();
}
migrate_set_state(&s->state, current_active_state,
qemu_savevm_send_postcopy_resume(s->to_dst_file);
while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
- migration_rp_wait(s);
+ if (migration_rp_wait(s)) {
+ return -1;
+ }
}
if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
/* If we enabled cpu throttling for auto-converge, turn it off. */
cpu_throttle_stop();
- qemu_mutex_lock_iothread();
+ bql_lock();
switch (s->state) {
case MIGRATION_STATUS_COMPLETED:
migration_calculate_complete(s);
case MIGRATION_STATUS_FAILED:
case MIGRATION_STATUS_CANCELLED:
case MIGRATION_STATUS_CANCELLING:
- if (s->vm_old_state == RUN_STATE_RUNNING) {
+ if (runstate_is_live(s->vm_old_state)) {
if (!runstate_check(RUN_STATE_SHUTDOWN)) {
vm_start();
}
break;
}
migrate_fd_cleanup_schedule(s);
- qemu_mutex_unlock_iothread();
+ bql_unlock();
}
static void bg_migration_iteration_finish(MigrationState *s)
*/
ram_write_tracking_stop();
- qemu_mutex_lock_iothread();
+ bql_lock();
switch (s->state) {
case MIGRATION_STATUS_COMPLETED:
migration_calculate_complete(s);
}
migrate_fd_cleanup_schedule(s);
- qemu_mutex_unlock_iothread();
+ bql_unlock();
}
/*
object_ref(OBJECT(s));
update_iteration_initial_status(s);
- qemu_mutex_lock_iothread();
+ bql_lock();
qemu_savevm_state_header(s->to_dst_file);
- qemu_mutex_unlock_iothread();
+ bql_unlock();
/*
* If we opened the return path, we need to make sure dst has it
qemu_savevm_send_colo_enable(s->to_dst_file);
}
- qemu_mutex_lock_iothread();
+ bql_lock();
qemu_savevm_state_setup(s->to_dst_file);
- qemu_mutex_unlock_iothread();
+ bql_unlock();
qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
qemu_bh_delete(s->vm_start_bh);
s->vm_start_bh = NULL;
- vm_start();
+ vm_resume(s->vm_old_state);
migration_downtime_end(s);
}
ram_write_tracking_prepare();
#endif
- qemu_mutex_lock_iothread();
+ bql_lock();
qemu_savevm_state_header(s->to_dst_file);
qemu_savevm_state_setup(s->to_dst_file);
- qemu_mutex_unlock_iothread();
+ bql_unlock();
qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
trace_migration_thread_setup_complete();
migration_downtime_start(s);
- qemu_mutex_lock_iothread();
+ bql_lock();
- /*
- * If VM is currently in suspended state, then, to make a valid runstate
- * transition in vm_stop_force_state() we need to wakeup it up.
- */
- qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
s->vm_old_state = runstate_get();
global_state_store();
s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s);
qemu_bh_schedule(s->vm_start_bh);
- qemu_mutex_unlock_iothread();
+ bql_unlock();
while (migration_is_active(s)) {
MigIterateState iter_state = bg_migration_iteration_run(s);
if (early_fail) {
migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_FAILED);
- qemu_mutex_unlock_iothread();
+ bql_unlock();
}
bg_migration_iteration_finish(s);