#include "net/net.h"
#include "migration.h"
#include "migration/snapshot.h"
+#include "migration-stats.h"
#include "migration/vmstate.h"
#include "migration/misc.h"
#include "migration/register.h"
#include "migration/global_state.h"
+#include "migration/channel-block.h"
#include "ram.h"
-#include "qemu-file-channel.h"
#include "qemu-file.h"
#include "savevm.h"
#include "postcopy-ram.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-migration.h"
-#include "qapi/qmp/json-writer.h"
#include "qapi/clone-visitor.h"
#include "qapi/qapi-builtin-visit.h"
#include "qapi/qmp/qerror.h"
#include "exec/target_page.h"
#include "trace.h"
#include "qemu/iov.h"
+#include "qemu/job.h"
#include "qemu/main-loop.h"
#include "block/snapshot.h"
#include "qemu/cutils.h"
#include "net/announce.h"
#include "qemu/yank.h"
#include "yank_functions.h"
+#include "sysemu/qtest.h"
+#include "options.h"
const unsigned int postcopy_ram_discard_version;
/***********************************************************/
/* savevm/loadvm support */
-static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
- int64_t pos, Error **errp)
-{
- int ret;
- QEMUIOVector qiov;
-
- qemu_iovec_init_external(&qiov, iov, iovcnt);
- ret = bdrv_writev_vmstate(opaque, &qiov, pos);
- if (ret < 0) {
- return ret;
- }
-
- return qiov.size;
-}
-
-static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
- size_t size, Error **errp)
-{
- return bdrv_load_vmstate(opaque, buf, pos, size);
-}
-
-static int bdrv_fclose(void *opaque, Error **errp)
-{
- return bdrv_flush(opaque);
-}
-
-static const QEMUFileOps bdrv_read_ops = {
- .get_buffer = block_get_buffer,
- .close = bdrv_fclose
-};
-
-static const QEMUFileOps bdrv_write_ops = {
- .writev_buffer = block_writev_buffer,
- .close = bdrv_fclose
-};
-
static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
{
if (is_writable) {
- return qemu_fopen_ops(bs, &bdrv_write_ops, false);
+ return qemu_file_new_output(QIO_CHANNEL(qio_channel_block_new(bs)));
+ } else {
+ return qemu_file_new_input(QIO_CHANNEL(qio_channel_block_new(bs)));
}
- return qemu_fopen_ops(bs, &bdrv_read_ops, false);
}
uint32_t result = 0;
int i;
for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
- if (should_validate_capability(i) && s->enabled_capabilities[i]) {
+ if (should_validate_capability(i) && s->capabilities[i]) {
result++;
}
}
state->capabilities = g_renew(MigrationCapability, state->capabilities,
state->caps_count);
for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
- if (should_validate_capability(i) && s->enabled_capabilities[i]) {
+ if (should_validate_capability(i) && s->capabilities[i]) {
state->capabilities[j++] = i;
}
}
continue;
}
source_state = test_bit(i, source_caps_bm);
- target_state = s->enabled_capabilities[i];
+ target_state = s->capabilities[i];
if (source_state != target_state) {
error_report("Capability %s is %s, but received capability is %s",
MigrationCapability_str(i),
field->version_id);
fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
field->field_exists ? "true" : "false");
+ if (field->flags & VMS_ARRAY) {
+ fprintf(out_file, "%*s\"num\": %d,\n", indent, "", field->num);
+ }
fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
if (field->vmsd != NULL) {
fprintf(out_file, ",\n");
field++;
first = false;
}
+ assert(field->flags == VMS_END);
fprintf(out_file, "\n%*s]", indent, "");
}
if (vmsd->subsections != NULL) {
}
}
+/*
+ * Perform some basic checks on vmsd's at registration
+ * time.
+ */
+static void vmstate_check(const VMStateDescription *vmsd)
+{
+ const VMStateField *field = vmsd->fields;
+ const VMStateDescription **subsection = vmsd->subsections;
+
+ if (field) {
+ while (field->name) {
+ if (field->flags & (VMS_STRUCT | VMS_VSTRUCT)) {
+ /* Recurse to sub structures */
+ vmstate_check(field->vmsd);
+ }
+ /* Carry on */
+ field++;
+ }
+ /* Check for the end of field list canary */
+ if (field->flags != VMS_END) {
+ error_report("VMSTATE not ending with VMS_END: %s", vmsd->name);
+ g_assert_not_reached();
+ }
+ }
+
+ while (subsection && *subsection) {
+ /*
+ * The name of a subsection should start with the name of the
+ * current object.
+ */
+ assert(!strncmp(vmsd->name, (*subsection)->name, strlen(vmsd->name)));
+ vmstate_check(*subsection);
+ subsection++;
+ }
+}
+
int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
const VMStateDescription *vmsd,
void *opaque, int alias_id,
} else {
se->instance_id = instance_id;
}
+
+ /* Perform a recursive sanity check during the test runs */
+ if (qtest_enabled()) {
+ vmstate_check(vmsd);
+ }
assert(!se->compat || se->instance_id == 0);
savevm_state_handler_insert(se);
return 0;
static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se,
JSONWriter *vmdesc)
{
- int64_t old_offset, size;
-
- old_offset = qemu_ftell_fast(f);
+ uint64_t old_offset = qemu_file_transferred_fast(f);
se->ops->save_state(f, se->opaque);
- size = qemu_ftell_fast(f) - old_offset;
+ uint64_t size = qemu_file_transferred_fast(f) - old_offset;
if (vmdesc) {
json_writer_int64(vmdesc, "size", size);
}
}
-static int vmstate_save(QEMUFile *f, SaveStateEntry *se,
- JSONWriter *vmdesc)
-{
- trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
- if (!se->vmsd) {
- vmstate_save_old_style(f, se, vmdesc);
- return 0;
- }
- return vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
-}
-
/*
* Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
*/
}
}
+static int vmstate_save(QEMUFile *f, SaveStateEntry *se, JSONWriter *vmdesc)
+{
+ int ret;
+
+ if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
+ return 0;
+ }
+ if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
+ trace_savevm_section_skip(se->idstr, se->section_id);
+ return 0;
+ }
+
+ trace_savevm_section_start(se->idstr, se->section_id);
+ save_section_header(f, se, QEMU_VM_SECTION_FULL);
+ if (vmdesc) {
+ json_writer_start_object(vmdesc, NULL);
+ json_writer_str(vmdesc, "name", se->idstr);
+ json_writer_int64(vmdesc, "instance_id", se->instance_id);
+ }
+
+ trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
+ if (!se->vmsd) {
+ vmstate_save_old_style(f, se, vmdesc);
+ } else {
+ ret = vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ trace_savevm_section_end(se->idstr, se->section_id, 0);
+ save_section_footer(f, se);
+ if (vmdesc) {
+ json_writer_end_object(vmdesc);
+ }
+ return 0;
+}
/**
* qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
* command and associated data.
void qemu_savevm_state_setup(QEMUFile *f)
{
+ MigrationState *ms = migrate_get_current();
SaveStateEntry *se;
Error *local_err = NULL;
int ret;
+ ms->vmdesc = json_writer_new(false);
+ json_writer_start_object(ms->vmdesc, NULL);
+ json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
+ json_writer_start_array(ms->vmdesc, "devices");
+
trace_savevm_state_setup();
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (se->vmsd && se->vmsd->early_setup) {
+ ret = vmstate_save(f, se, ms->vmdesc);
+ if (ret) {
+ qemu_file_set_error(f, ret);
+ break;
+ }
+ continue;
+ }
+
if (!se->ops || !se->ops->save_setup) {
continue;
}
!(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
continue;
}
- if (qemu_file_rate_limit(f)) {
+ if (migration_rate_exceeded(f)) {
return 0;
}
trace_savevm_section_start(se->idstr, se->section_id);
bool in_postcopy,
bool inactivate_disks)
{
- g_autoptr(JSONWriter) vmdesc = NULL;
+ MigrationState *ms = migrate_get_current();
+ JSONWriter *vmdesc = ms->vmdesc;
int vmdesc_len;
SaveStateEntry *se;
int ret;
- vmdesc = json_writer_new(false);
- json_writer_start_object(vmdesc, NULL);
- json_writer_int64(vmdesc, "page_size", qemu_target_page_size());
- json_writer_start_array(vmdesc, "devices");
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
-
- if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
+ if (se->vmsd && se->vmsd->early_setup) {
+ /* Already saved during qemu_savevm_state_setup(). */
continue;
}
- if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
- trace_savevm_section_skip(se->idstr, se->section_id);
- continue;
- }
-
- trace_savevm_section_start(se->idstr, se->section_id);
-
- json_writer_start_object(vmdesc, NULL);
- json_writer_str(vmdesc, "name", se->idstr);
- json_writer_int64(vmdesc, "instance_id", se->instance_id);
- save_section_header(f, se, QEMU_VM_SECTION_FULL);
ret = vmstate_save(f, se, vmdesc);
if (ret) {
qemu_file_set_error(f, ret);
return ret;
}
- trace_savevm_section_end(se->idstr, se->section_id, 0);
- save_section_footer(f, se);
-
- json_writer_end_object(vmdesc);
}
if (inactivate_disks) {
qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len);
}
+ /* Free it now to detect any inconsistencies. */
+ json_writer_free(vmdesc);
+ ms->vmdesc = NULL;
+
return 0;
}
* the result is split into the amount for units that can and
* for units that can't do postcopy.
*/
-void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
- uint64_t *res_precopy_only,
- uint64_t *res_compatible,
- uint64_t *res_postcopy_only)
+void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
+ uint64_t *can_postcopy)
{
SaveStateEntry *se;
- *res_precopy_only = 0;
- *res_compatible = 0;
- *res_postcopy_only = 0;
+ *must_precopy = 0;
+ *can_postcopy = 0;
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (!se->ops || !se->ops->state_pending_estimate) {
+ continue;
+ }
+ if (se->ops->is_active) {
+ if (!se->ops->is_active(se->opaque)) {
+ continue;
+ }
+ }
+ se->ops->state_pending_estimate(se->opaque, must_precopy, can_postcopy);
+ }
+}
+
+void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
+ uint64_t *can_postcopy)
+{
+ SaveStateEntry *se;
+
+ *must_precopy = 0;
+ *can_postcopy = 0;
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
- if (!se->ops || !se->ops->save_live_pending) {
+ if (!se->ops || !se->ops->state_pending_exact) {
continue;
}
if (se->ops->is_active) {
continue;
}
}
- se->ops->save_live_pending(f, se->opaque, threshold_size,
- res_precopy_only, res_compatible,
- res_postcopy_only);
+ se->ops->state_pending_exact(se->opaque, must_precopy, can_postcopy);
}
}
return -EINVAL;
}
- if (migrate_use_block()) {
+ if (migrate_block()) {
error_setg(errp, "Block migration and snapshots are incompatible");
return -EINVAL;
}
migrate_init(ms);
- memset(&ram_counters, 0, sizeof(ram_counters));
+ memset(&mig_stats, 0, sizeof(mig_stats));
memset(&compression_counters, 0, sizeof(compression_counters));
+ reset_vfio_bytes_transferred();
ms->to_dst_file = f;
qemu_mutex_unlock_iothread();
if (se->is_ram) {
continue;
}
- if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
- continue;
- }
- if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
- continue;
- }
-
- save_section_header(f, se, QEMU_VM_SECTION_FULL);
-
ret = vmstate_save(f, se, NULL);
if (ret) {
return ret;
}
-
- save_section_footer(f, se);
}
qemu_put_byte(f, QEMU_VM_EOF);
return -EINVAL;
}
- if (!postcopy_ram_supported_by_host(mis)) {
+ if (!postcopy_ram_supported_by_host(mis, &local_err)) {
+ error_report_err(local_err);
postcopy_state_set(POSTCOPY_INCOMING_NONE);
return -1;
}
*/
qemu_sem_post(&mis->postcopy_pause_sem_fault);
+ if (migrate_postcopy_preempt()) {
+ /*
+ * The preempt channel will be created in async manner, now let's
+ * wait for it and make sure it's created.
+ */
+ qemu_sem_wait(&mis->postcopy_qemufile_dst_done);
+ assert(mis->postcopy_qemufile_dst);
+ /* Kick the fast ram load thread too */
+ qemu_sem_post(&mis->postcopy_pause_sem_fast_load);
+ }
+
return 0;
}
bioc->usage += length;
trace_loadvm_handle_cmd_packaged_received(ret);
- QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
+ QEMUFile *packf = qemu_file_new_input(QIO_CHANNEL(bioc));
ret = qemu_loadvm_state_main(packf, mis);
trace_loadvm_handle_cmd_packaged_main(ret);
error_report("CMD_OPEN_RETURN_PATH failed");
return -1;
}
+
+ /*
+ * Switchover ack is enabled but no device uses it, so send an ACK to
+ * source that it's OK to switchover. Do it here, after return path has
+ * been created.
+ */
+ if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) {
+ int ret = migrate_send_rp_switchover_ack(mis);
+ if (ret) {
+ error_report(
+ "Could not send switchover ack RP MSG, err %d (%s)", ret,
+ strerror(-ret));
+ return ret;
+ }
+ }
break;
case MIG_CMD_PING:
return 0;
}
+static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis)
+{
+ SaveStateEntry *se;
+
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (!se->ops || !se->ops->switchover_ack_needed) {
+ continue;
+ }
+
+ if (se->ops->switchover_ack_needed(se->opaque)) {
+ mis->switchover_ack_pending_num++;
+ }
+ }
+
+ trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num);
+}
+
static int qemu_loadvm_state_setup(QEMUFile *f)
{
SaveStateEntry *se;
{
int i;
- /*
- * If network is interrupted, any temp page we received will be useless
- * because we didn't mark them as "received" in receivedmap. After a
- * proper recovery later (which will sync src dirty bitmap with receivedmap
- * on dest) these cached small pages will be resent again.
- */
- for (i = 0; i < mis->postcopy_channels; i++) {
- postcopy_temp_page_reset(&mis->postcopy_tmp_pages[i]);
- }
-
trace_postcopy_pause_incoming();
assert(migrate_postcopy_ram());
mis->to_src_file = NULL;
qemu_mutex_unlock(&mis->rp_mutex);
+ /*
+ * NOTE: this must happen before reset the PostcopyTmpPages below,
+ * otherwise it's racy to reset those fields when the fast load thread
+ * can be accessing it in parallel.
+ */
+ if (mis->postcopy_qemufile_dst) {
+ qemu_file_shutdown(mis->postcopy_qemufile_dst);
+ /* Take the mutex to make sure the fast ram load thread halted */
+ qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
+ migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
+ qemu_fclose(mis->postcopy_qemufile_dst);
+ mis->postcopy_qemufile_dst = NULL;
+ qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
+ }
+
migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
MIGRATION_STATUS_POSTCOPY_PAUSED);
/* Notify the fault thread for the invalidated file handle */
postcopy_fault_thread_notify(mis);
+ /*
+ * If network is interrupted, any temp page we received will be useless
+ * because we didn't mark them as "received" in receivedmap. After a
+ * proper recovery later (which will sync src dirty bitmap with receivedmap
+ * on dest) these cached small pages will be resent again.
+ */
+ for (i = 0; i < mis->postcopy_channels; i++) {
+ postcopy_temp_page_reset(&mis->postcopy_tmp_pages[i]);
+ }
+
error_report("Detected IO failure for postcopy. "
"Migration paused.");
while (true) {
section_type = qemu_get_byte(f);
- if (qemu_file_get_error(f)) {
- ret = qemu_file_get_error(f);
+ ret = qemu_file_get_error_obj_any(f, mis->postcopy_qemufile_dst, NULL);
+ if (ret) {
break;
}
return -EINVAL;
}
+ if (migrate_switchover_ack()) {
+ qemu_loadvm_state_switchover_ack_needed(mis);
+ }
+
cpu_synchronize_all_pre_loadvm();
ret = qemu_loadvm_state_main(f, mis);
return 0;
}
+int qemu_loadvm_approve_switchover(void)
+{
+ MigrationIncomingState *mis = migration_incoming_get_current();
+
+ if (!mis->switchover_ack_pending_num) {
+ return -EINVAL;
+ }
+
+ mis->switchover_ack_pending_num--;
+ trace_loadvm_approve_switchover(mis->switchover_ack_pending_num);
+
+ if (mis->switchover_ack_pending_num) {
+ return 0;
+ }
+
+ return migrate_send_rp_switchover_ack(mis);
+}
+
bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
bool has_devices, strList *devices, Error **errp)
{
saved_vm_running = runstate_is_running();
- ret = global_state_store();
- if (ret) {
- error_setg(errp, "Error saving global state");
- return false;
- }
+ global_state_store();
vm_stop(RUN_STATE_SAVE_VM);
bdrv_drain_all_begin();
goto the_end;
}
ret = qemu_savevm_state(f, errp);
- vm_state_size = qemu_ftell(f);
+ vm_state_size = qemu_file_transferred(f);
ret2 = qemu_fclose(f);
if (ret < 0) {
goto the_end;
goto the_end;
}
qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
- f = qemu_fopen_channel_output(QIO_CHANNEL(ioc));
+ f = qemu_file_new_output(QIO_CHANNEL(ioc));
object_unref(OBJECT(ioc));
ret = qemu_save_device_state(f);
if (ret < 0 || qemu_fclose(f) < 0) {
return;
}
qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
- f = qemu_fopen_channel_input(QIO_CHANNEL(ioc));
+ f = qemu_file_new_input(QIO_CHANNEL(ioc));
object_unref(OBJECT(ioc));
ret = qemu_loadvm_state(f);
goto err_drain;
}
- qemu_system_reset(SHUTDOWN_CAUSE_NONE);
+ qemu_system_reset(SHUTDOWN_CAUSE_SNAPSHOT_LOAD);
mis->from_src_file = f;
if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {