* The format of arguments is depending on postcopy mode:
* - postcopy RAM only
* uint64_t host page size
- * uint64_t taget page size
+ * uint64_t target page size
*
* - postcopy RAM and postcopy dirty bitmaps
* format is the same as for postcopy RAM only
.global_section_id = 0,
};
+static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id);
+
static bool should_validate_capability(int capability)
{
assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
.version_id = 1,
.minimum_version_id = 1,
.needed = vmstate_target_page_bits_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32(target_page_bits, SaveState),
VMSTATE_END_OF_LIST()
}
.version_id = 1,
.minimum_version_id = 1,
.needed = vmstate_capabilites_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32_V(caps_count, SaveState, 1),
VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
vmstate_info_capability,
static int vmstate_uuid_post_load(void *opaque, int version_id)
{
SaveState *state = opaque;
- char uuid_src[UUID_FMT_LEN + 1];
- char uuid_dst[UUID_FMT_LEN + 1];
+ char uuid_src[UUID_STR_LEN];
+ char uuid_dst[UUID_STR_LEN];
if (!qemu_uuid_set) {
/*
.minimum_version_id = 1,
.needed = vmstate_uuid_needed,
.post_load = vmstate_uuid_post_load,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT8_ARRAY_V(uuid.data, SaveState, sizeof(QemuUUID), 1),
VMSTATE_END_OF_LIST()
}
.post_load = configuration_post_load,
.pre_save = configuration_pre_save,
.post_save = configuration_post_save,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32(len, SaveState),
VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
VMSTATE_END_OF_LIST()
},
- .subsections = (const VMStateDescription *[]) {
+ .subsections = (const VMStateDescription * const []) {
&vmstate_target_page_bits,
&vmstate_capabilites,
&vmstate_uuid,
}
static void dump_vmstate_vmss(FILE *out_file,
- const VMStateDescription **subsection,
+ const VMStateDescription *subsection,
int indent)
{
- if (*subsection != NULL) {
- dump_vmstate_vmsd(out_file, *subsection, indent, true);
+ if (subsection != NULL) {
+ dump_vmstate_vmsd(out_file, subsection, indent, true);
}
}
fprintf(out_file, "\n%*s]", indent, "");
}
if (vmsd->subsections != NULL) {
- const VMStateDescription **subsection = vmsd->subsections;
+ const VMStateDescription * const *subsection = vmsd->subsections;
bool first;
fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
if (!first) {
fprintf(out_file, ",\n");
}
- dump_vmstate_vmss(out_file, subsection, indent + 2);
+ dump_vmstate_vmss(out_file, *subsection, indent + 2);
subsection++;
first = false;
}
assert(priority <= MIG_PRI_MAX);
+ /*
+ * This should never happen otherwise migration will probably fail
+ * silently somewhere because we can be wrongly applying one
+ * object properties upon another one. Bail out ASAP.
+ */
+ if (find_se(nse->idstr, nse->instance_id)) {
+ error_report("%s: Detected duplicate SaveStateEntry: "
+ "id=%s, instance_id=0x%"PRIx32, __func__,
+ nse->idstr, nse->instance_id);
+ exit(EXIT_FAILURE);
+ }
+
for (i = priority - 1; i >= 0; i--) {
se = savevm_state.handler_pri_head[i];
if (se != NULL) {
static void vmstate_check(const VMStateDescription *vmsd)
{
const VMStateField *field = vmsd->fields;
- const VMStateDescription **subsection = vmsd->subsections;
+ const VMStateDescription * const *subsection = vmsd->subsections;
if (field) {
while (field->name) {
}
}
+/*
+ * See comment in hw/intc/xics.c:icp_realize()
+ *
+ * This function can be removed when
+ * pre_2_10_vmstate_register_dummy_icp() is removed.
+ */
+int vmstate_replace_hack_for_ppc(VMStateIf *obj, int instance_id,
+ const VMStateDescription *vmsd,
+ void *opaque)
+{
+ SaveStateEntry *se = find_se(vmsd->name, instance_id);
+
+ if (se) {
+ savevm_state_handler_remove(se);
+ }
+ return vmstate_register(obj, instance_id, vmsd, opaque);
+}
+
int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
const VMStateDescription *vmsd,
void *opaque, int alias_id,
static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se,
JSONWriter *vmdesc)
{
- uint64_t old_offset = qemu_file_total_transferred_fast(f);
+ uint64_t old_offset = qemu_file_transferred(f);
se->ops->save_state(f, se->opaque);
- uint64_t size = qemu_file_total_transferred_fast(f) - old_offset;
+ uint64_t size = qemu_file_transferred(f) - old_offset;
if (vmdesc) {
json_writer_int64(vmdesc, "size", size);
static int vmstate_save(QEMUFile *f, SaveStateEntry *se, JSONWriter *vmdesc)
{
int ret;
+ Error *local_err = NULL;
+ MigrationState *s = migrate_get_current();
if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
return 0;
}
- if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
+ if (se->vmsd && !vmstate_section_needed(se->vmsd, se->opaque)) {
trace_savevm_section_skip(se->idstr, se->section_id);
return 0;
}
if (!se->vmsd) {
vmstate_save_old_style(f, se, vmdesc);
} else {
- ret = vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
+ ret = vmstate_save_state_with_err(f, se->vmsd, se->opaque, vmdesc, &local_err);
if (ret) {
+ migrate_set_error(s, local_err);
+ error_report_err(local_err);
return ret;
}
}
int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
{
uint32_t tmp;
+ MigrationState *ms = migrate_get_current();
+ Error *local_err = NULL;
if (len > MAX_VM_CMD_PACKAGED_SIZE) {
- error_report("%s: Unreasonably large packaged state: %zu",
+ error_setg(&local_err, "%s: Unreasonably large packaged state: %zu",
__func__, len);
+ migrate_set_error(ms, local_err);
+ error_report_err(local_err);
return -1;
}
void qemu_savevm_state_header(QEMUFile *f)
{
+ MigrationState *s = migrate_get_current();
+
+ s->vmdesc = json_writer_new(false);
+
trace_savevm_state_header();
qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
qemu_put_be32(f, QEMU_VM_FILE_VERSION);
- if (migrate_get_current()->send_configuration) {
+ if (s->send_configuration) {
qemu_put_byte(f, QEMU_VM_CONFIGURATION);
- vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
+
+ /*
+ * This starts the main json object and is paired with the
+ * json_writer_end_object in
+ * qemu_savevm_state_complete_precopy_non_iterable
+ */
+ json_writer_start_object(s->vmdesc, NULL);
+
+ json_writer_start_object(s->vmdesc, "configuration");
+ vmstate_save_state(f, &vmstate_configuration, &savevm_state, s->vmdesc);
+ json_writer_end_object(s->vmdesc);
}
}
return false;
}
+int qemu_savevm_state_prepare(Error **errp)
+{
+ SaveStateEntry *se;
+ int ret;
+
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (!se->ops || !se->ops->save_prepare) {
+ continue;
+ }
+ if (se->ops->is_active) {
+ if (!se->ops->is_active(se->opaque)) {
+ continue;
+ }
+ }
+
+ ret = se->ops->save_prepare(se->opaque, errp);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
void qemu_savevm_state_setup(QEMUFile *f)
{
MigrationState *ms = migrate_get_current();
Error *local_err = NULL;
int ret;
- ms->vmdesc = json_writer_new(false);
- json_writer_start_object(ms->vmdesc, NULL);
json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
json_writer_start_array(ms->vmdesc, "devices");
!(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
continue;
}
- if (qemu_file_rate_limit(f)) {
+ if (migration_rate_exceeded(f)) {
return 0;
}
trace_savevm_section_start(se->idstr, se->section_id);
static
int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
{
+ int64_t start_ts_each, end_ts_each;
SaveStateEntry *se;
int ret;
continue;
}
}
+
+ start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
trace_savevm_section_start(se->idstr, se->section_id);
save_section_header(f, se, QEMU_VM_SECTION_END);
qemu_file_set_error(f, ret);
return -1;
}
+ end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
+ end_ts_each - start_ts_each);
}
+ trace_vmstate_downtime_checkpoint("src-iterable-saved");
+
return 0;
}
bool inactivate_disks)
{
MigrationState *ms = migrate_get_current();
+ int64_t start_ts_each, end_ts_each;
JSONWriter *vmdesc = ms->vmdesc;
int vmdesc_len;
SaveStateEntry *se;
continue;
}
+ start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+
ret = vmstate_save(f, se, vmdesc);
if (ret) {
qemu_file_set_error(f, ret);
return ret;
}
+
+ end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id,
+ end_ts_each - start_ts_each);
}
if (inactivate_disks) {
* bdrv_activate_all() on the other end won't fail. */
ret = bdrv_inactivate_all();
if (ret) {
- error_report("%s: bdrv_inactivate_all() failed (%d)",
- __func__, ret);
+ Error *local_err = NULL;
+ error_setg(&local_err, "%s: bdrv_inactivate_all() failed (%d)",
+ __func__, ret);
+ migrate_set_error(ms, local_err);
+ error_report_err(local_err);
qemu_file_set_error(f, ret);
return ret;
}
json_writer_free(vmdesc);
ms->vmdesc = NULL;
+ trace_vmstate_downtime_checkpoint("src-non-iterable-saved");
+
return 0;
}
}
flush:
- qemu_fflush(f);
- return 0;
+ return qemu_fflush(f);
}
/* Give an estimate of the amount left to be transferred,
return -EINVAL;
}
- migrate_init(ms);
- memset(&mig_stats, 0, sizeof(mig_stats));
- memset(&compression_counters, 0, sizeof(compression_counters));
+ ret = migrate_init(ms, errp);
+ if (ret) {
+ return ret;
+ }
ms->to_dst_file = f;
- qemu_mutex_unlock_iothread();
qemu_savevm_state_header(f);
qemu_savevm_state_setup(f);
- qemu_mutex_lock_iothread();
while (qemu_file_get_error(f) == 0) {
if (qemu_savevm_state_iterate(f, false) > 0) {
Error *local_err = NULL;
MigrationIncomingState *mis = opaque;
- trace_loadvm_postcopy_handle_run_bh("enter");
+ trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter");
/* TODO we should move all of this lot into postcopy_ram.c or a shared code
* in migration.c
*/
cpu_synchronize_all_post_init();
- trace_loadvm_postcopy_handle_run_bh("after cpu sync");
+ trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cpu-synced");
qemu_announce_self(&mis->announce_timer, migrate_announce_params());
- trace_loadvm_postcopy_handle_run_bh("after announce");
+ trace_vmstate_downtime_checkpoint("dst-postcopy-bh-announced");
/* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet. */
autostart = false;
}
- trace_loadvm_postcopy_handle_run_bh("after invalidate cache");
+ trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated");
dirty_bitmap_mig_before_vm_start();
qemu_bh_delete(mis->bh);
- trace_loadvm_postcopy_handle_run_bh("return");
+ trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started");
+ object_unref(OBJECT(migration_get_current()));
}
/* After all discards we can start running and asking for pages */
postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis);
+ object_ref(OBJECT(migration_get_current()));
qemu_bh_schedule(mis->bh);
/* We need to finish reading the stream from the package
error_report("CMD_OPEN_RETURN_PATH failed");
return -1;
}
+
+ /*
+ * Switchover ack is enabled but no device uses it, so send an ACK to
+ * source that it's OK to switchover. Do it here, after return path has
+ * been created.
+ */
+ if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) {
+ int ret = migrate_send_rp_switchover_ack(mis);
+ if (ret) {
+ error_report(
+ "Could not send switchover ack RP MSG, err %d (%s)", ret,
+ strerror(-ret));
+ return ret;
+ }
+ }
break;
case MIG_CMD_PING:
}
static int
-qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
+qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis,
+ uint8_t type)
{
+ bool trace_downtime = (type == QEMU_VM_SECTION_FULL);
uint32_t instance_id, version_id, section_id;
+ int64_t start_ts, end_ts;
SaveStateEntry *se;
char idstr[256];
int ret;
return -EINVAL;
}
+ if (trace_downtime) {
+ start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ }
+
ret = vmstate_load(f, se);
if (ret < 0) {
error_report("error while loading state for instance 0x%"PRIx32" of"
" device '%s'", instance_id, idstr);
return ret;
}
+
+ if (trace_downtime) {
+ end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ trace_vmstate_downtime_load("non-iterable", se->idstr,
+ se->instance_id, end_ts - start_ts);
+ }
+
if (!check_section_footer(f, se)) {
return -EINVAL;
}
}
static int
-qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
+qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis,
+ uint8_t type)
{
+ bool trace_downtime = (type == QEMU_VM_SECTION_END);
+ int64_t start_ts, end_ts;
uint32_t section_id;
SaveStateEntry *se;
int ret;
return -EINVAL;
}
+ if (trace_downtime) {
+ start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ }
+
ret = vmstate_load(f, se);
if (ret < 0) {
error_report("error while loading state section id %d(%s)",
section_id, se->idstr);
return ret;
}
+
+ if (trace_downtime) {
+ end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ trace_vmstate_downtime_load("iterable", se->idstr,
+ se->instance_id, end_ts - start_ts);
+ }
+
if (!check_section_footer(f, se)) {
return -EINVAL;
}
return 0;
}
+static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis)
+{
+ SaveStateEntry *se;
+
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (!se->ops || !se->ops->switchover_ack_needed) {
+ continue;
+ }
+
+ if (se->ops->switchover_ack_needed(se->opaque)) {
+ mis->switchover_ack_pending_num++;
+ }
+ }
+
+ trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num);
+}
+
static int qemu_loadvm_state_setup(QEMUFile *f)
{
SaveStateEntry *se;
qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
}
- migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
+ /* Current state can be either ACTIVE or RECOVER */
+ migrate_set_state(&mis->state, mis->state,
MIGRATION_STATUS_POSTCOPY_PAUSED);
/* Notify the fault thread for the invalidated file handle */
switch (section_type) {
case QEMU_VM_SECTION_START:
case QEMU_VM_SECTION_FULL:
- ret = qemu_loadvm_section_start_full(f, mis);
+ ret = qemu_loadvm_section_start_full(f, mis, section_type);
if (ret < 0) {
goto out;
}
break;
case QEMU_VM_SECTION_PART:
case QEMU_VM_SECTION_END:
- ret = qemu_loadvm_section_part_end(f, mis);
+ ret = qemu_loadvm_section_part_end(f, mis, section_type);
if (ret < 0) {
goto out;
}
return -EINVAL;
}
+ if (migrate_switchover_ack()) {
+ qemu_loadvm_state_switchover_ack_needed(mis);
+ }
+
cpu_synchronize_all_pre_loadvm();
ret = qemu_loadvm_state_main(f, mis);
return 0;
}
+int qemu_loadvm_approve_switchover(void)
+{
+ MigrationIncomingState *mis = migration_incoming_get_current();
+
+ if (!mis->switchover_ack_pending_num) {
+ return -EINVAL;
+ }
+
+ mis->switchover_ack_pending_num--;
+ trace_loadvm_approve_switchover(mis->switchover_ack_pending_num);
+
+ if (mis->switchover_ack_pending_num) {
+ return 0;
+ }
+
+ return migrate_send_rp_switchover_ack(mis);
+}
+
bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
bool has_devices, strList *devices, Error **errp)
{
QEMUSnapshotInfo sn1, *sn = &sn1;
int ret = -1, ret2;
QEMUFile *f;
- int saved_vm_running;
+ RunState saved_state = runstate_get();
uint64_t vm_state_size;
g_autoptr(GDateTime) now = g_date_time_new_now_local();
- AioContext *aio_context;
GLOBAL_STATE_CODE();
if (bs == NULL) {
return false;
}
- aio_context = bdrv_get_aio_context(bs);
- saved_vm_running = runstate_is_running();
-
- ret = global_state_store();
- if (ret) {
- error_setg(errp, "Error saving global state");
- return false;
- }
+ global_state_store();
vm_stop(RUN_STATE_SAVE_VM);
bdrv_drain_all_begin();
- aio_context_acquire(aio_context);
-
memset(sn, 0, sizeof(*sn));
/* fill auxiliary fields */
goto the_end;
}
ret = qemu_savevm_state(f, errp);
- vm_state_size = qemu_file_total_transferred(f);
+ vm_state_size = qemu_file_transferred(f);
ret2 = qemu_fclose(f);
if (ret < 0) {
goto the_end;
goto the_end;
}
- /* The bdrv_all_create_snapshot() call that follows acquires the AioContext
- * for itself. BDRV_POLL_WHILE() does not support nested locking because
- * it only releases the lock once. Therefore synchronous I/O will deadlock
- * unless we release the AioContext before bdrv_all_create_snapshot().
- */
- aio_context_release(aio_context);
- aio_context = NULL;
-
ret = bdrv_all_create_snapshot(sn, bs, vm_state_size,
has_devices, devices, errp);
if (ret < 0) {
ret = 0;
the_end:
- if (aio_context) {
- aio_context_release(aio_context);
- }
-
bdrv_drain_all_end();
- if (saved_vm_running) {
- vm_start();
- }
+ vm_resume(saved_state);
return ret == 0;
}
QEMUSnapshotInfo sn;
QEMUFile *f;
int ret;
- AioContext *aio_context;
MigrationIncomingState *mis = migration_incoming_get_current();
if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
if (!bs_vm_state) {
return false;
}
- aio_context = bdrv_get_aio_context(bs_vm_state);
/* Don't even try to load empty VM states */
- aio_context_acquire(aio_context);
ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
- aio_context_release(aio_context);
if (ret < 0) {
return false;
} else if (sn.vm_state_size == 0) {
ret = -EINVAL;
goto err_drain;
}
- aio_context_acquire(aio_context);
ret = qemu_loadvm_state(f);
migration_incoming_state_destroy();
- aio_context_release(aio_context);
bdrv_drain_all_end();
return false;
}
+void load_snapshot_resume(RunState state)
+{
+ vm_resume(state);
+ if (state == RUN_STATE_RUNNING && runstate_get() == RUN_STATE_SUSPENDED) {
+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, &error_abort);
+ }
+}
+
bool delete_snapshot(const char *name, bool has_devices,
strList *devices, Error **errp)
{
{
Job *job = opaque;
SnapshotJob *s = container_of(job, SnapshotJob, common);
- int orig_vm_running;
+ RunState orig_state = runstate_get();
job_progress_set_remaining(&s->common, 1);
- orig_vm_running = runstate_is_running();
vm_stop(RUN_STATE_RESTORE_VM);
s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp);
- if (s->ret && orig_vm_running) {
- vm_start();
+ if (s->ret) {
+ load_snapshot_resume(orig_state);
}
job_progress_update(&s->common, 1);