migration: Reference migration state around loadvm_postcopy_handle_run_bh

[mirror_qemu.git] / migration / savevm.c
diff --git a/migration/savevm.c b/migration/savevm.c

index 032044b1d505c36bb874175ffdff926d21bf4190..93387350c7f54634027286a771cb9c5c3876e644 100644 (file)
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -117,7 +117,7 @@ static struct mig_cmd_args {
   * The format of arguments is depending on postcopy mode:
   * - postcopy RAM only
   *   uint64_t host page size
- *   uint64_t taget page size
+ *   uint64_t target page size
   *
   * - postcopy RAM and postcopy dirty bitmaps
   *   format is the same as for postcopy RAM only
@@ -237,6 +237,8 @@ static SaveState savevm_state = {
      .global_section_id = 0,
  };
  
+static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id);
+
  static bool should_validate_capability(int capability)
  {
      assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
@@ -436,7 +438,7 @@ static const VMStateDescription vmstate_target_page_bits = {
      .version_id = 1,
      .minimum_version_id = 1,
      .needed = vmstate_target_page_bits_needed,
-    .fields = (VMStateField[]) {
+    .fields = (const VMStateField[]) {
          VMSTATE_UINT32(target_page_bits, SaveState),
          VMSTATE_END_OF_LIST()
      }
@@ -452,7 +454,7 @@ static const VMStateDescription vmstate_capabilites = {
      .version_id = 1,
      .minimum_version_id = 1,
      .needed = vmstate_capabilites_needed,
-    .fields = (VMStateField[]) {
+    .fields = (const VMStateField[]) {
          VMSTATE_UINT32_V(caps_count, SaveState, 1),
          VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
                                      vmstate_info_capability,
@@ -469,8 +471,8 @@ static bool vmstate_uuid_needed(void *opaque)
  static int vmstate_uuid_post_load(void *opaque, int version_id)
  {
      SaveState *state = opaque;
-    char uuid_src[UUID_FMT_LEN + 1];
-    char uuid_dst[UUID_FMT_LEN + 1];
+    char uuid_src[UUID_STR_LEN];
+    char uuid_dst[UUID_STR_LEN];
  
      if (!qemu_uuid_set) {
          /*
@@ -497,7 +499,7 @@ static const VMStateDescription vmstate_uuid = {
      .minimum_version_id = 1,
      .needed = vmstate_uuid_needed,
      .post_load = vmstate_uuid_post_load,
-    .fields = (VMStateField[]) {
+    .fields = (const VMStateField[]) {
          VMSTATE_UINT8_ARRAY_V(uuid.data, SaveState, sizeof(QemuUUID), 1),
          VMSTATE_END_OF_LIST()
      }
@@ -510,12 +512,12 @@ static const VMStateDescription vmstate_configuration = {
      .post_load = configuration_post_load,
      .pre_save = configuration_pre_save,
      .post_save = configuration_post_save,
-    .fields = (VMStateField[]) {
+    .fields = (const VMStateField[]) {
          VMSTATE_UINT32(len, SaveState),
          VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
          VMSTATE_END_OF_LIST()
      },
-    .subsections = (const VMStateDescription *[]) {
+    .subsections = (const VMStateDescription * const []) {
          &vmstate_target_page_bits,
          &vmstate_capabilites,
          &vmstate_uuid,
@@ -549,11 +551,11 @@ static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
  }
  
  static void dump_vmstate_vmss(FILE *out_file,
-                              const VMStateDescription **subsection,
+                              const VMStateDescription *subsection,
                                int indent)
  {
-    if (*subsection != NULL) {
-        dump_vmstate_vmsd(out_file, *subsection, indent, true);
+    if (subsection != NULL) {
+        dump_vmstate_vmsd(out_file, subsection, indent, true);
      }
  }
  
@@ -595,7 +597,7 @@ static void dump_vmstate_vmsd(FILE *out_file,
          fprintf(out_file, "\n%*s]", indent, "");
      }
      if (vmsd->subsections != NULL) {
-        const VMStateDescription **subsection = vmsd->subsections;
+        const VMStateDescription * const *subsection = vmsd->subsections;
          bool first;
  
          fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
@@ -604,7 +606,7 @@ static void dump_vmstate_vmsd(FILE *out_file,
              if (!first) {
                  fprintf(out_file, ",\n");
              }
-            dump_vmstate_vmss(out_file, subsection, indent + 2);
+            dump_vmstate_vmss(out_file, *subsection, indent + 2);
              subsection++;
              first = false;
          }
@@ -716,6 +718,18 @@ static void savevm_state_handler_insert(SaveStateEntry *nse)
  
      assert(priority <= MIG_PRI_MAX);
  
+    /*
+     * This should never happen otherwise migration will probably fail
+     * silently somewhere because we can be wrongly applying one
+     * object properties upon another one.  Bail out ASAP.
+     */
+    if (find_se(nse->idstr, nse->instance_id)) {
+        error_report("%s: Detected duplicate SaveStateEntry: "
+                     "id=%s, instance_id=0x%"PRIx32, __func__,
+                     nse->idstr, nse->instance_id);
+        exit(EXIT_FAILURE);
+    }
+
      for (i = priority - 1; i >= 0; i--) {
          se = savevm_state.handler_pri_head[i];
          if (se != NULL) {
@@ -817,7 +831,7 @@ void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque)
  static void vmstate_check(const VMStateDescription *vmsd)
  {
      const VMStateField *field = vmsd->fields;
-    const VMStateDescription **subsection = vmsd->subsections;
+    const VMStateDescription * const *subsection = vmsd->subsections;
  
      if (field) {
          while (field->name) {
@@ -846,6 +860,24 @@ static void vmstate_check(const VMStateDescription *vmsd)
      }
  }
  
+/*
+ * See comment in hw/intc/xics.c:icp_realize()
+ *
+ * This function can be removed when
+ * pre_2_10_vmstate_register_dummy_icp() is removed.
+ */
+int vmstate_replace_hack_for_ppc(VMStateIf *obj, int instance_id,
+                                 const VMStateDescription *vmsd,
+                                 void *opaque)
+{
+    SaveStateEntry *se = find_se(vmsd->name, instance_id);
+
+    if (se) {
+        savevm_state_handler_remove(se);
+    }
+    return vmstate_register(obj, instance_id, vmsd, opaque);
+}
+
  int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
                                     const VMStateDescription *vmsd,
                                     void *opaque, int alias_id,
@@ -927,9 +959,9 @@ static int vmstate_load(QEMUFile *f, SaveStateEntry *se)
  static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se,
                                     JSONWriter *vmdesc)
  {
-    uint64_t old_offset = qemu_file_total_transferred_fast(f);
+    uint64_t old_offset = qemu_file_transferred(f);
      se->ops->save_state(f, se->opaque);
-    uint64_t size = qemu_file_total_transferred_fast(f) - old_offset;
+    uint64_t size = qemu_file_transferred(f) - old_offset;
  
      if (vmdesc) {
          json_writer_int64(vmdesc, "size", size);
@@ -979,11 +1011,13 @@ static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
  static int vmstate_save(QEMUFile *f, SaveStateEntry *se, JSONWriter *vmdesc)
  {
      int ret;
+    Error *local_err = NULL;
+    MigrationState *s = migrate_get_current();
  
      if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
          return 0;
      }
-    if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
+    if (se->vmsd && !vmstate_section_needed(se->vmsd, se->opaque)) {
          trace_savevm_section_skip(se->idstr, se->section_id);
          return 0;
      }
@@ -1000,8 +1034,10 @@ static int vmstate_save(QEMUFile *f, SaveStateEntry *se, JSONWriter *vmdesc)
      if (!se->vmsd) {
          vmstate_save_old_style(f, se, vmdesc);
      } else {
-        ret = vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
+        ret = vmstate_save_state_with_err(f, se->vmsd, se->opaque, vmdesc, &local_err);
          if (ret) {
+            migrate_set_error(s, local_err);
+            error_report_err(local_err);
              return ret;
          }
      }
@@ -1068,10 +1104,14 @@ void qemu_savevm_send_open_return_path(QEMUFile *f)
  int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
  {
      uint32_t tmp;
+    MigrationState *ms = migrate_get_current();
+    Error *local_err = NULL;
  
      if (len > MAX_VM_CMD_PACKAGED_SIZE) {
-        error_report("%s: Unreasonably large packaged state: %zu",
+        error_setg(&local_err, "%s: Unreasonably large packaged state: %zu",
                       __func__, len);
+        migrate_set_error(ms, local_err);
+        error_report_err(local_err);
          return -1;
      }
  
@@ -1209,13 +1249,27 @@ void qemu_savevm_non_migratable_list(strList **reasons)
  
  void qemu_savevm_state_header(QEMUFile *f)
  {
+    MigrationState *s = migrate_get_current();
+
+    s->vmdesc = json_writer_new(false);
+
      trace_savevm_state_header();
      qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
      qemu_put_be32(f, QEMU_VM_FILE_VERSION);
  
-    if (migrate_get_current()->send_configuration) {
+    if (s->send_configuration) {
          qemu_put_byte(f, QEMU_VM_CONFIGURATION);
-        vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
+
+        /*
+         * This starts the main json object and is paired with the
+         * json_writer_end_object in
+         * qemu_savevm_state_complete_precopy_non_iterable
+         */
+        json_writer_start_object(s->vmdesc, NULL);
+
+        json_writer_start_object(s->vmdesc, "configuration");
+        vmstate_save_state(f, &vmstate_configuration, &savevm_state, s->vmdesc);
+        json_writer_end_object(s->vmdesc);
      }
  }
  
@@ -1233,6 +1287,30 @@ bool qemu_savevm_state_guest_unplug_pending(void)
      return false;
  }
  
+int qemu_savevm_state_prepare(Error **errp)
+{
+    SaveStateEntry *se;
+    int ret;
+
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+        if (!se->ops || !se->ops->save_prepare) {
+            continue;
+        }
+        if (se->ops->is_active) {
+            if (!se->ops->is_active(se->opaque)) {
+                continue;
+            }
+        }
+
+        ret = se->ops->save_prepare(se->opaque, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
  void qemu_savevm_state_setup(QEMUFile *f)
  {
      MigrationState *ms = migrate_get_current();
@@ -1240,8 +1318,6 @@ void qemu_savevm_state_setup(QEMUFile *f)
      Error *local_err = NULL;
      int ret;
  
-    ms->vmdesc = json_writer_new(false);
-    json_writer_start_object(ms->vmdesc, NULL);
      json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
      json_writer_start_array(ms->vmdesc, "devices");
  
@@ -1338,7 +1414,7 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
              !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
              continue;
          }
-        if (qemu_file_rate_limit(f)) {
+        if (migration_rate_exceeded(f)) {
              return 0;
          }
          trace_savevm_section_start(se->idstr, se->section_id);
@@ -1415,6 +1491,7 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
  static
  int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
  {
+    int64_t start_ts_each, end_ts_each;
      SaveStateEntry *se;
      int ret;
  
@@ -1431,6 +1508,8 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
                  continue;
              }
          }
+
+        start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
          trace_savevm_section_start(se->idstr, se->section_id);
  
          save_section_header(f, se, QEMU_VM_SECTION_END);
@@ -1442,8 +1521,13 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
              qemu_file_set_error(f, ret);
              return -1;
          }
+        end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
+                                    end_ts_each - start_ts_each);
      }
  
+    trace_vmstate_downtime_checkpoint("src-iterable-saved");
+
      return 0;
  }
  
@@ -1452,6 +1536,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
                                                      bool inactivate_disks)
  {
      MigrationState *ms = migrate_get_current();
+    int64_t start_ts_each, end_ts_each;
      JSONWriter *vmdesc = ms->vmdesc;
      int vmdesc_len;
      SaveStateEntry *se;
@@ -1463,11 +1548,17 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
              continue;
          }
  
+        start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+
          ret = vmstate_save(f, se, vmdesc);
          if (ret) {
              qemu_file_set_error(f, ret);
              return ret;
          }
+
+        end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id,
+                                    end_ts_each - start_ts_each);
      }
  
      if (inactivate_disks) {
@@ -1475,8 +1566,11 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
           * bdrv_activate_all() on the other end won't fail. */
          ret = bdrv_inactivate_all();
          if (ret) {
-            error_report("%s: bdrv_inactivate_all() failed (%d)",
-                         __func__, ret);
+            Error *local_err = NULL;
+            error_setg(&local_err, "%s: bdrv_inactivate_all() failed (%d)",
+                       __func__, ret);
+            migrate_set_error(ms, local_err);
+            error_report_err(local_err);
              qemu_file_set_error(f, ret);
              return ret;
          }
@@ -1500,6 +1594,8 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
      json_writer_free(vmdesc);
      ms->vmdesc = NULL;
  
+    trace_vmstate_downtime_checkpoint("src-non-iterable-saved");
+
      return 0;
  }
  
@@ -1536,8 +1632,7 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
      }
  
  flush:
-    qemu_fflush(f);
-    return 0;
+    return qemu_fflush(f);
  }
  
  /* Give an estimate of the amount left to be transferred,
@@ -1619,15 +1714,14 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
          return -EINVAL;
      }
  
-    migrate_init(ms);
-    memset(&mig_stats, 0, sizeof(mig_stats));
-    memset(&compression_counters, 0, sizeof(compression_counters));
+    ret = migrate_init(ms, errp);
+    if (ret) {
+        return ret;
+    }
      ms->to_dst_file = f;
  
-    qemu_mutex_unlock_iothread();
      qemu_savevm_state_header(f);
      qemu_savevm_state_setup(f);
-    qemu_mutex_lock_iothread();
  
      while (qemu_file_get_error(f) == 0) {
          if (qemu_savevm_state_iterate(f, false) > 0) {
@@ -2043,18 +2137,18 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
      Error *local_err = NULL;
      MigrationIncomingState *mis = opaque;
  
-    trace_loadvm_postcopy_handle_run_bh("enter");
+    trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter");
  
      /* TODO we should move all of this lot into postcopy_ram.c or a shared code
       * in migration.c
       */
      cpu_synchronize_all_post_init();
  
-    trace_loadvm_postcopy_handle_run_bh("after cpu sync");
+    trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cpu-synced");
  
      qemu_announce_self(&mis->announce_timer, migrate_announce_params());
  
-    trace_loadvm_postcopy_handle_run_bh("after announce");
+    trace_vmstate_downtime_checkpoint("dst-postcopy-bh-announced");
  
      /* Make sure all file formats throw away their mutable metadata.
       * If we get an error here, just don't restart the VM yet. */
@@ -2065,7 +2159,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
          autostart = false;
      }
  
-    trace_loadvm_postcopy_handle_run_bh("after invalidate cache");
+    trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated");
  
      dirty_bitmap_mig_before_vm_start();
  
@@ -2079,7 +2173,8 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
  
      qemu_bh_delete(mis->bh);
  
-    trace_loadvm_postcopy_handle_run_bh("return");
+    trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started");
+    object_unref(OBJECT(migration_get_current()));
  }
  
  /* After all discards we can start running and asking for pages */
@@ -2095,6 +2190,7 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
  
      postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
      mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis);
+    object_ref(OBJECT(migration_get_current()));
      qemu_bh_schedule(mis->bh);
  
      /* We need to finish reading the stream from the package
@@ -2360,6 +2456,21 @@ static int loadvm_process_command(QEMUFile *f)
              error_report("CMD_OPEN_RETURN_PATH failed");
              return -1;
          }
+
+        /*
+         * Switchover ack is enabled but no device uses it, so send an ACK to
+         * source that it's OK to switchover. Do it here, after return path has
+         * been created.
+         */
+        if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) {
+            int ret = migrate_send_rp_switchover_ack(mis);
+            if (ret) {
+                error_report(
+                    "Could not send switchover ack RP MSG, err %d (%s)", ret,
+                    strerror(-ret));
+                return ret;
+            }
+        }
          break;
  
      case MIG_CMD_PING:
@@ -2445,9 +2556,12 @@ static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
  }
  
  static int
-qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
+qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis,
+                               uint8_t type)
  {
+    bool trace_downtime = (type == QEMU_VM_SECTION_FULL);
      uint32_t instance_id, version_id, section_id;
+    int64_t start_ts, end_ts;
      SaveStateEntry *se;
      char idstr[256];
      int ret;
@@ -2496,12 +2610,23 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
          return -EINVAL;
      }
  
+    if (trace_downtime) {
+        start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+    }
+
      ret = vmstate_load(f, se);
      if (ret < 0) {
          error_report("error while loading state for instance 0x%"PRIx32" of"
                       " device '%s'", instance_id, idstr);
          return ret;
      }
+
+    if (trace_downtime) {
+        end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_load("non-iterable", se->idstr,
+                                    se->instance_id, end_ts - start_ts);
+    }
+
      if (!check_section_footer(f, se)) {
          return -EINVAL;
      }
@@ -2510,8 +2635,11 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
  }
  
  static int
-qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
+qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis,
+                             uint8_t type)
  {
+    bool trace_downtime = (type == QEMU_VM_SECTION_END);
+    int64_t start_ts, end_ts;
      uint32_t section_id;
      SaveStateEntry *se;
      int ret;
@@ -2536,12 +2664,23 @@ qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
          return -EINVAL;
      }
  
+    if (trace_downtime) {
+        start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+    }
+
      ret = vmstate_load(f, se);
      if (ret < 0) {
          error_report("error while loading state section id %d(%s)",
                       section_id, se->idstr);
          return ret;
      }
+
+    if (trace_downtime) {
+        end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_load("iterable", se->idstr,
+                                    se->instance_id, end_ts - start_ts);
+    }
+
      if (!check_section_footer(f, se)) {
          return -EINVAL;
      }
@@ -2586,6 +2725,23 @@ static int qemu_loadvm_state_header(QEMUFile *f)
      return 0;
  }
  
+static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis)
+{
+    SaveStateEntry *se;
+
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+        if (!se->ops || !se->ops->switchover_ack_needed) {
+            continue;
+        }
+
+        if (se->ops->switchover_ack_needed(se->opaque)) {
+            mis->switchover_ack_pending_num++;
+        }
+    }
+
+    trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num);
+}
+
  static int qemu_loadvm_state_setup(QEMUFile *f)
  {
      SaveStateEntry *se;
@@ -2666,7 +2822,8 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
          qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
      }
  
-    migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
+    /* Current state can be either ACTIVE or RECOVER */
+    migrate_set_state(&mis->state, mis->state,
                        MIGRATION_STATUS_POSTCOPY_PAUSED);
  
      /* Notify the fault thread for the invalidated file handle */
@@ -2712,14 +2869,14 @@ retry:
          switch (section_type) {
          case QEMU_VM_SECTION_START:
          case QEMU_VM_SECTION_FULL:
-            ret = qemu_loadvm_section_start_full(f, mis);
+            ret = qemu_loadvm_section_start_full(f, mis, section_type);
              if (ret < 0) {
                  goto out;
              }
              break;
          case QEMU_VM_SECTION_PART:
          case QEMU_VM_SECTION_END:
-            ret = qemu_loadvm_section_part_end(f, mis);
+            ret = qemu_loadvm_section_part_end(f, mis, section_type);
              if (ret < 0) {
                  goto out;
              }
@@ -2789,6 +2946,10 @@ int qemu_loadvm_state(QEMUFile *f)
          return -EINVAL;
      }
  
+    if (migrate_switchover_ack()) {
+        qemu_loadvm_state_switchover_ack_needed(mis);
+    }
+
      cpu_synchronize_all_pre_loadvm();
  
      ret = qemu_loadvm_state_main(f, mis);
@@ -2862,6 +3023,24 @@ int qemu_load_device_state(QEMUFile *f)
      return 0;
  }
  
+int qemu_loadvm_approve_switchover(void)
+{
+    MigrationIncomingState *mis = migration_incoming_get_current();
+
+    if (!mis->switchover_ack_pending_num) {
+        return -EINVAL;
+    }
+
+    mis->switchover_ack_pending_num--;
+    trace_loadvm_approve_switchover(mis->switchover_ack_pending_num);
+
+    if (mis->switchover_ack_pending_num) {
+        return 0;
+    }
+
+    return migrate_send_rp_switchover_ack(mis);
+}
+
  bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
                    bool has_devices, strList *devices, Error **errp)
  {
@@ -2869,10 +3048,9 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
      QEMUSnapshotInfo sn1, *sn = &sn1;
      int ret = -1, ret2;
      QEMUFile *f;
-    int saved_vm_running;
+    RunState saved_state = runstate_get();
      uint64_t vm_state_size;
      g_autoptr(GDateTime) now = g_date_time_new_now_local();
-    AioContext *aio_context;
  
      GLOBAL_STATE_CODE();
  
@@ -2915,21 +3093,12 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
      if (bs == NULL) {
          return false;
      }
-    aio_context = bdrv_get_aio_context(bs);
  
-    saved_vm_running = runstate_is_running();
-
-    ret = global_state_store();
-    if (ret) {
-        error_setg(errp, "Error saving global state");
-        return false;
-    }
+    global_state_store();
      vm_stop(RUN_STATE_SAVE_VM);
  
      bdrv_drain_all_begin();
  
-    aio_context_acquire(aio_context);
-
      memset(sn, 0, sizeof(*sn));
  
      /* fill auxiliary fields */
@@ -2956,7 +3125,7 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
          goto the_end;
      }
      ret = qemu_savevm_state(f, errp);
-    vm_state_size = qemu_file_total_transferred(f);
+    vm_state_size = qemu_file_transferred(f);
      ret2 = qemu_fclose(f);
      if (ret < 0) {
          goto the_end;
@@ -2966,14 +3135,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
          goto the_end;
      }
  
-    /* The bdrv_all_create_snapshot() call that follows acquires the AioContext
-     * for itself.  BDRV_POLL_WHILE() does not support nested locking because
-     * it only releases the lock once.  Therefore synchronous I/O will deadlock
-     * unless we release the AioContext before bdrv_all_create_snapshot().
-     */
-    aio_context_release(aio_context);
-    aio_context = NULL;
-
      ret = bdrv_all_create_snapshot(sn, bs, vm_state_size,
                                     has_devices, devices, errp);
      if (ret < 0) {
@@ -2984,15 +3145,9 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
      ret = 0;
  
   the_end:
-    if (aio_context) {
-        aio_context_release(aio_context);
-    }
-
      bdrv_drain_all_end();
  
-    if (saved_vm_running) {
-        vm_start();
-    }
+    vm_resume(saved_state);
      return ret == 0;
  }
  
@@ -3085,7 +3240,6 @@ bool load_snapshot(const char *name, const char *vmstate,
      QEMUSnapshotInfo sn;
      QEMUFile *f;
      int ret;
-    AioContext *aio_context;
      MigrationIncomingState *mis = migration_incoming_get_current();
  
      if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
@@ -3105,12 +3259,9 @@ bool load_snapshot(const char *name, const char *vmstate,
      if (!bs_vm_state) {
          return false;
      }
-    aio_context = bdrv_get_aio_context(bs_vm_state);
  
      /* Don't even try to load empty VM states */
-    aio_context_acquire(aio_context);
      ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
-    aio_context_release(aio_context);
      if (ret < 0) {
          return false;
      } else if (sn.vm_state_size == 0) {
@@ -3147,10 +3298,8 @@ bool load_snapshot(const char *name, const char *vmstate,
          ret = -EINVAL;
          goto err_drain;
      }
-    aio_context_acquire(aio_context);
      ret = qemu_loadvm_state(f);
      migration_incoming_state_destroy();
-    aio_context_release(aio_context);
  
      bdrv_drain_all_end();
  
@@ -3166,6 +3315,14 @@ err_drain:
      return false;
  }
  
+void load_snapshot_resume(RunState state)
+{
+    vm_resume(state);
+    if (state == RUN_STATE_RUNNING && runstate_get() == RUN_STATE_SUSPENDED) {
+        qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, &error_abort);
+    }
+}
+
  bool delete_snapshot(const char *name, bool has_devices,
                       strList *devices, Error **errp)
  {
@@ -3230,16 +3387,15 @@ static void snapshot_load_job_bh(void *opaque)
  {
      Job *job = opaque;
      SnapshotJob *s = container_of(job, SnapshotJob, common);
-    int orig_vm_running;
+    RunState orig_state = runstate_get();
  
      job_progress_set_remaining(&s->common, 1);
  
-    orig_vm_running = runstate_is_running();
      vm_stop(RUN_STATE_RESTORE_VM);
  
      s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp);
-    if (s->ret && orig_vm_running) {
-        vm_start();
+    if (s->ret) {
+        load_snapshot_resume(orig_state);
      }
  
      job_progress_update(&s->common, 1);