block: Ignore close() failure in get_tmp_filename()

[mirror_qemu.git] / migration / savevm.c
diff --git a/migration/savevm.c b/migration/savevm.c

index 0c5d61ae20e6b057b3db10e3d3263f7bb12791b2..48e85c052c2c0197405803b9ef833d2a5d49c96c 100644 (file)
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -35,8 +35,8 @@
  #include "migration/misc.h"
  #include "migration/register.h"
  #include "migration/global_state.h"
+#include "migration/channel-block.h"
  #include "ram.h"
-#include "qemu-file-channel.h"
  #include "qemu-file.h"
  #include "savevm.h"
  #include "postcopy-ram.h"
@@ -65,6 +65,7 @@
  #include "qemu/bitmap.h"
  #include "net/announce.h"
  #include "qemu/yank.h"
+#include "yank_functions.h"
  
  const unsigned int postcopy_ram_discard_version;
  
@@ -129,48 +130,13 @@ static struct mig_cmd_args {
  /***********************************************************/
  /* savevm/loadvm support */
  
-static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
-                                   int64_t pos, Error **errp)
-{
-    int ret;
-    QEMUIOVector qiov;
-
-    qemu_iovec_init_external(&qiov, iov, iovcnt);
-    ret = bdrv_writev_vmstate(opaque, &qiov, pos);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return qiov.size;
-}
-
-static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
-                                size_t size, Error **errp)
-{
-    return bdrv_load_vmstate(opaque, buf, pos, size);
-}
-
-static int bdrv_fclose(void *opaque, Error **errp)
-{
-    return bdrv_flush(opaque);
-}
-
-static const QEMUFileOps bdrv_read_ops = {
-    .get_buffer = block_get_buffer,
-    .close =      bdrv_fclose
-};
-
-static const QEMUFileOps bdrv_write_ops = {
-    .writev_buffer  = block_writev_buffer,
-    .close          = bdrv_fclose
-};
-
  static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
  {
      if (is_writable) {
-        return qemu_fopen_ops(bs, &bdrv_write_ops);
+        return qemu_file_new_output(QIO_CHANNEL(qio_channel_block_new(bs)));
+    } else {
+        return qemu_file_new_input(QIO_CHANNEL(qio_channel_block_new(bs)));
      }
-    return qemu_fopen_ops(bs, &bdrv_read_ops);
  }
  
  
@@ -915,9 +881,9 @@ static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se,
  {
      int64_t old_offset, size;
  
-    old_offset = qemu_ftell_fast(f);
+    old_offset = qemu_file_total_transferred_fast(f);
      se->ops->save_state(f, se->opaque);
-    size = qemu_ftell_fast(f) - old_offset;
+    size = qemu_file_total_transferred_fast(f) - old_offset;
  
      if (vmdesc) {
          json_writer_int64(vmdesc, "size", size);
@@ -1297,8 +1263,9 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
          save_section_footer(f, se);
  
          if (ret < 0) {
-            error_report("failed to save SaveStateEntry with id(name): %d(%s)",
-                         se->section_id, se->idstr);
+            error_report("failed to save SaveStateEntry with id(name): "
+                         "%d(%s): %d",
+                         se->section_id, se->idstr, ret);
              qemu_file_set_error(f, ret);
          }
          if (ret <= 0) {
@@ -1436,7 +1403,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
  
      if (inactivate_disks) {
          /* Inactivate before sending QEMU_VM_EOF so that the
-         * bdrv_invalidate_cache_all() on the other end won't fail. */
+         * bdrv_activate_all() on the other end won't fail. */
          ret = bdrv_inactivate_all();
          if (ret) {
              error_report("%s: bdrv_inactivate_all() failed (%d)",
@@ -1566,6 +1533,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
  
      migrate_init(ms);
      memset(&ram_counters, 0, sizeof(ram_counters));
+    memset(&compression_counters, 0, sizeof(compression_counters));
      ms->to_dst_file = f;
  
      qemu_mutex_unlock_iothread();
@@ -1683,6 +1651,7 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
  {
      PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
      uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
+    size_t page_size = qemu_target_page_size();
      Error *local_err = NULL;
  
      trace_loadvm_postcopy_handle_advise();
@@ -1739,13 +1708,13 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
      }
  
      remote_tps = qemu_get_be64(mis->from_src_file);
-    if (remote_tps != qemu_target_page_size()) {
+    if (remote_tps != page_size) {
          /*
           * Again, some differences could be dealt with, but for now keep it
           * simple.
           */
          error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
-                     (int)remote_tps, qemu_target_page_size());
+                     (int)remote_tps, page_size);
          return -1;
      }
  
@@ -1859,7 +1828,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
  
      migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                                     MIGRATION_STATUS_POSTCOPY_ACTIVE);
-    qemu_sem_post(&mis->listen_thread_sem);
+    qemu_sem_post(&mis->thread_sync_sem);
      trace_postcopy_ram_listen_thread_start();
  
      rcu_register_thread();
@@ -1944,9 +1913,10 @@ static void *postcopy_ram_listen_thread(void *opaque)
  static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
  {
      PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
-    trace_loadvm_postcopy_handle_listen();
      Error *local_err = NULL;
  
+    trace_loadvm_postcopy_handle_listen("enter");
+
      if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
          error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
          return -1;
@@ -1961,6 +1931,8 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
          }
      }
  
+    trace_loadvm_postcopy_handle_listen("after discard");
+
      /*
       * Sensitise RAM - can now generate requests for blocks that don't exist
       * However, at this point the CPU shouldn't be running, and the IO
@@ -1973,19 +1945,17 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
          }
      }
  
+    trace_loadvm_postcopy_handle_listen("after uffd");
+
      if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
          error_report_err(local_err);
          return -1;
      }
  
      mis->have_listen_thread = true;
-    /* Start up the listening thread and wait for it to signal ready */
-    qemu_sem_init(&mis->listen_thread_sem, 0);
-    qemu_thread_create(&mis->listen_thread, "postcopy/listen",
-                       postcopy_ram_listen_thread, NULL,
-                       QEMU_THREAD_DETACHED);
-    qemu_sem_wait(&mis->listen_thread_sem);
-    qemu_sem_destroy(&mis->listen_thread_sem);
+    postcopy_thread_create(mis, &mis->listen_thread, "postcopy/listen",
+                           postcopy_ram_listen_thread, QEMU_THREAD_DETACHED);
+    trace_loadvm_postcopy_handle_listen("return");
  
      return 0;
  }
@@ -1995,25 +1965,29 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
      Error *local_err = NULL;
      MigrationIncomingState *mis = opaque;
  
+    trace_loadvm_postcopy_handle_run_bh("enter");
+
      /* TODO we should move all of this lot into postcopy_ram.c or a shared code
       * in migration.c
       */
      cpu_synchronize_all_post_init();
  
+    trace_loadvm_postcopy_handle_run_bh("after cpu sync");
+
      qemu_announce_self(&mis->announce_timer, migrate_announce_params());
  
-    /* Make sure all file formats flush their mutable metadata.
+    trace_loadvm_postcopy_handle_run_bh("after announce");
+
+    /* Make sure all file formats throw away their mutable metadata.
       * If we get an error here, just don't restart the VM yet. */
-    bdrv_invalidate_cache_all(&local_err);
+    bdrv_activate_all(&local_err);
      if (local_err) {
          error_report_err(local_err);
          local_err = NULL;
          autostart = false;
      }
  
-    trace_loadvm_postcopy_handle_run_cpu_sync();
-
-    trace_loadvm_postcopy_handle_run_vmstart();
+    trace_loadvm_postcopy_handle_run_bh("after invalidate cache");
  
      dirty_bitmap_mig_before_vm_start();
  
@@ -2026,6 +2000,8 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
      }
  
      qemu_bh_delete(mis->bh);
+
+    trace_loadvm_postcopy_handle_run_bh("return");
  }
  
  /* After all discards we can start running and asking for pages */
@@ -2141,6 +2117,13 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
       */
      qemu_sem_post(&mis->postcopy_pause_sem_fault);
  
+    if (migrate_postcopy_preempt()) {
+        /* The channel should already be setup again; make sure of it */
+        assert(mis->postcopy_qemufile_dst);
+        /* Kick the fast ram load thread too */
+        qemu_sem_post(&mis->postcopy_pause_sem_fast_load);
+    }
+
      return 0;
  }
  
@@ -2182,7 +2165,7 @@ static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
      bioc->usage += length;
      trace_loadvm_handle_cmd_packaged_received(ret);
  
-    QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
+    QEMUFile *packf = qemu_file_new_input(QIO_CHANNEL(bioc));
  
      ret = qemu_loadvm_state_main(packf, mis);
      trace_loadvm_handle_cmd_packaged_main(ret);
@@ -2269,12 +2252,13 @@ static int loadvm_process_command(QEMUFile *f)
          return qemu_file_get_error(f);
      }
  
-    trace_loadvm_process_command(cmd, len);
      if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
          error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
          return -EINVAL;
      }
  
+    trace_loadvm_process_command(mig_cmd_args[cmd].name, len);
+
      if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
          error_report("%s received with bad length - expecting %zu, got %d",
                       mig_cmd_args[cmd].name,
@@ -2561,12 +2545,17 @@ void qemu_loadvm_state_cleanup(void)
  /* Return true if we should continue the migration, or false. */
  static bool postcopy_pause_incoming(MigrationIncomingState *mis)
  {
+    int i;
+
      trace_postcopy_pause_incoming();
  
      assert(migrate_postcopy_ram());
  
-    /* Clear the triggered bit to allow one recovery */
-    mis->postcopy_recover_triggered = false;
+    /*
+     * Unregister yank with either from/to src would work, since ioc behind it
+     * is the same
+     */
+    migration_ioc_unregister_yank_from_file(mis->from_src_file);
  
      assert(mis->from_src_file);
      qemu_file_shutdown(mis->from_src_file);
@@ -2580,12 +2569,37 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
      mis->to_src_file = NULL;
      qemu_mutex_unlock(&mis->rp_mutex);
  
+    /*
+     * NOTE: this must happen before reset the PostcopyTmpPages below,
+     * otherwise it's racy to reset those fields when the fast load thread
+     * can be accessing it in parallel.
+     */
+    if (mis->postcopy_qemufile_dst) {
+        qemu_file_shutdown(mis->postcopy_qemufile_dst);
+        /* Take the mutex to make sure the fast ram load thread halted */
+        qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
+        migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
+        qemu_fclose(mis->postcopy_qemufile_dst);
+        mis->postcopy_qemufile_dst = NULL;
+        qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
+    }
+
      migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
                        MIGRATION_STATUS_POSTCOPY_PAUSED);
  
      /* Notify the fault thread for the invalidated file handle */
      postcopy_fault_thread_notify(mis);
  
+    /*
+     * If network is interrupted, any temp page we received will be useless
+     * because we didn't mark them as "received" in receivedmap.  After a
+     * proper recovery later (which will sync src dirty bitmap with receivedmap
+     * on dest) these cached small pages will be resent again.
+     */
+    for (i = 0; i < mis->postcopy_channels; i++) {
+        postcopy_temp_page_reset(&mis->postcopy_tmp_pages[i]);
+    }
+
      error_report("Detected IO failure for postcopy. "
                   "Migration paused.");
  
@@ -2607,8 +2621,8 @@ retry:
      while (true) {
          section_type = qemu_get_byte(f);
  
-        if (qemu_file_get_error(f)) {
-            ret = qemu_file_get_error(f);
+        ret = qemu_file_get_error_obj_any(f, mis->postcopy_qemufile_dst, NULL);
+        if (ret) {
              break;
          }
  
@@ -2775,10 +2789,11 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
      QEMUFile *f;
      int saved_vm_running;
      uint64_t vm_state_size;
-    qemu_timeval tv;
-    struct tm tm;
+    g_autoptr(GDateTime) now = g_date_time_new_now_local();
      AioContext *aio_context;
  
+    GLOBAL_STATE_CODE();
+
      if (migration_is_blocked(errp)) {
          return false;
      }
@@ -2836,9 +2851,8 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
      memset(sn, 0, sizeof(*sn));
  
      /* fill auxiliary fields */
-    qemu_gettimeofday(&tv);
-    sn->date_sec = tv.tv_sec;
-    sn->date_nsec = tv.tv_usec * 1000;
+    sn->date_sec = g_date_time_to_unix(now);
+    sn->date_nsec = g_date_time_get_microsecond(now) * 1000;
      sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
      if (replay_mode != REPLAY_MODE_NONE) {
          sn->icount = replay_get_current_icount();
@@ -2849,9 +2863,8 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
      if (name) {
          pstrcpy(sn->name, sizeof(sn->name), name);
      } else {
-        /* cast below needed for OpenBSD where tv_sec is still 'long' */
-        localtime_r((const time_t *)&tv.tv_sec, &tm);
-        strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm);
+        g_autofree char *autoname = g_date_time_format(now,  "vm-%Y%m%d%H%M%S");
+        pstrcpy(sn->name, sizeof(sn->name), autoname);
      }
  
      /* save the VM state */
@@ -2861,7 +2874,7 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
          goto the_end;
      }
      ret = qemu_savevm_state(f, errp);
-    vm_state_size = qemu_ftell(f);
+    vm_state_size = qemu_file_total_transferred(f);
      ret2 = qemu_fclose(f);
      if (ret < 0) {
          goto the_end;
@@ -2925,7 +2938,7 @@ void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
          goto the_end;
      }
      qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
-    f = qemu_fopen_channel_output(QIO_CHANNEL(ioc));
+    f = qemu_file_new_output(QIO_CHANNEL(ioc));
      object_unref(OBJECT(ioc));
      ret = qemu_save_device_state(f);
      if (ret < 0 || qemu_fclose(f) < 0) {
@@ -2972,7 +2985,7 @@ void qmp_xen_load_devices_state(const char *filename, Error **errp)
          return;
      }
      qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
-    f = qemu_fopen_channel_input(QIO_CHANNEL(ioc));
+    f = qemu_file_new_input(QIO_CHANNEL(ioc));
      object_unref(OBJECT(ioc));
  
      ret = qemu_loadvm_state(f);
@@ -3112,3 +3125,187 @@ bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
  
      return !(vmsd && vmsd->unmigratable);
  }
+
+typedef struct SnapshotJob {
+    Job common;
+    char *tag;
+    char *vmstate;
+    strList *devices;
+    Coroutine *co;
+    Error **errp;
+    bool ret;
+} SnapshotJob;
+
+static void qmp_snapshot_job_free(SnapshotJob *s)
+{
+    g_free(s->tag);
+    g_free(s->vmstate);
+    qapi_free_strList(s->devices);
+}
+
+
+static void snapshot_load_job_bh(void *opaque)
+{
+    Job *job = opaque;
+    SnapshotJob *s = container_of(job, SnapshotJob, common);
+    int orig_vm_running;
+
+    job_progress_set_remaining(&s->common, 1);
+
+    orig_vm_running = runstate_is_running();
+    vm_stop(RUN_STATE_RESTORE_VM);
+
+    s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp);
+    if (s->ret && orig_vm_running) {
+        vm_start();
+    }
+
+    job_progress_update(&s->common, 1);
+
+    qmp_snapshot_job_free(s);
+    aio_co_wake(s->co);
+}
+
+static void snapshot_save_job_bh(void *opaque)
+{
+    Job *job = opaque;
+    SnapshotJob *s = container_of(job, SnapshotJob, common);
+
+    job_progress_set_remaining(&s->common, 1);
+    s->ret = save_snapshot(s->tag, false, s->vmstate,
+                           true, s->devices, s->errp);
+    job_progress_update(&s->common, 1);
+
+    qmp_snapshot_job_free(s);
+    aio_co_wake(s->co);
+}
+
+static void snapshot_delete_job_bh(void *opaque)
+{
+    Job *job = opaque;
+    SnapshotJob *s = container_of(job, SnapshotJob, common);
+
+    job_progress_set_remaining(&s->common, 1);
+    s->ret = delete_snapshot(s->tag, true, s->devices, s->errp);
+    job_progress_update(&s->common, 1);
+
+    qmp_snapshot_job_free(s);
+    aio_co_wake(s->co);
+}
+
+static int coroutine_fn snapshot_save_job_run(Job *job, Error **errp)
+{
+    SnapshotJob *s = container_of(job, SnapshotJob, common);
+    s->errp = errp;
+    s->co = qemu_coroutine_self();
+    aio_bh_schedule_oneshot(qemu_get_aio_context(),
+                            snapshot_save_job_bh, job);
+    qemu_coroutine_yield();
+    return s->ret ? 0 : -1;
+}
+
+static int coroutine_fn snapshot_load_job_run(Job *job, Error **errp)
+{
+    SnapshotJob *s = container_of(job, SnapshotJob, common);
+    s->errp = errp;
+    s->co = qemu_coroutine_self();
+    aio_bh_schedule_oneshot(qemu_get_aio_context(),
+                            snapshot_load_job_bh, job);
+    qemu_coroutine_yield();
+    return s->ret ? 0 : -1;
+}
+
+static int coroutine_fn snapshot_delete_job_run(Job *job, Error **errp)
+{
+    SnapshotJob *s = container_of(job, SnapshotJob, common);
+    s->errp = errp;
+    s->co = qemu_coroutine_self();
+    aio_bh_schedule_oneshot(qemu_get_aio_context(),
+                            snapshot_delete_job_bh, job);
+    qemu_coroutine_yield();
+    return s->ret ? 0 : -1;
+}
+
+
+static const JobDriver snapshot_load_job_driver = {
+    .instance_size = sizeof(SnapshotJob),
+    .job_type      = JOB_TYPE_SNAPSHOT_LOAD,
+    .run           = snapshot_load_job_run,
+};
+
+static const JobDriver snapshot_save_job_driver = {
+    .instance_size = sizeof(SnapshotJob),
+    .job_type      = JOB_TYPE_SNAPSHOT_SAVE,
+    .run           = snapshot_save_job_run,
+};
+
+static const JobDriver snapshot_delete_job_driver = {
+    .instance_size = sizeof(SnapshotJob),
+    .job_type      = JOB_TYPE_SNAPSHOT_DELETE,
+    .run           = snapshot_delete_job_run,
+};
+
+
+void qmp_snapshot_save(const char *job_id,
+                       const char *tag,
+                       const char *vmstate,
+                       strList *devices,
+                       Error **errp)
+{
+    SnapshotJob *s;
+
+    s = job_create(job_id, &snapshot_save_job_driver, NULL,
+                   qemu_get_aio_context(), JOB_MANUAL_DISMISS,
+                   NULL, NULL, errp);
+    if (!s) {
+        return;
+    }
+
+    s->tag = g_strdup(tag);
+    s->vmstate = g_strdup(vmstate);
+    s->devices = QAPI_CLONE(strList, devices);
+
+    job_start(&s->common);
+}
+
+void qmp_snapshot_load(const char *job_id,
+                       const char *tag,
+                       const char *vmstate,
+                       strList *devices,
+                       Error **errp)
+{
+    SnapshotJob *s;
+
+    s = job_create(job_id, &snapshot_load_job_driver, NULL,
+                   qemu_get_aio_context(), JOB_MANUAL_DISMISS,
+                   NULL, NULL, errp);
+    if (!s) {
+        return;
+    }
+
+    s->tag = g_strdup(tag);
+    s->vmstate = g_strdup(vmstate);
+    s->devices = QAPI_CLONE(strList, devices);
+
+    job_start(&s->common);
+}
+
+void qmp_snapshot_delete(const char *job_id,
+                         const char *tag,
+                         strList *devices,
+                         Error **errp)
+{
+    SnapshotJob *s;
+
+    s = job_create(job_id, &snapshot_delete_job_driver, NULL,
+                   qemu_get_aio_context(), JOB_MANUAL_DISMISS,
+                   NULL, NULL, errp);
+    if (!s) {
+        return;
+    }
+
+    s->tag = g_strdup(tag);
+    s->devices = QAPI_CLONE(strList, devices);
+
+    job_start(&s->common);
+}