iotests: Fix test 200 on s390x without virtio-pci

[mirror_qemu.git] / migration / colo.c
diff --git a/migration/colo.c b/migration/colo.c

index c436d63a19b7c9999e8c83c7af0b32f5e19a25e6..238a6d62c75839740500abf3bc5618c18baee11c 100644 (file)
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -12,6 +12,8 @@
  
  #include "qemu/osdep.h"
  #include "sysemu/sysemu.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-migration.h"
  #include "qemu-file-channel.h"
  #include "migration.h"
  #include "qemu-file.h"
@@ -22,17 +24,24 @@
  #include "trace.h"
  #include "qemu/error-report.h"
  #include "migration/failover.h"
+#ifdef CONFIG_REPLICATION
  #include "replication.h"
-#include "qmp-commands.h"
+#endif
+#include "net/colo-compare.h"
+#include "net/colo.h"
+#include "block/block.h"
+#include "qapi/qapi-events-migration.h"
+#include "qapi/qmp/qerror.h"
+#include "sysemu/cpus.h"
+#include "net/filter.h"
  
  static bool vmstate_loading;
+static Notifier packets_compare_notifier;
  
-#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
+/* User need to know colo mode after COLO failover */
+static COLOMode last_colo_mode;
  
-bool colo_supported(void)
-{
-    return true;
-}
+#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
  
  bool migration_in_colo_state(void)
  {
@@ -55,8 +64,11 @@ static bool colo_runstate_is_stopped(void)
  
  static void secondary_vm_do_failover(void)
  {
+/* COLO needs enable block-replication */
+#ifdef CONFIG_REPLICATION
      int old_state;
      MigrationIncomingState *mis = migration_incoming_get_current();
+    Error *local_err = NULL;
  
      /* Can not do failover during the process of VM's loading VMstate, Or
       * it will break the secondary VM.
@@ -66,7 +78,7 @@ static void secondary_vm_do_failover(void)
                          FAILOVER_STATUS_RELAUNCH);
          if (old_state != FAILOVER_STATUS_ACTIVE) {
              error_report("Unknown error while do failover for secondary VM,"
-                         "old_state: %s", FailoverStatus_lookup[old_state]);
+                         "old_state: %s", FailoverStatus_str(old_state));
          }
          return;
      }
@@ -74,6 +86,17 @@ static void secondary_vm_do_failover(void)
      migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
                        MIGRATION_STATUS_COMPLETED);
  
+    replication_stop_all(true, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+
+    /* Notify all filters of all NIC to do checkpoint */
+    colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+
      if (!autostart) {
          error_report("\"-S\" qemu option will be ignored in secondary side");
          /* recover runstate to normal migration finish state */
@@ -96,24 +119,35 @@ static void secondary_vm_do_failover(void)
                                     FAILOVER_STATUS_COMPLETED);
      if (old_state != FAILOVER_STATUS_ACTIVE) {
          error_report("Incorrect state (%s) while doing failover for "
-                     "secondary VM", FailoverStatus_lookup[old_state]);
+                     "secondary VM", FailoverStatus_str(old_state));
          return;
      }
      /* Notify COLO incoming thread that failover work is finished */
      qemu_sem_post(&mis->colo_incoming_sem);
+
      /* For Secondary VM, jump to incoming co */
      if (mis->migration_incoming_co) {
          qemu_coroutine_enter(mis->migration_incoming_co);
      }
+#else
+    abort();
+#endif
  }
  
  static void primary_vm_do_failover(void)
  {
+#ifdef CONFIG_REPLICATION
      MigrationState *s = migrate_get_current();
      int old_state;
+    Error *local_err = NULL;
  
      migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
                        MIGRATION_STATUS_COMPLETED);
+    /*
+     * kick COLO thread which might wait at
+     * qemu_sem_wait(&s->colo_checkpoint_sem).
+     */
+    colo_checkpoint_notify(migrate_get_current());
  
      /*
       * Wake up COLO thread which may blocked in recv() or send(),
@@ -131,11 +165,32 @@ static void primary_vm_do_failover(void)
                                     FAILOVER_STATUS_COMPLETED);
      if (old_state != FAILOVER_STATUS_ACTIVE) {
          error_report("Incorrect state (%s) while doing failover for Primary VM",
-                     FailoverStatus_lookup[old_state]);
+                     FailoverStatus_str(old_state));
          return;
      }
+
+    replication_stop_all(true, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        local_err = NULL;
+    }
+
      /* Notify COLO thread that failover work is finished */
      qemu_sem_post(&s->colo_exit_sem);
+#else
+    abort();
+#endif
+}
+
+COLOMode get_colo_mode(void)
+{
+    if (migration_in_colo_state()) {
+        return COLO_MODE_PRIMARY;
+    } else if (migration_incoming_in_colo_state()) {
+        return COLO_MODE_SECONDARY;
+    } else {
+        return COLO_MODE_NONE;
+    }
  }
  
  void colo_do_failover(MigrationState *s)
@@ -145,18 +200,24 @@ void colo_do_failover(MigrationState *s)
          vm_stop_force_state(RUN_STATE_COLO);
      }
  
-    if (get_colo_mode() == COLO_MODE_PRIMARY) {
+    switch (get_colo_mode()) {
+    case COLO_MODE_PRIMARY:
          primary_vm_do_failover();
-    } else {
+        break;
+    case COLO_MODE_SECONDARY:
          secondary_vm_do_failover();
+        break;
+    default:
+        error_report("colo_do_failover failed because the colo mode"
+                     " could not be obtained");
      }
  }
  
+#ifdef CONFIG_REPLICATION
  void qmp_xen_set_replication(bool enable, bool primary,
                               bool has_failover, bool failover,
                               Error **errp)
  {
-#ifdef CONFIG_REPLICATION
      ReplicationMode mode = primary ?
                             REPLICATION_MODE_PRIMARY :
                             REPLICATION_MODE_SECONDARY;
@@ -175,14 +236,10 @@ void qmp_xen_set_replication(bool enable, bool primary,
          }
          replication_stop_all(failover, failover ? NULL : errp);
      }
-#else
-    abort();
-#endif
  }
  
  ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
  {
-#ifdef CONFIG_REPLICATION
      Error *err = NULL;
      ReplicationStatus *s = g_new0(ReplicationStatus, 1);
  
@@ -197,18 +254,37 @@ ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
  
      error_free(err);
      return s;
-#else
-    abort();
-#endif
  }
  
  void qmp_xen_colo_do_checkpoint(Error **errp)
  {
-#ifdef CONFIG_REPLICATION
      replication_do_checkpoint_all(errp);
-#else
-    abort();
+}
  #endif
+
+COLOStatus *qmp_query_colo_status(Error **errp)
+{
+    COLOStatus *s = g_new0(COLOStatus, 1);
+
+    s->mode = get_colo_mode();
+    s->last_mode = last_colo_mode;
+
+    switch (failover_get_state()) {
+    case FAILOVER_STATUS_NONE:
+        s->reason = COLO_EXIT_REASON_NONE;
+        break;
+    case FAILOVER_STATUS_COMPLETED:
+        s->reason = COLO_EXIT_REASON_REQUEST;
+        break;
+    default:
+        if (migration_in_colo_state()) {
+            s->reason = COLO_EXIT_REASON_PROCESSING;
+        } else {
+            s->reason = COLO_EXIT_REASON_ERROR;
+        }
+    }
+
+    return s;
  }
  
  static void colo_send_message(QEMUFile *f, COLOMessage msg,
@@ -227,7 +303,7 @@ static void colo_send_message(QEMUFile *f, COLOMessage msg,
      if (ret < 0) {
          error_setg_errno(errp, -ret, "Can't send COLO message");
      }
-    trace_colo_send_message(COLOMessage_lookup[msg]);
+    trace_colo_send_message(COLOMessage_str(msg));
  }
  
  static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
@@ -247,7 +323,7 @@ static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
      ret = qemu_file_get_error(f);
      if (ret < 0) {
          error_setg_errno(errp, -ret, "Failed to send value for message:%s",
-                         COLOMessage_lookup[msg]);
+                         COLOMessage_str(msg));
      }
  }
  
@@ -266,7 +342,7 @@ static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
          error_setg(errp, "%s: Invalid message", __func__);
          return msg;
      }
-    trace_colo_receive_message(COLOMessage_lookup[msg]);
+    trace_colo_receive_message(COLOMessage_str(msg));
      return msg;
  }
  
@@ -304,7 +380,7 @@ static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
      ret = qemu_file_get_error(f);
      if (ret < 0) {
          error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
-                         COLOMessage_lookup[expect_msg]);
+                         COLOMessage_str(expect_msg));
      }
      return value;
  }
@@ -347,20 +423,46 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
          goto out;
      }
  
+    colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
      /* Disable block migration */
      migrate_set_block_enabled(false, &local_err);
-    qemu_savevm_state_header(fb);
-    qemu_savevm_state_begin(fb);
      qemu_mutex_lock_iothread();
-    qemu_savevm_state_complete_precopy(fb, false);
-    qemu_mutex_unlock_iothread();
  
-    qemu_fflush(fb);
+#ifdef CONFIG_REPLICATION
+    replication_do_checkpoint_all(&local_err);
+    if (local_err) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+#else
+        abort();
+#endif
  
      colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
      if (local_err) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+    /* Note: device state is saved into buffer */
+    ret = qemu_save_device_state(fb);
+
+    qemu_mutex_unlock_iothread();
+    if (ret < 0) {
          goto out;
      }
+    /*
+     * Only save VM's live state, which not including device state.
+     * TODO: We may need a timeout mechanism to prevent COLO process
+     * to be blocked here.
+     */
+    qemu_savevm_live_state(s->to_dst_file);
+
+    qemu_fflush(fb);
+
      /*
       * We need the size of the VMstate data in Secondary side,
       * With which we can decide how much data should be read.
@@ -404,6 +506,11 @@ out:
      return ret;
  }
  
+static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
+{
+    colo_checkpoint_notify(data);
+}
+
  static void colo_process_checkpoint(MigrationState *s)
  {
      QIOChannelBuffer *bioc;
@@ -412,6 +519,12 @@ static void colo_process_checkpoint(MigrationState *s)
      Error *local_err = NULL;
      int ret;
  
+    last_colo_mode = get_colo_mode();
+    if (last_colo_mode != COLO_MODE_PRIMARY) {
+        error_report("COLO mode must be COLO_MODE_PRIMARY");
+        return;
+    }
+
      failover_init_state();
  
      s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
@@ -420,6 +533,9 @@ static void colo_process_checkpoint(MigrationState *s)
          goto out;
      }
  
+    packets_compare_notifier.notify = colo_compare_notify_checkpoint;
+    colo_compare_register_notifier(&packets_compare_notifier);
+
      /*
       * Wait for Secondary finish loading VM states and enter COLO
       * restore.
@@ -434,6 +550,16 @@ static void colo_process_checkpoint(MigrationState *s)
      object_unref(OBJECT(bioc));
  
      qemu_mutex_lock_iothread();
+#ifdef CONFIG_REPLICATION
+    replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
+    if (local_err) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+#else
+        abort();
+#endif
+
      vm_start();
      qemu_mutex_unlock_iothread();
      trace_colo_vm_state_change("stop", "run");
@@ -449,6 +575,9 @@ static void colo_process_checkpoint(MigrationState *s)
  
          qemu_sem_wait(&s->colo_checkpoint_sem);
  
+        if (s->state != MIGRATION_STATUS_COLO) {
+            goto out;
+        }
          ret = colo_do_checkpoint_transaction(s, bioc, fb);
          if (ret < 0) {
              goto out;
@@ -465,11 +594,35 @@ out:
          qemu_fclose(fb);
      }
  
-    timer_del(s->colo_delay_timer);
+    /*
+     * There are only two reasons we can get here, some error happened
+     * or the user triggered failover.
+     */
+    switch (failover_get_state()) {
+    case FAILOVER_STATUS_COMPLETED:
+        qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
+                                  COLO_EXIT_REASON_REQUEST);
+        break;
+    default:
+        qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
+                                  COLO_EXIT_REASON_ERROR);
+    }
  
      /* Hope this not to be too long to wait here */
      qemu_sem_wait(&s->colo_exit_sem);
      qemu_sem_destroy(&s->colo_exit_sem);
+
+    /*
+     * It is safe to unregister notifier after failover finished.
+     * Besides, colo_delay_timer and colo_checkpoint_sem can't be
+     * released befor unregister notifier, or there will be use-after-free
+     * error.
+     */
+    colo_compare_unregister_notifier(&packets_compare_notifier);
+    timer_del(s->colo_delay_timer);
+    timer_free(s->colo_delay_timer);
+    qemu_sem_destroy(&s->colo_checkpoint_sem);
+
      /*
       * Must be called after failover BH is completed,
       * Or the failover BH may shutdown the wrong fd that
@@ -537,12 +690,20 @@ void *colo_process_incoming_thread(void *opaque)
      uint64_t total_size;
      uint64_t value;
      Error *local_err = NULL;
+    int ret;
  
+    rcu_register_thread();
      qemu_sem_init(&mis->colo_incoming_sem, 0);
  
      migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                        MIGRATION_STATUS_COLO);
  
+    last_colo_mode = get_colo_mode();
+    if (last_colo_mode != COLO_MODE_SECONDARY) {
+        error_report("COLO mode must be COLO_MODE_SECONDARY");
+        return NULL;
+    }
+
      failover_init_state();
  
      mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
@@ -562,6 +723,20 @@ void *colo_process_incoming_thread(void *opaque)
      fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
      object_unref(OBJECT(bioc));
  
+    qemu_mutex_lock_iothread();
+#ifdef CONFIG_REPLICATION
+    replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
+    if (local_err) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+#else
+        abort();
+#endif
+    vm_start();
+    trace_colo_vm_state_change("stop", "run");
+    qemu_mutex_unlock_iothread();
+
      colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
                        &local_err);
      if (local_err) {
@@ -581,6 +756,11 @@ void *colo_process_incoming_thread(void *opaque)
              goto out;
          }
  
+        qemu_mutex_lock_iothread();
+        vm_stop_force_state(RUN_STATE_COLO);
+        trace_colo_vm_state_change("run", "stop");
+        qemu_mutex_unlock_iothread();
+
          /* FIXME: This is unnecessary for periodic checkpoint mode */
          colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
                       &local_err);
@@ -594,6 +774,16 @@ void *colo_process_incoming_thread(void *opaque)
              goto out;
          }
  
+        qemu_mutex_lock_iothread();
+        cpu_synchronize_all_pre_loadvm();
+        ret = qemu_loadvm_state_main(mis->from_src_file, mis);
+        qemu_mutex_unlock_iothread();
+
+        if (ret < 0) {
+            error_report("Load VM's live state (ram) error");
+            goto out;
+        }
+
          value = colo_receive_message_value(mis->from_src_file,
                                   COLO_MESSAGE_VMSTATE_SIZE, &local_err);
          if (local_err) {
@@ -625,15 +815,41 @@ void *colo_process_incoming_thread(void *opaque)
          }
  
          qemu_mutex_lock_iothread();
-        qemu_system_reset(SHUTDOWN_CAUSE_NONE);
          vmstate_loading = true;
-        if (qemu_loadvm_state(fb) < 0) {
-            error_report("COLO: loadvm failed");
+        ret = qemu_load_device_state(fb);
+        if (ret < 0) {
+            error_report("COLO: load device state failed");
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+
+#ifdef CONFIG_REPLICATION
+        replication_get_error_all(&local_err);
+        if (local_err) {
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+
+        /* discard colo disk buffer */
+        replication_do_checkpoint_all(&local_err);
+        if (local_err) {
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+#else
+        abort();
+#endif
+        /* Notify all filters of all NIC to do checkpoint */
+        colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
+
+        if (local_err) {
              qemu_mutex_unlock_iothread();
              goto out;
          }
  
          vmstate_loading = false;
+        vm_start();
+        trace_colo_vm_state_change("stop", "run");
          qemu_mutex_unlock_iothread();
  
          if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
@@ -657,6 +873,20 @@ out:
          error_report_err(local_err);
      }
  
+    /*
+     * There are only two reasons we can get here, some error happened
+     * or the user triggered failover.
+     */
+    switch (failover_get_state()) {
+    case FAILOVER_STATUS_COMPLETED:
+        qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
+                                  COLO_EXIT_REASON_REQUEST);
+        break;
+    default:
+        qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
+                                  COLO_EXIT_REASON_ERROR);
+    }
+
      if (fb) {
          qemu_fclose(fb);
      }
@@ -667,8 +897,9 @@ out:
      /* Must be called after failover BH is completed */
      if (mis->to_src_file) {
          qemu_fclose(mis->to_src_file);
+        mis->to_src_file = NULL;
      }
-    migration_incoming_exit_colo();
  
+    rcu_unregister_thread();
      return NULL;
  }