migration/multifd: clean pages after filling packet

[mirror_qemu.git] / cpus.c
diff --git a/cpus.c b/cpus.c

index 85cd451a86a9c38718f5ca85581f1f36dfcd1f29..b612116f9584c4f7c69a04091476ff1a0464d645 100644 (file)
--- a/cpus.c
+++ b/cpus.c
@@ -45,6 +45,7 @@
  #include "exec/exec-all.h"
  
  #include "qemu/thread.h"
+#include "qemu/plugin.h"
  #include "sysemu/cpus.h"
  #include "sysemu/qtest.h"
  #include "qemu/main-loop.h"
@@ -52,7 +53,7 @@
  #include "qemu/bitmap.h"
  #include "qemu/seqlock.h"
  #include "qemu/guest-random.h"
-#include "tcg.h"
+#include "tcg/tcg.h"
  #include "hw/nmi.h"
  #include "sysemu/replay.h"
  #include "sysemu/runstate.h"
@@ -77,6 +78,8 @@
  
  #endif /* CONFIG_LINUX */
  
+static QemuMutex qemu_global_mutex;
+
  int64_t max_delay;
  int64_t max_advance;
  
@@ -163,78 +166,6 @@ typedef struct TimersState {
  static TimersState timers_state;
  bool mttcg_enabled;
  
-/*
- * We default to false if we know other options have been enabled
- * which are currently incompatible with MTTCG. Otherwise when each
- * guest (target) has been updated to support:
- *   - atomic instructions
- *   - memory ordering primitives (barriers)
- * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
- *
- * Once a guest architecture has been converted to the new primitives
- * there are two remaining limitations to check.
- *
- * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
- * - The host must have a stronger memory order than the guest
- *
- * It may be possible in future to support strong guests on weak hosts
- * but that will require tagging all load/stores in a guest with their
- * implicit memory order requirements which would likely slow things
- * down a lot.
- */
-
-static bool check_tcg_memory_orders_compatible(void)
-{
-#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
-    return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
-#else
-    return false;
-#endif
-}
-
-static bool default_mttcg_enabled(void)
-{
-    if (use_icount || TCG_OVERSIZED_GUEST) {
-        return false;
-    } else {
-#ifdef TARGET_SUPPORTS_MTTCG
-        return check_tcg_memory_orders_compatible();
-#else
-        return false;
-#endif
-    }
-}
-
-void qemu_tcg_configure(QemuOpts *opts, Error **errp)
-{
-    const char *t = qemu_opt_get(opts, "thread");
-    if (t) {
-        if (strcmp(t, "multi") == 0) {
-            if (TCG_OVERSIZED_GUEST) {
-                error_setg(errp, "No MTTCG when guest word size > hosts");
-            } else if (use_icount) {
-                error_setg(errp, "No MTTCG when icount is enabled");
-            } else {
-#ifndef TARGET_SUPPORTS_MTTCG
-                warn_report("Guest not yet converted to MTTCG - "
-                            "you may get unexpected results");
-#endif
-                if (!check_tcg_memory_orders_compatible()) {
-                    warn_report("Guest expects a stronger memory ordering "
-                                "than the host provides");
-                    error_printf("This may cause strange/hard to debug errors\n");
-                }
-                mttcg_enabled = true;
-            }
-        } else if (strcmp(t, "single") == 0) {
-            mttcg_enabled = false;
-        } else {
-            error_setg(errp, "Invalid 'thread' setting %s", t);
-        }
-    } else {
-        mttcg_enabled = default_mttcg_enabled();
-    }
-}
  
  /* The current number of executed instructions is based on what we
   * originally budgeted minus the current state of the decrementing
@@ -782,7 +713,7 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
  {
      double pct;
      double throttle_ratio;
-    long sleeptime_ns;
+    int64_t sleeptime_ns, endtime_ns;
  
      if (!cpu_throttle_get_percentage()) {
          return;
@@ -790,11 +721,20 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
  
      pct = (double)cpu_throttle_get_percentage()/100;
      throttle_ratio = pct / (1 - pct);
-    sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
-
-    qemu_mutex_unlock_iothread();
-    g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
-    qemu_mutex_lock_iothread();
+    /* Add 1ns to fix double's rounding error (like 0.9999999...) */
+    sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
+    endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
+    while (sleeptime_ns > 0 && !cpu->stop) {
+        if (sleeptime_ns > SCALE_MS) {
+            qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
+                                sleeptime_ns / SCALE_MS);
+        } else {
+            qemu_mutex_unlock_iothread();
+            g_usleep(sleeptime_ns / SCALE_US);
+            qemu_mutex_lock_iothread();
+        }
+        sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    }
      atomic_set(&cpu->throttle_thread_scheduled, 0);
  }
  
@@ -938,8 +878,8 @@ static inline int64_t qemu_tcg_next_kick(void)
      return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
  }
  
-/* Kick the currently round-robin scheduled vCPU */
-static void qemu_cpu_kick_rr_cpu(void)
+/* Kick the currently round-robin scheduled vCPU to next */
+static void qemu_cpu_kick_rr_next_cpu(void)
  {
      CPUState *cpu;
      do {
@@ -950,6 +890,16 @@ static void qemu_cpu_kick_rr_cpu(void)
      } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
  }
  
+/* Kick all RR vCPUs */
+static void qemu_cpu_kick_rr_cpus(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_exit(cpu);
+    };
+}
+
  static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
  {
  }
@@ -982,7 +932,7 @@ void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
  static void kick_tcg_thread(void *opaque)
  {
      timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
-    qemu_cpu_kick_rr_cpu();
+    qemu_cpu_kick_rr_next_cpu();
  }
  
  static void start_tcg_kick_timer(void)
@@ -1086,7 +1036,6 @@ static int do_vm_stop(RunState state, bool send_stop)
      }
  
      bdrv_drain_all();
-    replay_disable_events();
      ret = bdrv_flush_all();
  
      return ret;
@@ -1172,8 +1121,6 @@ static void qemu_init_sigbus(void)
  }
  #endif /* !CONFIG_LINUX */
  
-static QemuMutex qemu_global_mutex;
-
  static QemuThread io_thread;
  
  /* cpu creation */
@@ -1246,9 +1193,18 @@ static void qemu_tcg_rr_wait_io_event(void)
  
  static void qemu_wait_io_event(CPUState *cpu)
  {
+    bool slept = false;
+
      while (cpu_thread_is_idle(cpu)) {
+        if (!slept) {
+            slept = true;
+            qemu_plugin_vcpu_idle_cb(cpu);
+        }
          qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
      }
+    if (slept) {
+        qemu_plugin_vcpu_resume_cb(cpu);
+    }
  
  #ifdef _WIN32
      /* Eat dummy APC queued by qemu_cpu_kick_thread.  */
@@ -1360,6 +1316,10 @@ static int64_t tcg_get_icount_limit(void)
           */
          deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
                                                QEMU_TIMER_ATTR_ALL);
+        /* Check realtime timers, because they help with input processing */
+        deadline = qemu_soonest_timeout(deadline,
+                qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
+                                           QEMU_TIMER_ATTR_ALL));
  
          /* Maintain prior (possibly buggy) behaviour where if no deadline
           * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
@@ -1819,9 +1779,11 @@ void qemu_cpu_kick(CPUState *cpu)
  {
      qemu_cond_broadcast(cpu->halt_cond);
      if (tcg_enabled()) {
-        cpu_exit(cpu);
-        /* NOP unless doing single-thread RR */
-        qemu_cpu_kick_rr_cpu();
+        if (qemu_tcg_mttcg_enabled()) {
+            cpu_exit(cpu);
+        } else {
+            qemu_cpu_kick_rr_cpus();
+        }
      } else {
          if (hax_enabled()) {
              /*
@@ -2172,7 +2134,6 @@ int vm_prepare_start(void)
      /* We are sending this now, but the CPUs will be resumed shortly later */
      qapi_event_send_resume();
  
-    replay_enable_events();
      cpu_enable_ticks();
      runstate_set(RUN_STATE_RUNNING);
      vm_state_notify(1, RUN_STATE_RUNNING);