migration/multifd: clean pages after filling packet

[mirror_qemu.git] / cpus.c
diff --git a/cpus.c b/cpus.c

index dde3b7b9813c2882b0ade13c7d4c1ad2a31c4b6b..b612116f9584c4f7c69a04091476ff1a0464d645 100644 (file)
--- a/cpus.c
+++ b/cpus.c
@@ -25,7 +25,7 @@
  #include "qemu/osdep.h"
  #include "qemu-common.h"
  #include "qemu/config-file.h"
-#include "cpu.h"
+#include "migration/vmstate.h"
  #include "monitor/monitor.h"
  #include "qapi/error.h"
  #include "qapi/qapi-commands-misc.h"
@@ -33,7 +33,6 @@
  #include "qapi/qmp/qerror.h"
  #include "qemu/error-report.h"
  #include "qemu/qemu-print.h"
-#include "sysemu/sysemu.h"
  #include "sysemu/tcg.h"
  #include "sysemu/block-backend.h"
  #include "exec/gdbstub.h"
@@ -46,6 +45,7 @@
  #include "exec/exec-all.h"
  
  #include "qemu/thread.h"
+#include "qemu/plugin.h"
  #include "sysemu/cpus.h"
  #include "sysemu/qtest.h"
  #include "qemu/main-loop.h"
@@ -53,10 +53,12 @@
  #include "qemu/bitmap.h"
  #include "qemu/seqlock.h"
  #include "qemu/guest-random.h"
-#include "tcg.h"
+#include "tcg/tcg.h"
  #include "hw/nmi.h"
  #include "sysemu/replay.h"
+#include "sysemu/runstate.h"
  #include "hw/boards.h"
+#include "hw/hw.h"
  
  #ifdef CONFIG_LINUX
  
@@ -76,6 +78,8 @@
  
  #endif /* CONFIG_LINUX */
  
+static QemuMutex qemu_global_mutex;
+
  int64_t max_delay;
  int64_t max_advance;
  
@@ -162,78 +166,6 @@ typedef struct TimersState {
  static TimersState timers_state;
  bool mttcg_enabled;
  
-/*
- * We default to false if we know other options have been enabled
- * which are currently incompatible with MTTCG. Otherwise when each
- * guest (target) has been updated to support:
- *   - atomic instructions
- *   - memory ordering primitives (barriers)
- * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
- *
- * Once a guest architecture has been converted to the new primitives
- * there are two remaining limitations to check.
- *
- * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
- * - The host must have a stronger memory order than the guest
- *
- * It may be possible in future to support strong guests on weak hosts
- * but that will require tagging all load/stores in a guest with their
- * implicit memory order requirements which would likely slow things
- * down a lot.
- */
-
-static bool check_tcg_memory_orders_compatible(void)
-{
-#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
-    return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
-#else
-    return false;
-#endif
-}
-
-static bool default_mttcg_enabled(void)
-{
-    if (use_icount || TCG_OVERSIZED_GUEST) {
-        return false;
-    } else {
-#ifdef TARGET_SUPPORTS_MTTCG
-        return check_tcg_memory_orders_compatible();
-#else
-        return false;
-#endif
-    }
-}
-
-void qemu_tcg_configure(QemuOpts *opts, Error **errp)
-{
-    const char *t = qemu_opt_get(opts, "thread");
-    if (t) {
-        if (strcmp(t, "multi") == 0) {
-            if (TCG_OVERSIZED_GUEST) {
-                error_setg(errp, "No MTTCG when guest word size > hosts");
-            } else if (use_icount) {
-                error_setg(errp, "No MTTCG when icount is enabled");
-            } else {
-#ifndef TARGET_SUPPORTS_MTTCG
-                warn_report("Guest not yet converted to MTTCG - "
-                            "you may get unexpected results");
-#endif
-                if (!check_tcg_memory_orders_compatible()) {
-                    warn_report("Guest expects a stronger memory ordering "
-                                "than the host provides");
-                    error_printf("This may cause strange/hard to debug errors\n");
-                }
-                mttcg_enabled = true;
-            }
-        } else if (strcmp(t, "single") == 0) {
-            mttcg_enabled = false;
-        } else {
-            error_setg(errp, "Invalid 'thread' setting %s", t);
-        }
-    } else {
-        mttcg_enabled = default_mttcg_enabled();
-    }
-}
  
  /* The current number of executed instructions is based on what we
   * originally budgeted minus the current state of the decrementing
@@ -555,7 +487,8 @@ void qtest_clock_warp(int64_t dest)
      assert(qtest_enabled());
      aio_context = qemu_get_aio_context();
      while (clock < dest) {
-        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+                                                      QEMU_TIMER_ATTR_ALL);
          int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
  
          seqlock_write_lock(&timers_state.vm_clock_seqlock,
@@ -615,7 +548,8 @@ void qemu_start_warp_timer(void)
  
      /* We want to use the earliest deadline from ALL vm_clocks */
      clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
-    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+                                          ~QEMU_TIMER_ATTR_EXTERNAL);
      if (deadline < 0) {
          static bool notified;
          if (!icount_sleep && !notified) {
@@ -779,7 +713,7 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
  {
      double pct;
      double throttle_ratio;
-    long sleeptime_ns;
+    int64_t sleeptime_ns, endtime_ns;
  
      if (!cpu_throttle_get_percentage()) {
          return;
@@ -787,11 +721,20 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
  
      pct = (double)cpu_throttle_get_percentage()/100;
      throttle_ratio = pct / (1 - pct);
-    sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
-
-    qemu_mutex_unlock_iothread();
-    g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
-    qemu_mutex_lock_iothread();
+    /* Add 1ns to fix double's rounding error (like 0.9999999...) */
+    sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
+    endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
+    while (sleeptime_ns > 0 && !cpu->stop) {
+        if (sleeptime_ns > SCALE_MS) {
+            qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
+                                sleeptime_ns / SCALE_MS);
+        } else {
+            qemu_mutex_unlock_iothread();
+            g_usleep(sleeptime_ns / SCALE_US);
+            qemu_mutex_lock_iothread();
+        }
+        sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    }
      atomic_set(&cpu->throttle_thread_scheduled, 0);
  }
  
@@ -935,8 +878,8 @@ static inline int64_t qemu_tcg_next_kick(void)
      return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
  }
  
-/* Kick the currently round-robin scheduled vCPU */
-static void qemu_cpu_kick_rr_cpu(void)
+/* Kick the currently round-robin scheduled vCPU to next */
+static void qemu_cpu_kick_rr_next_cpu(void)
  {
      CPUState *cpu;
      do {
@@ -947,6 +890,16 @@ static void qemu_cpu_kick_rr_cpu(void)
      } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
  }
  
+/* Kick all RR vCPUs */
+static void qemu_cpu_kick_rr_cpus(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_exit(cpu);
+    };
+}
+
  static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
  {
  }
@@ -979,7 +932,7 @@ void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
  static void kick_tcg_thread(void *opaque)
  {
      timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
-    qemu_cpu_kick_rr_cpu();
+    qemu_cpu_kick_rr_next_cpu();
  }
  
  static void start_tcg_kick_timer(void)
@@ -1083,7 +1036,6 @@ static int do_vm_stop(RunState state, bool send_stop)
      }
  
      bdrv_drain_all();
-    replay_disable_events();
      ret = bdrv_flush_all();
  
      return ret;
@@ -1169,8 +1121,6 @@ static void qemu_init_sigbus(void)
  }
  #endif /* !CONFIG_LINUX */
  
-static QemuMutex qemu_global_mutex;
-
  static QemuThread io_thread;
  
  /* cpu creation */
@@ -1243,9 +1193,18 @@ static void qemu_tcg_rr_wait_io_event(void)
  
  static void qemu_wait_io_event(CPUState *cpu)
  {
+    bool slept = false;
+
      while (cpu_thread_is_idle(cpu)) {
+        if (!slept) {
+            slept = true;
+            qemu_plugin_vcpu_idle_cb(cpu);
+        }
          qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
      }
+    if (slept) {
+        qemu_plugin_vcpu_resume_cb(cpu);
+    }
  
  #ifdef _WIN32
      /* Eat dummy APC queued by qemu_cpu_kick_thread.  */
@@ -1351,7 +1310,16 @@ static int64_t tcg_get_icount_limit(void)
      int64_t deadline;
  
      if (replay_mode != REPLAY_MODE_PLAY) {
-        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+        /*
+         * Include all the timers, because they may need an attention.
+         * Too long CPU execution may create unnecessary delay in UI.
+         */
+        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+                                              QEMU_TIMER_ATTR_ALL);
+        /* Check realtime timers, because they help with input processing */
+        deadline = qemu_soonest_timeout(deadline,
+                qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
+                                           QEMU_TIMER_ATTR_ALL));
  
          /* Maintain prior (possibly buggy) behaviour where if no deadline
           * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
@@ -1372,8 +1340,8 @@ static void handle_icount_deadline(void)
  {
      assert(qemu_in_vcpu_thread());
      if (use_icount) {
-        int64_t deadline =
-            qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+                                                      QEMU_TIMER_ATTR_ALL);
  
          if (deadline == 0) {
              /* Wake up other AioContexts.  */
@@ -1594,7 +1562,6 @@ static void *qemu_hax_cpu_thread_fn(void *arg)
  
      cpu->thread_id = qemu_get_thread_id();
      cpu->created = true;
-    cpu->halted = 0;
      current_cpu = cpu;
  
      hax_init_vcpu(cpu);
@@ -1812,9 +1779,11 @@ void qemu_cpu_kick(CPUState *cpu)
  {
      qemu_cond_broadcast(cpu->halt_cond);
      if (tcg_enabled()) {
-        cpu_exit(cpu);
-        /* NOP unless doing single-thread RR */
-        qemu_cpu_kick_rr_cpu();
+        if (qemu_tcg_mttcg_enabled()) {
+            cpu_exit(cpu);
+        } else {
+            qemu_cpu_kick_rr_cpus();
+        }
      } else {
          if (hax_enabled()) {
              /*
@@ -2079,8 +2048,10 @@ static void qemu_dummy_start_vcpu(CPUState *cpu)
  
  void qemu_init_vcpu(CPUState *cpu)
  {
-    cpu->nr_cores = smp_cores;
-    cpu->nr_threads = smp_threads;
+    MachineState *ms = MACHINE(qdev_get_machine());
+
+    cpu->nr_cores = ms->smp.cores;
+    cpu->nr_threads =  ms->smp.threads;
      cpu->stopped = true;
      cpu->random_seed = qemu_guest_random_seed_thread_part1();
  
@@ -2163,7 +2134,6 @@ int vm_prepare_start(void)
      /* We are sending this now, but the CPUs will be resumed shortly later */
      qapi_event_send_resume();
  
-    replay_enable_events();
      cpu_enable_ticks();
      runstate_set(RUN_STATE_RUNNING);
      vm_state_notify(1, RUN_STATE_RUNNING);
@@ -2201,190 +2171,6 @@ void list_cpus(const char *optarg)
  #endif
  }
  
-CpuInfoList *qmp_query_cpus(Error **errp)
-{
-    MachineState *ms = MACHINE(qdev_get_machine());
-    MachineClass *mc = MACHINE_GET_CLASS(ms);
-    CpuInfoList *head = NULL, *cur_item = NULL;
-    CPUState *cpu;
-
-    CPU_FOREACH(cpu) {
-        CpuInfoList *info;
-#if defined(TARGET_I386)
-        X86CPU *x86_cpu = X86_CPU(cpu);
-        CPUX86State *env = &x86_cpu->env;
-#elif defined(TARGET_PPC)
-        PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
-        CPUPPCState *env = &ppc_cpu->env;
-#elif defined(TARGET_SPARC)
-        SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
-        CPUSPARCState *env = &sparc_cpu->env;
-#elif defined(TARGET_RISCV)
-        RISCVCPU *riscv_cpu = RISCV_CPU(cpu);
-        CPURISCVState *env = &riscv_cpu->env;
-#elif defined(TARGET_MIPS)
-        MIPSCPU *mips_cpu = MIPS_CPU(cpu);
-        CPUMIPSState *env = &mips_cpu->env;
-#elif defined(TARGET_TRICORE)
-        TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
-        CPUTriCoreState *env = &tricore_cpu->env;
-#elif defined(TARGET_S390X)
-        S390CPU *s390_cpu = S390_CPU(cpu);
-        CPUS390XState *env = &s390_cpu->env;
-#endif
-
-        cpu_synchronize_state(cpu);
-
-        info = g_malloc0(sizeof(*info));
-        info->value = g_malloc0(sizeof(*info->value));
-        info->value->CPU = cpu->cpu_index;
-        info->value->current = (cpu == first_cpu);
-        info->value->halted = cpu->halted;
-        info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
-        info->value->thread_id = cpu->thread_id;
-#if defined(TARGET_I386)
-        info->value->arch = CPU_INFO_ARCH_X86;
-        info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
-#elif defined(TARGET_PPC)
-        info->value->arch = CPU_INFO_ARCH_PPC;
-        info->value->u.ppc.nip = env->nip;
-#elif defined(TARGET_SPARC)
-        info->value->arch = CPU_INFO_ARCH_SPARC;
-        info->value->u.q_sparc.pc = env->pc;
-        info->value->u.q_sparc.npc = env->npc;
-#elif defined(TARGET_MIPS)
-        info->value->arch = CPU_INFO_ARCH_MIPS;
-        info->value->u.q_mips.PC = env->active_tc.PC;
-#elif defined(TARGET_TRICORE)
-        info->value->arch = CPU_INFO_ARCH_TRICORE;
-        info->value->u.tricore.PC = env->PC;
-#elif defined(TARGET_S390X)
-        info->value->arch = CPU_INFO_ARCH_S390;
-        info->value->u.s390.cpu_state = env->cpu_state;
-#elif defined(TARGET_RISCV)
-        info->value->arch = CPU_INFO_ARCH_RISCV;
-        info->value->u.riscv.pc = env->pc;
-#else
-        info->value->arch = CPU_INFO_ARCH_OTHER;
-#endif
-        info->value->has_props = !!mc->cpu_index_to_instance_props;
-        if (info->value->has_props) {
-            CpuInstanceProperties *props;
-            props = g_malloc0(sizeof(*props));
-            *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
-            info->value->props = props;
-        }
-
-        /* XXX: waiting for the qapi to support GSList */
-        if (!cur_item) {
-            head = cur_item = info;
-        } else {
-            cur_item->next = info;
-            cur_item = info;
-        }
-    }
-
-    return head;
-}
-
-static CpuInfoArch sysemu_target_to_cpuinfo_arch(SysEmuTarget target)
-{
-    /*
-     * The @SysEmuTarget -> @CpuInfoArch mapping below is based on the
-     * TARGET_ARCH -> TARGET_BASE_ARCH mapping in the "configure" script.
-     */
-    switch (target) {
-    case SYS_EMU_TARGET_I386:
-    case SYS_EMU_TARGET_X86_64:
-        return CPU_INFO_ARCH_X86;
-
-    case SYS_EMU_TARGET_PPC:
-    case SYS_EMU_TARGET_PPC64:
-        return CPU_INFO_ARCH_PPC;
-
-    case SYS_EMU_TARGET_SPARC:
-    case SYS_EMU_TARGET_SPARC64:
-        return CPU_INFO_ARCH_SPARC;
-
-    case SYS_EMU_TARGET_MIPS:
-    case SYS_EMU_TARGET_MIPSEL:
-    case SYS_EMU_TARGET_MIPS64:
-    case SYS_EMU_TARGET_MIPS64EL:
-        return CPU_INFO_ARCH_MIPS;
-
-    case SYS_EMU_TARGET_TRICORE:
-        return CPU_INFO_ARCH_TRICORE;
-
-    case SYS_EMU_TARGET_S390X:
-        return CPU_INFO_ARCH_S390;
-
-    case SYS_EMU_TARGET_RISCV32:
-    case SYS_EMU_TARGET_RISCV64:
-        return CPU_INFO_ARCH_RISCV;
-
-    default:
-        return CPU_INFO_ARCH_OTHER;
-    }
-}
-
-static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu)
-{
-#ifdef TARGET_S390X
-    S390CPU *s390_cpu = S390_CPU(cpu);
-    CPUS390XState *env = &s390_cpu->env;
-
-    info->cpu_state = env->cpu_state;
-#else
-    abort();
-#endif
-}
-
-/*
- * fast means: we NEVER interrupt vCPU threads to retrieve
- * information from KVM.
- */
-CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
-{
-    MachineState *ms = MACHINE(qdev_get_machine());
-    MachineClass *mc = MACHINE_GET_CLASS(ms);
-    CpuInfoFastList *head = NULL, *cur_item = NULL;
-    SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, TARGET_NAME,
-                                          -1, &error_abort);
-    CPUState *cpu;
-
-    CPU_FOREACH(cpu) {
-        CpuInfoFastList *info = g_malloc0(sizeof(*info));
-        info->value = g_malloc0(sizeof(*info->value));
-
-        info->value->cpu_index = cpu->cpu_index;
-        info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
-        info->value->thread_id = cpu->thread_id;
-
-        info->value->has_props = !!mc->cpu_index_to_instance_props;
-        if (info->value->has_props) {
-            CpuInstanceProperties *props;
-            props = g_malloc0(sizeof(*props));
-            *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
-            info->value->props = props;
-        }
-
-        info->value->arch = sysemu_target_to_cpuinfo_arch(target);
-        info->value->target = target;
-        if (target == SYS_EMU_TARGET_S390X) {
-            cpustate_to_cpuinfo_s390(&info->value->u.s390x, cpu);
-        }
-
-        if (!cur_item) {
-            head = cur_item = info;
-        } else {
-            cur_item->next = info;
-            cur_item = info;
-        }
-    }
-
-    return head;
-}
-
  void qmp_memsave(int64_t addr, int64_t size, const char *filename,
                   bool has_cpu, int64_t cpu_index, Error **errp)
  {