migration/postcopy: enable compress during postcopy

[mirror_qemu.git] / exec.c
diff --git a/exec.c b/exec.c

index 8a0a6613b1f0407c3c2fb88187e0c736d971aefc..67e520d18ea5b65932f899b6c5981c74cfc15992 100644 (file)
--- a/exec.c
+++ b/exec.c
@@ -25,7 +25,7 @@
  #include "cpu.h"
  #include "exec/exec-all.h"
  #include "exec/target_page.h"
-#include "tcg.h"
+#include "tcg/tcg.h"
  #include "hw/qdev-core.h"
  #include "hw/qdev-properties.h"
  #if !defined(CONFIG_USER_ONLY)
@@ -65,6 +65,8 @@
  #include "exec/ram_addr.h"
  #include "exec/log.h"
  
+#include "qemu/pmem.h"
+
  #include "migration/vmstate.h"
  
  #include "qemu/range.h"
@@ -91,11 +93,6 @@ AddressSpace address_space_memory;
  static MemoryRegion io_mem_unassigned;
  #endif
  
-#ifdef TARGET_PAGE_BITS_VARY
-int target_page_bits;
-bool target_page_bits_decided;
-#endif
-
  CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  
  /* current CPU in the current thread. It is only valid inside
@@ -109,37 +106,8 @@ int use_icount;
  uintptr_t qemu_host_page_size;
  intptr_t qemu_host_page_mask;
  
-bool set_preferred_target_page_bits(int bits)
-{
-    /* The target page size is the lowest common denominator for all
-     * the CPUs in the system, so we can only make it smaller, never
-     * larger. And we can't make it smaller once we've committed to
-     * a particular size.
-     */
-#ifdef TARGET_PAGE_BITS_VARY
-    assert(bits >= TARGET_PAGE_BITS_MIN);
-    if (target_page_bits == 0 || target_page_bits > bits) {
-        if (target_page_bits_decided) {
-            return false;
-        }
-        target_page_bits = bits;
-    }
-#endif
-    return true;
-}
-
  #if !defined(CONFIG_USER_ONLY)
  
-static void finalize_target_page_bits(void)
-{
-#ifdef TARGET_PAGE_BITS_VARY
-    if (target_page_bits == 0) {
-        target_page_bits = TARGET_PAGE_BITS_MIN;
-    }
-    target_page_bits_decided = true;
-#endif
-}
-
  typedef struct PhysPageEntry PhysPageEntry;
  
  struct PhysPageEntry {
@@ -660,7 +628,8 @@ static void tcg_register_iommu_notifier(CPUState *cpu,
       */
      MemoryRegion *mr = MEMORY_REGION(iommu_mr);
      TCGIOMMUNotifier *notifier;
-    int i;
+    Error *err = NULL;
+    int i, ret;
  
      for (i = 0; i < cpu->iommu_notifiers->len; i++) {
          notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i);
@@ -689,7 +658,12 @@ static void tcg_register_iommu_notifier(CPUState *cpu,
                              0,
                              HWADDR_MAX,
                              iommu_idx);
-        memory_region_register_iommu_notifier(notifier->mr, &notifier->n);
+        ret = memory_region_register_iommu_notifier(notifier->mr, &notifier->n,
+                                                    &err);
+        if (ret) {
+            error_report_err(err);
+            exit(1);
+        }
      }
  
      if (!notifier->active) {
@@ -969,6 +943,8 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
      }
      tlb_init(cpu);
  
+    qemu_plugin_vcpu_init_hook(cpu);
+
  #ifndef CONFIG_USER_ONLY
      if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
          vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
@@ -1031,16 +1007,14 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs)
          return;
      }
  
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
      mr = address_space_translate(as, addr, &addr, &l, false, attrs);
      if (!(memory_region_is_ram(mr)
            || memory_region_is_romd(mr))) {
-        rcu_read_unlock();
          return;
      }
      ram_addr = memory_region_get_ram_addr(mr) + addr;
      tb_invalidate_phys_page_range(ram_addr, ram_addr + 1);
-    rcu_read_unlock();
  }
  
  static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
@@ -1251,13 +1225,13 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...)
      fprintf(stderr, "\n");
      cpu_dump_state(cpu, stderr, CPU_DUMP_FPU | CPU_DUMP_CCOP);
      if (qemu_log_separate()) {
-        qemu_log_lock();
+        FILE *logfile = qemu_log_lock();
          qemu_log("qemu: fatal: ");
          qemu_log_vprintf(fmt, ap2);
          qemu_log("\n");
          log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
          qemu_log_flush();
-        qemu_log_unlock();
+        qemu_log_unlock(logfile);
          qemu_log_close();
      }
      va_end(ap2);
@@ -1326,14 +1300,13 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
      end = TARGET_PAGE_ALIGN(start + length);
      start &= TARGET_PAGE_MASK;
  
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
      block = qemu_get_ram_block(start);
      assert(block == qemu_get_ram_block(end - 1));
      start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
      CPU_FOREACH(cpu) {
          tlb_reset_dirty(cpu, start1, length);
      }
-    rcu_read_unlock();
  }
  
  /* Note: start and end must be within the same ram block.  */
@@ -1354,30 +1327,29 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
      end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
      page = start >> TARGET_PAGE_BITS;
  
-    rcu_read_lock();
+    WITH_RCU_READ_LOCK_GUARD() {
+        blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+        ramblock = qemu_get_ram_block(start);
+        /* Range sanity check on the ramblock */
+        assert(start >= ramblock->offset &&
+               start + length <= ramblock->offset + ramblock->used_length);
  
-    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
-    ramblock = qemu_get_ram_block(start);
-    /* Range sanity check on the ramblock */
-    assert(start >= ramblock->offset &&
-           start + length <= ramblock->offset + ramblock->used_length);
+        while (page < end) {
+            unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+            unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+            unsigned long num = MIN(end - page,
+                                    DIRTY_MEMORY_BLOCK_SIZE - offset);
  
-    while (page < end) {
-        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
-        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
-        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
+            dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
+                                                  offset, num);
+            page += num;
+        }
  
-        dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
-                                              offset, num);
-        page += num;
+        mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset;
+        mr_size = (end - page) << TARGET_PAGE_BITS;
+        memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
      }
  
-    mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset;
-    mr_size = (end - page) << TARGET_PAGE_BITS;
-    memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
-
-    rcu_read_unlock();
-
      if (dirty && tcg_enabled()) {
          tlb_reset_dirty_range_all(start, length);
      }
@@ -1405,28 +1377,27 @@ DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
      end  = last  >> TARGET_PAGE_BITS;
      dest = 0;
  
-    rcu_read_lock();
-
-    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+    WITH_RCU_READ_LOCK_GUARD() {
+        blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
  
-    while (page < end) {
-        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
-        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
-        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
+        while (page < end) {
+            unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+            unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+            unsigned long num = MIN(end - page,
+                                    DIRTY_MEMORY_BLOCK_SIZE - offset);
  
-        assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
-        assert(QEMU_IS_ALIGNED(num,    (1 << BITS_PER_LEVEL)));
-        offset >>= BITS_PER_LEVEL;
+            assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
+            assert(QEMU_IS_ALIGNED(num,    (1 << BITS_PER_LEVEL)));
+            offset >>= BITS_PER_LEVEL;
  
-        bitmap_copy_and_clear_atomic(snap->dirty + dest,
-                                     blocks->blocks[idx] + offset,
-                                     num);
-        page += num;
-        dest += num >> BITS_PER_LEVEL;
+            bitmap_copy_and_clear_atomic(snap->dirty + dest,
+                                         blocks->blocks[idx] + offset,
+                                         num);
+            page += num;
+            dest += num >> BITS_PER_LEVEL;
+        }
      }
  
-    rcu_read_unlock();
-
      if (tcg_enabled()) {
          tlb_reset_dirty_range_all(start, length);
      }
@@ -1637,7 +1608,7 @@ void ram_block_dump(Monitor *mon)
      RAMBlock *block;
      char *psize;
  
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
      monitor_printf(mon, "%24s %8s  %18s %18s %18s\n",
                     "Block Name", "PSize", "Offset", "Used", "Total");
      RAMBLOCK_FOREACH(block) {
@@ -1649,7 +1620,6 @@ void ram_block_dump(Monitor *mon)
                         (uint64_t)block->max_length);
          g_free(psize);
      }
-    rcu_read_unlock();
  }
  
  #ifdef __linux__
@@ -1756,11 +1726,11 @@ long qemu_maxrampagesize(void)
  #else
  long qemu_minrampagesize(void)
  {
-    return getpagesize();
+    return qemu_real_host_page_size;
  }
  long qemu_maxrampagesize(void)
  {
-    return getpagesize();
+    return qemu_real_host_page_size;
  }
  #endif
  
@@ -1873,6 +1843,7 @@ static void *file_ram_alloc(RAMBlock *block,
                              bool truncate,
                              Error **errp)
  {
+    Error *err = NULL;
      MachineState *ms = MACHINE(qdev_get_machine());
      void *area;
  
@@ -1930,8 +1901,9 @@ static void *file_ram_alloc(RAMBlock *block,
      }
  
      if (mem_prealloc) {
-        os_mem_prealloc(fd, area, memory, ms->smp.cpus, errp);
-        if (errp && *errp) {
+        os_mem_prealloc(fd, area, memory, ms->smp.cpus, &err);
+        if (err) {
+            error_propagate(errp, err);
              qemu_ram_munmap(fd, area, memory);
              return NULL;
          }
@@ -2003,11 +1975,10 @@ static unsigned long last_ram_page(void)
      RAMBlock *block;
      ram_addr_t last = 0;
  
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
      RAMBLOCK_FOREACH(block) {
          last = MAX(last, block->offset + block->max_length);
      }
-    rcu_read_unlock();
      return last >> TARGET_PAGE_BITS;
  }
  
@@ -2094,7 +2065,7 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
      }
      pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
  
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
      RAMBLOCK_FOREACH(block) {
          if (block != new_block &&
              !strcmp(block->idstr, new_block->idstr)) {
@@ -2103,7 +2074,6 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
              abort();
          }
      }
-    rcu_read_unlock();
  }
  
  /* Called with iothread lock held.  */
@@ -2190,6 +2160,40 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
      return 0;
  }
  
+/*
+ * Trigger sync on the given ram block for range [start, start + length]
+ * with the backing store if one is available.
+ * Otherwise no-op.
+ * @Note: this is supposed to be a synchronous op.
+ */
+void qemu_ram_writeback(RAMBlock *block, ram_addr_t start, ram_addr_t length)
+{
+    void *addr = ramblock_ptr(block, start);
+
+    /* The requested range should fit in within the block range */
+    g_assert((start + length) <= block->used_length);
+
+#ifdef CONFIG_LIBPMEM
+    /* The lack of support for pmem should not block the sync */
+    if (ramblock_is_pmem(block)) {
+        pmem_persist(addr, length);
+        return;
+    }
+#endif
+    if (block->fd >= 0) {
+        /**
+         * Case there is no support for PMEM or the memory has not been
+         * specified as persistent (or is not one) - use the msync.
+         * Less optimal but still achieves the same goal
+         */
+        if (qemu_msync(addr, length, block->fd)) {
+            warn_report("%s: failed to sync memory range: start: "
+                    RAM_ADDR_FMT " length: " RAM_ADDR_FMT,
+                    __func__, start, length);
+        }
+    }
+}
+
  /* Called with ram_list.mutex held */
  static void dirty_memory_extend(ram_addr_t old_ram_size,
                                  ram_addr_t new_ram_size)
@@ -2419,7 +2423,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
      new_block->max_length = max_size;
      assert(max_size >= size);
      new_block->fd = -1;
-    new_block->page_size = getpagesize();
+    new_block->page_size = qemu_real_host_page_size;
      new_block->host = host;
      if (host) {
          new_block->flags |= RAM_PREALLOC;
@@ -2645,17 +2649,16 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
  
      if (xen_enabled()) {
          ram_addr_t ram_addr;
-        rcu_read_lock();
+        RCU_READ_LOCK_GUARD();
          ram_addr = xen_ram_addr_from_mapcache(ptr);
          block = qemu_get_ram_block(ram_addr);
          if (block) {
              *offset = ram_addr - block->offset;
          }
-        rcu_read_unlock();
          return block;
      }
  
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
      block = atomic_rcu_read(&ram_list.mru_block);
      if (block && block->host && host - block->host < block->max_length) {
          goto found;
@@ -2671,7 +2674,6 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
          }
      }
  
-    rcu_read_unlock();
      return NULL;
  
  found:
@@ -2679,7 +2681,6 @@ found:
      if (round_offset) {
          *offset &= TARGET_PAGE_MASK;
      }
-    rcu_read_unlock();
      return block;
  }
  
@@ -2959,8 +2960,17 @@ static void tcg_log_global_after_sync(MemoryListener *listener)
       * by pushing the migration thread's memory read after the vCPU thread has
       * written the memory.
       */
-    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
-    run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL);
+    if (replay_mode == REPLAY_MODE_NONE) {
+        /*
+         * VGA can make calls to this function while updating the screen.
+         * In record/replay mode this causes a deadlock, because
+         * run_on_cpu waits for rr mutex. Therefore no races are possible
+         * in this case and no need for making run_on_cpu when
+         * record/replay is not enabled.
+         */
+        cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
+        run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL);
+    }
  }
  
  static void tcg_commit(MemoryListener *listener)
@@ -3266,10 +3276,9 @@ MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
      FlatView *fv;
  
      if (len > 0) {
-        rcu_read_lock();
+        RCU_READ_LOCK_GUARD();
          fv = address_space_to_flatview(as);
          result = flatview_read(fv, addr, attrs, buf, len);
-        rcu_read_unlock();
      }
  
      return result;
@@ -3283,10 +3292,9 @@ MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
      FlatView *fv;
  
      if (len > 0) {
-        rcu_read_lock();
+        RCU_READ_LOCK_GUARD();
          fv = address_space_to_flatview(as);
          result = flatview_write(fv, addr, attrs, buf, len);
-        rcu_read_unlock();
      }
  
      return result;
@@ -3326,7 +3334,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
      hwaddr addr1;
      MemoryRegion *mr;
  
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
      while (len > 0) {
          l = len;
          mr = address_space_translate(as, addr, &addr1, &l, true, attrs);
@@ -3351,7 +3359,6 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
          buf += l;
          addr += l;
      }
-    rcu_read_unlock();
      return MEMTX_OK;
  }
  
@@ -3496,10 +3503,9 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr,
      FlatView *fv;
      bool result;
  
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
      fv = address_space_to_flatview(as);
      result = flatview_access_valid(fv, addr, len, is_write, attrs);
-    rcu_read_unlock();
      return result;
  }
  
@@ -3554,13 +3560,12 @@ void *address_space_map(AddressSpace *as,
      }
  
      l = len;
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
      fv = address_space_to_flatview(as);
      mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
  
      if (!memory_access_is_direct(mr, is_write)) {
          if (atomic_xchg(&bounce.in_use, true)) {
-            rcu_read_unlock();
              return NULL;
          }
          /* Avoid unbounded allocations */
@@ -3576,7 +3581,6 @@ void *address_space_map(AddressSpace *as,
                                 bounce.buffer, l);
          }
  
-        rcu_read_unlock();
          *plen = l;
          return bounce.buffer;
      }
@@ -3586,7 +3590,6 @@ void *address_space_map(AddressSpace *as,
      *plen = flatview_extend_translation(fv, addr, len, mr, xlat,
                                          l, is_write, attrs);
      ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen, true);
-    rcu_read_unlock();
  
      return ptr;
  }
@@ -3854,13 +3857,12 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr)
      hwaddr l = 1;
      bool res;
  
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
      mr = address_space_translate(&address_space_memory,
                                   phys_addr, &phys_addr, &l, false,
                                   MEMTXATTRS_UNSPECIFIED);
  
      res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
-    rcu_read_unlock();
      return res;
  }
  
@@ -3869,14 +3871,13 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
      RAMBlock *block;
      int ret = 0;
  
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
      RAMBLOCK_FOREACH(block) {
          ret = func(block, opaque);
          if (ret) {
              break;
          }
      }
-    rcu_read_unlock();
      return ret;
  }
  
@@ -3894,7 +3895,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
  
      uint8_t *host_startaddr = rb->host + start;
  
-    if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
+    if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) {
          error_report("ram_block_discard_range: Unaligned start address: %p",
                       host_startaddr);
          goto err;
@@ -3902,10 +3903,9 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
  
      if ((start + length) <= rb->used_length) {
          bool need_madvise, need_fallocate;
-        uint8_t *host_endaddr = host_startaddr + length;
-        if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
-            error_report("ram_block_discard_range: Unaligned end address: %p",
-                         host_endaddr);
+        if (!QEMU_IS_ALIGNED(length, rb->page_size)) {
+            error_report("ram_block_discard_range: Unaligned length: %zx",
+                         length);
              goto err;
          }