Merge tag 'pull-maintainer-may24-160524-2' of https://gitlab.com/stsquad/qemu into...

[mirror_qemu.git] / migration / ram.c
diff --git a/migration/ram.c b/migration/ram.c

index c844151ee951da0eb43435f03e815e8063a82ddd..ceea586b06baa3a08866a61ca2773da351cf19bb 100644 (file)
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -33,7 +33,6 @@
  #include "qemu/madvise.h"
  #include "qemu/main-loop.h"
  #include "xbzrle.h"
-#include "ram-compress.h"
  #include "ram.h"
  #include "migration.h"
  #include "migration-stats.h"
@@ -53,7 +52,6 @@
  #include "exec/target_page.h"
  #include "qemu/rcu_queue.h"
  #include "migration/colo.h"
-#include "block.h"
  #include "sysemu/cpu-throttle.h"
  #include "savevm.h"
  #include "qemu/iov.h"
@@ -78,9 +76,10 @@
   * worked for pages that were filled with the same char.  We switched
   * it to only search for the zero value.  And to avoid confusion with
   * RAM_SAVE_FLAG_COMPRESS_PAGE just rename it.
- */
-/*
- * RAM_SAVE_FLAG_FULL was obsoleted in 2009, it can be reused now
+ *
+ * RAM_SAVE_FLAG_FULL was obsoleted in 2009.
+ *
+ * RAM_SAVE_FLAG_COMPRESS_PAGE (0x100) was removed in QEMU 9.1.
   */
  #define RAM_SAVE_FLAG_FULL     0x01
  #define RAM_SAVE_FLAG_ZERO     0x02
@@ -90,10 +89,27 @@
  #define RAM_SAVE_FLAG_CONTINUE 0x20
  #define RAM_SAVE_FLAG_XBZRLE   0x40
  /* 0x80 is reserved in rdma.h for RAM_SAVE_FLAG_HOOK */
-#define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
  #define RAM_SAVE_FLAG_MULTIFD_FLUSH    0x200
  /* We can't use any flag that is bigger than 0x200 */
  
+/*
+ * mapped-ram migration supports O_DIRECT, so we need to make sure the
+ * userspace buffer, the IO operation size and the file offset are
+ * aligned according to the underlying device's block size. The first
+ * two are already aligned to page size, but we need to add padding to
+ * the file to align the offset.  We cannot read the block size
+ * dynamically because the migration file can be moved between
+ * different systems, so use 1M to cover most block sizes and to keep
+ * the file offset aligned at page size as well.
+ */
+#define MAPPED_RAM_FILE_OFFSET_ALIGNMENT 0x100000
+
+/*
+ * When doing mapped-ram migration, this is the amount we read from
+ * the pages region in the migration file at a time.
+ */
+#define MAPPED_RAM_LOAD_BUF_SIZE 0x100000
+
  XBZRLECacheStats xbzrle_counters;
  
  /* used by the search for pages to send */
@@ -257,6 +273,10 @@ void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
                        nr);
  }
  
+void ramblock_recv_bitmap_set_offset(RAMBlock *rb, uint64_t byte_offset)
+{
+    set_bit_atomic(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
+}
  #define  RAMBLOCK_RECV_BITMAP_ENDING  (0x0123456789abcdefULL)
  
  /*
@@ -305,17 +325,15 @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file,
  
      qemu_put_be64(file, size);
      qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
+    g_free(le_bitmap);
      /*
       * Mark as an end, in case the middle part is screwed up due to
       * some "mysterious" reason.
       */
      qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
-    qemu_fflush(file);
-
-    g_free(le_bitmap);
-
-    if (qemu_file_get_error(file)) {
-        return qemu_file_get_error(file);
+    int ret = qemu_fflush(file);
+    if (ret) {
+        return ret;
      }
  
      return size + sizeof(size);
@@ -369,13 +387,6 @@ struct RAMState {
      bool xbzrle_started;
      /* Are we on the last stage of migration */
      bool last_stage;
-    /* compression statistics since the beginning of the period */
-    /* amount of count that no free thread to compress data */
-    uint64_t compress_thread_busy_prev;
-    /* amount bytes after compression */
-    uint64_t compressed_size_prev;
-    /* amount of compressed pages */
-    uint64_t compress_pages_prev;
  
      /* total handled target pages at the beginning of period */
      uint64_t target_page_count_prev;
@@ -435,9 +446,8 @@ int precopy_notify(PrecopyNotifyReason reason, Error **errp)
  {
      PrecopyNotifyData pnd;
      pnd.reason = reason;
-    pnd.errp = errp;
  
-    return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
+    return notifier_with_return_list_notify(&precopy_notifier_list, &pnd, errp);
  }
  
  uint64_t ram_bytes_remaining(void)
@@ -455,7 +465,6 @@ void ram_transferred_add(uint64_t bytes)
      } else {
          stat64_add(&mig_stats.downtime_bytes, bytes);
      }
-    stat64_add(&mig_stats.transferred, bytes);
  }
  
  struct MigrationOps {
@@ -564,7 +573,7 @@ void mig_throttle_counter_reset(void)
  
      rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
      rs->num_dirty_pages_period = 0;
-    rs->bytes_xfer_prev = stat64_get(&mig_stats.transferred);
+    rs->bytes_xfer_prev = migration_transferred_bytes();
  }
  
  /**
@@ -680,8 +689,7 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss,
      qemu_put_buffer(file, XBZRLE.encoded_buf, encoded_len);
      bytes_xbzrle += encoded_len + 1 + 2;
      /*
-     * Like compressed_size (please see update_compress_thread_counts),
-     * the xbzrle encoded bytes don't count the 8 byte header with
+     * The xbzrle encoded bytes don't count the 8 byte header with
       * RAM_SAVE_FLAG_CONTINUE.
       */
      xbzrle_counters.bytes += bytes_xbzrle - 8;
@@ -939,13 +947,12 @@ uint64_t ram_get_total_transferred_pages(void)
  {
      return stat64_get(&mig_stats.normal_pages) +
          stat64_get(&mig_stats.zero_pages) +
-        compression_counters.pages + xbzrle_counters.pages;
+        xbzrle_counters.pages;
  }
  
  static void migration_update_rates(RAMState *rs, int64_t end_time)
  {
      uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
-    double compressed_size;
  
      /* calculate period counters */
      stat64_set(&mig_stats.dirty_pages_rate,
@@ -973,26 +980,6 @@ static void migration_update_rates(RAMState *rs, int64_t end_time)
          rs->xbzrle_pages_prev = xbzrle_counters.pages;
          rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
      }
-
-    if (migrate_compress()) {
-        compression_counters.busy_rate = (double)(compression_counters.busy -
-            rs->compress_thread_busy_prev) / page_count;
-        rs->compress_thread_busy_prev = compression_counters.busy;
-
-        compressed_size = compression_counters.compressed_size -
-                          rs->compressed_size_prev;
-        if (compressed_size) {
-            double uncompressed_size = (compression_counters.pages -
-                                    rs->compress_pages_prev) * TARGET_PAGE_SIZE;
-
-            /* Compression-Ratio = Uncompressed-size / Compressed-size */
-            compression_counters.compression_rate =
-                                        uncompressed_size / compressed_size;
-
-            rs->compress_pages_prev = compression_counters.pages;
-            rs->compressed_size_prev = compression_counters.compressed_size;
-        }
-    }
  }
  
  /*
@@ -1030,17 +1017,10 @@ static void migration_trigger_throttle(RAMState *rs)
  {
      uint64_t threshold = migrate_throttle_trigger_threshold();
      uint64_t bytes_xfer_period =
-        stat64_get(&mig_stats.transferred) - rs->bytes_xfer_prev;
+        migration_transferred_bytes() - rs->bytes_xfer_prev;
      uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
      uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
  
-    /* During block migration the auto-converge logic incorrectly detects
-     * that ram migration makes no progress. Avoid this by disabling the
-     * throttling logic during the bulk phase of block migration. */
-    if (blk_mig_bulk_active()) {
-        return;
-    }
-
      /*
       * The following detection logic can be refined later. For now:
       * Check to see if the ratio between dirtied bytes and the approx.
@@ -1075,14 +1055,14 @@ static void migration_bitmap_sync(RAMState *rs, bool last_stage)
      trace_migration_bitmap_sync_start();
      memory_global_dirty_log_sync(last_stage);
  
-    qemu_mutex_lock(&rs->bitmap_mutex);
-    WITH_RCU_READ_LOCK_GUARD() {
-        RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-            ramblock_sync_dirty_bitmap(rs, block);
+    WITH_QEMU_LOCK_GUARD(&rs->bitmap_mutex) {
+        WITH_RCU_READ_LOCK_GUARD() {
+            RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+                ramblock_sync_dirty_bitmap(rs, block);
+            }
+            stat64_set(&mig_stats.dirty_bytes_last_sync, ram_bytes_remaining());
          }
-        stat64_set(&mig_stats.dirty_bytes_last_sync, ram_bytes_remaining());
      }
-    qemu_mutex_unlock(&rs->bitmap_mutex);
  
      memory_global_after_dirty_log_sync();
      trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
@@ -1100,7 +1080,7 @@ static void migration_bitmap_sync(RAMState *rs, bool last_stage)
          /* reset period counters */
          rs->time_last_bitmap_sync = end_time;
          rs->num_dirty_pages_period = 0;
-        rs->bytes_xfer_prev = stat64_get(&mig_stats.transferred);
+        rs->bytes_xfer_prev = migration_transferred_bytes();
      }
      if (migrate_events()) {
          uint64_t generation = stat64_get(&mig_stats.dirty_sync_count);
@@ -1144,26 +1124,35 @@ void ram_release_page(const char *rbname, uint64_t offset)
   *
   * @rs: current RAM state
   * @pss: current PSS channel
- * @block: block that contains the page we want to send
   * @offset: offset inside the block for the page
   */
-static int save_zero_page(RAMState *rs, PageSearchStatus *pss, RAMBlock *block,
+static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
                            ram_addr_t offset)
  {
-    uint8_t *p = block->host + offset;
+    uint8_t *p = pss->block->host + offset;
      QEMUFile *file = pss->pss_channel;
      int len = 0;
  
+    if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
+        return 0;
+    }
+
      if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
          return 0;
      }
  
-    len += save_page_header(pss, file, block, offset | RAM_SAVE_FLAG_ZERO);
+    stat64_add(&mig_stats.zero_pages, 1);
+
+    if (migrate_mapped_ram()) {
+        /* zero pages are not transferred with mapped-ram */
+        clear_bit_atomic(offset >> TARGET_PAGE_BITS, pss->block->file_bmap);
+        return 1;
+    }
+
+    len += save_page_header(pss, file, pss->block, offset | RAM_SAVE_FLAG_ZERO);
      qemu_put_byte(file, 0);
      len += 1;
-    ram_release_page(block->idstr, offset);
-
-    stat64_add(&mig_stats.zero_pages, 1);
+    ram_release_page(pss->block->idstr, offset);
      ram_transferred_add(len);
  
      /*
@@ -1172,7 +1161,7 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss, RAMBlock *block,
       */
      if (rs->xbzrle_started) {
          XBZRLE_cache_lock();
-        xbzrle_cache_zero_page(block->offset + offset);
+        xbzrle_cache_zero_page(pss->block->offset + offset);
          XBZRLE_cache_unlock();
      }
  
@@ -1186,12 +1175,12 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss, RAMBlock *block,
   *
   * Return true if the pages has been saved, otherwise false is returned.
   */
-static bool control_save_page(PageSearchStatus *pss, RAMBlock *block,
+static bool control_save_page(PageSearchStatus *pss,
                                ram_addr_t offset, int *pages)
  {
      int ret;
  
-    ret = rdma_control_save_page(pss->pss_channel, block->offset, offset,
+    ret = rdma_control_save_page(pss->pss_channel, pss->block->offset, offset,
                                   TARGET_PAGE_SIZE);
      if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
          return false;
@@ -1221,14 +1210,20 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block,
  {
      QEMUFile *file = pss->pss_channel;
  
-    ram_transferred_add(save_page_header(pss, pss->pss_channel, block,
-                                         offset | RAM_SAVE_FLAG_PAGE));
-    if (async) {
-        qemu_put_buffer_async(file, buf, TARGET_PAGE_SIZE,
-                              migrate_release_ram() &&
-                              migration_in_postcopy());
+    if (migrate_mapped_ram()) {
+        qemu_put_buffer_at(file, buf, TARGET_PAGE_SIZE,
+                           block->pages_offset + offset);
+        set_bit(offset >> TARGET_PAGE_BITS, block->file_bmap);
      } else {
-        qemu_put_buffer(file, buf, TARGET_PAGE_SIZE);
+        ram_transferred_add(save_page_header(pss, pss->pss_channel, block,
+                                             offset | RAM_SAVE_FLAG_PAGE));
+        if (async) {
+            qemu_put_buffer_async(file, buf, TARGET_PAGE_SIZE,
+                                  migrate_release_ram() &&
+                                  migration_in_postcopy());
+        } else {
+            qemu_put_buffer(file, buf, TARGET_PAGE_SIZE);
+        }
      }
      ram_transferred_add(TARGET_PAGE_SIZE);
      stat64_add(&mig_stats.normal_pages, 1);
@@ -1281,78 +1276,15 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss)
      return pages;
  }
  
-static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block,
-                                 ram_addr_t offset)
+static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset)
  {
-    if (multifd_queue_page(file, block, offset) < 0) {
+    if (!multifd_queue_page(block, offset)) {
          return -1;
      }
-    stat64_add(&mig_stats.normal_pages, 1);
  
      return 1;
  }
  
-static void
-update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
-{
-    ram_transferred_add(bytes_xmit);
-
-    if (param->result == RES_ZEROPAGE) {
-        stat64_add(&mig_stats.zero_pages, 1);
-        return;
-    }
-
-    /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
-    compression_counters.compressed_size += bytes_xmit - 8;
-    compression_counters.pages++;
-}
-
-static bool save_page_use_compression(RAMState *rs);
-
-static int send_queued_data(CompressParam *param)
-{
-    PageSearchStatus *pss = &ram_state->pss[RAM_CHANNEL_PRECOPY];
-    MigrationState *ms = migrate_get_current();
-    QEMUFile *file = ms->to_dst_file;
-    int len = 0;
-
-    RAMBlock *block = param->block;
-    ram_addr_t offset = param->offset;
-
-    if (param->result == RES_NONE) {
-        return 0;
-    }
-
-    assert(block == pss->last_sent_block);
-
-    if (param->result == RES_ZEROPAGE) {
-        assert(qemu_file_buffer_empty(param->file));
-        len += save_page_header(pss, file, block, offset | RAM_SAVE_FLAG_ZERO);
-        qemu_put_byte(file, 0);
-        len += 1;
-        ram_release_page(block->idstr, offset);
-    } else if (param->result == RES_COMPRESS) {
-        assert(!qemu_file_buffer_empty(param->file));
-        len += save_page_header(pss, file, block,
-                                offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
-        len += qemu_put_qemu_file(file, param->file);
-    } else {
-        abort();
-    }
-
-    update_compress_thread_counts(param, len);
-
-    return len;
-}
-
-static void ram_flush_compressed_data(RAMState *rs)
-{
-    if (!save_page_use_compression(rs)) {
-        return;
-    }
-
-    flush_compressed_data(send_queued_data);
-}
  
  #define PAGE_ALL_CLEAN 0
  #define PAGE_TRY_AGAIN 1
@@ -1391,25 +1323,19 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss)
          pss->block = QLIST_NEXT_RCU(pss->block, next);
          if (!pss->block) {
              if (migrate_multifd() &&
-                !migrate_multifd_flush_after_each_section()) {
+                (!migrate_multifd_flush_after_each_section() ||
+                 migrate_mapped_ram())) {
                  QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel;
-                int ret = multifd_send_sync_main(f);
+                int ret = multifd_send_sync_main();
                  if (ret < 0) {
                      return ret;
                  }
-                qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
-                qemu_fflush(f);
+
+                if (!migrate_mapped_ram()) {
+                    qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
+                    qemu_fflush(f);
+                }
              }
-            /*
-             * If memory migration starts over, we will meet a dirtied page
-             * which may still exists in compression threads's ring, so we
-             * should flush the compressed data to make sure the new page
-             * is not overwritten by the old one in the destination.
-             *
-             * Also If xbzrle is on, stop using the data compression at this
-             * point. In theory, xbzrle can do better than compression.
-             */
-            ram_flush_compressed_data(rs);
  
              /* Hit the end of the list */
              pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
@@ -1955,7 +1881,8 @@ static void migration_page_queue_free(RAMState *rs)
   * @start: starting address from the start of the RAMBlock
   * @len: length (in bytes) to send
   */
-int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
+int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len,
+                         Error **errp)
  {
      RAMBlock *ramblock;
      RAMState *rs = ram_state;
@@ -1972,7 +1899,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
               * Shouldn't happen, we can't reuse the last RAMBlock if
               * it's the 1st request.
               */
-            error_report("ram_save_queue_pages no previous block");
+            error_setg(errp, "MIG_RP_MSG_REQ_PAGES has no previous block");
              return -1;
          }
      } else {
@@ -1980,16 +1907,17 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
  
          if (!ramblock) {
              /* We shouldn't be asked for a non-existent RAMBlock */
-            error_report("ram_save_queue_pages no block '%s'", rbname);
+            error_setg(errp, "MIG_RP_MSG_REQ_PAGES has no block '%s'", rbname);
              return -1;
          }
          rs->last_req_rb = ramblock;
      }
      trace_ram_save_queue_pages(ramblock->idstr, start, len);
      if (!offset_in_ramblock(ramblock, start + len - 1)) {
-        error_report("%s request overrun start=" RAM_ADDR_FMT " len="
-                     RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
-                     __func__, start, len, ramblock->used_length);
+        error_setg(errp, "MIG_RP_MSG_REQ_PAGES request overrun, "
+                   "start=" RAM_ADDR_FMT " len="
+                   RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
+                   start, len, ramblock->used_length);
          return -1;
      }
  
@@ -2021,9 +1949,9 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
          assert(len % page_size == 0);
          while (len) {
              if (ram_save_host_page_urgent(pss)) {
-                error_report("%s: ram_save_host_page_urgent() failed: "
-                             "ramblock=%s, start_addr=0x"RAM_ADDR_FMT,
-                             __func__, ramblock->idstr, start);
+                error_setg(errp, "ram_save_host_page_urgent() failed: "
+                           "ramblock=%s, start_addr=0x"RAM_ADDR_FMT,
+                           ramblock->idstr, start);
                  ret = -1;
                  break;
              }
@@ -2058,59 +1986,6 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
      return 0;
  }
  
-static bool save_page_use_compression(RAMState *rs)
-{
-    if (!migrate_compress()) {
-        return false;
-    }
-
-    /*
-     * If xbzrle is enabled (e.g., after first round of migration), stop
-     * using the data compression. In theory, xbzrle can do better than
-     * compression.
-     */
-    if (rs->xbzrle_started) {
-        return false;
-    }
-
-    return true;
-}
-
-/*
- * try to compress the page before posting it out, return true if the page
- * has been properly handled by compression, otherwise needs other
- * paths to handle it
- */
-static bool save_compress_page(RAMState *rs, PageSearchStatus *pss,
-                               RAMBlock *block, ram_addr_t offset)
-{
-    if (!save_page_use_compression(rs)) {
-        return false;
-    }
-
-    /*
-     * When starting the process of a new block, the first page of
-     * the block should be sent out before other pages in the same
-     * block, and all the pages in last block should have been sent
-     * out, keeping this order is important, because the 'cont' flag
-     * is used to avoid resending the block name.
-     *
-     * We post the fist page as normal page as compression will take
-     * much CPU resource.
-     */
-    if (block != pss->last_sent_block) {
-        ram_flush_compressed_data(rs);
-        return false;
-    }
-
-    if (compress_page_with_multi_thread(block, offset, send_queued_data) > 0) {
-        return true;
-    }
-
-    compression_counters.busy++;
-    return false;
-}
-
  /**
   * ram_save_target_page_legacy: save one target page
   *
@@ -2121,33 +1996,44 @@ static bool save_compress_page(RAMState *rs, PageSearchStatus *pss,
   */
  static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
  {
-    RAMBlock *block = pss->block;
      ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
      int res;
  
-    if (control_save_page(pss, block, offset, &res)) {
+    if (control_save_page(pss, offset, &res)) {
          return res;
      }
  
-    if (save_compress_page(rs, pss, block, offset)) {
+    if (save_zero_page(rs, pss, offset)) {
          return 1;
      }
  
-    if (save_zero_page(rs, pss, block, offset)) {
-        return 1;
-    }
+    return ram_save_page(rs, pss);
+}
+
+/**
+ * ram_save_target_page_multifd: send one target page to multifd workers
+ *
+ * Returns 1 if the page was queued, -1 otherwise.
+ *
+ * @rs: current RAM state
+ * @pss: data about the page we want to send
+ */
+static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
+{
+    RAMBlock *block = pss->block;
+    ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
  
      /*
-     * Do not use multifd in postcopy as one whole host page should be
-     * placed.  Meanwhile postcopy requires atomic update of pages, so even
-     * if host page size == guest page size the dest guest during run may
-     * still see partially copied pages which is data corruption.
+     * While using multifd live migration, we still need to handle zero
+     * page checking on the migration main thread.
       */
-    if (migrate_multifd() && !migration_in_postcopy()) {
-        return ram_save_multifd_page(pss->pss_channel, block, offset);
+    if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
+        if (save_zero_page(rs, pss, offset)) {
+            return 1;
+        }
      }
  
-    return ram_save_page(rs, pss);
+    return ram_save_multifd_page(block, offset);
  }
  
  /* Should be called before sending a host page */
@@ -2465,14 +2351,27 @@ static void xbzrle_cleanup(void)
      XBZRLE_cache_unlock();
  }
  
+static void ram_bitmaps_destroy(void)
+{
+    RAMBlock *block;
+
+    RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+        g_free(block->clear_bmap);
+        block->clear_bmap = NULL;
+        g_free(block->bmap);
+        block->bmap = NULL;
+        g_free(block->file_bmap);
+        block->file_bmap = NULL;
+    }
+}
+
  static void ram_save_cleanup(void *opaque)
  {
      RAMState **rsp = opaque;
-    RAMBlock *block;
  
      /* We don't use dirty log with background snapshots */
      if (!migrate_background_snapshot()) {
-        /* caller have hold iothread lock or is in a bh, so there is
+        /* caller have hold BQL or is in a bh, so there is
           * no writing race against the migration bitmap
           */
          if (global_dirty_tracking & GLOBAL_DIRTY_MIGRATION) {
@@ -2485,15 +2384,9 @@ static void ram_save_cleanup(void *opaque)
          }
      }
  
-    RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-        g_free(block->clear_bmap);
-        block->clear_bmap = NULL;
-        g_free(block->bmap);
-        block->bmap = NULL;
-    }
+    ram_bitmaps_destroy();
  
      xbzrle_cleanup();
-    compress_threads_save_cleanup();
      ram_state_cleanup(rsp);
      g_free(migration_ops);
      migration_ops = NULL;
@@ -2746,44 +2639,41 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length)
   * For every allocation, we will try not to crash the VM if the
   * allocation failed.
   */
-static int xbzrle_init(void)
+static bool xbzrle_init(Error **errp)
  {
-    Error *local_err = NULL;
-
      if (!migrate_xbzrle()) {
-        return 0;
+        return true;
      }
  
      XBZRLE_cache_lock();
  
      XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
      if (!XBZRLE.zero_target_page) {
-        error_report("%s: Error allocating zero page", __func__);
+        error_setg(errp, "%s: Error allocating zero page", __func__);
          goto err_out;
      }
  
      XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
-                              TARGET_PAGE_SIZE, &local_err);
+                              TARGET_PAGE_SIZE, errp);
      if (!XBZRLE.cache) {
-        error_report_err(local_err);
          goto free_zero_page;
      }
  
      XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
      if (!XBZRLE.encoded_buf) {
-        error_report("%s: Error allocating encoded_buf", __func__);
+        error_setg(errp, "%s: Error allocating encoded_buf", __func__);
          goto free_cache;
      }
  
      XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
      if (!XBZRLE.current_buf) {
-        error_report("%s: Error allocating current_buf", __func__);
+        error_setg(errp, "%s: Error allocating current_buf", __func__);
          goto free_encoded_buf;
      }
  
      /* We are all good */
      XBZRLE_cache_unlock();
-    return 0;
+    return true;
  
  free_encoded_buf:
      g_free(XBZRLE.encoded_buf);
@@ -2796,16 +2686,16 @@ free_zero_page:
      XBZRLE.zero_target_page = NULL;
  err_out:
      XBZRLE_cache_unlock();
-    return -ENOMEM;
+    return false;
  }
  
-static int ram_state_init(RAMState **rsp)
+static bool ram_state_init(RAMState **rsp, Error **errp)
  {
      *rsp = g_try_new0(RAMState, 1);
  
      if (!*rsp) {
-        error_report("%s: Init ramstate fail", __func__);
-        return -1;
+        error_setg(errp, "%s: Init ramstate fail", __func__);
+        return false;
      }
  
      qemu_mutex_init(&(*rsp)->bitmap_mutex);
@@ -2821,7 +2711,7 @@ static int ram_state_init(RAMState **rsp)
      (*rsp)->migration_dirty_pages = (*rsp)->ram_bytes_total >> TARGET_PAGE_BITS;
      ram_state_reset(*rsp);
  
-    return 0;
+    return true;
  }
  
  static void ram_list_init_bitmaps(void)
@@ -2857,6 +2747,9 @@ static void ram_list_init_bitmaps(void)
               */
              block->bmap = bitmap_new(pages);
              bitmap_set(block->bmap, 0, pages);
+            if (migrate_mapped_ram()) {
+                block->file_bmap = bitmap_new(pages);
+            }
              block->clear_bmap_shift = shift;
              block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
          }
@@ -2876,39 +2769,53 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
      }
  }
  
-static void ram_init_bitmaps(RAMState *rs)
+static bool ram_init_bitmaps(RAMState *rs, Error **errp)
  {
+    bool ret = true;
+
      qemu_mutex_lock_ramlist();
  
      WITH_RCU_READ_LOCK_GUARD() {
          ram_list_init_bitmaps();
          /* We don't use dirty log with background snapshots */
          if (!migrate_background_snapshot()) {
-            memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
+            ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
+            if (!ret) {
+                goto out_unlock;
+            }
              migration_bitmap_sync_precopy(rs, false);
          }
      }
+out_unlock:
      qemu_mutex_unlock_ramlist();
  
+    if (!ret) {
+        ram_bitmaps_destroy();
+        return false;
+    }
+
      /*
       * After an eventual first bitmap sync, fixup the initial bitmap
       * containing all 1s to exclude any discarded pages from migration.
       */
      migration_bitmap_clear_discarded_pages(rs);
+    return true;
  }
  
-static int ram_init_all(RAMState **rsp)
+static int ram_init_all(RAMState **rsp, Error **errp)
  {
-    if (ram_state_init(rsp)) {
+    if (!ram_state_init(rsp, errp)) {
          return -1;
      }
  
-    if (xbzrle_init()) {
+    if (!xbzrle_init(errp)) {
          ram_state_cleanup(rsp);
          return -1;
      }
  
-    ram_init_bitmaps(*rsp);
+    if (!ram_init_bitmaps(*rsp, errp)) {
+        return -1;
+    }
  
      return 0;
  }
@@ -2951,10 +2858,9 @@ void qemu_guest_free_page_hint(void *addr, size_t len)
      RAMBlock *block;
      ram_addr_t offset;
      size_t used_len, start, npages;
-    MigrationState *s = migrate_get_current();
  
      /* This function is currently expected to be used during live migration */
-    if (!migration_is_setup_or_active(s->state)) {
+    if (!migration_is_setup_or_active()) {
          return;
      }
  
@@ -2994,6 +2900,89 @@ void qemu_guest_free_page_hint(void *addr, size_t len)
      }
  }
  
+#define MAPPED_RAM_HDR_VERSION 1
+struct MappedRamHeader {
+    uint32_t version;
+    /*
+     * The target's page size, so we know how many pages are in the
+     * bitmap.
+     */
+    uint64_t page_size;
+    /*
+     * The offset in the migration file where the pages bitmap is
+     * stored.
+     */
+    uint64_t bitmap_offset;
+    /*
+     * The offset in the migration file where the actual pages (data)
+     * are stored.
+     */
+    uint64_t pages_offset;
+} QEMU_PACKED;
+typedef struct MappedRamHeader MappedRamHeader;
+
+static void mapped_ram_setup_ramblock(QEMUFile *file, RAMBlock *block)
+{
+    g_autofree MappedRamHeader *header = NULL;
+    size_t header_size, bitmap_size;
+    long num_pages;
+
+    header = g_new0(MappedRamHeader, 1);
+    header_size = sizeof(MappedRamHeader);
+
+    num_pages = block->used_length >> TARGET_PAGE_BITS;
+    bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long);
+
+    /*
+     * Save the file offsets of where the bitmap and the pages should
+     * go as they are written at the end of migration and during the
+     * iterative phase, respectively.
+     */
+    block->bitmap_offset = qemu_get_offset(file) + header_size;
+    block->pages_offset = ROUND_UP(block->bitmap_offset +
+                                   bitmap_size,
+                                   MAPPED_RAM_FILE_OFFSET_ALIGNMENT);
+
+    header->version = cpu_to_be32(MAPPED_RAM_HDR_VERSION);
+    header->page_size = cpu_to_be64(TARGET_PAGE_SIZE);
+    header->bitmap_offset = cpu_to_be64(block->bitmap_offset);
+    header->pages_offset = cpu_to_be64(block->pages_offset);
+
+    qemu_put_buffer(file, (uint8_t *) header, header_size);
+
+    /* prepare offset for next ramblock */
+    qemu_set_offset(file, block->pages_offset + block->used_length, SEEK_SET);
+}
+
+static bool mapped_ram_read_header(QEMUFile *file, MappedRamHeader *header,
+                                   Error **errp)
+{
+    size_t ret, header_size = sizeof(MappedRamHeader);
+
+    ret = qemu_get_buffer(file, (uint8_t *)header, header_size);
+    if (ret != header_size) {
+        error_setg(errp, "Could not read whole mapped-ram migration header "
+                   "(expected %zd, got %zd bytes)", header_size, ret);
+        return false;
+    }
+
+    /* migration stream is big-endian */
+    header->version = be32_to_cpu(header->version);
+
+    if (header->version > MAPPED_RAM_HDR_VERSION) {
+        error_setg(errp, "Migration mapped-ram capability version not "
+                   "supported (expected <= %d, got %d)", MAPPED_RAM_HDR_VERSION,
+                   header->version);
+        return false;
+    }
+
+    header->page_size = be64_to_cpu(header->page_size);
+    header->bitmap_offset = be64_to_cpu(header->bitmap_offset);
+    header->pages_offset = be64_to_cpu(header->pages_offset);
+
+    return true;
+}
+
  /*
   * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
   * long-running RCU critical section.  When rcu-reclaims in the code
@@ -3008,26 +2997,28 @@ void qemu_guest_free_page_hint(void *addr, size_t len)
   *
   * @f: QEMUFile where to send the data
   * @opaque: RAMState pointer
+ * @errp: pointer to Error*, to store an error if it happens.
   */
-static int ram_save_setup(QEMUFile *f, void *opaque)
+static int ram_save_setup(QEMUFile *f, void *opaque, Error **errp)
  {
      RAMState **rsp = opaque;
      RAMBlock *block;
-    int ret;
-
-    if (compress_threads_save_setup()) {
-        return -1;
-    }
+    int ret, max_hg_page_size;
  
      /* migration has already setup the bitmap, reuse it. */
      if (!migration_in_colo_state()) {
-        if (ram_init_all(rsp) != 0) {
-            compress_threads_save_cleanup();
+        if (ram_init_all(rsp, errp) != 0) {
              return -1;
          }
      }
      (*rsp)->pss[RAM_CHANNEL_PRECOPY].pss_channel = f;
  
+    /*
+     * ??? Mirrors the previous value of qemu_host_page_size,
+     * but is this really what was intended for the migration?
+     */
+    max_hg_page_size = MAX(qemu_real_host_page_size(), TARGET_PAGE_SIZE);
+
      WITH_RCU_READ_LOCK_GUARD() {
          qemu_put_be64(f, ram_bytes_total_with_ignored()
                           | RAM_SAVE_FLAG_MEM_SIZE);
@@ -3036,44 +3027,92 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
              qemu_put_byte(f, strlen(block->idstr));
              qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
              qemu_put_be64(f, block->used_length);
-            if (migrate_postcopy_ram() && block->page_size !=
-                                          qemu_host_page_size) {
+            if (migrate_postcopy_ram() &&
+                block->page_size != max_hg_page_size) {
                  qemu_put_be64(f, block->page_size);
              }
              if (migrate_ignore_shared()) {
                  qemu_put_be64(f, block->mr->addr);
              }
+
+            if (migrate_mapped_ram()) {
+                mapped_ram_setup_ramblock(f, block);
+            }
          }
      }
  
      ret = rdma_registration_start(f, RAM_CONTROL_SETUP);
      if (ret < 0) {
+        error_setg(errp, "%s: failed to start RDMA registration", __func__);
          qemu_file_set_error(f, ret);
+        return ret;
      }
  
      ret = rdma_registration_stop(f, RAM_CONTROL_SETUP);
      if (ret < 0) {
+        error_setg(errp, "%s: failed to stop RDMA registration", __func__);
          qemu_file_set_error(f, ret);
+        return ret;
      }
  
      migration_ops = g_malloc0(sizeof(MigrationOps));
-    migration_ops->ram_save_target_page = ram_save_target_page_legacy;
  
-    qemu_mutex_unlock_iothread();
-    ret = multifd_send_sync_main(f);
-    qemu_mutex_lock_iothread();
+    if (migrate_multifd()) {
+        migration_ops->ram_save_target_page = ram_save_target_page_multifd;
+    } else {
+        migration_ops->ram_save_target_page = ram_save_target_page_legacy;
+    }
+
+    bql_unlock();
+    ret = multifd_send_sync_main();
+    bql_lock();
      if (ret < 0) {
+        error_setg(errp, "%s: multifd synchronization failed", __func__);
          return ret;
      }
  
-    if (migrate_multifd() && !migrate_multifd_flush_after_each_section()) {
+    if (migrate_multifd() && !migrate_multifd_flush_after_each_section()
+        && !migrate_mapped_ram()) {
          qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
      }
  
      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-    qemu_fflush(f);
+    ret = qemu_fflush(f);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "%s failed", __func__);
+    }
+    return ret;
+}
  
-    return 0;
+static void ram_save_file_bmap(QEMUFile *f)
+{
+    RAMBlock *block;
+
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
+        long num_pages = block->used_length >> TARGET_PAGE_BITS;
+        long bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long);
+
+        qemu_put_buffer_at(f, (uint8_t *)block->file_bmap, bitmap_size,
+                           block->bitmap_offset);
+        ram_transferred_add(bitmap_size);
+
+        /*
+         * Free the bitmap here to catch any synchronization issues
+         * with multifd channels. No channels should be sending pages
+         * after we've written the bitmap to file.
+         */
+        g_free(block->file_bmap);
+        block->file_bmap = NULL;
+    }
+}
+
+void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset, bool set)
+{
+    if (set) {
+        set_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap);
+    } else {
+        clear_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap);
+    }
  }
  
  /**
@@ -3093,13 +3132,6 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
      int64_t t0;
      int done = 0;
  
-    if (blk_mig_bulk_active()) {
-        /* Avoid transferring ram during bulk phase of block migration as
-         * the bulk phase will usually take a long time and transferring
-         * ram updates during that time is pointless. */
-        goto out;
-    }
-
      /*
       * We'll take this lock a little bit long, but it's okay for two reasons.
       * Firstly, the only possible other thread to take it is who calls
@@ -3107,70 +3139,63 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
       * MAX_WAIT (if curious, further see commit 4508bd9ed8053ce) below, which
       * guarantees that we'll at least released it in a regular basis.
       */
-    qemu_mutex_lock(&rs->bitmap_mutex);
-    WITH_RCU_READ_LOCK_GUARD() {
-        if (ram_list.version != rs->last_version) {
-            ram_state_reset(rs);
-        }
-
-        /* Read version before ram_list.blocks */
-        smp_rmb();
-
-        ret = rdma_registration_start(f, RAM_CONTROL_ROUND);
-        if (ret < 0) {
-            qemu_file_set_error(f, ret);
-        }
-
-        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-        i = 0;
-        while ((ret = migration_rate_exceeded(f)) == 0 ||
-               postcopy_has_request(rs)) {
-            int pages;
-
-            if (qemu_file_get_error(f)) {
-                break;
+    WITH_QEMU_LOCK_GUARD(&rs->bitmap_mutex) {
+        WITH_RCU_READ_LOCK_GUARD() {
+            if (ram_list.version != rs->last_version) {
+                ram_state_reset(rs);
              }
  
-            pages = ram_find_and_save_block(rs);
-            /* no more pages to sent */
-            if (pages == 0) {
-                done = 1;
-                break;
-            }
+            /* Read version before ram_list.blocks */
+            smp_rmb();
  
-            if (pages < 0) {
-                qemu_file_set_error(f, pages);
-                break;
+            ret = rdma_registration_start(f, RAM_CONTROL_ROUND);
+            if (ret < 0) {
+                qemu_file_set_error(f, ret);
+                goto out;
              }
  
-            rs->target_page_count += pages;
+            t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+            i = 0;
+            while ((ret = migration_rate_exceeded(f)) == 0 ||
+                   postcopy_has_request(rs)) {
+                int pages;
  
-            /*
-             * During postcopy, it is necessary to make sure one whole host
-             * page is sent in one chunk.
-             */
-            if (migrate_postcopy_ram()) {
-                ram_flush_compressed_data(rs);
-            }
+                if (qemu_file_get_error(f)) {
+                    break;
+                }
  
-            /*
-             * we want to check in the 1st loop, just in case it was the 1st
-             * time and we had to sync the dirty bitmap.
-             * qemu_clock_get_ns() is a bit expensive, so we only check each
-             * some iterations
-             */
-            if ((i & 63) == 0) {
-                uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
-                              1000000;
-                if (t1 > MAX_WAIT) {
-                    trace_ram_save_iterate_big_wait(t1, i);
+                pages = ram_find_and_save_block(rs);
+                /* no more pages to sent */
+                if (pages == 0) {
+                    done = 1;
+                    break;
+                }
+
+                if (pages < 0) {
+                    qemu_file_set_error(f, pages);
                      break;
                  }
+
+                rs->target_page_count += pages;
+
+                /*
+                 * we want to check in the 1st loop, just in case it was the 1st
+                 * time and we had to sync the dirty bitmap.
+                 * qemu_clock_get_ns() is a bit expensive, so we only check each
+                 * some iterations
+                 */
+                if ((i & 63) == 0) {
+                    uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
+                        1000000;
+                    if (t1 > MAX_WAIT) {
+                        trace_ram_save_iterate_big_wait(t1, i);
+                        break;
+                    }
+                }
+                i++;
              }
-            i++;
          }
      }
-    qemu_mutex_unlock(&rs->bitmap_mutex);
  
      /*
       * Must occur before EOS (or any QEMUFile operation)
@@ -3183,19 +3208,18 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
  
  out:
      if (ret >= 0
-        && migration_is_setup_or_active(migrate_get_current()->state)) {
-        if (migrate_multifd() && migrate_multifd_flush_after_each_section()) {
-            ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel);
+        && migration_is_setup_or_active()) {
+        if (migrate_multifd() && migrate_multifd_flush_after_each_section() &&
+            !migrate_mapped_ram()) {
+            ret = multifd_send_sync_main();
              if (ret < 0) {
                  return ret;
              }
          }
  
          qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-        qemu_fflush(f);
          ram_transferred_add(8);
-
-        ret = qemu_file_get_error(f);
+        ret = qemu_fflush(f);
      }
      if (ret < 0) {
          return ret;
@@ -3209,7 +3233,7 @@ out:
   *
   * Returns zero to indicate success or negative on error
   *
- * Called with iothread lock
+ * Called with the BQL
   *
   * @f: QEMUFile where to send the data
   * @opaque: RAMState pointer
@@ -3230,6 +3254,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
          ret = rdma_registration_start(f, RAM_CONTROL_FINISH);
          if (ret < 0) {
              qemu_file_set_error(f, ret);
+            return ret;
          }
  
          /* try transferring iterative blocks of memory */
@@ -3245,36 +3270,42 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
                  break;
              }
              if (pages < 0) {
-                ret = pages;
-                break;
+                qemu_mutex_unlock(&rs->bitmap_mutex);
+                return pages;
              }
          }
          qemu_mutex_unlock(&rs->bitmap_mutex);
  
-        ram_flush_compressed_data(rs);
-
-        int ret = rdma_registration_stop(f, RAM_CONTROL_FINISH);
+        ret = rdma_registration_stop(f, RAM_CONTROL_FINISH);
          if (ret < 0) {
              qemu_file_set_error(f, ret);
+            return ret;
          }
      }
  
+    ret = multifd_send_sync_main();
      if (ret < 0) {
          return ret;
      }
  
-    ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel);
-    if (ret < 0) {
-        return ret;
+    if (migrate_mapped_ram()) {
+        ram_save_file_bmap(f);
+
+        if (qemu_file_get_error(f)) {
+            Error *local_err = NULL;
+            int err = qemu_file_get_error_obj(f, &local_err);
+
+            error_reportf_err(local_err, "Failed to write bitmap to file: ");
+            return -err;
+        }
      }
  
-    if (migrate_multifd() && !migrate_multifd_flush_after_each_section()) {
+    if (migrate_multifd() && !migrate_multifd_flush_after_each_section() &&
+        !migrate_mapped_ram()) {
          qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
      }
      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-    qemu_fflush(f);
-
-    return 0;
+    return qemu_fflush(f);
  }
  
  static void ram_state_pending_estimate(void *opaque, uint64_t *must_precopy,
@@ -3296,21 +3327,20 @@ static void ram_state_pending_estimate(void *opaque, uint64_t *must_precopy,
  static void ram_state_pending_exact(void *opaque, uint64_t *must_precopy,
                                      uint64_t *can_postcopy)
  {
-    MigrationState *s = migrate_get_current();
      RAMState **temp = opaque;
      RAMState *rs = *temp;
+    uint64_t remaining_size;
  
-    uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
-
-    if (!migration_in_postcopy() && remaining_size < s->threshold_size) {
-        qemu_mutex_lock_iothread();
+    if (!migration_in_postcopy()) {
+        bql_lock();
          WITH_RCU_READ_LOCK_GUARD() {
              migration_bitmap_sync_precopy(rs, false);
          }
-        qemu_mutex_unlock_iothread();
-        remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
+        bql_unlock();
      }
  
+    remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
+
      if (migrate_postcopy_ram()) {
          /* We can do postcopy, and all the data is postcopiable */
          *can_postcopy += remaining_size;
@@ -3461,7 +3491,7 @@ static inline void *colo_cache_from_block_offset(RAMBlock *block,
  }
  
  /**
- * ram_handle_compressed: handle the zero page case
+ * ram_handle_zero: handle the zero page case
   *
   * If a page (or a whole RDMA chunk) has been
   * determined to be zero, then zap it.
@@ -3470,16 +3500,20 @@ static inline void *colo_cache_from_block_offset(RAMBlock *block,
   * @ch: what the page is filled from.  We only support zero
   * @size: size of the zero page
   */
-void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
+void ram_handle_zero(void *host, uint64_t size)
  {
-    if (ch != 0 || !buffer_is_zero(host, size)) {
-        memset(host, ch, size);
+    if (!buffer_is_zero(host, size)) {
+        memset(host, 0, size);
      }
  }
  
  static void colo_init_ram_state(void)
  {
-    ram_state_init(&ram_state);
+    Error *local_err = NULL;
+
+    if (!ram_state_init(&ram_state, &local_err)) {
+        error_report_err(local_err);
+    }
  }
  
  /*
@@ -3534,8 +3568,10 @@ int colo_init_ram_cache(void)
  void colo_incoming_start_dirty_log(void)
  {
      RAMBlock *block = NULL;
+    Error *local_err = NULL;
+
      /* For memory_global_dirty_log_start below. */
-    qemu_mutex_lock_iothread();
+    bql_lock();
      qemu_mutex_lock_ramlist();
  
      memory_global_dirty_log_sync(false);
@@ -3545,11 +3581,14 @@ void colo_incoming_start_dirty_log(void)
              /* Discard this dirty bitmap record */
              bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
          }
-        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
+        if (!memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
+                                           &local_err)) {
+            error_report_err(local_err);
+        }
      }
      ram_state->migration_dirty_pages = 0;
      qemu_mutex_unlock_ramlist();
-    qemu_mutex_unlock_iothread();
+    bql_unlock();
  }
  
  /* It is need to hold the global lock to call this helper */
@@ -3581,8 +3620,9 @@ void colo_release_ram_cache(void)
   *
   * @f: QEMUFile where to receive the data
   * @opaque: RAMState pointer
+ * @errp: pointer to Error*, to store an error if it happens.
   */
-static int ram_load_setup(QEMUFile *f, void *opaque)
+static int ram_load_setup(QEMUFile *f, void *opaque, Error **errp)
  {
      xbzrle_load_setup();
      ramblock_recv_map_init();
@@ -3649,7 +3689,6 @@ int ram_load_postcopy(QEMUFile *f, int channel)
          void *place_source = NULL;
          RAMBlock *block = NULL;
          uint8_t ch;
-        int len;
  
          addr = qemu_get_be64(f);
  
@@ -3666,8 +3705,7 @@ int ram_load_postcopy(QEMUFile *f, int channel)
          addr &= TARGET_PAGE_MASK;
  
          trace_ram_load_postcopy_loop(channel, (uint64_t)addr, flags);
-        if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
-                     RAM_SAVE_FLAG_COMPRESS_PAGE)) {
+        if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
              block = ram_block_from_stream(mis, f, flags, channel);
              if (!block) {
                  ret = -EINVAL;
@@ -3730,16 +3768,18 @@ int ram_load_postcopy(QEMUFile *f, int channel)
          switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
          case RAM_SAVE_FLAG_ZERO:
              ch = qemu_get_byte(f);
+            if (ch != 0) {
+                error_report("Found a zero page with value %d", ch);
+                ret = -EINVAL;
+                break;
+            }
              /*
               * Can skip to set page_buffer when
               * this is a zero page and (block->page_size == TARGET_PAGE_SIZE).
               */
-            if (ch || !matches_target_page_size) {
+            if (!matches_target_page_size) {
                  memset(page_buffer, ch, TARGET_PAGE_SIZE);
              }
-            if (ch) {
-                tmp_page->all_zero = false;
-            }
              break;
  
          case RAM_SAVE_FLAG_PAGE:
@@ -3760,16 +3800,6 @@ int ram_load_postcopy(QEMUFile *f, int channel)
                                           TARGET_PAGE_SIZE);
              }
              break;
-        case RAM_SAVE_FLAG_COMPRESS_PAGE:
-            tmp_page->all_zero = false;
-            len = qemu_get_be32(f);
-            if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
-                error_report("Invalid compressed data length: %d", len);
-                ret = -EINVAL;
-                break;
-            }
-            decompress_data_with_multi_threads(f, page_buffer, len);
-            break;
          case RAM_SAVE_FLAG_MULTIFD_FLUSH:
              multifd_recv_sync_main();
              break;
@@ -3787,11 +3817,6 @@ int ram_load_postcopy(QEMUFile *f, int channel)
              break;
          }
  
-        /* Got the whole host page, wait for decompress before placing. */
-        if (place_needed) {
-            ret |= wait_for_decompress_done();
-        }
-
          /* Detect for any possible file errors */
          if (!ret && qemu_file_get_error(f)) {
              ret = qemu_file_get_error(f);
@@ -3869,36 +3894,171 @@ void colo_flush_ram_cache(void)
      trace_colo_flush_ram_cache_end();
  }
  
+static size_t ram_load_multifd_pages(void *host_addr, size_t size,
+                                     uint64_t offset)
+{
+    MultiFDRecvData *data = multifd_get_recv_data();
+
+    data->opaque = host_addr;
+    data->file_offset = offset;
+    data->size = size;
+
+    if (!multifd_recv()) {
+        return 0;
+    }
+
+    return size;
+}
+
+static bool read_ramblock_mapped_ram(QEMUFile *f, RAMBlock *block,
+                                     long num_pages, unsigned long *bitmap,
+                                     Error **errp)
+{
+    ERRP_GUARD();
+    unsigned long set_bit_idx, clear_bit_idx;
+    ram_addr_t offset;
+    void *host;
+    size_t read, unread, size;
+
+    for (set_bit_idx = find_first_bit(bitmap, num_pages);
+         set_bit_idx < num_pages;
+         set_bit_idx = find_next_bit(bitmap, num_pages, clear_bit_idx + 1)) {
+
+        clear_bit_idx = find_next_zero_bit(bitmap, num_pages, set_bit_idx + 1);
+
+        unread = TARGET_PAGE_SIZE * (clear_bit_idx - set_bit_idx);
+        offset = set_bit_idx << TARGET_PAGE_BITS;
+
+        while (unread > 0) {
+            host = host_from_ram_block_offset(block, offset);
+            if (!host) {
+                error_setg(errp, "page outside of ramblock %s range",
+                           block->idstr);
+                return false;
+            }
+
+            size = MIN(unread, MAPPED_RAM_LOAD_BUF_SIZE);
+
+            if (migrate_multifd()) {
+                read = ram_load_multifd_pages(host, size,
+                                              block->pages_offset + offset);
+            } else {
+                read = qemu_get_buffer_at(f, host, size,
+                                          block->pages_offset + offset);
+            }
+
+            if (!read) {
+                goto err;
+            }
+            offset += read;
+            unread -= read;
+        }
+    }
+
+    return true;
+
+err:
+    qemu_file_get_error_obj(f, errp);
+    error_prepend(errp, "(%s) failed to read page " RAM_ADDR_FMT
+                  "from file offset %" PRIx64 ": ", block->idstr, offset,
+                  block->pages_offset + offset);
+    return false;
+}
+
+static void parse_ramblock_mapped_ram(QEMUFile *f, RAMBlock *block,
+                                      ram_addr_t length, Error **errp)
+{
+    g_autofree unsigned long *bitmap = NULL;
+    MappedRamHeader header;
+    size_t bitmap_size;
+    long num_pages;
+
+    if (!mapped_ram_read_header(f, &header, errp)) {
+        return;
+    }
+
+    block->pages_offset = header.pages_offset;
+
+    /*
+     * Check the alignment of the file region that contains pages. We
+     * don't enforce MAPPED_RAM_FILE_OFFSET_ALIGNMENT to allow that
+     * value to change in the future. Do only a sanity check with page
+     * size alignment.
+     */
+    if (!QEMU_IS_ALIGNED(block->pages_offset, TARGET_PAGE_SIZE)) {
+        error_setg(errp,
+                   "Error reading ramblock %s pages, region has bad alignment",
+                   block->idstr);
+        return;
+    }
+
+    num_pages = length / header.page_size;
+    bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long);
+
+    bitmap = g_malloc0(bitmap_size);
+    if (qemu_get_buffer_at(f, (uint8_t *)bitmap, bitmap_size,
+                           header.bitmap_offset) != bitmap_size) {
+        error_setg(errp, "Error reading dirty bitmap");
+        return;
+    }
+
+    if (!read_ramblock_mapped_ram(f, block, num_pages, bitmap, errp)) {
+        return;
+    }
+
+    /* Skip pages array */
+    qemu_set_offset(f, block->pages_offset + length, SEEK_SET);
+
+    return;
+}
+
  static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length)
  {
      int ret = 0;
      /* ADVISE is earlier, it shows the source has the postcopy capability on */
      bool postcopy_advised = migration_incoming_postcopy_advised();
+    int max_hg_page_size;
+    Error *local_err = NULL;
  
      assert(block);
  
+    if (migrate_mapped_ram()) {
+        parse_ramblock_mapped_ram(f, block, length, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return -EINVAL;
+        }
+        return 0;
+    }
+
      if (!qemu_ram_is_migratable(block)) {
          error_report("block %s should not be migrated !", block->idstr);
          return -EINVAL;
      }
  
      if (length != block->used_length) {
-        Error *local_err = NULL;
-
          ret = qemu_ram_resize(block, length, &local_err);
          if (local_err) {
              error_report_err(local_err);
+            return ret;
          }
      }
+
+    /*
+     * ??? Mirrors the previous value of qemu_host_page_size,
+     * but is this really what was intended for the migration?
+     */
+    max_hg_page_size = MAX(qemu_real_host_page_size(), TARGET_PAGE_SIZE);
+
      /* For postcopy we need to check hugepage sizes match */
      if (postcopy_advised && migrate_postcopy_ram() &&
-        block->page_size != qemu_host_page_size) {
+        block->page_size != max_hg_page_size) {
          uint64_t remote_page_size = qemu_get_be64(f);
          if (remote_page_size != block->page_size) {
              error_report("Mismatched RAM page size %s "
                           "(local) %zd != %" PRId64, block->idstr,
                           block->page_size, remote_page_size);
-            ret = -EINVAL;
+            return -EINVAL;
          }
      }
      if (migrate_ignore_shared()) {
@@ -3908,7 +4068,7 @@ static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length)
              error_report("Mismatched GPAs for block %s "
                           "%" PRId64 "!= %" PRId64, block->idstr,
                           (uint64_t)addr, (uint64_t)block->mr->addr);
-            ret = -EINVAL;
+            return -EINVAL;
          }
      }
      ret = rdma_block_notification_handle(f, block->idstr);
@@ -3961,10 +4121,12 @@ static int parse_ramblocks(QEMUFile *f, ram_addr_t total_ram_bytes)
  static int ram_load_precopy(QEMUFile *f)
  {
      MigrationIncomingState *mis = migration_incoming_get_current();
-    int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
+    int flags = 0, ret = 0, invalid_flags = 0, i = 0;
  
-    if (!migrate_compress()) {
-        invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
+    if (migrate_mapped_ram()) {
+        invalid_flags |= (RAM_SAVE_FLAG_HOOK | RAM_SAVE_FLAG_MULTIFD_FLUSH |
+                          RAM_SAVE_FLAG_PAGE | RAM_SAVE_FLAG_XBZRLE |
+                          RAM_SAVE_FLAG_ZERO);
      }
  
      while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
@@ -3984,20 +4146,24 @@ static int ram_load_precopy(QEMUFile *f)
          i++;
  
          addr = qemu_get_be64(f);
+        ret = qemu_file_get_error(f);
+        if (ret) {
+            error_report("Getting RAM address failed");
+            break;
+        }
+
          flags = addr & ~TARGET_PAGE_MASK;
          addr &= TARGET_PAGE_MASK;
  
          if (flags & invalid_flags) {
-            if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
-                error_report("Received an unexpected compressed page");
-            }
+            error_report("Unexpected RAM flags: %d", flags & invalid_flags);
  
              ret = -EINVAL;
              break;
          }
  
          if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
-                     RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
+                     RAM_SAVE_FLAG_XBZRLE)) {
              RAMBlock *block = ram_block_from_stream(mis, f, flags,
                                                      RAM_CHANNEL_PRECOPY);
  
@@ -4040,27 +4206,32 @@ static int ram_load_precopy(QEMUFile *f)
          switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
          case RAM_SAVE_FLAG_MEM_SIZE:
              ret = parse_ramblocks(f, addr);
+            /*
+             * For mapped-ram migration (to a file) using multifd, we sync
+             * once and for all here to make sure all tasks we queued to
+             * multifd threads are completed, so that all the ramblocks
+             * (including all the guest memory pages within) are fully
+             * loaded after this sync returns.
+             */
+            if (migrate_mapped_ram()) {
+                multifd_recv_sync_main();
+            }
              break;
  
          case RAM_SAVE_FLAG_ZERO:
              ch = qemu_get_byte(f);
-            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
+            if (ch != 0) {
+                error_report("Found a zero page with value %d", ch);
+                ret = -EINVAL;
+                break;
+            }
+            ram_handle_zero(host, TARGET_PAGE_SIZE);
              break;
  
          case RAM_SAVE_FLAG_PAGE:
              qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
              break;
  
-        case RAM_SAVE_FLAG_COMPRESS_PAGE:
-            len = qemu_get_be32(f);
-            if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
-                error_report("Invalid compressed data length: %d", len);
-                ret = -EINVAL;
-                break;
-            }
-            decompress_data_with_multi_threads(f, host, len);
-            break;
-
          case RAM_SAVE_FLAG_XBZRLE:
              if (load_xbzrle(f, addr, host) < 0) {
                  error_report("Failed to decompress XBZRLE page at "
@@ -4075,7 +4246,12 @@ static int ram_load_precopy(QEMUFile *f)
          case RAM_SAVE_FLAG_EOS:
              /* normal exit */
              if (migrate_multifd() &&
-                migrate_multifd_flush_after_each_section()) {
+                migrate_multifd_flush_after_each_section() &&
+                /*
+                 * Mapped-ram migration flushes once and for all after
+                 * parsing ramblocks. Always ignore EOS for it.
+                 */
+                !migrate_mapped_ram()) {
                  multifd_recv_sync_main();
              }
              break;
@@ -4097,7 +4273,6 @@ static int ram_load_precopy(QEMUFile *f)
          }
      }
  
-    ret |= wait_for_decompress_done();
      return ret;
  }
  
@@ -4173,7 +4348,9 @@ static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
  
      /* Wait until all the ramblocks' dirty bitmap synced */
      while (qatomic_read(&rs->postcopy_bmap_sync_requested)) {
-        migration_rp_wait(s);
+        if (migration_rp_wait(s)) {
+            return -1;
+        }
      }
  
      trace_ram_dirty_bitmap_sync_complete();
@@ -4185,10 +4362,11 @@ static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
   * Read the received bitmap, revert it as the initial dirty bitmap.
   * This is only used when the postcopy migration is paused but wants
   * to resume from a middle point.
+ *
+ * Returns true if succeeded, false for errors.
   */
-int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
+bool ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block, Error **errp)
  {
-    int ret = -EINVAL;
      /* from_dst_file is always valid because we're within rp_thread */
      QEMUFile *file = s->rp_state.from_dst_file;
      g_autofree unsigned long *le_bitmap = NULL;
@@ -4200,9 +4378,9 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
      trace_ram_dirty_bitmap_reload_begin(block->idstr);
  
      if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
-        error_report("%s: incorrect state %s", __func__,
-                     MigrationStatus_str(s->state));
-        return -EINVAL;
+        error_setg(errp, "Reload bitmap in incorrect state %s",
+                   MigrationStatus_str(s->state));
+        return false;
      }
  
      /*
@@ -4218,27 +4396,25 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
  
      /* The size of the bitmap should match with our ramblock */
      if (size != local_size) {
-        error_report("%s: ramblock '%s' bitmap size mismatch "
-                     "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
-                     block->idstr, size, local_size);
-        return -EINVAL;
+        error_setg(errp, "ramblock '%s' bitmap size mismatch (0x%"PRIx64
+                   " != 0x%"PRIx64")", block->idstr, size, local_size);
+        return false;
      }
  
      size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
      end_mark = qemu_get_be64(file);
  
-    ret = qemu_file_get_error(file);
-    if (ret || size != local_size) {
-        error_report("%s: read bitmap failed for ramblock '%s': %d"
-                     " (size 0x%"PRIx64", got: 0x%"PRIx64")",
-                     __func__, block->idstr, ret, local_size, size);
-        return -EIO;
+    if (qemu_file_get_error(file) || size != local_size) {
+        error_setg(errp, "read bitmap failed for ramblock '%s': "
+                   "(size 0x%"PRIx64", got: 0x%"PRIx64")",
+                   block->idstr, local_size, size);
+        return false;
      }
  
      if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
-        error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64,
-                     __func__, block->idstr, end_mark);
-        return -EINVAL;
+        error_setg(errp, "ramblock '%s' end mark incorrect: 0x%"PRIx64,
+                   block->idstr, end_mark);
+        return false;
      }
  
      /*
@@ -4270,7 +4446,7 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
       */
      migration_rp_kick(s);
  
-    return 0;
+    return true;
  }
  
  static int ram_resume_prepare(MigrationState *s, void *opaque)