migration: Cleanup xbzrle zero page cache update logic

[mirror_qemu.git] / migration / ram.c
diff --git a/migration/ram.c b/migration/ram.c

index a9d0d100bda457451cb587edad9ac0a3417042a7..7124ff531c841ed0c5645bcbfcc9b041de3f4727 100644 (file)
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -32,6 +32,7 @@
  #include "qemu/bitmap.h"
  #include "qemu/madvise.h"
  #include "qemu/main-loop.h"
+#include "io/channel-null.h"
  #include "xbzrle.h"
  #include "ram.h"
  #include "migration.h"
@@ -161,6 +162,11 @@ out:
      return ret;
  }
  
+static bool postcopy_preempt_active(void)
+{
+    return migrate_postcopy_preempt() && migration_in_postcopy();
+}
+
  bool ramblock_is_ignored(RAMBlock *block)
  {
      return !qemu_ram_is_migratable(block) ||
@@ -295,6 +301,20 @@ struct RAMSrcPageRequest {
      QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
  };
  
+typedef struct {
+    /*
+     * Cached ramblock/offset values if preempted.  They're only meaningful if
+     * preempted==true below.
+     */
+    RAMBlock *ram_block;
+    unsigned long ram_page;
+    /*
+     * Whether a postcopy preemption just happened.  Will be reset after
+     * precopy recovered to background migration.
+     */
+    bool preempted;
+} PostcopyPreemptState;
+
  /* State of RAM for migration */
  struct RAMState {
      /* QEMUFile used for this migration */
@@ -349,6 +369,14 @@ struct RAMState {
      /* Queue of outstanding page requests from the destination */
      QemuMutex src_page_req_mutex;
      QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
+
+    /* Postcopy preemption informations */
+    PostcopyPreemptState postcopy_preempt_state;
+    /*
+     * Current channel we're using on src VM.  Only valid if postcopy-preempt
+     * is enabled.
+     */
+    unsigned int postcopy_channel;
  };
  typedef struct RAMState RAMState;
  
@@ -356,6 +384,11 @@ static RAMState *ram_state;
  
  static NotifierWithReturnList precopy_notifier_list;
  
+static void postcopy_preempt_reset(RAMState *rs)
+{
+    memset(&rs->postcopy_preempt_state, 0, sizeof(PostcopyPreemptState));
+}
+
  /* Whether postcopy has queued requests? */
  static bool postcopy_has_request(RAMState *rs)
  {
@@ -394,7 +427,7 @@ uint64_t ram_bytes_remaining(void)
  
  MigrationStats ram_counters;
  
-static void ram_transferred_add(uint64_t bytes)
+void ram_transferred_add(uint64_t bytes)
  {
      if (runstate_is_running()) {
          ram_counters.precopy_bytes += bytes;
@@ -406,6 +439,11 @@ static void ram_transferred_add(uint64_t bytes)
      ram_counters.transferred += bytes;
  }
  
+void dirty_sync_missed_zero_copy(void)
+{
+    ram_counters.dirty_sync_missed_zero_copy++;
+}
+
  /* used by the search for pages to send */
  struct PageSearchStatus {
      /* Current block being searched */
@@ -414,6 +452,28 @@ struct PageSearchStatus {
      unsigned long page;
      /* Set once we wrap around */
      bool         complete_round;
+    /*
+     * [POSTCOPY-ONLY] Whether current page is explicitly requested by
+     * postcopy.  When set, the request is "urgent" because the dest QEMU
+     * threads are waiting for us.
+     */
+    bool         postcopy_requested;
+    /*
+     * [POSTCOPY-ONLY] The target channel to use to send current page.
+     *
+     * Note: This may _not_ match with the value in postcopy_requested
+     * above. Let's imagine the case where the postcopy request is exactly
+     * the page that we're sending in progress during precopy. In this case
+     * we'll have postcopy_requested set to true but the target channel
+     * will be the precopy channel (so that we don't split brain on that
+     * specific page since the precopy channel already contains partial of
+     * that page data).
+     *
+     * Besides that specific use case, postcopy_target_channel should
+     * always be equal to postcopy_requested, because by default we send
+     * postcopy pages via postcopy preempt channel.
+     */
+    bool         postcopy_target_channel;
  };
  typedef struct PageSearchStatus PageSearchStatus;
  
@@ -455,8 +515,6 @@ static QemuThread *compress_threads;
   */
  static QemuMutex comp_done_lock;
  static QemuCond comp_done_cond;
-/* The empty QEMUFileOps will be used by file in CompressParam */
-static const QEMUFileOps empty_ops = { };
  
  static QEMUFile *decomp_file;
  static DecompressParam *decomp_param;
@@ -467,6 +525,9 @@ static QemuCond decomp_done_cond;
  static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
                                   ram_addr_t offset, uint8_t *source_buf);
  
+static void postcopy_preempt_restore(RAMState *rs, PageSearchStatus *pss,
+                                     bool postcopy_requested);
+
  static void *do_data_compress(void *opaque)
  {
      CompressParam *param = opaque;
@@ -567,7 +628,8 @@ static int compress_threads_save_setup(void)
          /* comp_param[i].file is just used as a dummy buffer to save data,
           * set its ops to empty.
           */
-        comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops, false);
+        comp_param[i].file = qemu_file_new_output(
+            QIO_CHANNEL(qio_channel_null_new()));
          comp_param[i].done = true;
          comp_param[i].quit = false;
          qemu_mutex_init(&comp_param[i].mutex);
@@ -679,10 +741,6 @@ void mig_throttle_counter_reset(void)
   */
  static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
  {
-    if (!rs->xbzrle_enabled) {
-        return;
-    }
-
      /* We don't care if this fails to allocate a new cache page
       * as long as it updated an old one */
      cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
@@ -1177,7 +1235,7 @@ static void migration_bitmap_sync_precopy(RAMState *rs)
      }
  }
  
-static void ram_release_page(const char *rbname, uint64_t offset)
+void ram_release_page(const char *rbname, uint64_t offset)
  {
      if (!migrate_release_ram() || !migration_in_postcopy()) {
          return;
@@ -1289,7 +1347,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
                                           offset | RAM_SAVE_FLAG_PAGE));
      if (async) {
          qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
-                              migrate_release_ram() &
+                              migrate_release_ram() &&
                                migration_in_postcopy());
      } else {
          qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
@@ -1487,6 +1545,13 @@ retry:
   */
  static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
  {
+    /*
+     * This is not a postcopy requested page, mark it "not urgent", and use
+     * precopy channel to send it.
+     */
+    pss->postcopy_requested = false;
+    pss->postcopy_target_channel = RAM_CHANNEL_PRECOPY;
+
      pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
      if (pss->complete_round && pss->block == rs->last_seen_block &&
          pss->page >= rs->last_page) {
@@ -1548,7 +1613,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
  {
      struct RAMSrcPageRequest *entry;
      RAMBlock *block = NULL;
-    size_t page_size;
  
      if (!postcopy_has_request(rs)) {
          return NULL;
@@ -1565,13 +1629,10 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
      entry = QSIMPLEQ_FIRST(&rs->src_page_requests);
      block = entry->rb;
      *offset = entry->offset;
-    page_size = qemu_ram_pagesize(block);
-    /* Each page request should only be multiple page size of the ramblock */
-    assert((entry->len % page_size) == 0);
  
-    if (entry->len > page_size) {
-        entry->len -= page_size;
-        entry->offset += page_size;
+    if (entry->len > TARGET_PAGE_SIZE) {
+        entry->len -= TARGET_PAGE_SIZE;
+        entry->offset += TARGET_PAGE_SIZE;
      } else {
          memory_region_unref(block->mr);
          QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
@@ -1579,9 +1640,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
          migration_consume_urgent_request();
      }
  
-    trace_unqueue_page(block->idstr, *offset,
-                       test_bit((*offset >> TARGET_PAGE_BITS), block->bmap));
-
      return block;
  }
  
@@ -1942,6 +2000,55 @@ void ram_write_tracking_stop(void)
  }
  #endif /* defined(__linux__) */
  
+/*
+ * Check whether two addr/offset of the ramblock falls onto the same host huge
+ * page.  Returns true if so, false otherwise.
+ */
+static bool offset_on_same_huge_page(RAMBlock *rb, uint64_t addr1,
+                                     uint64_t addr2)
+{
+    size_t page_size = qemu_ram_pagesize(rb);
+
+    addr1 = ROUND_DOWN(addr1, page_size);
+    addr2 = ROUND_DOWN(addr2, page_size);
+
+    return addr1 == addr2;
+}
+
+/*
+ * Whether a previous preempted precopy huge page contains current requested
+ * page?  Returns true if so, false otherwise.
+ *
+ * This should really happen very rarely, because it means when we were sending
+ * during background migration for postcopy we're sending exactly the page that
+ * some vcpu got faulted on on dest node.  When it happens, we probably don't
+ * need to do much but drop the request, because we know right after we restore
+ * the precopy stream it'll be serviced.  It'll slightly affect the order of
+ * postcopy requests to be serviced (e.g. it'll be the same as we move current
+ * request to the end of the queue) but it shouldn't be a big deal.  The most
+ * imporant thing is we can _never_ try to send a partial-sent huge page on the
+ * POSTCOPY channel again, otherwise that huge page will got "split brain" on
+ * two channels (PRECOPY, POSTCOPY).
+ */
+static bool postcopy_preempted_contains(RAMState *rs, RAMBlock *block,
+                                        ram_addr_t offset)
+{
+    PostcopyPreemptState *state = &rs->postcopy_preempt_state;
+
+    /* No preemption at all? */
+    if (!state->preempted) {
+        return false;
+    }
+
+    /* Not even the same ramblock? */
+    if (state->ram_block != block) {
+        return false;
+    }
+
+    return offset_on_same_huge_page(block, offset,
+                                    state->ram_page << TARGET_PAGE_BITS);
+}
+
  /**
   * get_queued_page: unqueue a page from the postcopy requests
   *
@@ -1956,10 +2063,45 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
  {
      RAMBlock  *block;
      ram_addr_t offset;
+    bool dirty;
+
+    do {
+        block = unqueue_page(rs, &offset);
+        /*
+         * We're sending this page, and since it's postcopy nothing else
+         * will dirty it, and we must make sure it doesn't get sent again
+         * even if this queue request was received after the background
+         * search already sent it.
+         */
+        if (block) {
+            unsigned long page;
+
+            page = offset >> TARGET_PAGE_BITS;
+            dirty = test_bit(page, block->bmap);
+            if (!dirty) {
+                trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
+                                                page);
+            } else {
+                trace_get_queued_page(block->idstr, (uint64_t)offset, page);
+            }
+        }
  
-    block = unqueue_page(rs, &offset);
+    } while (block && !dirty);
  
-    if (!block) {
+    if (block) {
+        /* See comment above postcopy_preempted_contains() */
+        if (postcopy_preempted_contains(rs, block, offset)) {
+            trace_postcopy_preempt_hit(block->idstr, offset);
+            /*
+             * If what we preempted previously was exactly what we're
+             * requesting right now, restore the preempted precopy
+             * immediately, boosting its priority as it's requested by
+             * postcopy.
+             */
+            postcopy_preempt_restore(rs, pss, true);
+            return true;
+        }
+    } else {
          /*
           * Poll write faults too if background snapshot is enabled; that's
           * when we have vcpus got blocked by the write protected pages.
@@ -1981,6 +2123,9 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
           * really rare.
           */
          pss->complete_round = false;
+        /* Mark it an urgent request, meanwhile using POSTCOPY channel */
+        pss->postcopy_requested = true;
+        pss->postcopy_target_channel = RAM_CHANNEL_POSTCOPY;
      }
  
      return !!block;
@@ -2059,7 +2204,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
      }
  
      struct RAMSrcPageRequest *new_entry =
-        g_malloc0(sizeof(struct RAMSrcPageRequest));
+        g_new0(struct RAMSrcPageRequest, 1);
      new_entry->rb = ramblock;
      new_entry->offset = start;
      new_entry->len = len;
@@ -2152,7 +2297,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
          /* Must let xbzrle know, otherwise a previous (now 0'd) cached
           * page would be stale
           */
-        if (!save_page_use_compression(rs)) {
+        if (rs->xbzrle_enabled) {
              XBZRLE_cache_lock();
              xbzrle_cache_zero_page(rs, block->offset + offset);
              XBZRLE_cache_unlock();
@@ -2161,19 +2306,141 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
      }
  
      /*
-     * Do not use multifd for:
-     * 1. Compression as the first page in the new block should be posted out
-     *    before sending the compressed page
-     * 2. In postcopy as one whole host page should be placed
+     * Do not use multifd in postcopy as one whole host page should be
+     * placed.  Meanwhile postcopy requires atomic update of pages, so even
+     * if host page size == guest page size the dest guest during run may
+     * still see partially copied pages which is data corruption.
       */
-    if (!save_page_use_compression(rs) && migrate_use_multifd()
-        && !migration_in_postcopy()) {
+    if (migrate_use_multifd() && !migration_in_postcopy()) {
          return ram_save_multifd_page(rs, block, offset);
      }
  
      return ram_save_page(rs, pss);
  }
  
+static bool postcopy_needs_preempt(RAMState *rs, PageSearchStatus *pss)
+{
+    MigrationState *ms = migrate_get_current();
+
+    /* Not enabled eager preempt?  Then never do that. */
+    if (!migrate_postcopy_preempt()) {
+        return false;
+    }
+
+    /* If the user explicitly disabled breaking of huge page, skip */
+    if (!ms->postcopy_preempt_break_huge) {
+        return false;
+    }
+
+    /* If the ramblock we're sending is a small page?  Never bother. */
+    if (qemu_ram_pagesize(pss->block) == TARGET_PAGE_SIZE) {
+        return false;
+    }
+
+    /* Not in postcopy at all? */
+    if (!migration_in_postcopy()) {
+        return false;
+    }
+
+    /*
+     * If we're already handling a postcopy request, don't preempt as this page
+     * has got the same high priority.
+     */
+    if (pss->postcopy_requested) {
+        return false;
+    }
+
+    /* If there's postcopy requests, then check it up! */
+    return postcopy_has_request(rs);
+}
+
+/* Returns true if we preempted precopy, false otherwise */
+static void postcopy_do_preempt(RAMState *rs, PageSearchStatus *pss)
+{
+    PostcopyPreemptState *p_state = &rs->postcopy_preempt_state;
+
+    trace_postcopy_preempt_triggered(pss->block->idstr, pss->page);
+
+    /*
+     * Time to preempt precopy. Cache current PSS into preempt state, so that
+     * after handling the postcopy pages we can recover to it.  We need to do
+     * so because the dest VM will have partial of the precopy huge page kept
+     * over in its tmp huge page caches; better move on with it when we can.
+     */
+    p_state->ram_block = pss->block;
+    p_state->ram_page = pss->page;
+    p_state->preempted = true;
+}
+
+/* Whether we're preempted by a postcopy request during sending a huge page */
+static bool postcopy_preempt_triggered(RAMState *rs)
+{
+    return rs->postcopy_preempt_state.preempted;
+}
+
+static void postcopy_preempt_restore(RAMState *rs, PageSearchStatus *pss,
+                                     bool postcopy_requested)
+{
+    PostcopyPreemptState *state = &rs->postcopy_preempt_state;
+
+    assert(state->preempted);
+
+    pss->block = state->ram_block;
+    pss->page = state->ram_page;
+
+    /* Whether this is a postcopy request? */
+    pss->postcopy_requested = postcopy_requested;
+    /*
+     * When restoring a preempted page, the old data resides in PRECOPY
+     * slow channel, even if postcopy_requested is set.  So always use
+     * PRECOPY channel here.
+     */
+    pss->postcopy_target_channel = RAM_CHANNEL_PRECOPY;
+
+    trace_postcopy_preempt_restored(pss->block->idstr, pss->page);
+
+    /* Reset preempt state, most importantly, set preempted==false */
+    postcopy_preempt_reset(rs);
+}
+
+static void postcopy_preempt_choose_channel(RAMState *rs, PageSearchStatus *pss)
+{
+    MigrationState *s = migrate_get_current();
+    unsigned int channel = pss->postcopy_target_channel;
+    QEMUFile *next;
+
+    if (channel != rs->postcopy_channel) {
+        if (channel == RAM_CHANNEL_PRECOPY) {
+            next = s->to_dst_file;
+        } else {
+            next = s->postcopy_qemufile_src;
+        }
+        /* Update and cache the current channel */
+        rs->f = next;
+        rs->postcopy_channel = channel;
+
+        /*
+         * If channel switched, reset last_sent_block since the old sent block
+         * may not be on the same channel.
+         */
+        rs->last_sent_block = NULL;
+
+        trace_postcopy_preempt_switch_channel(channel);
+    }
+
+    trace_postcopy_preempt_send_host_page(pss->block->idstr, pss->page);
+}
+
+/* We need to make sure rs->f always points to the default channel elsewhere */
+static void postcopy_preempt_reset_channel(RAMState *rs)
+{
+    if (postcopy_preempt_active()) {
+        rs->postcopy_channel = RAM_CHANNEL_PRECOPY;
+        rs->f = migrate_get_current()->to_dst_file;
+        trace_postcopy_preempt_reset_channel();
+    }
+}
+
  /**
   * ram_save_host_page: save a whole host page
   *
@@ -2205,7 +2472,16 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
          return 0;
      }
  
+    if (postcopy_preempt_active()) {
+        postcopy_preempt_choose_channel(rs, pss);
+    }
+
      do {
+        if (postcopy_needs_preempt(rs, pss)) {
+            postcopy_do_preempt(rs, pss);
+            break;
+        }
+
          /* Check the pages is dirty and if it is send it */
          if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
              tmppages = ram_save_target_page(rs, pss);
@@ -2229,6 +2505,19 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
      /* The offset we leave with is the min boundary of host page and block */
      pss->page = MIN(pss->page, hostpage_boundary);
  
+    /*
+     * When with postcopy preempt mode, flush the data as soon as possible for
+     * postcopy requests, because we've already sent a whole huge page, so the
+     * dst node should already have enough resource to atomically filling in
+     * the current missing page.
+     *
+     * More importantly, when using separate postcopy channel, we must do
+     * explicit flush or it won't flush until the buffer is full.
+     */
+    if (migrate_postcopy_preempt() && pss->postcopy_requested) {
+        qemu_fflush(rs->f);
+    }
+
      res = ram_save_release_protection(rs, pss, start_page);
      return (res < 0 ? res : pages);
  }
@@ -2257,21 +2546,38 @@ static int ram_find_and_save_block(RAMState *rs)
          return pages;
      }
  
+    /*
+     * Always keep last_seen_block/last_page valid during this procedure,
+     * because find_dirty_block() relies on these values (e.g., we compare
+     * last_seen_block with pss.block to see whether we searched all the
+     * ramblocks) to detect the completion of migration.  Having NULL value
+     * of last_seen_block can conditionally cause below loop to run forever.
+     */
+    if (!rs->last_seen_block) {
+        rs->last_seen_block = QLIST_FIRST_RCU(&ram_list.blocks);
+        rs->last_page = 0;
+    }
+
      pss.block = rs->last_seen_block;
      pss.page = rs->last_page;
      pss.complete_round = false;
  
-    if (!pss.block) {
-        pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
-    }
-
      do {
          again = true;
          found = get_queued_page(rs, &pss);
  
          if (!found) {
-            /* priority queue empty, so just search for something dirty */
-            found = find_dirty_block(rs, &pss, &again);
+            /*
+             * Recover previous precopy ramblock/offset if postcopy has
+             * preempted precopy.  Otherwise find the next dirty bit.
+             */
+            if (postcopy_preempt_triggered(rs)) {
+                postcopy_preempt_restore(rs, &pss, false);
+                found = true;
+            } else {
+                /* priority queue empty, so just search for something dirty */
+                found = find_dirty_block(rs, &pss, &again);
+            }
          }
  
          if (found) {
@@ -2294,7 +2600,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero)
      } else {
          ram_counters.normal += pages;
          ram_transferred_add(size);
-        qemu_update_position(f, size);
+        qemu_file_credit_transfer(f, size);
      }
  }
  
@@ -2399,6 +2705,8 @@ static void ram_state_reset(RAMState *rs)
      rs->last_page = 0;
      rs->last_version = ram_list.version;
      rs->xbzrle_enabled = false;
+    postcopy_preempt_reset(rs);
+    rs->postcopy_channel = RAM_CHANNEL_PRECOPY;
  }
  
  #define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -2903,6 +3211,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
  {
      RAMState **rsp = opaque;
      RAMBlock *block;
+    int ret;
  
      if (compress_threads_save_setup()) {
          return -1;
@@ -2937,7 +3246,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
      ram_control_before_iterate(f, RAM_CONTROL_SETUP);
      ram_control_after_iterate(f, RAM_CONTROL_SETUP);
  
-    multifd_send_sync_main(f);
+    ret =  multifd_send_sync_main(f);
+    if (ret < 0) {
+        return ret;
+    }
+
      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
      qemu_fflush(f);
  
@@ -3037,6 +3350,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
      }
      qemu_mutex_unlock(&rs->bitmap_mutex);
  
+    postcopy_preempt_reset_channel(rs);
+
      /*
       * Must occur before EOS (or any QEMUFile operation)
       * because of RDMA protocol.
@@ -3046,7 +3361,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
  out:
      if (ret >= 0
          && migration_is_setup_or_active(migrate_get_current()->state)) {
-        multifd_send_sync_main(rs->f);
+        ret = multifd_send_sync_main(rs->f);
+        if (ret < 0) {
+            return ret;
+        }
+
          qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
          qemu_fflush(f);
          ram_transferred_add(8);
@@ -3088,6 +3407,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
          /* try transferring iterative blocks of memory */
  
          /* flush all remaining blocks regardless of rate limiting */
+        qemu_mutex_lock(&rs->bitmap_mutex);
          while (true) {
              int pages;
  
@@ -3101,18 +3421,27 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
                  break;
              }
          }
+        qemu_mutex_unlock(&rs->bitmap_mutex);
  
          flush_compressed_data(rs);
          ram_control_after_iterate(f, RAM_CONTROL_FINISH);
      }
  
-    if (ret >= 0) {
-        multifd_send_sync_main(rs->f);
-        qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-        qemu_fflush(f);
+    if (ret < 0) {
+        return ret;
      }
  
-    return ret;
+    postcopy_preempt_reset_channel(rs);
+
+    ret = multifd_send_sync_main(rs->f);
+    if (ret < 0) {
+        return ret;
+    }
+
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+    qemu_fflush(f);
+
+    return 0;
  }
  
  static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
@@ -3185,12 +3514,16 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
   *
   * Returns a pointer from within the RCU-protected ram_list.
   *
+ * @mis: the migration incoming state pointer
   * @f: QEMUFile where to read the data from
   * @flags: Page flags (mostly to see if it's a continuation of previous block)
+ * @channel: the channel we're using
   */
-static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
+static inline RAMBlock *ram_block_from_stream(MigrationIncomingState *mis,
+                                              QEMUFile *f, int flags,
+                                              int channel)
  {
-    static RAMBlock *block;
+    RAMBlock *block = mis->last_recv_block[channel];
      char id[256];
      uint8_t len;
  
@@ -3217,6 +3550,8 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
          return NULL;
      }
  
+    mis->last_recv_block[channel] = block;
+
      return block;
  }
  
@@ -3634,15 +3969,15 @@ int ram_postcopy_incoming_init(MigrationIncomingState *mis)
   * rcu_read_lock is taken prior to this being called.
   *
   * @f: QEMUFile where to send the data
+ * @channel: the channel to use for loading
   */
-static int ram_load_postcopy(QEMUFile *f)
+int ram_load_postcopy(QEMUFile *f, int channel)
  {
      int flags = 0, ret = 0;
      bool place_needed = false;
      bool matches_target_page_size = false;
      MigrationIncomingState *mis = migration_incoming_get_current();
-    /* Currently we only use channel 0.  TODO: use all the channels */
-    PostcopyTmpPage *tmp_page = &mis->postcopy_tmp_pages[0];
+    PostcopyTmpPage *tmp_page = &mis->postcopy_tmp_pages[channel];
  
      while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
          ram_addr_t addr;
@@ -3666,10 +4001,10 @@ static int ram_load_postcopy(QEMUFile *f)
          flags = addr & ~TARGET_PAGE_MASK;
          addr &= TARGET_PAGE_MASK;
  
-        trace_ram_load_postcopy_loop((uint64_t)addr, flags);
+        trace_ram_load_postcopy_loop(channel, (uint64_t)addr, flags);
          if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
                       RAM_SAVE_FLAG_COMPRESS_PAGE)) {
-            block = ram_block_from_stream(f, flags);
+            block = ram_block_from_stream(mis, f, flags, channel);
              if (!block) {
                  ret = -EINVAL;
                  break;
@@ -3707,10 +4042,10 @@ static int ram_load_postcopy(QEMUFile *f)
              } else if (tmp_page->host_addr !=
                         host_page_from_ram_block_offset(block, addr)) {
                  /* not the 1st TP within the HP */
-                error_report("Non-same host page detected.  "
+                error_report("Non-same host page detected on channel %d: "
                               "Target host page %p, received host page %p "
                               "(rb %s offset 0x"RAM_ADDR_FMT" target_pages %d)",
-                             tmp_page->host_addr,
+                             channel, tmp_page->host_addr,
                               host_page_from_ram_block_offset(block, addr),
                               block->idstr, addr, tmp_page->target_pages);
                  ret = -EINVAL;
@@ -3881,6 +4216,7 @@ void colo_flush_ram_cache(void)
   */
  static int ram_load_precopy(QEMUFile *f)
  {
+    MigrationIncomingState *mis = migration_incoming_get_current();
      int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
      /* ADVISE is earlier, it shows the source has the postcopy capability on */
      bool postcopy_advised = postcopy_is_advised();
@@ -3919,7 +4255,8 @@ static int ram_load_precopy(QEMUFile *f)
  
          if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
                       RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
-            RAMBlock *block = ram_block_from_stream(f, flags);
+            RAMBlock *block = ram_block_from_stream(mis, f, flags,
+                                                    RAM_CHANNEL_PRECOPY);
  
              host = host_from_ram_block_offset(block, addr);
              /*
@@ -4096,7 +4433,12 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
       */
      WITH_RCU_READ_LOCK_GUARD() {
          if (postcopy_running) {
-            ret = ram_load_postcopy(f);
+            /*
+             * Note!  Here RAM_CHANNEL_PRECOPY is the precopy channel of
+             * postcopy migration, we have another RAM_CHANNEL_POSTCOPY to
+             * service fast page faults.
+             */
+            ret = ram_load_postcopy(f, RAM_CHANNEL_PRECOPY);
          } else {
              ret = ram_load_precopy(f);
          }
@@ -4258,6 +4600,12 @@ static int ram_resume_prepare(MigrationState *s, void *opaque)
      return 0;
  }
  
+void postcopy_preempt_shutdown_file(MigrationState *s)
+{
+    qemu_put_be64(s->postcopy_qemufile_src, RAM_SAVE_FLAG_EOS);
+    qemu_fflush(s->postcopy_qemufile_src);
+}
+
  static SaveVMHandlers savevm_ram_handlers = {
      .save_setup = ram_save_setup,
      .save_live_iterate = ram_save_iterate,