add 'release-ram' migrate capability

author Pavel Butsykin <pbutsykin@virtuozzo.com>

Fri, 3 Feb 2017 15:23:20 +0000 (18:23 +0300)

committer Dr. David Alan Gilbert <dgilbert@redhat.com>

Mon, 13 Feb 2017 17:27:13 +0000 (17:27 +0000)
author Pavel Butsykin <pbutsykin@virtuozzo.com>
Fri, 3 Feb 2017 15:23:20 +0000 (18:23 +0300)
committer Dr. David Alan Gilbert <dgilbert@redhat.com>
Mon, 13 Feb 2017 17:27:13 +0000 (17:27 +0000)
diff --git a/include/migration/migration.h b/include/migration/migration.h

index 7528cc2fbc82983810780e9c4fbe78ceae5d4be2..b9b706a7e31522c10c8a4a343de617cc4fea83de 100644 (file)
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -304,6 +304,7 @@ int migrate_add_blocker(Error *reason, Error **errp);
   */
  void migrate_del_blocker(Error *reason);
  
+bool migrate_release_ram(void);
  bool migrate_postcopy_ram(void);
  bool migrate_zero_blocks(void);
  
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h

index abedd466c945d4bd8a96aec519787338054a5231..0cd648a733e0a3d2b0c0d59fd2fc5215af4a191f 100644 (file)
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -132,7 +132,8 @@ void qemu_put_byte(QEMUFile *f, int v);
   * put_buffer without copying the buffer.
   * The buffer should be available till it is sent asynchronously.
   */
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size);
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
+                           bool may_free);
  bool qemu_file_mode_is_not_valid(const char *mode);
  bool qemu_file_is_writable(QEMUFile *f);
  
diff --git a/migration/migration.c b/migration/migration.c

index 2b179c69fac94d23beab9b9aed9b19d349dca49c..68afc070167049c4eee27380416e9eaca287cd81 100644 (file)
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1297,6 +1297,15 @@ void qmp_migrate_set_downtime(double value, Error **errp)
      qmp_migrate_set_parameters(&p, errp);
  }
  
+bool migrate_release_ram(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
+}
+
  bool migrate_postcopy_ram(void)
  {
      MigrationState *s;
diff --git a/migration/qemu-file.c b/migration/qemu-file.c

index e9fae3115882aca356971032602304cfed580deb..195fa94fcf3e8792f4e6d8b820423924ade5eb98 100644 (file)
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -49,6 +49,7 @@ struct QEMUFile {
      int buf_size; /* 0 when writing */
      uint8_t buf[IO_BUF_SIZE];
  
+    DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
      struct iovec iov[MAX_IOV_SIZE];
      unsigned int iovcnt;
  
@@ -132,6 +133,41 @@ bool qemu_file_is_writable(QEMUFile *f)
      return f->ops->writev_buffer;
  }
  
+static void qemu_iovec_release_ram(QEMUFile *f)
+{
+    struct iovec iov;
+    unsigned long idx;
+
+    /* Find and release all the contiguous memory ranges marked as may_free. */
+    idx = find_next_bit(f->may_free, f->iovcnt, 0);
+    if (idx >= f->iovcnt) {
+        return;
+    }
+    iov = f->iov[idx];
+
+    /* The madvise() in the loop is called for iov within a continuous range and
+     * then reinitialize the iov. And in the end, madvise() is called for the
+     * last iov.
+     */
+    while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) < f->iovcnt) {
+        /* check for adjacent buffer and coalesce them */
+        if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) {
+            iov.iov_len += f->iov[idx].iov_len;
+            continue;
+        }
+        if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
+            error_report("migrate: madvise DONTNEED failed %p %zd: %s",
+                         iov.iov_base, iov.iov_len, strerror(errno));
+        }
+        iov = f->iov[idx];
+    }
+    if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
+            error_report("migrate: madvise DONTNEED failed %p %zd: %s",
+                         iov.iov_base, iov.iov_len, strerror(errno));
+    }
+    memset(f->may_free, 0, sizeof(f->may_free));
+}
+
  /**
   * Flushes QEMUFile buffer
   *
@@ -151,6 +187,8 @@ void qemu_fflush(QEMUFile *f)
      if (f->iovcnt > 0) {
          expect = iov_size(f->iov, f->iovcnt);
          ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
+
+        qemu_iovec_release_ram(f);
      }
  
      if (ret >= 0) {
@@ -304,13 +342,19 @@ int qemu_fclose(QEMUFile *f)
      return ret;
  }
  
-static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
+static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size,
+                         bool may_free)
  {
      /* check for adjacent buffer and coalesce them */
      if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
-        f->iov[f->iovcnt - 1].iov_len) {
+        f->iov[f->iovcnt - 1].iov_len &&
+        may_free == test_bit(f->iovcnt - 1, f->may_free))
+    {
          f->iov[f->iovcnt - 1].iov_len += size;
      } else {
+        if (may_free) {
+            set_bit(f->iovcnt, f->may_free);
+        }
          f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
          f->iov[f->iovcnt++].iov_len = size;
      }
@@ -320,14 +364,15 @@ static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
      }
  }
  
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size)
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
+                           bool may_free)
  {
      if (f->last_error) {
          return;
      }
  
      f->bytes_xfer += size;
-    add_to_iovec(f, buf, size);
+    add_to_iovec(f, buf, size, may_free);
  }
  
  void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
@@ -345,7 +390,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
          }
          memcpy(f->buf + f->buf_index, buf, l);
          f->bytes_xfer += l;
-        add_to_iovec(f, f->buf + f->buf_index, l);
+        add_to_iovec(f, f->buf + f->buf_index, l, false);
          f->buf_index += l;
          if (f->buf_index == IO_BUF_SIZE) {
              qemu_fflush(f);
@@ -366,7 +411,7 @@ void qemu_put_byte(QEMUFile *f, int v)
  
      f->buf[f->buf_index] = v;
      f->bytes_xfer++;
-    add_to_iovec(f, f->buf + f->buf_index, 1);
+    add_to_iovec(f, f->buf + f->buf_index, 1, false);
      f->buf_index++;
      if (f->buf_index == IO_BUF_SIZE) {
          qemu_fflush(f);
@@ -647,7 +692,7 @@ ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
      }
      qemu_put_be32(f, blen);
      if (f->ops->writev_buffer) {
-        add_to_iovec(f, f->buf + f->buf_index, blen);
+        add_to_iovec(f, f->buf + f->buf_index, blen, false);
      }
      f->buf_index += blen;
      if (f->buf_index == IO_BUF_SIZE) {
diff --git a/migration/ram.c b/migration/ram.c

index 91443b39617a5244d4fbdd25237b1d213a415b8e..c22209db30545b98a559180085426b359ad5f21c 100644 (file)
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -705,6 +705,16 @@ static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
      return pages;
  }
  
+static void ram_release_pages(MigrationState *ms, const char *block_name,
+                              uint64_t offset, int pages)
+{
+    if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
+        return;
+    }
+
+    ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
+}
+
  /**
   * ram_save_page: Send the given page to the stream
   *
@@ -765,6 +775,7 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
               * page would be stale
               */
              xbzrle_cache_zero_page(current_addr);
+            ram_release_pages(ms, block->idstr, pss->offset, pages);
          } else if (!ram_bulk_stage &&
                     !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
              pages = save_xbzrle_page(f, &p, current_addr, block,
@@ -783,7 +794,9 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
          *bytes_transferred += save_page_header(f, block,
                                                 offset | RAM_SAVE_FLAG_PAGE);
          if (send_async) {
-            qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+            qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
+                                  migrate_release_ram() &
+                                  migration_in_postcopy(ms));
          } else {
              qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
          }
@@ -813,6 +826,8 @@ static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
          error_report("compressed data failed!");
      } else {
          bytes_sent += blen;
+        ram_release_pages(migrate_get_current(), block->idstr,
+                          offset & TARGET_PAGE_MASK, 1);
      }
  
      return bytes_sent;
@@ -952,12 +967,17 @@ static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
                      error_report("compressed data failed!");
                  }
              }
+            if (pages > 0) {
+                ram_release_pages(ms, block->idstr, pss->offset, pages);
+            }
          } else {
              offset |= RAM_SAVE_FLAG_CONTINUE;
              pages = save_zero_page(f, block, offset, p, bytes_transferred);
              if (pages == -1) {
                  pages = compress_page_with_multi_thread(f, block, offset,
                                                          bytes_transferred);
+            } else {
+                ram_release_pages(ms, block->idstr, pss->offset, pages);
              }
          }
      }
diff --git a/qapi-schema.json b/qapi-schema.json

index 61151f34d0cfff3d653782d775ae1e153fb07b7f..93305412dd47f0dd5000721b3610b67c193dcd95 100644 (file)
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -865,11 +865,14 @@
  #        side, this process is called COarse-Grain LOck Stepping (COLO) for
  #        Non-stop Service. (since 2.8)
  #
+# @release-ram: if enabled, qemu will free the migrated ram pages on the source
+#        during postcopy-ram migration. (since 2.9)
+#
  # Since: 1.2
  ##
  { 'enum': 'MigrationCapability',
    'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
-           'compress', 'events', 'postcopy-ram', 'x-colo'] }
+           'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram'] }
  
  ##
  # @MigrationCapabilityStatus:
author	Pavel Butsykin <pbutsykin@virtuozzo.com>
	Fri, 3 Feb 2017 15:23:20 +0000 (18:23 +0300)
committer	Dr. David Alan Gilbert <dgilbert@redhat.com>
	Mon, 13 Feb 2017 17:27:13 +0000 (17:27 +0000)
include/migration/migration.h		patch \| blob \| blame \| history
include/migration/qemu-file.h		patch \| blob \| blame \| history
migration/migration.c		patch \| blob \| blame \| history
migration/qemu-file.c		patch \| blob \| blame \| history
migration/ram.c		patch \| blob \| blame \| history
qapi-schema.json		patch \| blob \| blame \| history