int buf_size; /* 0 when writing */
uint8_t buf[IO_BUF_SIZE];
+ DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
unsigned int iovcnt;
return f->ops->writev_buffer;
}
+static void qemu_iovec_release_ram(QEMUFile *f)
+{
+ struct iovec iov;
+ unsigned long idx;
+
+ /* Find and release all the contiguous memory ranges marked as may_free. */
+ idx = find_next_bit(f->may_free, f->iovcnt, 0);
+ if (idx >= f->iovcnt) {
+ return;
+ }
+ iov = f->iov[idx];
+
+ /* The madvise() in the loop is called for iov within a continuous range and
+ * then reinitialize the iov. And in the end, madvise() is called for the
+ * last iov.
+ */
+ while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) < f->iovcnt) {
+ /* check for adjacent buffer and coalesce them */
+ if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) {
+ iov.iov_len += f->iov[idx].iov_len;
+ continue;
+ }
+ if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
+ error_report("migrate: madvise DONTNEED failed %p %zd: %s",
+ iov.iov_base, iov.iov_len, strerror(errno));
+ }
+ iov = f->iov[idx];
+ }
+ if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
+ error_report("migrate: madvise DONTNEED failed %p %zd: %s",
+ iov.iov_base, iov.iov_len, strerror(errno));
+ }
+ memset(f->may_free, 0, sizeof(f->may_free));
+}
+
/**
* Flushes QEMUFile buffer
*
if (f->iovcnt > 0) {
expect = iov_size(f->iov, f->iovcnt);
ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
+
+ qemu_iovec_release_ram(f);
}
if (ret >= 0) {
return ret;
}
-static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
+static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size,
+ bool may_free)
{
/* check for adjacent buffer and coalesce them */
if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
- f->iov[f->iovcnt - 1].iov_len) {
+ f->iov[f->iovcnt - 1].iov_len &&
+ may_free == test_bit(f->iovcnt - 1, f->may_free))
+ {
f->iov[f->iovcnt - 1].iov_len += size;
} else {
+ if (may_free) {
+ set_bit(f->iovcnt, f->may_free);
+ }
f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
f->iov[f->iovcnt++].iov_len = size;
}
}
}
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size)
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
+ bool may_free)
{
if (f->last_error) {
return;
}
f->bytes_xfer += size;
- add_to_iovec(f, buf, size);
+ add_to_iovec(f, buf, size, may_free);
}
void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
memcpy(f->buf + f->buf_index, buf, l);
f->bytes_xfer += l;
- add_to_iovec(f, f->buf + f->buf_index, l);
+ add_to_iovec(f, f->buf + f->buf_index, l, false);
f->buf_index += l;
if (f->buf_index == IO_BUF_SIZE) {
qemu_fflush(f);
f->buf[f->buf_index] = v;
f->bytes_xfer++;
- add_to_iovec(f, f->buf + f->buf_index, 1);
+ add_to_iovec(f, f->buf + f->buf_index, 1, false);
f->buf_index++;
if (f->buf_index == IO_BUF_SIZE) {
qemu_fflush(f);
}
qemu_put_be32(f, blen);
if (f->ops->writev_buffer) {
- add_to_iovec(f, f->buf + f->buf_index, blen);
+ add_to_iovec(f, f->buf + f->buf_index, blen, false);
}
f->buf_index += blen;
if (f->buf_index == IO_BUF_SIZE) {
return pages;
}
+static void ram_release_pages(MigrationState *ms, const char *block_name,
+ uint64_t offset, int pages)
+{
+ if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
+ return;
+ }
+
+ ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
+}
+
/**
* ram_save_page: Send the given page to the stream
*
* page would be stale
*/
xbzrle_cache_zero_page(current_addr);
+ ram_release_pages(ms, block->idstr, pss->offset, pages);
} else if (!ram_bulk_stage &&
!migration_in_postcopy(ms) && migrate_use_xbzrle()) {
pages = save_xbzrle_page(f, &p, current_addr, block,
*bytes_transferred += save_page_header(f, block,
offset | RAM_SAVE_FLAG_PAGE);
if (send_async) {
- qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+ qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
+ migrate_release_ram() &
+ migration_in_postcopy(ms));
} else {
qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
}
error_report("compressed data failed!");
} else {
bytes_sent += blen;
+ ram_release_pages(migrate_get_current(), block->idstr,
+ offset & TARGET_PAGE_MASK, 1);
}
return bytes_sent;
error_report("compressed data failed!");
}
}
+ if (pages > 0) {
+ ram_release_pages(ms, block->idstr, pss->offset, pages);
+ }
} else {
offset |= RAM_SAVE_FLAG_CONTINUE;
pages = save_zero_page(f, block, offset, p, bytes_transferred);
if (pages == -1) {
pages = compress_page_with_multi_thread(f, block, offset,
bytes_transferred);
+ } else {
+ ram_release_pages(ms, block->idstr, pss->offset, pages);
}
}
}
# side, this process is called COarse-Grain LOck Stepping (COLO) for
# Non-stop Service. (since 2.8)
#
+# @release-ram: if enabled, qemu will free the migrated ram pages on the source
+# during postcopy-ram migration. (since 2.9)
+#
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
- 'compress', 'events', 'postcopy-ram', 'x-colo'] }
+ 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram'] }
##
# @MigrationCapabilityStatus: