#include "ram.h"
#include "qapi/error.h"
#include "qemu/notify.h"
+#include "qemu/rcu.h"
#include "sysemu/sysemu.h"
#include "sysemu/balloon.h"
#include "qemu/error-report.h"
#include "trace.h"
+#include "hw/boards.h"
/* Arbitrary limit on size of each discard command,
* keeps them around ~200 bytes
static struct PostcopyBlocktimeContext *blocktime_context_new(void)
{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ unsigned int smp_cpus = ms->smp.cpus;
PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
{
+ MachineState *ms = MACHINE(qdev_get_machine());
uint32List *list = NULL, *entry = NULL;
int i;
- for (i = smp_cpus - 1; i >= 0; i--) {
+ for (i = ms->smp.cpus - 1; i >= 0; i--) {
entry = g_new0(uint32List, 1);
entry->value = ctx->vcpu_blocktime[i];
entry->next = list;
return false;
}
- if (getpagesize() != ram_pagesize_summary()) {
+ if (qemu_real_host_page_size != ram_pagesize_summary()) {
bool have_hp = false;
/* We've got a huge page */
#ifdef UFFD_FEATURE_MISSING_HUGETLBFS
/* Callback from postcopy_ram_supported_by_host block iterator.
*/
-static int test_ramblock_postcopiable(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
+static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque)
{
- RAMBlock *rb = qemu_ram_block_by_name(block_name);
+ const char *block_name = qemu_ram_get_idstr(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
size_t pagesize = qemu_ram_pagesize(rb);
if (length % pagesize) {
*/
bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
{
- long pagesize = getpagesize();
+ long pagesize = qemu_real_host_page_size;
int ufd = -1;
bool ret = false; /* Error unless we change it */
void *testarea = NULL;
}
/* We don't support postcopy with shared RAM yet */
- if (qemu_ram_foreach_migratable_block(test_ramblock_postcopiable, NULL)) {
+ if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) {
goto out;
}
* must be done right at the start prior to pre-copy.
* opaque should be the MIS.
*/
-static int init_range(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
+static int init_range(RAMBlock *rb, void *opaque)
{
+ const char *block_name = qemu_ram_get_idstr(rb);
+ void *host_addr = qemu_ram_get_host_addr(rb);
+ ram_addr_t offset = qemu_ram_get_offset(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
trace_postcopy_init_range(block_name, host_addr, offset, length);
/*
* At the end of migration, undo the effects of init_range
* opaque should be the MIS.
*/
-static int cleanup_range(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
+static int cleanup_range(RAMBlock *rb, void *opaque)
{
+ const char *block_name = qemu_ram_get_idstr(rb);
+ void *host_addr = qemu_ram_get_host_addr(rb);
+ ram_addr_t offset = qemu_ram_get_offset(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
MigrationIncomingState *mis = opaque;
struct uffdio_range range_struct;
trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
*/
int postcopy_ram_incoming_init(MigrationIncomingState *mis)
{
- if (qemu_ram_foreach_migratable_block(init_range, NULL)) {
+ if (foreach_not_ignored_block(init_range, NULL)) {
return -1;
}
return 0;
}
+/*
+ * Manage a single vote to the QEMU balloon inhibitor for all postcopy usage,
+ * last caller wins.
+ */
+static void postcopy_balloon_inhibit(bool state)
+{
+ static bool cur_state = false;
+
+ if (state != cur_state) {
+ qemu_balloon_inhibit(state);
+ cur_state = state;
+ }
+}
+
/*
* At the end of a migration where postcopy_ram_incoming_init was called.
*/
if (mis->have_fault_thread) {
Error *local_err = NULL;
+ /* Let the fault thread quit */
+ atomic_set(&mis->fault_thread_quit, 1);
+ postcopy_fault_thread_notify(mis);
+ trace_postcopy_ram_incoming_cleanup_join();
+ qemu_thread_join(&mis->fault_thread);
+
if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_END, &local_err)) {
error_report_err(local_err);
return -1;
}
- if (qemu_ram_foreach_migratable_block(cleanup_range, mis)) {
+ if (foreach_not_ignored_block(cleanup_range, mis)) {
return -1;
}
- /* Let the fault thread quit */
- atomic_set(&mis->fault_thread_quit, 1);
- postcopy_fault_thread_notify(mis);
- trace_postcopy_ram_incoming_cleanup_join();
- qemu_thread_join(&mis->fault_thread);
trace_postcopy_ram_incoming_cleanup_closeuf();
close(mis->userfault_fd);
mis->have_fault_thread = false;
}
- qemu_balloon_inhibit(false);
+ postcopy_balloon_inhibit(false);
if (enable_mlock) {
if (os_mlock() < 0) {
}
}
- postcopy_state_set(POSTCOPY_INCOMING_END);
-
if (mis->postcopy_tmp_page) {
munmap(mis->postcopy_tmp_page, mis->largest_page_size);
mis->postcopy_tmp_page = NULL;
/*
* Disable huge pages on an area
*/
-static int nhp_range(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
+static int nhp_range(RAMBlock *rb, void *opaque)
{
+ const char *block_name = qemu_ram_get_idstr(rb);
+ void *host_addr = qemu_ram_get_host_addr(rb);
+ ram_addr_t offset = qemu_ram_get_offset(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
trace_postcopy_nhp_range(block_name, host_addr, offset, length);
/*
*/
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
{
- if (qemu_ram_foreach_migratable_block(nhp_range, mis)) {
+ if (foreach_not_ignored_block(nhp_range, mis)) {
return -1;
}
/*
* Mark the given area of RAM as requiring notification to unwritten areas
- * Used as a callback on qemu_ram_foreach_migratable_block.
+ * Used as a callback on foreach_not_ignored_block.
* host_addr: Base of area to mark
* offset: Offset in the whole ram arena
* length: Length of the section
* opaque: MigrationIncomingState pointer
* Returns 0 on success
*/
-static int ram_block_enable_notify(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length,
- void *opaque)
+static int ram_block_enable_notify(RAMBlock *rb, void *opaque)
{
MigrationIncomingState *mis = opaque;
struct uffdio_register reg_struct;
- reg_struct.range.start = (uintptr_t)host_addr;
- reg_struct.range.len = length;
+ reg_struct.range.start = (uintptr_t)qemu_ram_get_host_addr(rb);
+ reg_struct.range.len = qemu_ram_get_used_length(rb);
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
/* Now tell our userfault_fd that it's responsible for this area */
return -1;
}
if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) {
- RAMBlock *rb = qemu_ram_block_by_name(block_name);
qemu_ram_set_uf_zeroable(rb);
}
atomic_xchg(&dc->page_fault_vcpu_time[cpu], low_time_offset);
atomic_xchg(&dc->vcpu_addr[cpu], addr);
- /* check it here, not at the begining of the function,
- * due to, check could accur early than bitmap_set in
- * qemu_ufd_copy_ioctl */
+ /*
+ * check it here, not at the beginning of the function,
+ * due to, check could occur early than bitmap_set in
+ * qemu_ufd_copy_ioctl
+ */
already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
if (already_received) {
atomic_xchg(&dc->vcpu_addr[cpu], 0);
{
MigrationIncomingState *mis = migration_incoming_get_current();
PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+ MachineState *ms = MACHINE(qdev_get_machine());
+ unsigned int smp_cpus = ms->smp.cpus;
int i, affected_cpu = 0;
bool vcpu_total_blocktime = false;
uint32_t read_vcpu_time, low_time_offset;
return NULL;
}
-int postcopy_ram_enable_notify(MigrationIncomingState *mis)
+int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
{
/* Open the fd for the kernel to give us userfaults */
mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
mis->have_fault_thread = true;
/* Mark so that we get notified of accesses to unwritten areas */
- if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
+ if (foreach_not_ignored_block(ram_block_enable_notify, mis)) {
+ error_report("ram_block_enable_notify failed");
+ return -1;
+ }
+
+ mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE |
+ MAP_ANONYMOUS, -1, 0);
+ if (mis->postcopy_tmp_page == MAP_FAILED) {
+ mis->postcopy_tmp_page = NULL;
+ error_report("%s: Failed to map postcopy_tmp_page %s",
+ __func__, strerror(errno));
return -1;
}
+ /*
+ * Map large zero page when kernel can't use UFFDIO_ZEROPAGE for hugepages
+ */
+ mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
+ int e = errno;
+ mis->postcopy_tmp_zero_page = NULL;
+ error_report("%s: Failed to map large zero page %s",
+ __func__, strerror(e));
+ return -e;
+ }
+ memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
+
/*
* Ballooning can mark pages as absent while we're postcopying
* that would cause false userfaults.
*/
- qemu_balloon_inhibit(true);
+ postcopy_balloon_inhibit(true);
trace_postcopy_ram_enable_notify();
qemu_ram_block_host_offset(rb,
host));
} else {
- /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
- if (!mis->postcopy_tmp_zero_page) {
- mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS,
- -1, 0);
- if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
- int e = errno;
- mis->postcopy_tmp_zero_page = NULL;
- error_report("%s: %s mapping large zero page",
- __func__, strerror(e));
- return -e;
- }
- memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
- }
- return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
- rb);
- }
-}
-
-/*
- * Returns a target page of memory that can be mapped at a later point in time
- * using postcopy_place_page
- * The same address is used repeatedly, postcopy_place_page just takes the
- * backing page away.
- * Returns: Pointer to allocated page
- *
- */
-void *postcopy_get_tmp_page(MigrationIncomingState *mis)
-{
- if (!mis->postcopy_tmp_page) {
- mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
- PROT_READ | PROT_WRITE, MAP_PRIVATE |
- MAP_ANONYMOUS, -1, 0);
- if (mis->postcopy_tmp_page == MAP_FAILED) {
- mis->postcopy_tmp_page = NULL;
- error_report("%s: %s", __func__, strerror(errno));
- return NULL;
- }
+ return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, rb);
}
-
- return mis->postcopy_tmp_page;
}
#else
return -1;
}
-int postcopy_ram_enable_notify(MigrationIncomingState *mis)
+int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
{
assert(0);
return -1;
return -1;
}
-void *postcopy_get_tmp_page(MigrationIncomingState *mis)
-{
- assert(0);
- return NULL;
-}
-
int postcopy_wake_shared(struct PostCopyFD *pcfd,
uint64_t client_addr,
RAMBlock *rb)
* asking to discard individual ranges.
*
* @ms: The current migration state.
- * @offset: the bitmap offset of the named RAMBlock in the migration
- * bitmap.
+ * @offset: the bitmap offset of the named RAMBlock in the migration bitmap.
* @name: RAMBlock that discards will operate on.
- *
- * returns: a new PDS.
*/
-PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
- const char *name)
+static PostcopyDiscardState pds = {0};
+void postcopy_discard_send_init(MigrationState *ms, const char *name)
{
- PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
-
- if (res) {
- res->ramblock_name = name;
- }
-
- return res;
+ pds.ramblock_name = name;
+ pds.cur_entry = 0;
+ pds.nsentwords = 0;
+ pds.nsentcmds = 0;
}
/**
* be sent later.
*
* @ms: Current migration state.
- * @pds: Structure initialised by postcopy_discard_send_init().
* @start,@length: a range of pages in the migration bitmap in the
* RAM block passed to postcopy_discard_send_init() (length=1 is one page)
*/
-void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
- unsigned long start, unsigned long length)
+void postcopy_discard_send_range(MigrationState *ms, unsigned long start,
+ unsigned long length)
{
size_t tp_size = qemu_target_page_size();
/* Convert to byte offsets within the RAM block */
- pds->start_list[pds->cur_entry] = start * tp_size;
- pds->length_list[pds->cur_entry] = length * tp_size;
- trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
- pds->cur_entry++;
- pds->nsentwords++;
+ pds.start_list[pds.cur_entry] = start * tp_size;
+ pds.length_list[pds.cur_entry] = length * tp_size;
+ trace_postcopy_discard_send_range(pds.ramblock_name, start, length);
+ pds.cur_entry++;
+ pds.nsentwords++;
- if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
+ if (pds.cur_entry == MAX_DISCARDS_PER_COMMAND) {
/* Full set, ship it! */
qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
- pds->ramblock_name,
- pds->cur_entry,
- pds->start_list,
- pds->length_list);
- pds->nsentcmds++;
- pds->cur_entry = 0;
+ pds.ramblock_name,
+ pds.cur_entry,
+ pds.start_list,
+ pds.length_list);
+ pds.nsentcmds++;
+ pds.cur_entry = 0;
}
}
* bitmap code. Sends any outstanding discard messages, frees the PDS
*
* @ms: Current migration state.
- * @pds: Structure initialised by postcopy_discard_send_init().
*/
-void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
+void postcopy_discard_send_finish(MigrationState *ms)
{
/* Anything unsent? */
- if (pds->cur_entry) {
+ if (pds.cur_entry) {
qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
- pds->ramblock_name,
- pds->cur_entry,
- pds->start_list,
- pds->length_list);
- pds->nsentcmds++;
+ pds.ramblock_name,
+ pds.cur_entry,
+ pds.start_list,
+ pds.length_list);
+ pds.nsentcmds++;
}
- trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
- pds->nsentcmds);
-
- g_free(pds);
+ trace_postcopy_discard_send_finish(pds.ramblock_name, pds.nsentwords,
+ pds.nsentcmds);
}
/*