X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=migration%2Fpostcopy-ram.c;h=a793bad477c9ee39de84056eb793ad14af01bef7;hb=2d49bacda00876736c746ce1fcea006a128bef6b;hp=658b750a8e72a148d624c7e2c4855344881aecc6;hpb=3a7804c3065ac79ff6e609e75ff2765d79f64f85;p=mirror_qemu.git diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 658b750a8e..a793bad477 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -25,10 +25,12 @@ #include "ram.h" #include "qapi/error.h" #include "qemu/notify.h" +#include "qemu/rcu.h" #include "sysemu/sysemu.h" #include "sysemu/balloon.h" #include "qemu/error-report.h" #include "trace.h" +#include "hw/boards.h" /* Arbitrary limit on size of each discard command, * keeps them around ~200 bytes @@ -128,6 +130,8 @@ static void migration_exit_cb(Notifier *n, void *data) static struct PostcopyBlocktimeContext *blocktime_context_new(void) { + MachineState *ms = MACHINE(qdev_get_machine()); + unsigned int smp_cpus = ms->smp.cpus; PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus); ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus); @@ -141,10 +145,11 @@ static struct PostcopyBlocktimeContext *blocktime_context_new(void) static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) { + MachineState *ms = MACHINE(qdev_get_machine()); uint32List *list = NULL, *entry = NULL; int i; - for (i = smp_cpus - 1; i >= 0; i--) { + for (i = ms->smp.cpus - 1; i >= 0; i--) { entry = g_new0(uint32List, 1); entry->value = ctx->vcpu_blocktime[i]; entry->next = list; @@ -319,10 +324,10 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) /* Callback from postcopy_ram_supported_by_host block iterator. */ -static int test_ramblock_postcopiable(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, void *opaque) +static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) { - RAMBlock *rb = qemu_ram_block_by_name(block_name); + const char *block_name = qemu_ram_get_idstr(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); size_t pagesize = qemu_ram_pagesize(rb); if (length % pagesize) { @@ -374,7 +379,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) } /* We don't support postcopy with shared RAM yet */ - if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) { + if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) { goto out; } @@ -443,9 +448,12 @@ out: * must be done right at the start prior to pre-copy. * opaque should be the MIS. */ -static int init_range(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, void *opaque) +static int init_range(RAMBlock *rb, void *opaque) { + const char *block_name = qemu_ram_get_idstr(rb); + void *host_addr = qemu_ram_get_host_addr(rb); + ram_addr_t offset = qemu_ram_get_offset(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); trace_postcopy_init_range(block_name, host_addr, offset, length); /* @@ -465,9 +473,12 @@ static int init_range(const char *block_name, void *host_addr, * At the end of migration, undo the effects of init_range * opaque should be the MIS. */ -static int cleanup_range(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, void *opaque) +static int cleanup_range(RAMBlock *rb, void *opaque) { + const char *block_name = qemu_ram_get_idstr(rb); + void *host_addr = qemu_ram_get_host_addr(rb); + ram_addr_t offset = qemu_ram_get_offset(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); MigrationIncomingState *mis = opaque; struct uffdio_range range_struct; trace_postcopy_cleanup_range(block_name, host_addr, offset, length); @@ -500,15 +511,29 @@ static int cleanup_range(const char *block_name, void *host_addr, * postcopy later; must be called prior to any precopy. * called from arch_init's similarly named ram_postcopy_incoming_init */ -int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +int postcopy_ram_incoming_init(MigrationIncomingState *mis) { - if (qemu_ram_foreach_block(init_range, NULL)) { + if (foreach_not_ignored_block(init_range, NULL)) { return -1; } return 0; } +/* + * Manage a single vote to the QEMU balloon inhibitor for all postcopy usage, + * last caller wins. + */ +static void postcopy_balloon_inhibit(bool state) +{ + static bool cur_state = false; + + if (state != cur_state) { + qemu_balloon_inhibit(state); + cur_state = state; + } +} + /* * At the end of a migration where postcopy_ram_incoming_init was called. */ @@ -519,19 +544,20 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) if (mis->have_fault_thread) { Error *local_err = NULL; + /* Let the fault thread quit */ + atomic_set(&mis->fault_thread_quit, 1); + postcopy_fault_thread_notify(mis); + trace_postcopy_ram_incoming_cleanup_join(); + qemu_thread_join(&mis->fault_thread); + if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_END, &local_err)) { error_report_err(local_err); return -1; } - if (qemu_ram_foreach_block(cleanup_range, mis)) { + if (foreach_not_ignored_block(cleanup_range, mis)) { return -1; } - /* Let the fault thread quit */ - atomic_set(&mis->fault_thread_quit, 1); - postcopy_fault_thread_notify(mis); - trace_postcopy_ram_incoming_cleanup_join(); - qemu_thread_join(&mis->fault_thread); trace_postcopy_ram_incoming_cleanup_closeuf(); close(mis->userfault_fd); @@ -539,7 +565,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) mis->have_fault_thread = false; } - qemu_balloon_inhibit(false); + postcopy_balloon_inhibit(false); if (enable_mlock) { if (os_mlock() < 0) { @@ -551,8 +577,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) } } - postcopy_state_set(POSTCOPY_INCOMING_END); - if (mis->postcopy_tmp_page) { munmap(mis->postcopy_tmp_page, mis->largest_page_size); mis->postcopy_tmp_page = NULL; @@ -571,9 +595,12 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) /* * Disable huge pages on an area */ -static int nhp_range(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, void *opaque) +static int nhp_range(RAMBlock *rb, void *opaque) { + const char *block_name = qemu_ram_get_idstr(rb); + void *host_addr = qemu_ram_get_host_addr(rb); + ram_addr_t offset = qemu_ram_get_offset(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); trace_postcopy_nhp_range(block_name, host_addr, offset, length); /* @@ -593,7 +620,7 @@ static int nhp_range(const char *block_name, void *host_addr, */ int postcopy_ram_prepare_discard(MigrationIncomingState *mis) { - if (qemu_ram_foreach_block(nhp_range, mis)) { + if (foreach_not_ignored_block(nhp_range, mis)) { return -1; } @@ -604,22 +631,20 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis) /* * Mark the given area of RAM as requiring notification to unwritten areas - * Used as a callback on qemu_ram_foreach_block. + * Used as a callback on foreach_not_ignored_block. * host_addr: Base of area to mark * offset: Offset in the whole ram arena * length: Length of the section * opaque: MigrationIncomingState pointer * Returns 0 on success */ -static int ram_block_enable_notify(const char *block_name, void *host_addr, - ram_addr_t offset, ram_addr_t length, - void *opaque) +static int ram_block_enable_notify(RAMBlock *rb, void *opaque) { MigrationIncomingState *mis = opaque; struct uffdio_register reg_struct; - reg_struct.range.start = (uintptr_t)host_addr; - reg_struct.range.len = length; + reg_struct.range.start = (uintptr_t)qemu_ram_get_host_addr(rb); + reg_struct.range.len = qemu_ram_get_used_length(rb); reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; /* Now tell our userfault_fd that it's responsible for this area */ @@ -632,7 +657,6 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, return -1; } if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) { - RAMBlock *rb = qemu_ram_block_by_name(block_name); qemu_ram_set_uf_zeroable(rb); } @@ -742,9 +766,11 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, atomic_xchg(&dc->page_fault_vcpu_time[cpu], low_time_offset); atomic_xchg(&dc->vcpu_addr[cpu], addr); - /* check it here, not at the begining of the function, - * due to, check could accur early than bitmap_set in - * qemu_ufd_copy_ioctl */ + /* + * check it here, not at the beginning of the function, + * due to, check could occur early than bitmap_set in + * qemu_ufd_copy_ioctl + */ already_received = ramblock_recv_bitmap_test(rb, (void *)addr); if (already_received) { atomic_xchg(&dc->vcpu_addr[cpu], 0); @@ -786,6 +812,8 @@ static void mark_postcopy_blocktime_end(uintptr_t addr) { MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyBlocktimeContext *dc = mis->blocktime_ctx; + MachineState *ms = MACHINE(qdev_get_machine()); + unsigned int smp_cpus = ms->smp.cpus; int i, affected_cpu = 0; bool vcpu_total_blocktime = false; uint32_t read_vcpu_time, low_time_offset; @@ -853,6 +881,7 @@ static void *postcopy_ram_fault_thread(void *opaque) RAMBlock *rb = NULL; trace_postcopy_ram_fault_thread_entry(); + rcu_register_thread(); mis->last_rb = NULL; /* last RAMBlock we sent part of */ qemu_sem_post(&mis->fault_thread_sem); @@ -1059,6 +1088,7 @@ retry: } } } + rcu_unregister_thread(); trace_postcopy_ram_fault_thread_exit(); g_free(pfd); return NULL; @@ -1099,15 +1129,42 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) mis->have_fault_thread = true; /* Mark so that we get notified of accesses to unwritten areas */ - if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) { + if (foreach_not_ignored_block(ram_block_enable_notify, mis)) { + error_report("ram_block_enable_notify failed"); + return -1; + } + + mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, + PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (mis->postcopy_tmp_page == MAP_FAILED) { + mis->postcopy_tmp_page = NULL; + error_report("%s: Failed to map postcopy_tmp_page %s", + __func__, strerror(errno)); return -1; } + /* + * Map large zero page when kernel can't use UFFDIO_ZEROPAGE for hugepages + */ + mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + if (mis->postcopy_tmp_zero_page == MAP_FAILED) { + int e = errno; + mis->postcopy_tmp_zero_page = NULL; + error_report("%s: Failed to map large zero page %s", + __func__, strerror(e)); + return -e; + } + memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); + /* * Ballooning can mark pages as absent while we're postcopying * that would cause false userfaults. */ - qemu_balloon_inhibit(true); + postcopy_balloon_inhibit(true); trace_postcopy_ram_enable_notify(); @@ -1209,48 +1266,8 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, qemu_ram_block_host_offset(rb, host)); } else { - /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */ - if (!mis->postcopy_tmp_zero_page) { - mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0); - if (mis->postcopy_tmp_zero_page == MAP_FAILED) { - int e = errno; - mis->postcopy_tmp_zero_page = NULL; - error_report("%s: %s mapping large zero page", - __func__, strerror(e)); - return -e; - } - memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); - } - return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, - rb); - } -} - -/* - * Returns a target page of memory that can be mapped at a later point in time - * using postcopy_place_page - * The same address is used repeatedly, postcopy_place_page just takes the - * backing page away. - * Returns: Pointer to allocated page - * - */ -void *postcopy_get_tmp_page(MigrationIncomingState *mis) -{ - if (!mis->postcopy_tmp_page) { - mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, - PROT_READ | PROT_WRITE, MAP_PRIVATE | - MAP_ANONYMOUS, -1, 0); - if (mis->postcopy_tmp_page == MAP_FAILED) { - mis->postcopy_tmp_page = NULL; - error_report("%s: %s", __func__, strerror(errno)); - return NULL; - } + return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, rb); } - - return mis->postcopy_tmp_page; } #else @@ -1265,7 +1282,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) return false; } -int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +int postcopy_ram_incoming_init(MigrationIncomingState *mis) { error_report("postcopy_ram_incoming_init: No OS support"); return -1; @@ -1310,12 +1327,6 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, return -1; } -void *postcopy_get_tmp_page(MigrationIncomingState *mis) -{ - assert(0); - return NULL; -} - int postcopy_wake_shared(struct PostCopyFD *pcfd, uint64_t client_addr, RAMBlock *rb) @@ -1347,22 +1358,16 @@ void postcopy_fault_thread_notify(MigrationIncomingState *mis) * asking to discard individual ranges. * * @ms: The current migration state. - * @offset: the bitmap offset of the named RAMBlock in the migration - * bitmap. + * @offset: the bitmap offset of the named RAMBlock in the migration bitmap. * @name: RAMBlock that discards will operate on. - * - * returns: a new PDS. */ -PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms, - const char *name) +static PostcopyDiscardState pds = {0}; +void postcopy_discard_send_init(MigrationState *ms, const char *name) { - PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState)); - - if (res) { - res->ramblock_name = name; - } - - return res; + pds.ramblock_name = name; + pds.cur_entry = 0; + pds.nsentwords = 0; + pds.nsentcmds = 0; } /** @@ -1371,30 +1376,29 @@ PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms, * be sent later. * * @ms: Current migration state. - * @pds: Structure initialised by postcopy_discard_send_init(). * @start,@length: a range of pages in the migration bitmap in the * RAM block passed to postcopy_discard_send_init() (length=1 is one page) */ -void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds, - unsigned long start, unsigned long length) +void postcopy_discard_send_range(MigrationState *ms, unsigned long start, + unsigned long length) { size_t tp_size = qemu_target_page_size(); /* Convert to byte offsets within the RAM block */ - pds->start_list[pds->cur_entry] = start * tp_size; - pds->length_list[pds->cur_entry] = length * tp_size; - trace_postcopy_discard_send_range(pds->ramblock_name, start, length); - pds->cur_entry++; - pds->nsentwords++; + pds.start_list[pds.cur_entry] = start * tp_size; + pds.length_list[pds.cur_entry] = length * tp_size; + trace_postcopy_discard_send_range(pds.ramblock_name, start, length); + pds.cur_entry++; + pds.nsentwords++; - if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) { + if (pds.cur_entry == MAX_DISCARDS_PER_COMMAND) { /* Full set, ship it! */ qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file, - pds->ramblock_name, - pds->cur_entry, - pds->start_list, - pds->length_list); - pds->nsentcmds++; - pds->cur_entry = 0; + pds.ramblock_name, + pds.cur_entry, + pds.start_list, + pds.length_list); + pds.nsentcmds++; + pds.cur_entry = 0; } } @@ -1403,24 +1407,21 @@ void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds, * bitmap code. Sends any outstanding discard messages, frees the PDS * * @ms: Current migration state. - * @pds: Structure initialised by postcopy_discard_send_init(). */ -void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds) +void postcopy_discard_send_finish(MigrationState *ms) { /* Anything unsent? */ - if (pds->cur_entry) { + if (pds.cur_entry) { qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file, - pds->ramblock_name, - pds->cur_entry, - pds->start_list, - pds->length_list); - pds->nsentcmds++; + pds.ramblock_name, + pds.cur_entry, + pds.start_list, + pds.length_list); + pds.nsentcmds++; } - trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords, - pds->nsentcmds); - - g_free(pds); + trace_postcopy_discard_send_finish(pds.ramblock_name, pds.nsentwords, + pds.nsentcmds); } /*