X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=migration%2Fpostcopy-ram.c;h=dc80dbb67f4329744842c840c180db2252b0e531;hb=0e5d6327f3abb8d582cbc2e444a23ef0dc6a64c7;hp=3946aa98aa773311b5fb9560823e1da7dbf1362d;hpb=a5df35070a4c7fa8e2d9c6bd7175ee8e3e0f7641;p=mirror_qemu.git diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 3946aa98aa..dc80dbb67f 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -16,9 +16,7 @@ * source to the destination before all the data has been copied. */ -#include -#include -#include +#include "qemu/osdep.h" #include "qemu-common.h" #include "migration/migration.h" @@ -53,15 +51,13 @@ struct PostcopyDiscardState { #if defined(__linux__) #include -#include -#include #include #include -#include #include /* for __u64 */ #endif -#if defined(__linux__) && defined(__NR_userfaultfd) +#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD) +#include #include static bool ufd_version_check(int ufd) @@ -85,9 +81,33 @@ static bool ufd_version_check(int ufd) return false; } + if (getpagesize() != ram_pagesize_summary()) { + bool have_hp = false; + /* We've got a huge page */ +#ifdef UFFD_FEATURE_MISSING_HUGETLBFS + have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS; +#endif + if (!have_hp) { + error_report("Userfault on this host does not support huge pages"); + return false; + } + } return true; } +/* Callback from postcopy_ram_supported_by_host block iterator. + */ +static int test_range_shared(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + if (qemu_ram_is_shared(qemu_ram_block_by_name(block_name))) { + error_report("Postcopy on shared RAM (%s) is not yet supported", + block_name); + return 1; + } + return 0; +} + /* * Note: This has the side effect of munlock'ing all of RAM, that's * normally fine since if the postcopy succeeds it gets turned back on at the @@ -120,6 +140,11 @@ bool postcopy_ram_supported_by_host(void) goto out; } + /* We don't support postcopy with shared RAM yet */ + if (qemu_ram_foreach_block(test_range_shared, NULL)) { + goto out; + } + /* * userfault and mlock don't go together; we'll put it back later if * it was enabled. @@ -180,27 +205,6 @@ out: return ret; } -/** - * postcopy_ram_discard_range: Discard a range of memory. - * We can assume that if we've been called postcopy_ram_hosttest returned true. - * - * @mis: Current incoming migration state. - * @start, @length: range of memory to discard. - * - * returns: 0 on success. - */ -int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, - size_t length) -{ - trace_postcopy_ram_discard_range(start, length); - if (madvise(start, length, MADV_DONTNEED)) { - error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno)); - return -1; - } - - return 0; -} - /* * Setup an area of RAM so that it *can* be used for postcopy later; this * must be done right at the start prior to pre-copy. @@ -219,7 +223,7 @@ static int init_range(const char *block_name, void *host_addr, * - we're going to get the copy from the source anyway. * (Precopy will just overwrite this data, so doesn't need the discard) */ - if (postcopy_ram_discard_range(mis, host_addr, length)) { + if (ram_discard_range(mis, block_name, 0, length)) { return -1; } @@ -322,9 +326,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); if (mis->postcopy_tmp_page) { - munmap(mis->postcopy_tmp_page, getpagesize()); + munmap(mis->postcopy_tmp_page, mis->largest_page_size); mis->postcopy_tmp_page = NULL; } + if (mis->postcopy_tmp_zero_page) { + munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size); + mis->postcopy_tmp_zero_page = NULL; + } trace_postcopy_ram_incoming_cleanup_exit(); return 0; } @@ -388,6 +396,10 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, error_report("%s userfault register: %s", __func__, strerror(errno)); return -1; } + if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) { + error_report("%s userfault: Region doesn't support COPY", __func__); + return -1; + } return 0; } @@ -400,7 +412,6 @@ static void *postcopy_ram_fault_thread(void *opaque) MigrationIncomingState *mis = opaque; struct uffd_msg msg; int ret; - size_t hostpagesize = getpagesize(); RAMBlock *rb = NULL; RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */ @@ -409,7 +420,6 @@ static void *postcopy_ram_fault_thread(void *opaque) while (true) { ram_addr_t rb_offset; - ram_addr_t in_raspace; struct pollfd pfd[2]; /* @@ -461,14 +471,14 @@ static void *postcopy_ram_fault_thread(void *opaque) rb = qemu_ram_block_from_host( (void *)(uintptr_t)msg.arg.pagefault.address, - true, &in_raspace, &rb_offset); + true, &rb_offset); if (!rb) { error_report("postcopy_ram_fault_thread: Fault outside guest: %" PRIx64, (uint64_t)msg.arg.pagefault.address); break; } - rb_offset &= ~(hostpagesize - 1); + rb_offset &= ~(qemu_ram_pagesize(rb) - 1); trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, qemu_ram_get_idstr(rb), rb_offset); @@ -480,11 +490,11 @@ static void *postcopy_ram_fault_thread(void *opaque) if (rb != last_rb) { last_rb = rb; migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), - rb_offset, hostpagesize); + rb_offset, qemu_ram_pagesize(rb)); } else { /* Save some space */ migrate_send_rp_req_pages(mis, NULL, - rb_offset, hostpagesize); + rb_offset, qemu_ram_pagesize(rb)); } } trace_postcopy_ram_fault_thread_exit(); @@ -545,13 +555,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * Place a host page (from) at (host) atomically * returns 0 on success */ -int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, + size_t pagesize) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host; copy_struct.src = (uint64_t)(uintptr_t)from; - copy_struct.len = getpagesize(); + copy_struct.len = pagesize; copy_struct.mode = 0; /* copy also acks to the kernel waking the stalled thread up @@ -561,8 +572,8 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) */ if (ioctl(mis->userfault_fd, UFFDIO_COPY, ©_struct)) { int e = errno; - error_report("%s: %s copy host: %p from: %p", - __func__, strerror(e), host, from); + error_report("%s: %s copy host: %p from: %p (size: %zd)", + __func__, strerror(e), host, from, pagesize); return -e; } @@ -575,23 +586,44 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) * Place a zero page at (host) atomically * returns 0 on success */ -int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, + size_t pagesize) { - struct uffdio_zeropage zero_struct; + trace_postcopy_place_page_zero(host); - zero_struct.range.start = (uint64_t)(uintptr_t)host; - zero_struct.range.len = getpagesize(); - zero_struct.mode = 0; + if (pagesize == getpagesize()) { + struct uffdio_zeropage zero_struct; + zero_struct.range.start = (uint64_t)(uintptr_t)host; + zero_struct.range.len = getpagesize(); + zero_struct.mode = 0; - if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { - int e = errno; - error_report("%s: %s zero host: %p", - __func__, strerror(e), host); + if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { + int e = errno; + error_report("%s: %s zero host: %p", + __func__, strerror(e), host); - return -e; + return -e; + } + } else { + /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */ + if (!mis->postcopy_tmp_zero_page) { + mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + if (mis->postcopy_tmp_zero_page == MAP_FAILED) { + int e = errno; + mis->postcopy_tmp_zero_page = NULL; + error_report("%s: %s mapping large zero page", + __func__, strerror(e)); + return -e; + } + memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); + } + return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, + pagesize); } - trace_postcopy_place_page_zero(host); return 0; } @@ -606,10 +638,11 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) void *postcopy_get_tmp_page(MigrationIncomingState *mis) { if (!mis->postcopy_tmp_page) { - mis->postcopy_tmp_page = mmap(NULL, getpagesize(), + mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (!mis->postcopy_tmp_page) { + if (mis->postcopy_tmp_page == MAP_FAILED) { + mis->postcopy_tmp_page = NULL; error_report("%s: %s", __func__, strerror(errno)); return NULL; } @@ -638,13 +671,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) return -1; } -int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, - size_t length) -{ - assert(0); - return -1; -} - int postcopy_ram_prepare_discard(MigrationIncomingState *mis) { assert(0); @@ -657,13 +683,15 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) return -1; } -int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from) +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, + size_t pagesize) { assert(0); return -1; } -int postcopy_place_page_zero(MigrationIncomingState *mis, void *host) +int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, + size_t pagesize) { assert(0); return -1; @@ -727,7 +755,8 @@ void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds, if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) { /* Full set, ship it! */ - qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name, + qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file, + pds->ramblock_name, pds->cur_entry, pds->start_list, pds->length_list); @@ -747,7 +776,8 @@ void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds) { /* Anything unsent? */ if (pds->cur_entry) { - qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name, + qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file, + pds->ramblock_name, pds->cur_entry, pds->start_list, pds->length_list);