X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=exec.c;h=e57a8a217894c181010a35759e892d5b102462b1;hb=fce5d5386d8613e9659f1edc4be9c3f8b7cff073;hp=3adf2b1861abc2deeb15c0972ec251b936575f85;hpb=6d3f4c6d1d8eb1187dc13713c49e3986eab39e7a;p=mirror_qemu.git diff --git a/exec.c b/exec.c index 3adf2b1861..e57a8a2178 100644 --- a/exec.c +++ b/exec.c @@ -42,9 +42,17 @@ #include "exec/memory.h" #include "exec/ioport.h" #include "sysemu/dma.h" +#include "sysemu/numa.h" +#include "sysemu/hw_accel.h" #include "exec/address-spaces.h" #include "sysemu/xen-mapcache.h" #include "trace-root.h" + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE +#include +#include +#endif + #endif #include "exec/cpu-all.h" #include "qemu/rcu_queue.h" @@ -1250,6 +1258,87 @@ void qemu_mutex_unlock_ramlist(void) qemu_mutex_unlock(&ram_list.mutex); } +#ifdef __linux__ +/* + * FIXME TOCTTOU: this iterates over memory backends' mem-path, which + * may or may not name the same files / on the same filesystem now as + * when we actually open and map them. Iterate over the file + * descriptors instead, and use qemu_fd_getpagesize(). + */ +static int find_max_supported_pagesize(Object *obj, void *opaque) +{ + char *mem_path; + long *hpsize_min = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { + mem_path = object_property_get_str(obj, "mem-path", NULL); + if (mem_path) { + long hpsize = qemu_mempath_getpagesize(mem_path); + if (hpsize < *hpsize_min) { + *hpsize_min = hpsize; + } + } else { + *hpsize_min = getpagesize(); + } + } + + return 0; +} + +long qemu_getrampagesize(void) +{ + long hpsize = LONG_MAX; + long mainrampagesize; + Object *memdev_root; + + if (mem_path) { + mainrampagesize = qemu_mempath_getpagesize(mem_path); + } else { + mainrampagesize = getpagesize(); + } + + /* it's possible we have memory-backend objects with + * hugepage-backed RAM. these may get mapped into system + * address space via -numa parameters or memory hotplug + * hooks. we want to take these into account, but we + * also want to make sure these supported hugepage + * sizes are applicable across the entire range of memory + * we may boot from, so we take the min across all + * backends, and assume normal pages in cases where a + * backend isn't backed by hugepages. + */ + memdev_root = object_resolve_path("/objects", NULL); + if (memdev_root) { + object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize); + } + if (hpsize == LONG_MAX) { + /* No additional memory regions found ==> Report main RAM page size */ + return mainrampagesize; + } + + /* If NUMA is disabled or the NUMA nodes are not backed with a + * memory-backend, then there is at least one node using "normal" RAM, + * so if its page size is smaller we have got to report that size instead. + */ + if (hpsize > mainrampagesize && + (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { + static bool warned; + if (!warned) { + error_report("Huge page support disabled (n/a for main memory)."); + warned = true; + } + return mainrampagesize; + } + + return hpsize; +} +#else +long qemu_getrampagesize(void) +{ + return getpagesize(); +} +#endif + #ifdef __linux__ static int64_t get_file_size(int fd) { @@ -1379,7 +1468,7 @@ static void *file_ram_alloc(RAMBlock *block, } if (mem_prealloc) { - os_mem_prealloc(fd, area, memory, errp); + os_mem_prealloc(fd, area, memory, smp_cpus, errp); if (errp && *errp) { goto error; } @@ -1472,6 +1561,11 @@ const char *qemu_ram_get_idstr(RAMBlock *rb) return rb->idstr; } +bool qemu_ram_is_shared(RAMBlock *rb) +{ + return rb->flags & RAM_SHARED; +} + /* Called with iothread lock held. */ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev) { @@ -1518,6 +1612,19 @@ size_t qemu_ram_pagesize(RAMBlock *rb) return rb->page_size; } +/* Returns the largest size of page in use */ +size_t qemu_ram_pagesize_largest(void) +{ + RAMBlock *block; + size_t largest = 0; + + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { + largest = MAX(largest, qemu_ram_pagesize(block)); + } + + return largest; +} + static int memory_try_enable_merging(void *addr, size_t len) { if (!machine_mem_merge(current_machine)) { @@ -3208,6 +3315,7 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, hwaddr phys_addr; target_ulong page; + cpu_synchronize_state(cpu); while (len > 0) { int asidx; MemTxAttrs attrs; @@ -3294,4 +3402,68 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) rcu_read_unlock(); return ret; } + +/* + * Unmap pages of memory from start to start+length such that + * they a) read as 0, b) Trigger whatever fault mechanism + * the OS provides for postcopy. + * The pages must be unmapped by the end of the function. + * Returns: 0 on success, none-0 on failure + * + */ +int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) +{ + int ret = -1; + + uint8_t *host_startaddr = rb->host + start; + + if ((uintptr_t)host_startaddr & (rb->page_size - 1)) { + error_report("ram_block_discard_range: Unaligned start address: %p", + host_startaddr); + goto err; + } + + if ((start + length) <= rb->used_length) { + uint8_t *host_endaddr = host_startaddr + length; + if ((uintptr_t)host_endaddr & (rb->page_size - 1)) { + error_report("ram_block_discard_range: Unaligned end address: %p", + host_endaddr); + goto err; + } + + errno = ENOTSUP; /* If we are missing MADVISE etc */ + + if (rb->page_size == qemu_host_page_size) { +#if defined(CONFIG_MADVISE) + /* Note: We need the madvise MADV_DONTNEED behaviour of definitely + * freeing the page. + */ + ret = madvise(host_startaddr, length, MADV_DONTNEED); +#endif + } else { + /* Huge page case - unfortunately it can't do DONTNEED, but + * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the + * huge page file. + */ +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, length); +#endif + } + if (ret) { + ret = -errno; + error_report("ram_block_discard_range: Failed to discard range " + "%s:%" PRIx64 " +%zx (%d)", + rb->idstr, start, length, ret); + } + } else { + error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64 + "/%zx/" RAM_ADDR_FMT")", + rb->idstr, start, length, rb->used_length); + } + +err: + return ret; +} + #endif