From: Anthony Liguori Date: Mon, 23 Sep 2013 16:52:49 +0000 (-0500) Subject: Merge remote-tracking branch 'qemu-kvm/uq/master' into staging X-Git-Tag: v2.7.1~2275 X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=3e4be9c29784df09c364b52a55e826a0b05b950e;hp=-c;p=mirror_qemu.git Merge remote-tracking branch 'qemu-kvm/uq/master' into staging # By Alexey Kardashevskiy (3) and others # Via Paolo Bonzini * qemu-kvm/uq/master: target-i386: add feature kvm_pv_unhalt linux-headers: update to 3.12-rc1 target-i386: forward CPUID cache leaves when -cpu host is used linux-headers: update to 3.11 kvm: fix traces to use %x instead of %d kvmvapic: Clear also physical ROM address when entering INACTIVE state kvmvapic: Enter inactive state on hardware reset kvmvapic: Catch invalid ROM size kvm irqfd: support direct msimessage to irq translation fix steal time MSR vmsd callback to proper opaque type kvm: warn if num cpus is greater than num recommended cpu: Move cpu state syncs up into cpu_dump_state() exec: always use MADV_DONTFORK Message-id: 1379694292-1601-1-git-send-email-pbonzini@redhat.com --- 3e4be9c29784df09c364b52a55e826a0b05b950e diff --combined exec.c index 26469120d9,5e6015cbd3..8f5e7abfa7 --- a/exec.c +++ b/exec.c @@@ -749,18 -749,6 +749,18 @@@ static int subpage_register (subpage_t uint16_t section); static subpage_t *subpage_init(AddressSpace *as, hwaddr base); +static void *(*phys_mem_alloc)(ram_addr_t size) = qemu_anon_ram_alloc; + +/* + * Set a custom physical guest memory alloator. + * Accelerators with unusual needs may need this. Hopefully, we can + * get rid of it eventually. + */ +void phys_mem_set_alloc(void *(*alloc)(ram_addr_t)) +{ + phys_mem_alloc = alloc; +} + static uint16_t phys_section_add(MemoryRegionSection *section) { /* The physical section number is ORed with a page-aligned @@@ -892,7 -880,7 +892,7 @@@ void qemu_mutex_unlock_ramlist(void qemu_mutex_unlock(&ram_list.mutex); } -#if defined(__linux__) && !defined(TARGET_S390X) +#ifdef __linux__ #include @@@ -995,14 -983,6 +995,14 @@@ static void *file_ram_alloc(RAMBlock *b block->fd = fd; return area; } +#else +static void *file_ram_alloc(RAMBlock *block, + ram_addr_t memory, + const char *path) +{ + fprintf(stderr, "-mem-path not supported on this host\n"); + exit(1); +} #endif static ram_addr_t find_ram_offset(ram_addr_t size) @@@ -1119,7 -1099,6 +1119,7 @@@ ram_addr_t qemu_ram_alloc_from_ptr(ram_ size = TARGET_PAGE_ALIGN(size); new_block = g_malloc0(sizeof(*new_block)); + new_block->fd = -1; /* This assumes the iothread lock is taken here too. */ qemu_mutex_lock_ramlist(); @@@ -1128,32 -1107,26 +1128,32 @@@ if (host) { new_block->host = host; new_block->flags |= RAM_PREALLOC_MASK; + } else if (xen_enabled()) { + if (mem_path) { + fprintf(stderr, "-mem-path not supported with Xen\n"); + exit(1); + } + xen_ram_alloc(new_block->offset, size, mr); } else { if (mem_path) { -#if defined (__linux__) && !defined(TARGET_S390X) + if (phys_mem_alloc != qemu_anon_ram_alloc) { + /* + * file_ram_alloc() needs to allocate just like + * phys_mem_alloc, but we haven't bothered to provide + * a hook there. + */ + fprintf(stderr, + "-mem-path not supported with this accelerator\n"); + exit(1); + } new_block->host = file_ram_alloc(new_block, size, mem_path); + } + if (!new_block->host) { + new_block->host = phys_mem_alloc(size); if (!new_block->host) { - new_block->host = qemu_anon_ram_alloc(size); - memory_try_enable_merging(new_block->host, size); - } -#else - fprintf(stderr, "-mem-path option unsupported\n"); - exit(1); -#endif - } else { - if (xen_enabled()) { - xen_ram_alloc(new_block->offset, size, mr); - } else if (kvm_enabled()) { - /* some s390/kvm configurations have special constraints */ - new_block->host = kvm_ram_alloc(size); - } else { - new_block->host = qemu_anon_ram_alloc(size); + fprintf(stderr, "Cannot set up guest memory '%s': %s\n", + new_block->mr->name, strerror(errno)); + exit(1); } memory_try_enable_merging(new_block->host, size); } @@@ -1184,6 -1157,7 +1184,7 @@@ qemu_ram_setup_dump(new_block->host, size); qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE); + qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK); if (kvm_enabled()) kvm_setup_guest_memory(new_block->host, size); @@@ -1227,13 -1201,23 +1228,13 @@@ void qemu_ram_free(ram_addr_t addr ram_list.version++; if (block->flags & RAM_PREALLOC_MASK) { ; - } else if (mem_path) { -#if defined (__linux__) && !defined(TARGET_S390X) - if (block->fd) { - munmap(block->host, block->length); - close(block->fd); - } else { - qemu_anon_ram_free(block->host, block->length); - } -#else - abort(); -#endif + } else if (xen_enabled()) { + xen_invalidate_map_cache_entry(block->host); + } else if (block->fd >= 0) { + munmap(block->host, block->length); + close(block->fd); } else { - if (xen_enabled()) { - xen_invalidate_map_cache_entry(block->host); - } else { - qemu_anon_ram_free(block->host, block->length); - } + qemu_anon_ram_free(block->host, block->length); } g_free(block); break; @@@ -1257,31 -1241,38 +1258,31 @@@ void qemu_ram_remap(ram_addr_t addr, ra vaddr = block->host + offset; if (block->flags & RAM_PREALLOC_MASK) { ; + } else if (xen_enabled()) { + abort(); } else { flags = MAP_FIXED; munmap(vaddr, length); - if (mem_path) { -#if defined(__linux__) && !defined(TARGET_S390X) - if (block->fd) { + if (block->fd >= 0) { #ifdef MAP_POPULATE - flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED : - MAP_PRIVATE; + flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED : + MAP_PRIVATE; #else - flags |= MAP_PRIVATE; -#endif - area = mmap(vaddr, length, PROT_READ | PROT_WRITE, - flags, block->fd, offset); - } else { - flags |= MAP_PRIVATE | MAP_ANONYMOUS; - area = mmap(vaddr, length, PROT_READ | PROT_WRITE, - flags, -1, 0); - } -#else - abort(); + flags |= MAP_PRIVATE; #endif + area = mmap(vaddr, length, PROT_READ | PROT_WRITE, + flags, block->fd, offset); } else { -#if defined(TARGET_S390X) && defined(CONFIG_KVM) - flags |= MAP_SHARED | MAP_ANONYMOUS; - area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE, - flags, -1, 0); -#else + /* + * Remap needs to match alloc. Accelerators that + * set phys_mem_alloc never remap. If they did, + * we'd need a remap hook here. + */ + assert(phys_mem_alloc == qemu_anon_ram_alloc); + flags |= MAP_PRIVATE | MAP_ANONYMOUS; area = mmap(vaddr, length, PROT_READ | PROT_WRITE, flags, -1, 0); -#endif } if (area != vaddr) { fprintf(stderr, "Could not remap addr: " diff --combined include/sysemu/kvm.h index 9bbe3db146,73c1ec5c40..3b25f27a7c --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@@ -46,6 -46,7 +46,7 @@@ extern bool kvm_halt_in_kernel_allowed extern bool kvm_irqfds_allowed; extern bool kvm_msi_via_irqfd_allowed; extern bool kvm_gsi_routing_allowed; + extern bool kvm_gsi_direct_mapping; extern bool kvm_readonly_mem_allowed; #if defined CONFIG_KVM || !defined NEED_CPU_H @@@ -107,6 -108,13 +108,13 @@@ */ #define kvm_gsi_routing_enabled() (kvm_gsi_routing_allowed) + /** + * kvm_gsi_direct_mapping: + * + * Returns: true if GSI direct mapping is enabled. + */ + #define kvm_gsi_direct_mapping() (kvm_gsi_direct_mapping) + /** * kvm_readonly_mem_enabled: * @@@ -123,6 -131,7 +131,7 @@@ #define kvm_irqfds_enabled() (false) #define kvm_msi_via_irqfd_enabled() (false) #define kvm_gsi_routing_allowed() (false) + #define kvm_gsi_direct_mapping() (false) #define kvm_readonly_mem_enabled() (false) #endif @@@ -161,6 -170,11 +170,6 @@@ int kvm_cpu_exec(CPUState *cpu) #ifdef NEED_CPU_H -#if !defined(CONFIG_USER_ONLY) -void *kvm_ram_alloc(ram_addr_t size); -void *kvm_arch_ram_alloc(ram_addr_t size); -#endif - void kvm_setup_guest_memory(void *start, size_t size); void kvm_flush_coalesced_mmio_buffer(void); @@@ -265,16 -279,6 +274,6 @@@ int kvm_check_extension(KVMState *s, un uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function, uint32_t index, int reg); - void kvm_cpu_synchronize_state(CPUState *cpu); - - /* generic hooks - to be moved/refactored once there are more users */ - - static inline void cpu_synchronize_state(CPUState *cpu) - { - if (kvm_enabled()) { - kvm_cpu_synchronize_state(cpu); - } - } #if !defined(CONFIG_USER_ONLY) int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, @@@ -283,9 -287,19 +282,19 @@@ #endif /* NEED_CPU_H */ + void kvm_cpu_synchronize_state(CPUState *cpu); void kvm_cpu_synchronize_post_reset(CPUState *cpu); void kvm_cpu_synchronize_post_init(CPUState *cpu); + /* generic hooks - to be moved/refactored once there are more users */ + + static inline void cpu_synchronize_state(CPUState *cpu) + { + if (kvm_enabled()) { + kvm_cpu_synchronize_state(cpu); + } + } + static inline void cpu_synchronize_post_reset(CPUState *cpu) { if (kvm_enabled()) { diff --combined kvm-all.c index b87215c10f,7630a7d6e3..4478969ed2 --- a/kvm-all.c +++ b/kvm-all.c @@@ -111,6 -111,7 +111,7 @@@ bool kvm_halt_in_kernel_allowed bool kvm_irqfds_allowed; bool kvm_msi_via_irqfd_allowed; bool kvm_gsi_routing_allowed; + bool kvm_gsi_direct_mapping; bool kvm_allowed; bool kvm_readonly_mem_allowed; @@@ -1069,6 -1070,10 +1070,10 @@@ void kvm_irqchip_release_virq(KVMState struct kvm_irq_routing_entry *e; int i; + if (kvm_gsi_direct_mapping()) { + return; + } + for (i = 0; i < s->irq_routes->nr; i++) { e = &s->irq_routes->entries[i]; if (e->gsi == virq) { @@@ -1190,6 -1195,10 +1195,10 @@@ int kvm_irqchip_add_msi_route(KVMState struct kvm_irq_routing_entry kroute = {}; int virq; + if (kvm_gsi_direct_mapping()) { + return msg.data & 0xffff; + } + if (!kvm_gsi_routing_enabled()) { return -ENOSYS; } @@@ -1216,6 -1225,10 +1225,10 @@@ int kvm_irqchip_update_msi_route(KVMSta { struct kvm_irq_routing_entry kroute = {}; + if (kvm_gsi_direct_mapping()) { + return 0; + } + if (!kvm_irqchip_in_kernel()) { return -ENOSYS; } @@@ -1322,24 -1335,20 +1335,20 @@@ static int kvm_irqchip_create(KVMState return 0; } - static int kvm_max_vcpus(KVMState *s) + /* Find number of supported CPUs using the recommended + * procedure from the kernel API documentation to cope with + * older kernels that may be missing capabilities. + */ + static int kvm_recommended_vcpus(KVMState *s) { - int ret; - - /* Find number of supported CPUs using the recommended - * procedure from the kernel API documentation to cope with - * older kernels that may be missing capabilities. - */ - ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS); - if (ret) { - return ret; - } - ret = kvm_check_extension(s, KVM_CAP_NR_VCPUS); - if (ret) { - return ret; - } + int ret = kvm_check_extension(s, KVM_CAP_NR_VCPUS); + return (ret) ? ret : 4; + } - return 4; + static int kvm_max_vcpus(KVMState *s) + { + int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS); + return (ret) ? ret : kvm_recommended_vcpus(s); } int kvm_init(void) @@@ -1347,11 -1356,19 +1356,19 @@@ static const char upgrade_note[] = "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n" "(see http://sourceforge.net/projects/kvm).\n"; + struct { + const char *name; + int num; + } num_cpus[] = { + { "SMP", smp_cpus }, + { "hotpluggable", max_cpus }, + { NULL, } + }, *nc = num_cpus; + int soft_vcpus_limit, hard_vcpus_limit; KVMState *s; const KVMCapabilityInfo *missing_cap; int ret; int i; - int max_vcpus; s = g_malloc0(sizeof(KVMState)); @@@ -1392,19 -1409,26 +1409,26 @@@ goto err; } - max_vcpus = kvm_max_vcpus(s); - if (smp_cpus > max_vcpus) { - ret = -EINVAL; - fprintf(stderr, "Number of SMP cpus requested (%d) exceeds max cpus " - "supported by KVM (%d)\n", smp_cpus, max_vcpus); - goto err; - } + /* check the vcpu limits */ + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); - if (max_cpus > max_vcpus) { - ret = -EINVAL; - fprintf(stderr, "Number of hotpluggable cpus requested (%d) exceeds max cpus " - "supported by KVM (%d)\n", max_cpus, max_vcpus); - goto err; + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + fprintf(stderr, + "Warning: Number of %s cpus requested (%d) exceeds " + "the recommended cpus supported by KVM (%d)\n", + nc->name, nc->num, soft_vcpus_limit); + + if (nc->num > hard_vcpus_limit) { + ret = -EINVAL; + fprintf(stderr, "Number of %s cpus requested (%d) exceeds " + "the maximum cpus supported by KVM (%d)\n", + nc->name, nc->num, hard_vcpus_limit); + goto err; + } + } + nc++; } s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0); @@@ -1812,6 -1836,19 +1836,6 @@@ int kvm_has_intx_set_mask(void return kvm_state->intx_set_mask; } -void *kvm_ram_alloc(ram_addr_t size) -{ -#ifdef TARGET_S390X - void *mem; - - mem = kvm_arch_ram_alloc(size); - if (mem) { - return mem; - } -#endif - return qemu_anon_ram_alloc(size); -} - void kvm_setup_guest_memory(void *start, size_t size) { #ifdef CONFIG_VALGRIND_H diff --combined target-i386/helper.c index 8bf85ec5f0,0ad7c8e3b6..678503a7cc --- a/target-i386/helper.c +++ b/target-i386/helper.c @@@ -188,8 -188,6 +188,6 @@@ void x86_cpu_dump_state(CPUState *cs, F char cc_op_name[32]; static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" }; - cpu_synchronize_state(cs); - eflags = cpu_compute_eflags(env); #ifdef TARGET_X86_64 if (env->hflags & HF_CS64_MASK) { @@@ -894,10 -892,7 +892,10 @@@ hwaddr x86_cpu_get_phys_page_debug(CPUS uint32_t page_offset; int page_size; - if (env->cr[4] & CR4_PAE_MASK) { + if (!(env->cr[0] & CR0_PG_MASK)) { + pte = addr & env->a20_mask; + page_size = 4096; + } else if (env->cr[4] & CR4_PAE_MASK) { target_ulong pdpe_addr; uint64_t pde, pdpe; @@@ -955,21 -950,26 +953,21 @@@ } else { uint32_t pde; - if (!(env->cr[0] & CR0_PG_MASK)) { - pte = addr; - page_size = 4096; + /* page directory entry */ + pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask; + pde = ldl_phys(pde_addr); + if (!(pde & PG_PRESENT_MASK)) + return -1; + if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { + pte = pde & ~0x003ff000; /* align to 4MB */ + page_size = 4096 * 1024; } else { /* page directory entry */ - pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask; - pde = ldl_phys(pde_addr); - if (!(pde & PG_PRESENT_MASK)) + pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask; + pte = ldl_phys(pte_addr); + if (!(pte & PG_PRESENT_MASK)) return -1; - if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { - pte = pde & ~0x003ff000; /* align to 4MB */ - page_size = 4096 * 1024; - } else { - /* page directory entry */ - pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask; - pte = ldl_phys(pte_addr); - if (!(pte & PG_PRESENT_MASK)) - return -1; - page_size = 4096; - } + page_size = 4096; } pte = pte & env->a20_mask; }