From: Anthony Liguori <anthony@codemonkey.ws>
Date: Mon, 23 Sep 2013 16:52:49 +0000 (-0500)
Subject: Merge remote-tracking branch 'qemu-kvm/uq/master' into staging
X-Git-Tag: v2.7.1~2275
X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=3e4be9c29784df09c364b52a55e826a0b05b950e;hp=-c;p=mirror_qemu.git

Merge remote-tracking branch 'qemu-kvm/uq/master' into staging

# By Alexey Kardashevskiy (3) and others
# Via Paolo Bonzini
* qemu-kvm/uq/master:
  target-i386: add feature kvm_pv_unhalt
  linux-headers: update to 3.12-rc1
  target-i386: forward CPUID cache leaves when -cpu host is used
  linux-headers: update to 3.11
  kvm: fix traces to use %x instead of %d
  kvmvapic: Clear also physical ROM address when entering INACTIVE state
  kvmvapic: Enter inactive state on hardware reset
  kvmvapic: Catch invalid ROM size
  kvm irqfd: support direct msimessage to irq translation
  fix steal time MSR vmsd callback to proper opaque type
  kvm: warn if num cpus is greater than num recommended
  cpu: Move cpu state syncs up into cpu_dump_state()
  exec: always use MADV_DONTFORK

Message-id: 1379694292-1601-1-git-send-email-pbonzini@redhat.com
---

3e4be9c29784df09c364b52a55e826a0b05b950e
diff --combined exec.c
index 26469120d9,5e6015cbd3..8f5e7abfa7
--- a/exec.c
+++ b/exec.c
@@@ -749,18 -749,6 +749,18 @@@ static int subpage_register (subpage_t 
                               uint16_t section);
  static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
  
 +static void *(*phys_mem_alloc)(ram_addr_t size) = qemu_anon_ram_alloc;
 +
 +/*
 + * Set a custom physical guest memory alloator.
 + * Accelerators with unusual needs may need this.  Hopefully, we can
 + * get rid of it eventually.
 + */
 +void phys_mem_set_alloc(void *(*alloc)(ram_addr_t))
 +{
 +    phys_mem_alloc = alloc;
 +}
 +
  static uint16_t phys_section_add(MemoryRegionSection *section)
  {
      /* The physical section number is ORed with a page-aligned
@@@ -892,7 -880,7 +892,7 @@@ void qemu_mutex_unlock_ramlist(void
      qemu_mutex_unlock(&ram_list.mutex);
  }
  
 -#if defined(__linux__) && !defined(TARGET_S390X)
 +#ifdef __linux__
  
  #include <sys/vfs.h>
  
@@@ -995,14 -983,6 +995,14 @@@ static void *file_ram_alloc(RAMBlock *b
      block->fd = fd;
      return area;
  }
 +#else
 +static void *file_ram_alloc(RAMBlock *block,
 +                            ram_addr_t memory,
 +                            const char *path)
 +{
 +    fprintf(stderr, "-mem-path not supported on this host\n");
 +    exit(1);
 +}
  #endif
  
  static ram_addr_t find_ram_offset(ram_addr_t size)
@@@ -1119,7 -1099,6 +1119,7 @@@ ram_addr_t qemu_ram_alloc_from_ptr(ram_
  
      size = TARGET_PAGE_ALIGN(size);
      new_block = g_malloc0(sizeof(*new_block));
 +    new_block->fd = -1;
  
      /* This assumes the iothread lock is taken here too.  */
      qemu_mutex_lock_ramlist();
@@@ -1128,32 -1107,26 +1128,32 @@@
      if (host) {
          new_block->host = host;
          new_block->flags |= RAM_PREALLOC_MASK;
 +    } else if (xen_enabled()) {
 +        if (mem_path) {
 +            fprintf(stderr, "-mem-path not supported with Xen\n");
 +            exit(1);
 +        }
 +        xen_ram_alloc(new_block->offset, size, mr);
      } else {
          if (mem_path) {
 -#if defined (__linux__) && !defined(TARGET_S390X)
 +            if (phys_mem_alloc != qemu_anon_ram_alloc) {
 +                /*
 +                 * file_ram_alloc() needs to allocate just like
 +                 * phys_mem_alloc, but we haven't bothered to provide
 +                 * a hook there.
 +                 */
 +                fprintf(stderr,
 +                        "-mem-path not supported with this accelerator\n");
 +                exit(1);
 +            }
              new_block->host = file_ram_alloc(new_block, size, mem_path);
 +        }
 +        if (!new_block->host) {
 +            new_block->host = phys_mem_alloc(size);
              if (!new_block->host) {
 -                new_block->host = qemu_anon_ram_alloc(size);
 -                memory_try_enable_merging(new_block->host, size);
 -            }
 -#else
 -            fprintf(stderr, "-mem-path option unsupported\n");
 -            exit(1);
 -#endif
 -        } else {
 -            if (xen_enabled()) {
 -                xen_ram_alloc(new_block->offset, size, mr);
 -            } else if (kvm_enabled()) {
 -                /* some s390/kvm configurations have special constraints */
 -                new_block->host = kvm_ram_alloc(size);
 -            } else {
 -                new_block->host = qemu_anon_ram_alloc(size);
 +                fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
 +                        new_block->mr->name, strerror(errno));
 +                exit(1);
              }
              memory_try_enable_merging(new_block->host, size);
          }
@@@ -1184,6 -1157,7 +1184,7 @@@
  
      qemu_ram_setup_dump(new_block->host, size);
      qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
+     qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
  
      if (kvm_enabled())
          kvm_setup_guest_memory(new_block->host, size);
@@@ -1227,13 -1201,23 +1228,13 @@@ void qemu_ram_free(ram_addr_t addr
              ram_list.version++;
              if (block->flags & RAM_PREALLOC_MASK) {
                  ;
 -            } else if (mem_path) {
 -#if defined (__linux__) && !defined(TARGET_S390X)
 -                if (block->fd) {
 -                    munmap(block->host, block->length);
 -                    close(block->fd);
 -                } else {
 -                    qemu_anon_ram_free(block->host, block->length);
 -                }
 -#else
 -                abort();
 -#endif
 +            } else if (xen_enabled()) {
 +                xen_invalidate_map_cache_entry(block->host);
 +            } else if (block->fd >= 0) {
 +                munmap(block->host, block->length);
 +                close(block->fd);
              } else {
 -                if (xen_enabled()) {
 -                    xen_invalidate_map_cache_entry(block->host);
 -                } else {
 -                    qemu_anon_ram_free(block->host, block->length);
 -                }
 +                qemu_anon_ram_free(block->host, block->length);
              }
              g_free(block);
              break;
@@@ -1257,31 -1241,38 +1258,31 @@@ void qemu_ram_remap(ram_addr_t addr, ra
              vaddr = block->host + offset;
              if (block->flags & RAM_PREALLOC_MASK) {
                  ;
 +            } else if (xen_enabled()) {
 +                abort();
              } else {
                  flags = MAP_FIXED;
                  munmap(vaddr, length);
 -                if (mem_path) {
 -#if defined(__linux__) && !defined(TARGET_S390X)
 -                    if (block->fd) {
 +                if (block->fd >= 0) {
  #ifdef MAP_POPULATE
 -                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
 -                            MAP_PRIVATE;
 +                    flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
 +                        MAP_PRIVATE;
  #else
 -                        flags |= MAP_PRIVATE;
 -#endif
 -                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
 -                                    flags, block->fd, offset);
 -                    } else {
 -                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
 -                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
 -                                    flags, -1, 0);
 -                    }
 -#else
 -                    abort();
 +                    flags |= MAP_PRIVATE;
  #endif
 +                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
 +                                flags, block->fd, offset);
                  } else {
 -#if defined(TARGET_S390X) && defined(CONFIG_KVM)
 -                    flags |= MAP_SHARED | MAP_ANONYMOUS;
 -                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
 -                                flags, -1, 0);
 -#else
 +                    /*
 +                     * Remap needs to match alloc.  Accelerators that
 +                     * set phys_mem_alloc never remap.  If they did,
 +                     * we'd need a remap hook here.
 +                     */
 +                    assert(phys_mem_alloc == qemu_anon_ram_alloc);
 +
                      flags |= MAP_PRIVATE | MAP_ANONYMOUS;
                      area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                  flags, -1, 0);
 -#endif
                  }
                  if (area != vaddr) {
                      fprintf(stderr, "Could not remap addr: "
diff --combined include/sysemu/kvm.h
index 9bbe3db146,73c1ec5c40..3b25f27a7c
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@@ -46,6 -46,7 +46,7 @@@ extern bool kvm_halt_in_kernel_allowed
  extern bool kvm_irqfds_allowed;
  extern bool kvm_msi_via_irqfd_allowed;
  extern bool kvm_gsi_routing_allowed;
+ extern bool kvm_gsi_direct_mapping;
  extern bool kvm_readonly_mem_allowed;
  
  #if defined CONFIG_KVM || !defined NEED_CPU_H
@@@ -107,6 -108,13 +108,13 @@@
   */
  #define kvm_gsi_routing_enabled() (kvm_gsi_routing_allowed)
  
+ /**
+  * kvm_gsi_direct_mapping:
+  *
+  * Returns: true if GSI direct mapping is enabled.
+  */
+ #define kvm_gsi_direct_mapping() (kvm_gsi_direct_mapping)
+ 
  /**
   * kvm_readonly_mem_enabled:
   *
@@@ -123,6 -131,7 +131,7 @@@
  #define kvm_irqfds_enabled() (false)
  #define kvm_msi_via_irqfd_enabled() (false)
  #define kvm_gsi_routing_allowed() (false)
+ #define kvm_gsi_direct_mapping() (false)
  #define kvm_readonly_mem_enabled() (false)
  #endif
  
@@@ -161,6 -170,11 +170,6 @@@ int kvm_cpu_exec(CPUState *cpu)
  
  #ifdef NEED_CPU_H
  
 -#if !defined(CONFIG_USER_ONLY)
 -void *kvm_ram_alloc(ram_addr_t size);
 -void *kvm_arch_ram_alloc(ram_addr_t size);
 -#endif
 -
  void kvm_setup_guest_memory(void *start, size_t size);
  void kvm_flush_coalesced_mmio_buffer(void);
  
@@@ -265,16 -279,6 +274,6 @@@ int kvm_check_extension(KVMState *s, un
  
  uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
                                        uint32_t index, int reg);
- void kvm_cpu_synchronize_state(CPUState *cpu);
- 
- /* generic hooks - to be moved/refactored once there are more users */
- 
- static inline void cpu_synchronize_state(CPUState *cpu)
- {
-     if (kvm_enabled()) {
-         kvm_cpu_synchronize_state(cpu);
-     }
- }
  
  #if !defined(CONFIG_USER_ONLY)
  int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
@@@ -283,9 -287,19 +282,19 @@@
  
  #endif /* NEED_CPU_H */
  
+ void kvm_cpu_synchronize_state(CPUState *cpu);
  void kvm_cpu_synchronize_post_reset(CPUState *cpu);
  void kvm_cpu_synchronize_post_init(CPUState *cpu);
  
+ /* generic hooks - to be moved/refactored once there are more users */
+ 
+ static inline void cpu_synchronize_state(CPUState *cpu)
+ {
+     if (kvm_enabled()) {
+         kvm_cpu_synchronize_state(cpu);
+     }
+ }
+ 
  static inline void cpu_synchronize_post_reset(CPUState *cpu)
  {
      if (kvm_enabled()) {
diff --combined kvm-all.c
index b87215c10f,7630a7d6e3..4478969ed2
--- a/kvm-all.c
+++ b/kvm-all.c
@@@ -111,6 -111,7 +111,7 @@@ bool kvm_halt_in_kernel_allowed
  bool kvm_irqfds_allowed;
  bool kvm_msi_via_irqfd_allowed;
  bool kvm_gsi_routing_allowed;
+ bool kvm_gsi_direct_mapping;
  bool kvm_allowed;
  bool kvm_readonly_mem_allowed;
  
@@@ -1069,6 -1070,10 +1070,10 @@@ void kvm_irqchip_release_virq(KVMState 
      struct kvm_irq_routing_entry *e;
      int i;
  
+     if (kvm_gsi_direct_mapping()) {
+         return;
+     }
+ 
      for (i = 0; i < s->irq_routes->nr; i++) {
          e = &s->irq_routes->entries[i];
          if (e->gsi == virq) {
@@@ -1190,6 -1195,10 +1195,10 @@@ int kvm_irqchip_add_msi_route(KVMState 
      struct kvm_irq_routing_entry kroute = {};
      int virq;
  
+     if (kvm_gsi_direct_mapping()) {
+         return msg.data & 0xffff;
+     }
+ 
      if (!kvm_gsi_routing_enabled()) {
          return -ENOSYS;
      }
@@@ -1216,6 -1225,10 +1225,10 @@@ int kvm_irqchip_update_msi_route(KVMSta
  {
      struct kvm_irq_routing_entry kroute = {};
  
+     if (kvm_gsi_direct_mapping()) {
+         return 0;
+     }
+ 
      if (!kvm_irqchip_in_kernel()) {
          return -ENOSYS;
      }
@@@ -1322,24 -1335,20 +1335,20 @@@ static int kvm_irqchip_create(KVMState 
      return 0;
  }
  
- static int kvm_max_vcpus(KVMState *s)
+ /* Find number of supported CPUs using the recommended
+  * procedure from the kernel API documentation to cope with
+  * older kernels that may be missing capabilities.
+  */
+ static int kvm_recommended_vcpus(KVMState *s)
  {
-     int ret;
- 
-     /* Find number of supported CPUs using the recommended
-      * procedure from the kernel API documentation to cope with
-      * older kernels that may be missing capabilities.
-      */
-     ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS);
-     if (ret) {
-         return ret;
-     }
-     ret = kvm_check_extension(s, KVM_CAP_NR_VCPUS);
-     if (ret) {
-         return ret;
-     }
+     int ret = kvm_check_extension(s, KVM_CAP_NR_VCPUS);
+     return (ret) ? ret : 4;
+ }
  
-     return 4;
+ static int kvm_max_vcpus(KVMState *s)
+ {
+     int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS);
+     return (ret) ? ret : kvm_recommended_vcpus(s);
  }
  
  int kvm_init(void)
@@@ -1347,11 -1356,19 +1356,19 @@@
      static const char upgrade_note[] =
          "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
          "(see http://sourceforge.net/projects/kvm).\n";
+     struct {
+         const char *name;
+         int num;
+     } num_cpus[] = {
+         { "SMP",          smp_cpus },
+         { "hotpluggable", max_cpus },
+         { NULL, }
+     }, *nc = num_cpus;
+     int soft_vcpus_limit, hard_vcpus_limit;
      KVMState *s;
      const KVMCapabilityInfo *missing_cap;
      int ret;
      int i;
-     int max_vcpus;
  
      s = g_malloc0(sizeof(KVMState));
  
@@@ -1392,19 -1409,26 +1409,26 @@@
          goto err;
      }
  
-     max_vcpus = kvm_max_vcpus(s);
-     if (smp_cpus > max_vcpus) {
-         ret = -EINVAL;
-         fprintf(stderr, "Number of SMP cpus requested (%d) exceeds max cpus "
-                 "supported by KVM (%d)\n", smp_cpus, max_vcpus);
-         goto err;
-     }
+     /* check the vcpu limits */
+     soft_vcpus_limit = kvm_recommended_vcpus(s);
+     hard_vcpus_limit = kvm_max_vcpus(s);
  
-     if (max_cpus > max_vcpus) {
-         ret = -EINVAL;
-         fprintf(stderr, "Number of hotpluggable cpus requested (%d) exceeds max cpus "
-                 "supported by KVM (%d)\n", max_cpus, max_vcpus);
-         goto err;
+     while (nc->name) {
+         if (nc->num > soft_vcpus_limit) {
+             fprintf(stderr,
+                     "Warning: Number of %s cpus requested (%d) exceeds "
+                     "the recommended cpus supported by KVM (%d)\n",
+                     nc->name, nc->num, soft_vcpus_limit);
+ 
+             if (nc->num > hard_vcpus_limit) {
+                 ret = -EINVAL;
+                 fprintf(stderr, "Number of %s cpus requested (%d) exceeds "
+                         "the maximum cpus supported by KVM (%d)\n",
+                         nc->name, nc->num, hard_vcpus_limit);
+                 goto err;
+             }
+         }
+         nc++;
      }
  
      s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
@@@ -1812,6 -1836,19 +1836,6 @@@ int kvm_has_intx_set_mask(void
      return kvm_state->intx_set_mask;
  }
  
 -void *kvm_ram_alloc(ram_addr_t size)
 -{
 -#ifdef TARGET_S390X
 -    void *mem;
 -
 -    mem = kvm_arch_ram_alloc(size);
 -    if (mem) {
 -        return mem;
 -    }
 -#endif
 -    return qemu_anon_ram_alloc(size);
 -}
 -
  void kvm_setup_guest_memory(void *start, size_t size)
  {
  #ifdef CONFIG_VALGRIND_H
diff --combined target-i386/helper.c
index 8bf85ec5f0,0ad7c8e3b6..678503a7cc
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@@ -188,8 -188,6 +188,6 @@@ void x86_cpu_dump_state(CPUState *cs, F
      char cc_op_name[32];
      static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" };
  
-     cpu_synchronize_state(cs);
- 
      eflags = cpu_compute_eflags(env);
  #ifdef TARGET_X86_64
      if (env->hflags & HF_CS64_MASK) {
@@@ -894,10 -892,7 +892,10 @@@ hwaddr x86_cpu_get_phys_page_debug(CPUS
      uint32_t page_offset;
      int page_size;
  
 -    if (env->cr[4] & CR4_PAE_MASK) {
 +    if (!(env->cr[0] & CR0_PG_MASK)) {
 +        pte = addr & env->a20_mask;
 +        page_size = 4096;
 +    } else if (env->cr[4] & CR4_PAE_MASK) {
          target_ulong pdpe_addr;
          uint64_t pde, pdpe;
  
@@@ -955,21 -950,26 +953,21 @@@
      } else {
          uint32_t pde;
  
 -        if (!(env->cr[0] & CR0_PG_MASK)) {
 -            pte = addr;
 -            page_size = 4096;
 +        /* page directory entry */
 +        pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
 +        pde = ldl_phys(pde_addr);
 +        if (!(pde & PG_PRESENT_MASK))
 +            return -1;
 +        if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
 +            pte = pde & ~0x003ff000; /* align to 4MB */
 +            page_size = 4096 * 1024;
          } else {
              /* page directory entry */
 -            pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
 -            pde = ldl_phys(pde_addr);
 -            if (!(pde & PG_PRESENT_MASK))
 +            pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
 +            pte = ldl_phys(pte_addr);
 +            if (!(pte & PG_PRESENT_MASK))
                  return -1;
 -            if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
 -                pte = pde & ~0x003ff000; /* align to 4MB */
 -                page_size = 4096 * 1024;
 -            } else {
 -                /* page directory entry */
 -                pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
 -                pte = ldl_phys(pte_addr);
 -                if (!(pte & PG_PRESENT_MASK))
 -                    return -1;
 -                page_size = 4096;
 -            }
 +            page_size = 4096;
          }
          pte = pte & env->a20_mask;
      }