virtio: decrement vq->inuse in virtqueue_discard()

[mirror_qemu.git] / kvm-all.c
diff --git a/kvm-all.c b/kvm-all.c

index de3c8c48bb178bd8c88a7010a8eba2a67cdd176d..ebf35b0c5b3e3bbef411359a34273b3adaf84ebb 100644 (file)
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -13,10 +13,8 @@
   *
   */
  
-#include <sys/types.h>
+#include "qemu/osdep.h"
  #include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <stdarg.h>
  
  #include <linux/kvm.h>
  
@@ -27,6 +25,7 @@
  #include "qemu/error-report.h"
  #include "hw/hw.h"
  #include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
  #include "hw/s390x/adapter.h"
  #include "exec/gdbstub.h"
  #include "sysemu/kvm_int.h"
@@ -45,8 +44,10 @@
  #include <sys/eventfd.h>
  #endif
  
-/* KVM uses PAGE_SIZE in its definition of COALESCED_MMIO_MAX */
-#define PAGE_SIZE TARGET_PAGE_SIZE
+/* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We
+ * need to use the real host PAGE_SIZE, as that's what KVM will use.
+ */
+#define PAGE_SIZE getpagesize()
  
  //#define DEBUG_KVM
  
@@ -60,6 +61,12 @@
  
  #define KVM_MSI_HASHTAB_SIZE    256
  
+struct KVMParkedVcpu {
+    unsigned long vcpu_id;
+    int kvm_fd;
+    QLIST_ENTRY(KVMParkedVcpu) node;
+};
+
  struct KVMState
  {
      AccelState parent_obj;
@@ -88,15 +95,17 @@ struct KVMState
  #ifdef KVM_CAP_IRQ_ROUTING
      struct kvm_irq_routing *irq_routes;
      int nr_allocated_irq_routes;
-    uint32_t *used_gsi_bitmap;
+    unsigned long *used_gsi_bitmap;
      unsigned int gsi_count;
      QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
  #endif
      KVMMemoryListener memory_listener;
+    QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus;
  };
  
  KVMState *kvm_state;
  bool kvm_kernel_irqchip;
+bool kvm_split_irqchip;
  bool kvm_async_interrupts_allowed;
  bool kvm_halt_in_kernel_allowed;
  bool kvm_eventfds_allowed;
@@ -109,6 +118,7 @@ bool kvm_allowed;
  bool kvm_readonly_mem_allowed;
  bool kvm_vm_attributes_allowed;
  bool kvm_direct_msi_allowed;
+bool kvm_ioeventfd_any_length_allowed;
  
  static const KVMCapabilityInfo kvm_required_capabilites[] = {
      KVM_CAP_INFO(USER_MEMORY),
@@ -116,6 +126,13 @@ static const KVMCapabilityInfo kvm_required_capabilites[] = {
      KVM_CAP_LAST_INFO
  };
  
+int kvm_get_max_memslots(void)
+{
+    KVMState *s = KVM_STATE(current_machine->accelerator);
+
+    return s->nr_slots;
+}
+
  static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
  {
      KVMState *s = kvm_state;
@@ -234,6 +251,53 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot)
      return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
  }
  
+int kvm_destroy_vcpu(CPUState *cpu)
+{
+    KVMState *s = kvm_state;
+    long mmap_size;
+    struct KVMParkedVcpu *vcpu = NULL;
+    int ret = 0;
+
+    DPRINTF("kvm_destroy_vcpu\n");
+
+    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
+    if (mmap_size < 0) {
+        ret = mmap_size;
+        DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
+        goto err;
+    }
+
+    ret = munmap(cpu->kvm_run, mmap_size);
+    if (ret < 0) {
+        goto err;
+    }
+
+    vcpu = g_malloc0(sizeof(*vcpu));
+    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
+    vcpu->kvm_fd = cpu->kvm_fd;
+    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+err:
+    return ret;
+}
+
+static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
+{
+    struct KVMParkedVcpu *cpu;
+
+    QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
+        if (cpu->vcpu_id == vcpu_id) {
+            int kvm_fd;
+
+            QLIST_REMOVE(cpu, node);
+            kvm_fd = cpu->kvm_fd;
+            g_free(cpu);
+            return kvm_fd;
+        }
+    }
+
+    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
+}
+
  int kvm_init_vcpu(CPUState *cpu)
  {
      KVMState *s = kvm_state;
@@ -242,7 +306,7 @@ int kvm_init_vcpu(CPUState *cpu)
  
      DPRINTF("kvm_init_vcpu\n");
  
-    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)kvm_arch_vcpu_id(cpu));
+    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
      if (ret < 0) {
          DPRINTF("kvm_create_vcpu failed\n");
          goto err;
@@ -363,7 +427,8 @@ static void kvm_log_stop(MemoryListener *listener,
  static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
                                           unsigned long *bitmap)
  {
-    ram_addr_t start = section->offset_within_region + section->mr->ram_addr;
+    ram_addr_t start = section->offset_within_region +
+                       memory_region_get_ram_addr(section->mr);
      ram_addr_t pages = int128_get64(section->size) / getpagesize();
  
      cpu_physical_memory_set_dirty_lebitmap(bitmap, start, pages);
@@ -409,7 +474,7 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
           * userspace memory corruption (which is not detectable by valgrind
           * too, in most cases).
           * So for now, let's align to 64 instead of HOST_LONG_BITS here, in
-         * a hope that sizeof(long) wont become >8 any time soon.
+         * a hope that sizeof(long) won't become >8 any time soon.
           */
          size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
                       /*HOST_LONG_BITS*/ 64) / 8;
@@ -947,12 +1012,12 @@ typedef struct KVMMSIRoute {
  
  static void set_gsi(KVMState *s, unsigned int gsi)
  {
-    s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32);
+    set_bit(gsi, s->used_gsi_bitmap);
  }
  
  static void clear_gsi(KVMState *s, unsigned int gsi)
  {
-    s->used_gsi_bitmap[gsi / 32] &= ~(1U << (gsi % 32));
+    clear_bit(gsi, s->used_gsi_bitmap);
  }
  
  void kvm_init_irq_routing(KVMState *s)
@@ -961,17 +1026,9 @@ void kvm_init_irq_routing(KVMState *s)
  
      gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING) - 1;
      if (gsi_count > 0) {
-        unsigned int gsi_bits, i;
-
          /* Round up so we can search ints using ffs */
-        gsi_bits = ALIGN(gsi_count, 32);
-        s->used_gsi_bitmap = g_malloc0(gsi_bits / 8);
+        s->used_gsi_bitmap = bitmap_new(gsi_count);
          s->gsi_count = gsi_count;
-
-        /* Mark any over-allocated bits as already in use */
-        for (i = gsi_count; i < gsi_bits; i++) {
-            set_gsi(s, i);
-        }
      }
  
      s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
@@ -990,7 +1047,16 @@ void kvm_irqchip_commit_routes(KVMState *s)
  {
      int ret;
  
+    if (kvm_gsi_direct_mapping()) {
+        return;
+    }
+
+    if (!kvm_gsi_routing_enabled()) {
+        return;
+    }
+
      s->irq_routes->flags = 0;
+    trace_kvm_irqchip_commit_routes();
      ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
      assert(ret == 0);
  }
@@ -1037,8 +1103,6 @@ static int kvm_update_routing_entry(KVMState *s,
  
          *entry = *new_entry;
  
-        kvm_irqchip_commit_routes(s);
-
          return 0;
      }
  
@@ -1076,6 +1140,7 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
          }
      }
      clear_gsi(s, virq);
+    kvm_arch_release_virq_post(virq);
  }
  
  static unsigned int kvm_hash_msi(uint32_t data)
@@ -1101,9 +1166,7 @@ static void kvm_flush_dynamic_msi_routes(KVMState *s)
  
  static int kvm_irqchip_get_virq(KVMState *s)
  {
-    uint32_t *word = s->used_gsi_bitmap;
-    int max_words = ALIGN(s->gsi_count, 32) / 32;
-    int i, zeroes;
+    int next_virq;
  
      /*
       * PIC and IOAPIC share the first 16 GSI numbers, thus the available
@@ -1116,16 +1179,12 @@ static int kvm_irqchip_get_virq(KVMState *s)
      }
  
      /* Return the lowest unused GSI in the bitmap */
-    for (i = 0; i < max_words; i++) {
-        zeroes = ctz32(~word[i]);
-        if (zeroes == 32) {
-            continue;
-        }
-
-        return zeroes + i * 32;
+    next_virq = find_first_zero_bit(s->used_gsi_bitmap, s->gsi_count);
+    if (next_virq >= s->gsi_count) {
+        return -ENOSPC;
+    } else {
+        return next_virq;
      }
-    return -ENOSPC;
-
  }
  
  static KVMMSIRoute *kvm_lookup_msi_route(KVMState *s, MSIMessage msg)
@@ -1187,10 +1246,15 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
      return kvm_set_irq(s, route->kroute.gsi, 1);
  }
  
-int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev)
+int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
  {
      struct kvm_irq_routing_entry kroute = {};
      int virq;
+    MSIMessage msg = {0, 0};
+
+    if (dev) {
+        msg = pci_get_msi_message(dev, vector);
+    }
  
      if (kvm_gsi_direct_mapping()) {
          return kvm_arch_msi_data_to_gsi(msg.data);
@@ -1216,7 +1280,10 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev)
          return -EINVAL;
      }
  
+    trace_kvm_irqchip_add_msi_route(virq);
+
      kvm_add_routing_entry(s, &kroute);
+    kvm_arch_add_msi_route_post(&kroute, vector, dev);
      kvm_irqchip_commit_routes(s);
  
      return virq;
@@ -1245,6 +1312,8 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
          return -EINVAL;
      }
  
+    trace_kvm_irqchip_update_msi_route(virq);
+
      return kvm_update_routing_entry(s, &kroute);
  }
  
@@ -1297,6 +1366,34 @@ int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter)
      return virq;
  }
  
+int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint)
+{
+    struct kvm_irq_routing_entry kroute = {};
+    int virq;
+
+    if (!kvm_gsi_routing_enabled()) {
+        return -ENOSYS;
+    }
+    if (!kvm_check_extension(s, KVM_CAP_HYPERV_SYNIC)) {
+        return -ENOSYS;
+    }
+    virq = kvm_irqchip_get_virq(s);
+    if (virq < 0) {
+        return virq;
+    }
+
+    kroute.gsi = virq;
+    kroute.type = KVM_IRQ_ROUTING_HV_SINT;
+    kroute.flags = 0;
+    kroute.u.hv_sint.vcpu = vcpu;
+    kroute.u.hv_sint.sint = sint;
+
+    kvm_add_routing_entry(s, &kroute);
+    kvm_irqchip_commit_routes(s);
+
+    return virq;
+}
+
  #else /* !KVM_CAP_IRQ_ROUTING */
  
  void kvm_init_irq_routing(KVMState *s)
@@ -1312,7 +1409,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
      abort();
  }
  
-int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
+int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
  {
      return -ENOSYS;
  }
@@ -1322,6 +1419,11 @@ int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter)
      return -ENOSYS;
  }
  
+int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint)
+{
+    return -ENOSYS;
+}
+
  static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
  {
      abort();
@@ -1394,9 +1496,14 @@ static void kvm_irqchip_create(MachineState *machine, KVMState *s)
  
      /* First probe and see if there's a arch-specific hook to create the
       * in-kernel irqchip for us */
-    ret = kvm_arch_irqchip_create(s);
+    ret = kvm_arch_irqchip_create(machine, s);
      if (ret == 0) {
-        ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
+        if (machine_kernel_irqchip_split(machine)) {
+            perror("Split IRQ chip mode not supported.");
+            exit(1);
+        } else {
+            ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
+        }
      }
      if (ret < 0) {
          fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret));
@@ -1431,6 +1538,18 @@ static int kvm_max_vcpus(KVMState *s)
      return (ret) ? ret : kvm_recommended_vcpus(s);
  }
  
+static int kvm_max_vcpu_id(KVMState *s)
+{
+    int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPU_ID);
+    return (ret) ? ret : kvm_max_vcpus(s);
+}
+
+bool kvm_vcpu_id_is_valid(int vcpu_id)
+{
+    KVMState *s = KVM_STATE(current_machine->accelerator);
+    return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s);
+}
+
  static int kvm_init(MachineState *ms)
  {
      MachineClass *mc = MACHINE_GET_CLASS(ms);
@@ -1467,6 +1586,7 @@ static int kvm_init(MachineState *ms)
  #ifdef KVM_CAP_SET_GUEST_DEBUG
      QTAILQ_INIT(&s->kvm_sw_breakpoints);
  #endif
+    QLIST_INIT(&s->kvm_parked_vcpus);
      s->vmfd = -1;
      s->fd = qemu_open("/dev/kvm", O_RDWR);
      if (s->fd == -1) {
@@ -1611,6 +1731,9 @@ static int kvm_init(MachineState *ms)
      kvm_vm_attributes_allowed =
          (kvm_check_extension(s, KVM_CAP_VM_ATTRIBUTES) > 0);
  
+    kvm_ioeventfd_any_length_allowed =
+        (kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0);
+
      ret = kvm_arch_init(ms, s);
      if (ret < 0) {
          goto err;
@@ -1622,8 +1745,10 @@ static int kvm_init(MachineState *ms)
  
      kvm_state = s;
  
-    s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
-    s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
+    if (kvm_eventfds_allowed) {
+        s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
+        s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
+    }
      s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region;
      s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region;
  
@@ -2016,9 +2141,9 @@ void kvm_device_access(int fd, int group, uint64_t attr,
                             write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR,
                             &kvmattr);
      if (err < 0) {
-        error_report("KVM_%s_DEVICE_ATTR failed: %s\n"
-                     "Group %d attr 0x%016" PRIx64, write ? "SET" : "GET",
-                     strerror(-err), group, attr);
+        error_report("KVM_%s_DEVICE_ATTR failed: %s",
+                     write ? "SET" : "GET", strerror(-err));
+        error_printf("Group %d attr 0x%016" PRIx64 "\n", group, attr);
          abort();
      }
  }
@@ -2306,6 +2431,21 @@ int kvm_create_device(KVMState *s, uint64_t type, bool test)
      return test ? 0 : create_dev.fd;
  }
  
+bool kvm_device_supported(int vmfd, uint64_t type)
+{
+    struct kvm_create_device create_dev = {
+        .type = type,
+        .fd = -1,
+        .flags = KVM_CREATE_DEVICE_TEST,
+    };
+
+    if (ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_DEVICE_CTRL) <= 0) {
+        return false;
+    }
+
+    return (ioctl(vmfd, KVM_CREATE_DEVICE, &create_dev) >= 0);
+}
+
  int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source)
  {
      struct kvm_one_reg reg;
@@ -2315,7 +2455,7 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source)
      reg.addr = (uintptr_t) source;
      r = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
      if (r) {
-        trace_kvm_failed_reg_set(id, strerror(r));
+        trace_kvm_failed_reg_set(id, strerror(-r));
      }
      return r;
  }
@@ -2329,7 +2469,7 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target)
      reg.addr = (uintptr_t) target;
      r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
      if (r) {
-        trace_kvm_failed_reg_get(id, strerror(r));
+        trace_kvm_failed_reg_get(id, strerror(-r));
      }
      return r;
  }