Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

[mirror_qemu.git] / target / arm / kvm.c
diff --git a/target/arm/kvm.c b/target/arm/kvm.c

index 5141d0adc52125f8f8ed776f264faa57eac7b670..b87b59a02ad84e583f2d208e672a0e5bf0f94ad1 100644 (file)
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -16,17 +16,19 @@
  #include "qemu-common.h"
  #include "qemu/timer.h"
  #include "qemu/error-report.h"
+#include "qemu/main-loop.h"
  #include "sysemu/sysemu.h"
  #include "sysemu/kvm.h"
+#include "sysemu/kvm_int.h"
  #include "kvm_arm.h"
  #include "cpu.h"
  #include "trace.h"
  #include "internals.h"
-#include "hw/arm/arm.h"
  #include "hw/pci/pci.h"
  #include "exec/memattrs.h"
  #include "exec/address-spaces.h"
  #include "hw/boards.h"
+#include "hw/irq.h"
  #include "qemu/log.h"
  
  const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
@@ -34,6 +36,7 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  };
  
  static bool cap_has_mp_state;
+static bool cap_has_inject_serror_esr;
  
  static ARMHostCPUFeatures arm_host_cpu_features;
  
@@ -48,11 +51,22 @@ int kvm_arm_vcpu_init(CPUState *cs)
      return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
  }
  
+int kvm_arm_vcpu_finalize(CPUState *cs, int feature)
+{
+    return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_FINALIZE, &feature);
+}
+
+void kvm_arm_init_serror_injection(CPUState *cs)
+{
+    cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state,
+                                    KVM_CAP_ARM_INJECT_SERROR_ESR);
+}
+
  bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
                                        int *fdarray,
                                        struct kvm_vcpu_init *init)
  {
-    int ret, kvmfd = -1, vmfd = -1, cpufd = -1;
+    int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1;
  
      kvmfd = qemu_open("/dev/kvm", O_RDWR);
      if (kvmfd < 0) {
@@ -72,7 +86,14 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
          goto finish;
      }
  
-    ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, init);
+    if (init->target == -1) {
+        struct kvm_vcpu_init preferred;
+
+        ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred);
+        if (!ret) {
+            init->target = preferred.target;
+        }
+    }
      if (ret >= 0) {
          ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
          if (ret < 0) {
@@ -84,10 +105,12 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
           * creating one kind of guest CPU which is its preferred
           * CPU type.
           */
+        struct kvm_vcpu_init try;
+
          while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) {
-            init->target = *cpus_to_try++;
-            memset(init->features, 0, sizeof(init->features));
-            ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
+            try.target = *cpus_to_try++;
+            memcpy(try.features, init->features, sizeof(init->features));
+            ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, &try);
              if (ret >= 0) {
                  break;
              }
@@ -95,6 +118,7 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
          if (ret < 0) {
              goto err;
          }
+        init->target = try.target;
      } else {
          /* Treat a NULL cpus_to_try argument the same as an empty
           * list, which means we will fail the call since this must
@@ -151,11 +175,29 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu)
  
      cpu->kvm_target = arm_host_cpu_features.target;
      cpu->dtb_compatible = arm_host_cpu_features.dtb_compatible;
+    cpu->isar = arm_host_cpu_features.isar;
      env->features = arm_host_cpu_features.features;
  }
  
+bool kvm_arm_pmu_supported(CPUState *cpu)
+{
+    KVMState *s = KVM_STATE(current_machine->accelerator);
+
+    return kvm_check_extension(s, KVM_CAP_ARM_PMU_V3);
+}
+
+int kvm_arm_get_max_vm_ipa_size(MachineState *ms)
+{
+    KVMState *s = KVM_STATE(ms->accelerator);
+    int ret;
+
+    ret = kvm_check_extension(s, KVM_CAP_ARM_VM_IPA_SIZE);
+    return ret > 0 ? ret : 40;
+}
+
  int kvm_arch_init(MachineState *ms, KVMState *s)
  {
+    int ret = 0;
      /* For ARM interrupt delivery is always asynchronous,
       * whether we are using an in-kernel VGIC or not.
       */
@@ -169,7 +211,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
  
      cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
  
-    return 0;
+    if (ms->smp.cpus > 256 &&
+        !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) {
+        error_report("Using more than 256 vcpus requires a host kernel "
+                     "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2");
+        ret = -EINVAL;
+    }
+
+    return ret;
  }
  
  unsigned long kvm_arch_vcpu_id(CPUState *cpu)
@@ -184,16 +233,21 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu)
   * We use a MemoryListener to track mapping and unmapping of
   * the regions during board creation, so the board models don't
   * need to do anything special for the KVM case.
+ *
+ * Sometimes the address must be OR'ed with some other fields
+ * (for example for KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION).
+ * @kda_addr_ormask aims at storing the value of those fields.
   */
  typedef struct KVMDevice {
      struct kvm_arm_device_addr kda;
      struct kvm_device_attr kdattr;
+    uint64_t kda_addr_ormask;
      MemoryRegion *mr;
      QSLIST_ENTRY(KVMDevice) entries;
      int dev_fd;
  } KVMDevice;
  
-static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head;
+static QSLIST_HEAD(, KVMDevice) kvm_devices_head;
  
  static void kvm_arm_devlistener_add(MemoryListener *listener,
                                      MemoryRegionSection *section)
@@ -234,6 +288,8 @@ static void kvm_arm_set_device_addr(KVMDevice *kd)
       */
      if (kd->dev_fd >= 0) {
          uint64_t addr = kd->kda.addr;
+
+        addr |= kd->kda_addr_ormask;
          attr->addr = (uintptr_t)&addr;
          ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
      } else {
@@ -256,6 +312,7 @@ static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
              kvm_arm_set_device_addr(kd);
          }
          memory_region_unref(kd->mr);
+        QSLIST_REMOVE_HEAD(&kvm_devices_head, entries);
          g_free(kd);
      }
      memory_listener_unregister(&devlistener);
@@ -266,7 +323,7 @@ static Notifier notify = {
  };
  
  void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
-                             uint64_t attr, int dev_fd)
+                             uint64_t attr, int dev_fd, uint64_t addr_ormask)
  {
      KVMDevice *kd;
  
@@ -286,6 +343,7 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
      kd->kdattr.group = group;
      kd->kdattr.attr = attr;
      kd->dev_fd = dev_fd;
+    kd->kda_addr_ormask = addr_ormask;
      QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
      memory_region_ref(kd->mr);
  }
@@ -301,7 +359,7 @@ static int compare_u64(const void *a, const void *b)
      return 0;
  }
  
-/* Initialize the CPUState's cpreg list according to the kernel's
+/* Initialize the ARMCPU cpreg list according to the kernel's
   * definition of what CPU registers it knows about (and throw away
   * the previous TCG-created cpreg list).
   */
@@ -470,6 +528,14 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu)
          fprintf(stderr, "write_kvmstate_to_list failed\n");
          abort();
      }
+    /*
+     * Sync the reset values also into the CPUState. This is necessary
+     * because the next thing we do will be a kvm_arch_put_registers()
+     * which will update the list values from the CPUState before copying
+     * the list values back to KVM. It's OK to ignore failure returns here
+     * for the same reason we do so in kvm_arch_get_registers().
+     */
+    write_list_to_cpustate(cpu);
  }
  
  /*
@@ -513,6 +579,59 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
      return 0;
  }
  
+int kvm_put_vcpu_events(ARMCPU *cpu)
+{
+    CPUARMState *env = &cpu->env;
+    struct kvm_vcpu_events events;
+    int ret;
+
+    if (!kvm_has_vcpu_events()) {
+        return 0;
+    }
+
+    memset(&events, 0, sizeof(events));
+    events.exception.serror_pending = env->serror.pending;
+
+    /* Inject SError to guest with specified syndrome if host kernel
+     * supports it, otherwise inject SError without syndrome.
+     */
+    if (cap_has_inject_serror_esr) {
+        events.exception.serror_has_esr = env->serror.has_esr;
+        events.exception.serror_esr = env->serror.esr;
+    }
+
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
+    if (ret) {
+        error_report("failed to put vcpu events");
+    }
+
+    return ret;
+}
+
+int kvm_get_vcpu_events(ARMCPU *cpu)
+{
+    CPUARMState *env = &cpu->env;
+    struct kvm_vcpu_events events;
+    int ret;
+
+    if (!kvm_has_vcpu_events()) {
+        return 0;
+    }
+
+    memset(&events, 0, sizeof(events));
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events);
+    if (ret) {
+        error_report("failed to get vcpu events");
+        return ret;
+    }
+
+    env->serror.pending = events.exception.serror_pending;
+    env->serror.has_esr = events.exception.serror_has_esr;
+    env->serror.esr = events.exception.serror_esr;
+
+    return 0;
+}
+
  void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
  {
  }
@@ -622,11 +741,11 @@ void kvm_arch_init_irq_routing(KVMState *s)
  {
  }
  
-int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
+int kvm_arch_irqchip_create(KVMState *s)
  {
-     if (machine_kernel_irqchip_split(ms)) {
-         perror("-machine kernel_irqchip=split is not supported on ARM.");
-         exit(1);
+    if (kvm_kernel_irqchip_split()) {
+        perror("-machine kernel_irqchip=split is not supported on ARM.");
+        exit(1);
      }
  
      /* If we can create the VGIC using the newer device control API, we
@@ -648,6 +767,18 @@ int kvm_arm_vgic_probe(void)
      }
  }
  
+int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level)
+{
+    int kvm_irq = (irqtype << KVM_ARM_IRQ_TYPE_SHIFT) | irq;
+    int cpu_idx1 = cpu % 256;
+    int cpu_idx2 = cpu / 256;
+
+    kvm_irq |= (cpu_idx1 << KVM_ARM_IRQ_VCPU_SHIFT) |
+               (cpu_idx2 << KVM_ARM_IRQ_VCPU2_SHIFT);
+
+    return kvm_set_irq(kvm_state, kvm_irq, !!level);
+}
+
  int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
                               uint64_t address, uint32_t data, PCIDevice *dev)
  {
@@ -664,7 +795,8 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
      /* MSI doorbell address is translated by an IOMMU */
  
      rcu_read_lock();
-    mr = address_space_translate(as, address, &xlat, &len, true);
+    mr = address_space_translate(as, address, &xlat, &len, true,
+                                 MEMTXATTRS_UNSPECIFIED);
      if (!mr) {
          goto unlock;
      }