]> git.proxmox.com Git - mirror_qemu.git/blobdiff - target-arm/kvm.c
9pfs: use coroutine_fn annotation in hw/9pfs/9p.[ch]
[mirror_qemu.git] / target-arm / kvm.c
index f865dac871903f22b573f63b73b6bc1d98abda25..dbe393c10995381ad9e8d88a03c2e2ed0e0b9189 100644 (file)
@@ -8,25 +8,41 @@
  *
  */
 
-#include <stdio.h>
-#include <sys/types.h>
+#include "qemu/osdep.h"
 #include <sys/ioctl.h>
-#include <sys/mman.h>
 
 #include <linux/kvm.h>
 
 #include "qemu-common.h"
 #include "qemu/timer.h"
+#include "qemu/error-report.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
 #include "cpu.h"
+#include "internals.h"
 #include "hw/arm/arm.h"
+#include "exec/memattrs.h"
+#include "hw/boards.h"
+#include "qemu/log.h"
 
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };
 
+static bool cap_has_mp_state;
+
+int kvm_arm_vcpu_init(CPUState *cs)
+{
+    ARMCPU *cpu = ARM_CPU(cs);
+    struct kvm_vcpu_init init;
+
+    init.target = cpu->kvm_target;
+    memcpy(init.features, cpu->kvm_init_features, sizeof(init.features));
+
+    return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
+}
+
 bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
                                       int *fdarray,
                                       struct kvm_vcpu_init *init)
@@ -46,13 +62,18 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
         goto err;
     }
 
+    if (!init) {
+        /* Caller doesn't want the VCPU to be initialized, so skip it */
+        goto finish;
+    }
+
     ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, init);
     if (ret >= 0) {
         ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
         if (ret < 0) {
             goto err;
         }
-    } else {
+    } else if (cpus_to_try) {
         /* Old kernel which doesn't know about the
          * PREFERRED_TARGET ioctl: we know it will only support
          * creating one kind of guest CPU which is its preferred
@@ -69,8 +90,15 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
         if (ret < 0) {
             goto err;
         }
+    } else {
+        /* Treat a NULL cpus_to_try argument the same as an empty
+         * list, which means we will fail the call since this must
+         * be an old kernel which doesn't support PREFERRED_TARGET.
+         */
+        goto err;
     }
 
+finish:
     fdarray[0] = kvmfd;
     fdarray[1] = vmfd;
     fdarray[2] = cpufd;
@@ -100,120 +128,6 @@ void kvm_arm_destroy_scratch_host_vcpu(int *fdarray)
     }
 }
 
-static inline void set_feature(uint64_t *features, int feature)
-{
-    *features |= 1ULL << feature;
-}
-
-bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc)
-{
-    /* Identify the feature bits corresponding to the host CPU, and
-     * fill out the ARMHostCPUClass fields accordingly. To do this
-     * we have to create a scratch VM, create a single CPU inside it,
-     * and then query that CPU for the relevant ID registers.
-     */
-    int i, ret, fdarray[3];
-    uint32_t midr, id_pfr0, id_isar0, mvfr1;
-    uint64_t features = 0;
-    /* Old kernels may not know about the PREFERRED_TARGET ioctl: however
-     * we know these will only support creating one kind of guest CPU,
-     * which is its preferred CPU type.
-     */
-    static const uint32_t cpus_to_try[] = {
-        QEMU_KVM_ARM_TARGET_CORTEX_A15,
-        QEMU_KVM_ARM_TARGET_NONE
-    };
-    struct kvm_vcpu_init init;
-    struct kvm_one_reg idregs[] = {
-        {
-            .id = KVM_REG_ARM | KVM_REG_SIZE_U32
-            | ENCODE_CP_REG(15, 0, 0, 0, 0, 0),
-            .addr = (uintptr_t)&midr,
-        },
-        {
-            .id = KVM_REG_ARM | KVM_REG_SIZE_U32
-            | ENCODE_CP_REG(15, 0, 0, 1, 0, 0),
-            .addr = (uintptr_t)&id_pfr0,
-        },
-        {
-            .id = KVM_REG_ARM | KVM_REG_SIZE_U32
-            | ENCODE_CP_REG(15, 0, 0, 2, 0, 0),
-            .addr = (uintptr_t)&id_isar0,
-        },
-        {
-            .id = KVM_REG_ARM | KVM_REG_SIZE_U32
-            | KVM_REG_ARM_VFP | KVM_REG_ARM_VFP_MVFR1,
-            .addr = (uintptr_t)&mvfr1,
-        },
-    };
-
-    if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) {
-        return false;
-    }
-
-    ahcc->target = init.target;
-
-    /* This is not strictly blessed by the device tree binding docs yet,
-     * but in practice the kernel does not care about this string so
-     * there is no point maintaining an KVM_ARM_TARGET_* -> string table.
-     */
-    ahcc->dtb_compatible = "arm,arm-v7";
-
-    for (i = 0; i < ARRAY_SIZE(idregs); i++) {
-        ret = ioctl(fdarray[2], KVM_GET_ONE_REG, &idregs[i]);
-        if (ret) {
-            break;
-        }
-    }
-
-    kvm_arm_destroy_scratch_host_vcpu(fdarray);
-
-    if (ret) {
-        return false;
-    }
-
-    /* Now we've retrieved all the register information we can
-     * set the feature bits based on the ID register fields.
-     * We can assume any KVM supporting CPU is at least a v7
-     * with VFPv3, LPAE and the generic timers; this in turn implies
-     * most of the other feature bits, but a few must be tested.
-     */
-    set_feature(&features, ARM_FEATURE_V7);
-    set_feature(&features, ARM_FEATURE_VFP3);
-    set_feature(&features, ARM_FEATURE_LPAE);
-    set_feature(&features, ARM_FEATURE_GENERIC_TIMER);
-
-    switch (extract32(id_isar0, 24, 4)) {
-    case 1:
-        set_feature(&features, ARM_FEATURE_THUMB_DIV);
-        break;
-    case 2:
-        set_feature(&features, ARM_FEATURE_ARM_DIV);
-        set_feature(&features, ARM_FEATURE_THUMB_DIV);
-        break;
-    default:
-        break;
-    }
-
-    if (extract32(id_pfr0, 12, 4) == 1) {
-        set_feature(&features, ARM_FEATURE_THUMB2EE);
-    }
-    if (extract32(mvfr1, 20, 4) == 1) {
-        set_feature(&features, ARM_FEATURE_VFP_FP16);
-    }
-    if (extract32(mvfr1, 12, 4) == 1) {
-        set_feature(&features, ARM_FEATURE_NEON);
-    }
-    if (extract32(mvfr1, 28, 4) == 1) {
-        /* FMAC support implies VFPv4 */
-        set_feature(&features, ARM_FEATURE_VFP4);
-    }
-
-    ahcc->features = features;
-
-    return true;
-}
-
 static void kvm_arm_host_cpu_class_init(ObjectClass *oc, void *data)
 {
     ARMHostCPUClass *ahcc = ARM_HOST_CPU_CLASS(oc);
@@ -242,19 +156,25 @@ static void kvm_arm_host_cpu_initfn(Object *obj)
 
 static const TypeInfo host_arm_cpu_type_info = {
     .name = TYPE_ARM_HOST_CPU,
+#ifdef TARGET_AARCH64
+    .parent = TYPE_AARCH64_CPU,
+#else
     .parent = TYPE_ARM_CPU,
+#endif
     .instance_init = kvm_arm_host_cpu_initfn,
     .class_init = kvm_arm_host_cpu_class_init,
     .class_size = sizeof(ARMHostCPUClass),
 };
 
-int kvm_arch_init(KVMState *s)
+int kvm_arch_init(MachineState *ms, KVMState *s)
 {
     /* For ARM interrupt delivery is always asynchronous,
      * whether we are using an in-kernel VGIC or not.
      */
     kvm_async_interrupts_allowed = true;
 
+    cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
+
     type_register_static(&host_arm_cpu_type_info);
 
     return 0;
@@ -265,19 +185,117 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu)
     return cpu->cpu_index;
 }
 
-static bool reg_syncs_via_tuple_list(uint64_t regidx)
+/* We track all the KVM devices which need their memory addresses
+ * passing to the kernel in a list of these structures.
+ * When board init is complete we run through the list and
+ * tell the kernel the base addresses of the memory regions.
+ * We use a MemoryListener to track mapping and unmapping of
+ * the regions during board creation, so the board models don't
+ * need to do anything special for the KVM case.
+ */
+typedef struct KVMDevice {
+    struct kvm_arm_device_addr kda;
+    struct kvm_device_attr kdattr;
+    MemoryRegion *mr;
+    QSLIST_ENTRY(KVMDevice) entries;
+    int dev_fd;
+} KVMDevice;
+
+static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head;
+
+static void kvm_arm_devlistener_add(MemoryListener *listener,
+                                    MemoryRegionSection *section)
+{
+    KVMDevice *kd;
+
+    QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
+        if (section->mr == kd->mr) {
+            kd->kda.addr = section->offset_within_address_space;
+        }
+    }
+}
+
+static void kvm_arm_devlistener_del(MemoryListener *listener,
+                                    MemoryRegionSection *section)
 {
-    /* Return true if the regidx is a register we should synchronize
-     * via the cpreg_tuples array (ie is not a core reg we sync by
-     * hand in kvm_arch_get/put_registers())
+    KVMDevice *kd;
+
+    QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
+        if (section->mr == kd->mr) {
+            kd->kda.addr = -1;
+        }
+    }
+}
+
+static MemoryListener devlistener = {
+    .region_add = kvm_arm_devlistener_add,
+    .region_del = kvm_arm_devlistener_del,
+};
+
+static void kvm_arm_set_device_addr(KVMDevice *kd)
+{
+    struct kvm_device_attr *attr = &kd->kdattr;
+    int ret;
+
+    /* If the device control API is available and we have a device fd on the
+     * KVMDevice struct, let's use the newer API
      */
-    switch (regidx & KVM_REG_ARM_COPROC_MASK) {
-    case KVM_REG_ARM_CORE:
-    case KVM_REG_ARM_VFP:
-        return false;
-    default:
-        return true;
+    if (kd->dev_fd >= 0) {
+        uint64_t addr = kd->kda.addr;
+        attr->addr = (uintptr_t)&addr;
+        ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
+    } else {
+        ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda);
+    }
+
+    if (ret < 0) {
+        fprintf(stderr, "Failed to set device address: %s\n",
+                strerror(-ret));
+        abort();
+    }
+}
+
+static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
+{
+    KVMDevice *kd, *tkd;
+
+    memory_listener_unregister(&devlistener);
+    QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) {
+        if (kd->kda.addr != -1) {
+            kvm_arm_set_device_addr(kd);
+        }
+        memory_region_unref(kd->mr);
+        g_free(kd);
+    }
+}
+
+static Notifier notify = {
+    .notify = kvm_arm_machine_init_done,
+};
+
+void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
+                             uint64_t attr, int dev_fd)
+{
+    KVMDevice *kd;
+
+    if (!kvm_irqchip_in_kernel()) {
+        return;
+    }
+
+    if (QSLIST_EMPTY(&kvm_devices_head)) {
+        memory_listener_register(&devlistener, NULL);
+        qemu_add_machine_init_done_notifier(&notify);
     }
+    kd = g_new0(KVMDevice, 1);
+    kd->mr = mr;
+    kd->kda.id = devid;
+    kd->kda.addr = -1;
+    kd->kdattr.flags = 0;
+    kd->kdattr.group = group;
+    kd->kdattr.attr = attr;
+    kd->dev_fd = dev_fd;
+    QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
+    memory_region_ref(kd->mr);
 }
 
 static int compare_u64(const void *a, const void *b)
@@ -291,45 +309,17 @@ static int compare_u64(const void *a, const void *b)
     return 0;
 }
 
-int kvm_arch_init_vcpu(CPUState *cs)
+/* Initialize the CPUState's cpreg list according to the kernel's
+ * definition of what CPU registers it knows about (and throw away
+ * the previous TCG-created cpreg list).
+ */
+int kvm_arm_init_cpreg_list(ARMCPU *cpu)
 {
-    struct kvm_vcpu_init init;
-    int i, ret, arraylen;
-    uint64_t v;
-    struct kvm_one_reg r;
     struct kvm_reg_list rl;
     struct kvm_reg_list *rlp;
-    ARMCPU *cpu = ARM_CPU(cs);
-
-    if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE) {
-        fprintf(stderr, "KVM is not supported for this guest CPU type\n");
-        return -EINVAL;
-    }
-
-    init.target = cpu->kvm_target;
-    memset(init.features, 0, sizeof(init.features));
-    if (cpu->start_powered_off) {
-        init.features[0] = 1 << KVM_ARM_VCPU_POWER_OFF;
-    }
-    ret = kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
-    if (ret) {
-        return ret;
-    }
-    /* Query the kernel to make sure it supports 32 VFP
-     * registers: QEMU's "cortex-a15" CPU is always a
-     * VFP-D32 core. The simplest way to do this is just
-     * to attempt to read register d31.
-     */
-    r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP | 31;
-    r.addr = (uintptr_t)(&v);
-    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
-    if (ret == -ENOENT) {
-        return -EINVAL;
-    }
+    int i, ret, arraylen;
+    CPUState *cs = CPU(cpu);
 
-    /* Populate the cpreg list based on the kernel's idea
-     * of what registers exist (and throw away the TCG-created list).
-     */
     rl.n = 0;
     ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl);
     if (ret != -E2BIG) {
@@ -347,7 +337,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
     qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64);
 
     for (i = 0, arraylen = 0; i < rlp->n; i++) {
-        if (!reg_syncs_via_tuple_list(rlp->reg[i])) {
+        if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) {
             continue;
         }
         switch (rlp->reg[i] & KVM_REG_SIZE_MASK) {
@@ -374,7 +364,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
 
     for (i = 0, arraylen = 0; i < rlp->n; i++) {
         uint64_t regidx = rlp->reg[i];
-        if (!reg_syncs_via_tuple_list(regidx)) {
+        if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) {
             continue;
         }
         cpu->cpreg_indexes[arraylen] = regidx;
@@ -391,106 +381,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
         goto out;
     }
 
-    /* Save a copy of the initial register values so that we can
-     * feed it back to the kernel on VCPU reset.
-     */
-    cpu->cpreg_reset_values = g_memdup(cpu->cpreg_values,
-                                       cpu->cpreg_array_len *
-                                       sizeof(cpu->cpreg_values[0]));
-
 out:
     g_free(rlp);
     return ret;
 }
 
-/* We track all the KVM devices which need their memory addresses
- * passing to the kernel in a list of these structures.
- * When board init is complete we run through the list and
- * tell the kernel the base addresses of the memory regions.
- * We use a MemoryListener to track mapping and unmapping of
- * the regions during board creation, so the board models don't
- * need to do anything special for the KVM case.
- */
-typedef struct KVMDevice {
-    struct kvm_arm_device_addr kda;
-    MemoryRegion *mr;
-    QSLIST_ENTRY(KVMDevice) entries;
-} KVMDevice;
-
-static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head;
-
-static void kvm_arm_devlistener_add(MemoryListener *listener,
-                                    MemoryRegionSection *section)
-{
-    KVMDevice *kd;
-
-    QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
-        if (section->mr == kd->mr) {
-            kd->kda.addr = section->offset_within_address_space;
-        }
-    }
-}
-
-static void kvm_arm_devlistener_del(MemoryListener *listener,
-                                    MemoryRegionSection *section)
-{
-    KVMDevice *kd;
-
-    QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
-        if (section->mr == kd->mr) {
-            kd->kda.addr = -1;
-        }
-    }
-}
-
-static MemoryListener devlistener = {
-    .region_add = kvm_arm_devlistener_add,
-    .region_del = kvm_arm_devlistener_del,
-};
-
-static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
-{
-    KVMDevice *kd, *tkd;
-
-    memory_listener_unregister(&devlistener);
-    QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) {
-        if (kd->kda.addr != -1) {
-            if (kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR,
-                             &kd->kda) < 0) {
-                fprintf(stderr, "KVM_ARM_SET_DEVICE_ADDRESS failed: %s\n",
-                        strerror(errno));
-                abort();
-            }
-        }
-        memory_region_unref(kd->mr);
-        g_free(kd);
-    }
-}
-
-static Notifier notify = {
-    .notify = kvm_arm_machine_init_done,
-};
-
-void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid)
-{
-    KVMDevice *kd;
-
-    if (!kvm_irqchip_in_kernel()) {
-        return;
-    }
-
-    if (QSLIST_EMPTY(&kvm_devices_head)) {
-        memory_listener_register(&devlistener, NULL);
-        qemu_add_machine_init_done_notifier(&notify);
-    }
-    kd = g_new0(KVMDevice, 1);
-    kd->mr = mr;
-    kd->kda.id = devid;
-    kd->kda.addr = -1;
-    QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
-    memory_region_ref(kd->mr);
-}
-
 bool write_kvmstate_to_list(ARMCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
@@ -527,7 +422,7 @@ bool write_kvmstate_to_list(ARMCPU *cpu)
     return ok;
 }
 
-bool write_list_to_kvmstate(ARMCPU *cpu)
+bool write_list_to_kvmstate(ARMCPU *cpu, int level)
 {
     CPUState *cs = CPU(cpu);
     int i;
@@ -539,6 +434,10 @@ bool write_list_to_kvmstate(ARMCPU *cpu)
         uint32_t v32;
         int ret;
 
+        if (kvm_arm_cpreg_level(regidx) > level) {
+            continue;
+        }
+
         r.id = regidx;
         switch (regidx & KVM_REG_SIZE_MASK) {
         case KVM_REG_SIZE_U32:
@@ -563,228 +462,60 @@ bool write_list_to_kvmstate(ARMCPU *cpu)
     return ok;
 }
 
-typedef struct Reg {
-    uint64_t id;
-    int offset;
-} Reg;
-
-#define COREREG(KERNELNAME, QEMUFIELD)                       \
-    {                                                        \
-        KVM_REG_ARM | KVM_REG_SIZE_U32 |                     \
-        KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(KERNELNAME), \
-        offsetof(CPUARMState, QEMUFIELD)                     \
-    }
-
-#define VFPSYSREG(R)                                       \
-    {                                                      \
-        KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP | \
-        KVM_REG_ARM_VFP_##R,                               \
-        offsetof(CPUARMState, vfp.xregs[ARM_VFP_##R])      \
-    }
-
-static const Reg regs[] = {
-    /* R0_usr .. R14_usr */
-    COREREG(usr_regs.uregs[0], regs[0]),
-    COREREG(usr_regs.uregs[1], regs[1]),
-    COREREG(usr_regs.uregs[2], regs[2]),
-    COREREG(usr_regs.uregs[3], regs[3]),
-    COREREG(usr_regs.uregs[4], regs[4]),
-    COREREG(usr_regs.uregs[5], regs[5]),
-    COREREG(usr_regs.uregs[6], regs[6]),
-    COREREG(usr_regs.uregs[7], regs[7]),
-    COREREG(usr_regs.uregs[8], usr_regs[0]),
-    COREREG(usr_regs.uregs[9], usr_regs[1]),
-    COREREG(usr_regs.uregs[10], usr_regs[2]),
-    COREREG(usr_regs.uregs[11], usr_regs[3]),
-    COREREG(usr_regs.uregs[12], usr_regs[4]),
-    COREREG(usr_regs.uregs[13], banked_r13[0]),
-    COREREG(usr_regs.uregs[14], banked_r14[0]),
-    /* R13, R14, SPSR for SVC, ABT, UND, IRQ banks */
-    COREREG(svc_regs[0], banked_r13[1]),
-    COREREG(svc_regs[1], banked_r14[1]),
-    COREREG(svc_regs[2], banked_spsr[1]),
-    COREREG(abt_regs[0], banked_r13[2]),
-    COREREG(abt_regs[1], banked_r14[2]),
-    COREREG(abt_regs[2], banked_spsr[2]),
-    COREREG(und_regs[0], banked_r13[3]),
-    COREREG(und_regs[1], banked_r14[3]),
-    COREREG(und_regs[2], banked_spsr[3]),
-    COREREG(irq_regs[0], banked_r13[4]),
-    COREREG(irq_regs[1], banked_r14[4]),
-    COREREG(irq_regs[2], banked_spsr[4]),
-    /* R8_fiq .. R14_fiq and SPSR_fiq */
-    COREREG(fiq_regs[0], fiq_regs[0]),
-    COREREG(fiq_regs[1], fiq_regs[1]),
-    COREREG(fiq_regs[2], fiq_regs[2]),
-    COREREG(fiq_regs[3], fiq_regs[3]),
-    COREREG(fiq_regs[4], fiq_regs[4]),
-    COREREG(fiq_regs[5], banked_r13[5]),
-    COREREG(fiq_regs[6], banked_r14[5]),
-    COREREG(fiq_regs[7], banked_spsr[5]),
-    /* R15 */
-    COREREG(usr_regs.uregs[15], regs[15]),
-    /* VFP system registers */
-    VFPSYSREG(FPSID),
-    VFPSYSREG(MVFR1),
-    VFPSYSREG(MVFR0),
-    VFPSYSREG(FPEXC),
-    VFPSYSREG(FPINST),
-    VFPSYSREG(FPINST2),
-};
-
-int kvm_arch_put_registers(CPUState *cs, int level)
+void kvm_arm_reset_vcpu(ARMCPU *cpu)
 {
-    ARMCPU *cpu = ARM_CPU(cs);
-    CPUARMState *env = &cpu->env;
-    struct kvm_one_reg r;
-    int mode, bn;
-    int ret, i;
-    uint32_t cpsr, fpscr;
-
-    /* Make sure the banked regs are properly set */
-    mode = env->uncached_cpsr & CPSR_M;
-    bn = bank_number(mode);
-    if (mode == ARM_CPU_MODE_FIQ) {
-        memcpy(env->fiq_regs, env->regs + 8, 5 * sizeof(uint32_t));
-    } else {
-        memcpy(env->usr_regs, env->regs + 8, 5 * sizeof(uint32_t));
-    }
-    env->banked_r13[bn] = env->regs[13];
-    env->banked_r14[bn] = env->regs[14];
-    env->banked_spsr[bn] = env->spsr;
+    int ret;
 
-    /* Now we can safely copy stuff down to the kernel */
-    for (i = 0; i < ARRAY_SIZE(regs); i++) {
-        r.id = regs[i].id;
-        r.addr = (uintptr_t)(env) + regs[i].offset;
-        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
-        if (ret) {
-            return ret;
-        }
+    /* Re-init VCPU so that all registers are set to
+     * their respective reset values.
+     */
+    ret = kvm_arm_vcpu_init(CPU(cpu));
+    if (ret < 0) {
+        fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret));
+        abort();
     }
-
-    /* Special cases which aren't a single CPUARMState field */
-    cpsr = cpsr_read(env);
-    r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 |
-        KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr);
-    r.addr = (uintptr_t)(&cpsr);
-    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
-    if (ret) {
-        return ret;
+    if (!write_kvmstate_to_list(cpu)) {
+        fprintf(stderr, "write_kvmstate_to_list failed\n");
+        abort();
     }
+}
 
-    /* VFP registers */
-    r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP;
-    for (i = 0; i < 32; i++) {
-        r.addr = (uintptr_t)(&env->vfp.regs[i]);
-        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
+/*
+ * Update KVM's MP_STATE based on what QEMU thinks it is
+ */
+int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
+{
+    if (cap_has_mp_state) {
+        struct kvm_mp_state mp_state = {
+            .mp_state =
+            cpu->powered_off ? KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
+        };
+        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
         if (ret) {
-            return ret;
+            fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n",
+                    __func__, ret, strerror(-ret));
+            return -1;
         }
-        r.id++;
     }
 
-    r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP |
-        KVM_REG_ARM_VFP_FPSCR;
-    fpscr = vfp_get_fpscr(env);
-    r.addr = (uintptr_t)&fpscr;
-    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
-    if (ret) {
-        return ret;
-    }
-
-    /* Note that we do not call write_cpustate_to_list()
-     * here, so we are only writing the tuple list back to
-     * KVM. This is safe because nothing can change the
-     * CPUARMState cp15 fields (in particular gdb accesses cannot)
-     * and so there are no changes to sync. In fact syncing would
-     * be wrong at this point: for a constant register where TCG and
-     * KVM disagree about its value, the preceding write_list_to_cpustate()
-     * would not have had any effect on the CPUARMState value (since the
-     * register is read-only), and a write_cpustate_to_list() here would
-     * then try to write the TCG value back into KVM -- this would either
-     * fail or incorrectly change the value the guest sees.
-     *
-     * If we ever want to allow the user to modify cp15 registers via
-     * the gdb stub, we would need to be more clever here (for instance
-     * tracking the set of registers kvm_arch_get_registers() successfully
-     * managed to update the CPUARMState with, and only allowing those
-     * to be written back up into the kernel).
-     */
-    if (!write_list_to_kvmstate(cpu)) {
-        return EINVAL;
-    }
-
-    return ret;
+    return 0;
 }
 
-int kvm_arch_get_registers(CPUState *cs)
+/*
+ * Sync the KVM MP_STATE into QEMU
+ */
+int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
 {
-    ARMCPU *cpu = ARM_CPU(cs);
-    CPUARMState *env = &cpu->env;
-    struct kvm_one_reg r;
-    int mode, bn;
-    int ret, i;
-    uint32_t cpsr, fpscr;
-
-    for (i = 0; i < ARRAY_SIZE(regs); i++) {
-        r.id = regs[i].id;
-        r.addr = (uintptr_t)(env) + regs[i].offset;
-        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
-        if (ret) {
-            return ret;
-        }
-    }
-
-    /* Special cases which aren't a single CPUARMState field */
-    r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 |
-        KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr);
-    r.addr = (uintptr_t)(&cpsr);
-    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
-    if (ret) {
-        return ret;
-    }
-    cpsr_write(env, cpsr, 0xffffffff);
-
-    /* Make sure the current mode regs are properly set */
-    mode = env->uncached_cpsr & CPSR_M;
-    bn = bank_number(mode);
-    if (mode == ARM_CPU_MODE_FIQ) {
-        memcpy(env->regs + 8, env->fiq_regs, 5 * sizeof(uint32_t));
-    } else {
-        memcpy(env->regs + 8, env->usr_regs, 5 * sizeof(uint32_t));
-    }
-    env->regs[13] = env->banked_r13[bn];
-    env->regs[14] = env->banked_r14[bn];
-    env->spsr = env->banked_spsr[bn];
-
-    /* VFP registers */
-    r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP;
-    for (i = 0; i < 32; i++) {
-        r.addr = (uintptr_t)(&env->vfp.regs[i]);
-        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
+    if (cap_has_mp_state) {
+        struct kvm_mp_state mp_state;
+        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state);
         if (ret) {
-            return ret;
+            fprintf(stderr, "%s: failed to get MP_STATE %d/%s\n",
+                    __func__, ret, strerror(-ret));
+            abort();
         }
-        r.id++;
-    }
-
-    r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP |
-        KVM_REG_ARM_VFP_FPSCR;
-    r.addr = (uintptr_t)&fpscr;
-    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
-    if (ret) {
-        return ret;
-    }
-    vfp_set_fpscr(env, fpscr);
-
-    if (!write_kvmstate_to_list(cpu)) {
-        return EINVAL;
+        cpu->powered_off = (mp_state.mp_state == KVM_MP_STATE_STOPPED);
     }
-    /* Note that it's OK to have registers which aren't in CPUState,
-     * so we can ignore a failure return here.
-     */
-    write_list_to_cpustate(cpu);
 
     return 0;
 }
@@ -793,26 +524,28 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
 {
 }
 
-void kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
+MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
 {
+    return MEMTXATTRS_UNSPECIFIED;
 }
 
-int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
-{
-    return 0;
-}
 
-void kvm_arch_reset_vcpu(CPUState *cs)
+int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
-    /* Feed the kernel back its initial register state */
-    ARMCPU *cpu = ARM_CPU(cs);
-
-    memmove(cpu->cpreg_values, cpu->cpreg_reset_values,
-            cpu->cpreg_array_len * sizeof(cpu->cpreg_values[0]));
+    int ret = 0;
 
-    if (!write_list_to_kvmstate(cpu)) {
-        abort();
+    switch (run->exit_reason) {
+    case KVM_EXIT_DEBUG:
+        if (kvm_arm_handle_debug(cs, &run->debug.arch)) {
+            ret = EXCP_DEBUG;
+        } /* otherwise return to guest */
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
+                      __func__, run->exit_reason);
+        break;
     }
+    return ret;
 }
 
 bool kvm_arch_stop_on_emulation_error(CPUState *cs)
@@ -835,44 +568,72 @@ int kvm_arch_on_sigbus(int code, void *addr)
     return 1;
 }
 
+/* The #ifdef protections are until 32bit headers are imported and can
+ * be removed once both 32 and 64 bit reach feature parity.
+ */
 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
 {
-    qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
+#ifdef KVM_GUESTDBG_USE_SW_BP
+    if (kvm_sw_breakpoints_active(cs)) {
+        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+    }
+#endif
+#ifdef KVM_GUESTDBG_USE_HW
+    if (kvm_arm_hw_debug_active(cs)) {
+        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW;
+        kvm_arm_copy_hw_debug_data(&dbg->arch);
+    }
+#endif
+}
+
+void kvm_arch_init_irq_routing(KVMState *s)
+{
 }
 
-int kvm_arch_insert_sw_breakpoint(CPUState *cs,
-                                  struct kvm_sw_breakpoint *bp)
+int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 {
-    qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
-    return -EINVAL;
+     if (machine_kernel_irqchip_split(ms)) {
+         perror("-machine kernel_irqchip=split is not supported on ARM.");
+         exit(1);
+    }
+
+    /* If we can create the VGIC using the newer device control API, we
+     * let the device do this when it initializes itself, otherwise we
+     * fall back to the old API */
+    return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
 }
 
-int kvm_arch_insert_hw_breakpoint(target_ulong addr,
-                                  target_ulong len, int type)
+int kvm_arm_vgic_probe(void)
 {
-    qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
-    return -EINVAL;
+    if (kvm_create_device(kvm_state,
+                          KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) {
+        return 3;
+    } else if (kvm_create_device(kvm_state,
+                                 KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) {
+        return 2;
+    } else {
+        return 0;
+    }
 }
 
-int kvm_arch_remove_hw_breakpoint(target_ulong addr,
-                                  target_ulong len, int type)
+int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
+                             uint64_t address, uint32_t data, PCIDevice *dev)
 {
-    qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
-    return -EINVAL;
+    return 0;
 }
 
-int kvm_arch_remove_sw_breakpoint(CPUState *cs,
-                                  struct kvm_sw_breakpoint *bp)
+int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
+                                int vector, PCIDevice *dev)
 {
-    qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
-    return -EINVAL;
+    return 0;
 }
 
-void kvm_arch_remove_all_hw_breakpoints(void)
+int kvm_arch_release_virq_post(int virq)
 {
-    qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
+    return 0;
 }
 
-void kvm_arch_init_irq_routing(KVMState *s)
+int kvm_arch_msi_data_to_gsi(uint32_t data)
 {
+    return (data - 32) & 0xffff;
 }