X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=target-ppc%2Fkvm.c;h=b77ce5e94cbca9e96e791ff10ce67fb633213757;hb=60aad298cb6de52f2716b2e82e1353ea9de95fd6;hp=88650d4ae4e1dfd8408fe46433833148c0785443;hpb=9c17d615a66ebd655871bf891ec0fe901ad8b332;p=qemu.git diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 88650d4ae..b77ce5e94 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -30,20 +30,20 @@ #include "cpu.h" #include "sysemu/cpus.h" #include "sysemu/device_tree.h" -#include "hw/sysbus.h" -#include "hw/spapr.h" +#include "mmu-hash64.h" #include "hw/sysbus.h" -#include "hw/spapr.h" -#include "hw/spapr_vio.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" +#include "sysemu/watchdog.h" //#define DEBUG_KVM #ifdef DEBUG_KVM -#define dprintf(fmt, ...) \ +#define DPRINTF(fmt, ...) \ do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) #else -#define dprintf(fmt, ...) \ +#define DPRINTF(fmt, ...) \ do { } while (0) #endif @@ -61,6 +61,11 @@ static int cap_ppc_smt; static int cap_ppc_rma; static int cap_spapr_tce; static int cap_hior; +static int cap_one_reg; +static int cap_epr; +static int cap_ppc_watchdog; +static int cap_papr; +static int cap_htab_fd; /* XXX We have a race condition where we actually have a level triggered * interrupt, but the infrastructure can't expose that yet, so the guest @@ -80,6 +85,8 @@ static void kvm_kick_cpu(void *opaque) qemu_cpu_kick(CPU(cpu)); } +static int kvm_ppc_register_host_cpu_type(void); + int kvm_arch_init(KVMState *s) { cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); @@ -89,18 +96,28 @@ int kvm_arch_init(KVMState *s) cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT); cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); + cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); + cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); + cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); + /* Note: we don't set cap_papr here, because this capability is + * only activated after this by kvmppc_set_papr() */ + cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD); if (!cap_interrupt_level) { fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " "VM to stall at times!\n"); } + kvm_ppc_register_host_cpu_type(); + return 0; } -static int kvm_arch_sync_sregs(CPUPPCState *cenv) +static int kvm_arch_sync_sregs(PowerPCCPU *cpu) { + CPUPPCState *cenv = &cpu->env; + CPUState *cs = CPU(cpu); struct kvm_sregs sregs; int ret; @@ -117,18 +134,20 @@ static int kvm_arch_sync_sregs(CPUPPCState *cenv) } } - ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs); + ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); if (ret) { return ret; } sregs.pvr = cenv->spr[SPR_PVR]; - return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs); + return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); } /* Set up a shared TLB array with KVM */ -static int kvm_booke206_tlb_init(CPUPPCState *env) +static int kvm_booke206_tlb_init(PowerPCCPU *cpu) { + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); struct kvm_book3e_206_tlb_params params = {}; struct kvm_config_tlb cfg = {}; struct kvm_enable_cap encap = {}; @@ -136,7 +155,7 @@ static int kvm_booke206_tlb_init(CPUPPCState *env) int ret, i; if (!kvm_enabled() || - !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) { + !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { return 0; } @@ -161,7 +180,7 @@ static int kvm_booke206_tlb_init(CPUPPCState *env) encap.cap = KVM_CAP_SW_TLB; encap.args[0] = (uintptr_t)&cfg; - ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap); + ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap); if (ret < 0) { fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", __func__, strerror(-ret)); @@ -174,9 +193,12 @@ static int kvm_booke206_tlb_init(CPUPPCState *env) #if defined(TARGET_PPC64) -static void kvm_get_fallback_smmu_info(CPUPPCState *env, +static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) { + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); + memset(info, 0, sizeof(*info)); /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so @@ -202,7 +224,7 @@ static void kvm_get_fallback_smmu_info(CPUPPCState *env, * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit * this fallback. */ - if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) { + if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) { /* No flags */ info->flags = 0; info->slb_size = 64; @@ -258,18 +280,19 @@ static void kvm_get_fallback_smmu_info(CPUPPCState *env, } } -static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info) +static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) { + CPUState *cs = CPU(cpu); int ret; - if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { - ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info); + if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { + ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); if (ret == 0) { return; } } - kvm_get_fallback_smmu_info(env, info); + kvm_get_fallback_smmu_info(cpu, info); } static long getrampagesize(void) @@ -312,10 +335,11 @@ static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) return (1ul << shift) <= rampgsize; } -static void kvm_fixup_page_sizes(CPUPPCState *env) +static void kvm_fixup_page_sizes(PowerPCCPU *cpu) { static struct kvm_ppc_smmu_info smmu_info; static bool has_smmu_info; + CPUPPCState *env = &cpu->env; long rampagesize; int iq, ik, jq, jk; @@ -326,7 +350,7 @@ static void kvm_fixup_page_sizes(CPUPPCState *env) /* Collect MMU info from kernel if not already */ if (!has_smmu_info) { - kvm_get_smmu_info(env, &smmu_info); + kvm_get_smmu_info(cpu, &smmu_info); has_smmu_info = true; } @@ -369,32 +393,38 @@ static void kvm_fixup_page_sizes(CPUPPCState *env) } #else /* defined (TARGET_PPC64) */ -static inline void kvm_fixup_page_sizes(CPUPPCState *env) +static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) { } #endif /* !defined (TARGET_PPC64) */ -int kvm_arch_init_vcpu(CPUPPCState *cenv) +unsigned long kvm_arch_vcpu_id(CPUState *cpu) +{ + return cpu->cpu_index; +} + +int kvm_arch_init_vcpu(CPUState *cs) { - PowerPCCPU *cpu = ppc_env_get_cpu(cenv); + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *cenv = &cpu->env; int ret; /* Gather server mmu info from KVM and update the CPU state */ - kvm_fixup_page_sizes(cenv); + kvm_fixup_page_sizes(cpu); /* Synchronize sregs with kvm */ - ret = kvm_arch_sync_sregs(cenv); + ret = kvm_arch_sync_sregs(cpu); if (ret) { return ret; } - idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu); + idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); /* Some targets support access to KVM's guest TLB. */ switch (cenv->mmu_model) { case POWERPC_MMU_BOOKE206: - ret = kvm_booke206_tlb_init(cenv); + ret = kvm_booke206_tlb_init(cpu); break; default: break; @@ -403,12 +433,14 @@ int kvm_arch_init_vcpu(CPUPPCState *cenv) return ret; } -void kvm_arch_reset_vcpu(CPUPPCState *env) +void kvm_arch_reset_vcpu(CPUState *cpu) { } -static void kvm_sw_tlb_put(CPUPPCState *env) +static void kvm_sw_tlb_put(PowerPCCPU *cpu) { + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); struct kvm_dirty_tlb dirty_tlb; unsigned char *bitmap; int ret; @@ -423,7 +455,7 @@ static void kvm_sw_tlb_put(CPUPPCState *env) dirty_tlb.bitmap = (uintptr_t)bitmap; dirty_tlb.num_dirty = env->nb_tlb; - ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb); + ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); if (ret) { fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", __func__, strerror(-ret)); @@ -432,19 +464,315 @@ static void kvm_sw_tlb_put(CPUPPCState *env) g_free(bitmap); } -int kvm_arch_put_registers(CPUPPCState *env, int level) +static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + union { + uint32_t u32; + uint64_t u64; + } val; + struct kvm_one_reg reg = { + .id = id, + .addr = (uintptr_t) &val, + }; + int ret; + + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret != 0) { + fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n", + spr, strerror(errno)); + } else { + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U32: + env->spr[spr] = val.u32; + break; + + case KVM_REG_SIZE_U64: + env->spr[spr] = val.u64; + break; + + default: + /* Don't handle this size yet */ + abort(); + } + } +} + +static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + union { + uint32_t u32; + uint64_t u64; + } val; + struct kvm_one_reg reg = { + .id = id, + .addr = (uintptr_t) &val, + }; + int ret; + + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U32: + val.u32 = env->spr[spr]; + break; + + case KVM_REG_SIZE_U64: + val.u64 = env->spr[spr]; + break; + + default: + /* Don't handle this size yet */ + abort(); + } + + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret != 0) { + fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n", + spr, strerror(errno)); + } +} + +static int kvm_put_fp(CPUState *cs) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + struct kvm_one_reg reg; + int i; + int ret; + + if (env->insns_flags & PPC_FLOAT) { + uint64_t fpscr = env->fpscr; + bool vsx = !!(env->insns_flags2 & PPC2_VSX); + + reg.id = KVM_REG_PPC_FPSCR; + reg.addr = (uintptr_t)&fpscr; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); + return ret; + } + + for (i = 0; i < 32; i++) { + uint64_t vsr[2]; + + vsr[0] = float64_val(env->fpr[i]); + vsr[1] = env->vsr[i]; + reg.addr = (uintptr_t) &vsr; + reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); + + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", + i, strerror(errno)); + return ret; + } + } + } + + if (env->insns_flags & PPC_ALTIVEC) { + reg.id = KVM_REG_PPC_VSCR; + reg.addr = (uintptr_t)&env->vscr; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); + return ret; + } + + for (i = 0; i < 32; i++) { + reg.id = KVM_REG_PPC_VR(i); + reg.addr = (uintptr_t)&env->avr[i]; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); + return ret; + } + } + } + + return 0; +} + +static int kvm_get_fp(CPUState *cs) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + struct kvm_one_reg reg; + int i; + int ret; + + if (env->insns_flags & PPC_FLOAT) { + uint64_t fpscr; + bool vsx = !!(env->insns_flags2 & PPC2_VSX); + + reg.id = KVM_REG_PPC_FPSCR; + reg.addr = (uintptr_t)&fpscr; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); + return ret; + } else { + env->fpscr = fpscr; + } + + for (i = 0; i < 32; i++) { + uint64_t vsr[2]; + + reg.addr = (uintptr_t) &vsr; + reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); + + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to get %s%d from KVM: %s\n", + vsx ? "VSR" : "FPR", i, strerror(errno)); + return ret; + } else { + env->fpr[i] = vsr[0]; + if (vsx) { + env->vsr[i] = vsr[1]; + } + } + } + } + + if (env->insns_flags & PPC_ALTIVEC) { + reg.id = KVM_REG_PPC_VSCR; + reg.addr = (uintptr_t)&env->vscr; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); + return ret; + } + + for (i = 0; i < 32; i++) { + reg.id = KVM_REG_PPC_VR(i); + reg.addr = (uintptr_t)&env->avr[i]; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to get VR%d from KVM: %s\n", + i, strerror(errno)); + return ret; + } + } + } + + return 0; +} + +#if defined(TARGET_PPC64) +static int kvm_get_vpa(CPUState *cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + struct kvm_one_reg reg; + int ret; + + reg.id = KVM_REG_PPC_VPA_ADDR; + reg.addr = (uintptr_t)&env->vpa_addr; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); + return ret; + } + + assert((uintptr_t)&env->slb_shadow_size + == ((uintptr_t)&env->slb_shadow_addr + 8)); + reg.id = KVM_REG_PPC_VPA_SLB; + reg.addr = (uintptr_t)&env->slb_shadow_addr; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to get SLB shadow state from KVM: %s\n", + strerror(errno)); + return ret; + } + + assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); + reg.id = KVM_REG_PPC_VPA_DTL; + reg.addr = (uintptr_t)&env->dtl_addr; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", + strerror(errno)); + return ret; + } + + return 0; +} + +static int kvm_put_vpa(CPUState *cs) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + struct kvm_one_reg reg; + int ret; + + /* SLB shadow or DTL can't be registered unless a master VPA is + * registered. That means when restoring state, if a VPA *is* + * registered, we need to set that up first. If not, we need to + * deregister the others before deregistering the master VPA */ + assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); + + if (env->vpa_addr) { + reg.id = KVM_REG_PPC_VPA_ADDR; + reg.addr = (uintptr_t)&env->vpa_addr; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); + return ret; + } + } + + assert((uintptr_t)&env->slb_shadow_size + == ((uintptr_t)&env->slb_shadow_addr + 8)); + reg.id = KVM_REG_PPC_VPA_SLB; + reg.addr = (uintptr_t)&env->slb_shadow_addr; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); + return ret; + } + + assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); + reg.id = KVM_REG_PPC_VPA_DTL; + reg.addr = (uintptr_t)&env->dtl_addr; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", + strerror(errno)); + return ret; + } + + if (!env->vpa_addr) { + reg.id = KVM_REG_PPC_VPA_ADDR; + reg.addr = (uintptr_t)&env->vpa_addr; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret < 0) { + DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); + return ret; + } + } + + return 0; +} +#endif /* TARGET_PPC64 */ + +int kvm_arch_put_registers(CPUState *cs, int level) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; struct kvm_regs regs; int ret; int i; - ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, ®s); - if (ret < 0) + ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); + if (ret < 0) { return ret; + } regs.ctr = env->ctr; regs.lr = env->lr; - regs.xer = env->xer; + regs.xer = cpu_read_xer(env); regs.msr = env->msr; regs.pc = env->nip; @@ -465,12 +793,19 @@ int kvm_arch_put_registers(CPUPPCState *env, int level) for (i = 0;i < 32; i++) regs.gpr[i] = env->gpr[i]; - ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, ®s); + regs.cr = 0; + for (i = 0; i < 8; i++) { + regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); + } + + ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); if (ret < 0) return ret; + kvm_put_fp(cs); + if (env->tlb_dirty) { - kvm_sw_tlb_put(env); + kvm_sw_tlb_put(cpu); env->tlb_dirty = false; } @@ -483,7 +818,7 @@ int kvm_arch_put_registers(CPUPPCState *env, int level) /* Sync SLB */ #ifdef TARGET_PPC64 - for (i = 0; i < 64; i++) { + for (i = 0; i < ARRAY_SIZE(env->slb); i++) { sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; } @@ -503,36 +838,53 @@ int kvm_arch_put_registers(CPUPPCState *env, int level) | env->IBAT[1][i]; } - ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs); + ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); if (ret) { return ret; } } if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { - uint64_t hior = env->spr[SPR_HIOR]; - struct kvm_one_reg reg = { - .id = KVM_REG_PPC_HIOR, - .addr = (uintptr_t) &hior, - }; + kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); + } - ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; + if (cap_one_reg) { + int i; + + /* We deliberately ignore errors here, for kernels which have + * the ONE_REG calls, but don't support the specific + * registers, there's a reasonable chance things will still + * work, at least until we try to migrate. */ + for (i = 0; i < 1024; i++) { + uint64_t id = env->spr_cb[i].one_reg_id; + + if (id != 0) { + kvm_put_one_spr(cs, id, i); + } } + +#ifdef TARGET_PPC64 + if (cap_papr) { + if (kvm_put_vpa(cs) < 0) { + DPRINTF("Warning: Unable to set VPA information to KVM\n"); + } + } +#endif /* TARGET_PPC64 */ } return ret; } -int kvm_arch_get_registers(CPUPPCState *env) +int kvm_arch_get_registers(CPUState *cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; struct kvm_regs regs; struct kvm_sregs sregs; uint32_t cr; int i, ret; - ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, ®s); + ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); if (ret < 0) return ret; @@ -544,7 +896,7 @@ int kvm_arch_get_registers(CPUPPCState *env) env->ctr = regs.ctr; env->lr = regs.lr; - env->xer = regs.xer; + cpu_write_xer(env, regs.xer); env->msr = regs.msr; env->nip = regs.pc; @@ -565,8 +917,10 @@ int kvm_arch_get_registers(CPUPPCState *env) for (i = 0;i < 32; i++) env->gpr[i] = regs.gpr[i]; + kvm_get_fp(cs); + if (cap_booke_sregs) { - ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs); + ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); if (ret < 0) { return ret; } @@ -670,7 +1024,7 @@ int kvm_arch_get_registers(CPUPPCState *env) } if (cap_segstate) { - ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs); + ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); if (ret < 0) { return ret; } @@ -679,9 +1033,22 @@ int kvm_arch_get_registers(CPUPPCState *env) /* Sync SLB */ #ifdef TARGET_PPC64 - for (i = 0; i < 64; i++) { - ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe, - sregs.u.s.ppc64.slb[i].slbv); + /* + * The packed SLB array we get from KVM_GET_SREGS only contains + * information about valid entries. So we flush our internal + * copy to get rid of stale ones, then put all valid SLB entries + * back in. + */ + memset(env->slb, 0, sizeof(env->slb)); + for (i = 0; i < ARRAY_SIZE(env->slb); i++) { + target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; + target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; + /* + * Only restore valid entries + */ + if (rb & SLB_ESID_V) { + ppc_store_slb(env, rb, rs); + } } #endif @@ -699,10 +1066,38 @@ int kvm_arch_get_registers(CPUPPCState *env) } } + if (cap_hior) { + kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); + } + + if (cap_one_reg) { + int i; + + /* We deliberately ignore errors here, for kernels which have + * the ONE_REG calls, but don't support the specific + * registers, there's a reasonable chance things will still + * work, at least until we try to migrate. */ + for (i = 0; i < 1024; i++) { + uint64_t id = env->spr_cb[i].one_reg_id; + + if (id != 0) { + kvm_get_one_spr(cs, id, i); + } + } + +#ifdef TARGET_PPC64 + if (cap_papr) { + if (kvm_get_vpa(cs) < 0) { + DPRINTF("Warning: Unable to get VPA information from KVM\n"); + } + } +#endif + } + return 0; } -int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level) +int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) { unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; @@ -714,7 +1109,7 @@ int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level) return 0; } - kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq); + kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); return 0; } @@ -727,8 +1122,10 @@ int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level) #define PPC_INPUT_INT PPC6xx_INPUT_INT #endif -void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run) +void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; int r; unsigned irq; @@ -736,7 +1133,7 @@ void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run) * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ if (!cap_interrupt_level && run->ready_for_interrupt_injection && - (env->interrupt_request & CPU_INTERRUPT_HARD) && + (cs->interrupt_request & CPU_INTERRUPT_HARD) && (env->irq_input_state & (1<cpu_index, irq); + DPRINTF("injected interrupt %d\n", irq); + r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); + if (r < 0) { + printf("cpu %d fail inject %x\n", cs->cpu_index, irq); + } /* Always wake up soon in case the interrupt was level based */ - qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) + + timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + (get_ticks_per_sec() / 50)); } @@ -760,19 +1158,22 @@ void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run) * anyways, so we will get a chance to deliver the rest. */ } -void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run) +void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) { } -int kvm_arch_process_async_events(CPUPPCState *env) +int kvm_arch_process_async_events(CPUState *cs) { - return env->halted; + return cs->halted; } -static int kvmppc_handle_halt(CPUPPCState *env) +static int kvmppc_handle_halt(PowerPCCPU *cpu) { - if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { - env->halted = 1; + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + + if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { + cs->halted = 1; env->exception_index = EXCP_HLT; } @@ -796,33 +1197,46 @@ static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t dat return 0; } -int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run) +int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; int ret; switch (run->exit_reason) { case KVM_EXIT_DCR: if (run->dcr.is_write) { - dprintf("handle dcr write\n"); + DPRINTF("handle dcr write\n"); ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); } else { - dprintf("handle dcr read\n"); + DPRINTF("handle dcr read\n"); ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); } break; case KVM_EXIT_HLT: - dprintf("handle halt\n"); - ret = kvmppc_handle_halt(env); + DPRINTF("handle halt\n"); + ret = kvmppc_handle_halt(cpu); break; -#ifdef CONFIG_PSERIES +#if defined(TARGET_PPC64) case KVM_EXIT_PAPR_HCALL: - dprintf("handle PAPR hypercall\n"); - run->papr_hcall.ret = spapr_hypercall(ppc_env_get_cpu(env), + DPRINTF("handle PAPR hypercall\n"); + run->papr_hcall.ret = spapr_hypercall(cpu, run->papr_hcall.nr, run->papr_hcall.args); ret = 0; break; #endif + case KVM_EXIT_EPR: + DPRINTF("handle epr\n"); + run->epr.epr = ldl_phys(env->mpic_iack); + ret = 0; + break; + case KVM_EXIT_WATCHDOG: + DPRINTF("handle watchdog expiry\n"); + watchdog_perform_action(); + ret = 0; + break; + default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; @@ -832,6 +1246,71 @@ int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run) return ret; } +int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) +{ + CPUState *cs = CPU(cpu); + uint32_t bits = tsr_bits; + struct kvm_one_reg reg = { + .id = KVM_REG_PPC_OR_TSR, + .addr = (uintptr_t) &bits, + }; + + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); +} + +int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) +{ + + CPUState *cs = CPU(cpu); + uint32_t bits = tsr_bits; + struct kvm_one_reg reg = { + .id = KVM_REG_PPC_CLEAR_TSR, + .addr = (uintptr_t) &bits, + }; + + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); +} + +int kvmppc_set_tcr(PowerPCCPU *cpu) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + uint32_t tcr = env->spr[SPR_BOOKE_TCR]; + + struct kvm_one_reg reg = { + .id = KVM_REG_PPC_TCR, + .addr = (uintptr_t) &tcr, + }; + + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); +} + +int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) +{ + CPUState *cs = CPU(cpu); + struct kvm_enable_cap encap = {}; + int ret; + + if (!kvm_enabled()) { + return -1; + } + + if (!cap_ppc_watchdog) { + printf("warning: KVM does not support watchdog"); + return -1; + } + + encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG; + ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap); + if (ret < 0) { + fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", + __func__, strerror(-ret)); + return ret; + } + + return ret; +} + static int read_cpuinfo(const char *field, char *value, int len) { FILE *f; @@ -967,16 +1446,38 @@ uint32_t kvmppc_get_dfp(void) return kvmppc_read_int_cpu_dt("ibm,dfp"); } +static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) + { + PowerPCCPU *cpu = ppc_env_get_cpu(env); + CPUState *cs = CPU(cpu); + + if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && + !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { + return 0; + } + + return 1; +} + +int kvmppc_get_hasidle(CPUPPCState *env) +{ + struct kvm_ppc_pvinfo pvinfo; + + if (!kvmppc_get_pvinfo(env, &pvinfo) && + (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { + return 1; + } + + return 0; +} + int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) { uint32_t *hc = (uint32_t*)buf; - struct kvm_ppc_pvinfo pvinfo; - if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) && - !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) { + if (!kvmppc_get_pvinfo(env, &pvinfo)) { memcpy(buf, pvinfo.hcall, buf_len); - return 0; } @@ -997,17 +1498,39 @@ int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) return 0; } -void kvmppc_set_papr(CPUPPCState *env) +void kvmppc_set_papr(PowerPCCPU *cpu) { + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); struct kvm_enable_cap cap = {}; int ret; cap.cap = KVM_CAP_PPC_PAPR; - ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap); + ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap); if (ret) { cpu_abort(env, "This KVM version does not support PAPR\n"); } + + /* Update the capability flag so we sync the right information + * with kvm */ + cap_papr = 1; +} + +void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) +{ + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); + struct kvm_enable_cap cap = {}; + int ret; + + cap.cap = KVM_CAP_PPC_EPR; + cap.args[0] = mpic_proxy; + ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap); + + if (ret && mpic_proxy) { + cpu_abort(env, "This KVM version does not support EPR\n"); + } } int kvmppc_smt_threads(void) @@ -1052,7 +1575,7 @@ off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem) }; rma_region = g_new(MemoryRegion, 1); - memory_region_init_ram_ptr(rma_region, name, size, rma); + memory_region_init_ram_ptr(rma_region, NULL, name, size, rma); vmstate_register_ram_global(rma_region); memory_region_add_subregion(sysmem, 0, rma_region); @@ -1061,11 +1584,35 @@ off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem) uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) { + struct kvm_ppc_smmu_info info; + long rampagesize, best_page_shift; + int i; + if (cap_ppc_rma >= 2) { return current_size; } + + /* Find the largest hardware supported page size that's less than + * or equal to the (logical) backing page size of guest RAM */ + kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); + rampagesize = getrampagesize(); + best_page_shift = 0; + + for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { + struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; + + if (!sps->page_shift) { + continue; + } + + if ((sps->page_shift > best_page_shift) + && ((1UL << sps->page_shift) <= rampagesize)) { + best_page_shift = sps->page_shift; + } + } + return MIN(current_size, - getrampagesize() << (hash_shift - 7)); + 1ULL << (best_page_shift + hash_shift - 7)); } #endif @@ -1094,7 +1641,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) return NULL; } - len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE); + len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t); /* FIXME: round this up to page size */ table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); @@ -1117,7 +1664,7 @@ int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size) return -1; } - len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE); + len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t); if ((munmap(table, len) < 0) || (close(fd) < 0)) { fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", @@ -1184,53 +1731,165 @@ static void alter_insns(uint64_t *word, uint64_t flags, bool on) } } -const ppc_def_t *kvmppc_host_cpu_def(void) +static void kvmppc_host_cpu_initfn(Object *obj) { - uint32_t host_pvr = mfpvr(); - const ppc_def_t *base_spec; - ppc_def_t *spec; + assert(kvm_enabled()); +} + +static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) +{ + PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); uint32_t vmx = kvmppc_get_vmx(); uint32_t dfp = kvmppc_get_dfp(); + uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); + uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); - base_spec = ppc_find_by_pvr(host_pvr); - - spec = g_malloc0(sizeof(*spec)); - memcpy(spec, base_spec, sizeof(*spec)); - - /* Now fix up the spec with information we can query from the host */ + /* Now fix up the class with information we can query from the host */ if (vmx != -1) { /* Only override when we know what the host supports */ - alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0); - alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1); + alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0); + alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1); } if (dfp != -1) { /* Only override when we know what the host supports */ - alter_insns(&spec->insns_flags2, PPC2_DFP, dfp); + alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp); + } + + if (dcache_size != -1) { + pcc->l1_dcache_size = dcache_size; } - return spec; + if (icache_size != -1) { + pcc->l1_icache_size = icache_size; + } } -int kvmppc_fixup_cpu(CPUPPCState *env) +int kvmppc_fixup_cpu(PowerPCCPU *cpu) { + CPUState *cs = CPU(cpu); int smt; /* Adjust cpu index for SMT */ smt = kvmppc_smt_threads(); - env->cpu_index = (env->cpu_index / smp_threads) * smt - + (env->cpu_index % smp_threads); + cs->cpu_index = (cs->cpu_index / smp_threads) * smt + + (cs->cpu_index % smp_threads); + + return 0; +} + +bool kvmppc_has_cap_epr(void) +{ + return cap_epr; +} + +static int kvm_ppc_register_host_cpu_type(void) +{ + TypeInfo type_info = { + .name = TYPE_HOST_POWERPC_CPU, + .instance_init = kvmppc_host_cpu_initfn, + .class_init = kvmppc_host_cpu_class_init, + }; + uint32_t host_pvr = mfpvr(); + PowerPCCPUClass *pvr_pcc; + pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); + if (pvr_pcc == NULL) { + return -1; + } + type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); + type_register(&type_info); return 0; } +int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) +{ + struct kvm_rtas_token_args args = { + .token = token, + }; + + if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { + return -ENOENT; + } + + strncpy(args.name, function, sizeof(args.name)); + + return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); +} + +int kvmppc_get_htab_fd(bool write) +{ + struct kvm_get_htab_fd s = { + .flags = write ? KVM_GET_HTAB_WRITE : 0, + .start_index = 0, + }; + + if (!cap_htab_fd) { + fprintf(stderr, "KVM version doesn't support saving the hash table\n"); + return -1; + } + + return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); +} + +int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) +{ + int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + uint8_t buf[bufsize]; + ssize_t rc; + + do { + rc = read(fd, buf, bufsize); + if (rc < 0) { + fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", + strerror(errno)); + return rc; + } else if (rc) { + /* Kernel already retuns data in BE format for the file */ + qemu_put_buffer(f, buf, rc); + } + } while ((rc != 0) + && ((max_ns < 0) + || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); + + return (rc == 0) ? 1 : 0; +} + +int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, + uint16_t n_valid, uint16_t n_invalid) +{ + struct kvm_get_htab_header *buf; + size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; + ssize_t rc; + + buf = alloca(chunksize); + /* This is KVM on ppc, so this is all big-endian */ + buf->index = index; + buf->n_valid = n_valid; + buf->n_invalid = n_invalid; -bool kvm_arch_stop_on_emulation_error(CPUPPCState *env) + qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); + + rc = write(fd, buf, chunksize); + if (rc < 0) { + fprintf(stderr, "Error writing KVM hash table: %s\n", + strerror(errno)); + return rc; + } + if (rc != chunksize) { + /* We should never get a short write on a single chunk */ + fprintf(stderr, "Short write, restoring KVM hash table\n"); + return -1; + } + return 0; +} + +bool kvm_arch_stop_on_emulation_error(CPUState *cpu) { return true; } -int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr) +int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) { return 1; } @@ -1239,3 +1898,7 @@ int kvm_arch_on_sigbus(int code, void *addr) { return 1; } + +void kvm_arch_init_irq_routing(KVMState *s) +{ +}