X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=target-ppc%2Fkvm.c;h=2546c577f6b311edca3702da9152d4561ce2c0fb;hb=1de7afc984b49af164e2619e6850b9732b173b34;hp=f3d0861a111e3fefd0f2d51b1161c263432b6309;hpb=70bca53ffb811ea59dc090b3ca7825cf0bf346a7;p=qemu.git diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index f3d0861a1..2546c577f 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -18,15 +18,17 @@ #include #include #include +#include #include #include "qemu-common.h" -#include "qemu-timer.h" +#include "qemu/timer.h" #include "sysemu.h" #include "kvm.h" #include "kvm_ppc.h" #include "cpu.h" +#include "cpus.h" #include "device_tree.h" #include "hw/sysbus.h" #include "hw/spapr.h" @@ -58,6 +60,7 @@ static int cap_booke_sregs; static int cap_ppc_smt; static int cap_ppc_rma; static int cap_spapr_tce; +static int cap_hior; /* XXX We have a race condition where we actually have a level triggered * interrupt, but the infrastructure can't expose that yet, so the guest @@ -70,9 +73,11 @@ static int cap_spapr_tce; */ static QEMUTimer *idle_timer; -static void kvm_kick_env(void *env) +static void kvm_kick_cpu(void *opaque) { - qemu_cpu_kick(env); + PowerPCCPU *cpu = opaque; + + qemu_cpu_kick(CPU(cpu)); } int kvm_arch_init(KVMState *s) @@ -84,6 +89,7 @@ int kvm_arch_init(KVMState *s) cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT); cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); + cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); if (!cap_interrupt_level) { fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " @@ -93,7 +99,7 @@ int kvm_arch_init(KVMState *s) return 0; } -static int kvm_arch_sync_sregs(CPUState *cenv) +static int kvm_arch_sync_sregs(CPUPPCState *cenv) { struct kvm_sregs sregs; int ret; @@ -121,7 +127,7 @@ static int kvm_arch_sync_sregs(CPUState *cenv) } /* Set up a shared TLB array with KVM */ -static int kvm_booke206_tlb_init(CPUState *env) +static int kvm_booke206_tlb_init(CPUPPCState *env) { struct kvm_book3e_206_tlb_params params = {}; struct kvm_config_tlb cfg = {}; @@ -166,16 +172,224 @@ static int kvm_booke206_tlb_init(CPUState *env) return 0; } -int kvm_arch_init_vcpu(CPUState *cenv) + +#if defined(TARGET_PPC64) +static void kvm_get_fallback_smmu_info(CPUPPCState *env, + struct kvm_ppc_smmu_info *info) +{ + memset(info, 0, sizeof(*info)); + + /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so + * need to "guess" what the supported page sizes are. + * + * For that to work we make a few assumptions: + * + * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR" + * KVM which only supports 4K and 16M pages, but supports them + * regardless of the backing store characteritics. We also don't + * support 1T segments. + * + * This is safe as if HV KVM ever supports that capability or PR + * KVM grows supports for more page/segment sizes, those versions + * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we + * will not hit this fallback + * + * - Else we are running HV KVM. This means we only support page + * sizes that fit in the backing store. Additionally we only + * advertize 64K pages if the processor is ARCH 2.06 and we assume + * P7 encodings for the SLB and hash table. Here too, we assume + * support for any newer processor will mean a kernel that + * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit + * this fallback. + */ + if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) { + /* No flags */ + info->flags = 0; + info->slb_size = 64; + + /* Standard 4k base page size segment */ + info->sps[0].page_shift = 12; + info->sps[0].slb_enc = 0; + info->sps[0].enc[0].page_shift = 12; + info->sps[0].enc[0].pte_enc = 0; + + /* Standard 16M large page size segment */ + info->sps[1].page_shift = 24; + info->sps[1].slb_enc = SLB_VSID_L; + info->sps[1].enc[0].page_shift = 24; + info->sps[1].enc[0].pte_enc = 0; + } else { + int i = 0; + + /* HV KVM has backing store size restrictions */ + info->flags = KVM_PPC_PAGE_SIZES_REAL; + + if (env->mmu_model & POWERPC_MMU_1TSEG) { + info->flags |= KVM_PPC_1T_SEGMENTS; + } + + if (env->mmu_model == POWERPC_MMU_2_06) { + info->slb_size = 32; + } else { + info->slb_size = 64; + } + + /* Standard 4k base page size segment */ + info->sps[i].page_shift = 12; + info->sps[i].slb_enc = 0; + info->sps[i].enc[0].page_shift = 12; + info->sps[i].enc[0].pte_enc = 0; + i++; + + /* 64K on MMU 2.06 */ + if (env->mmu_model == POWERPC_MMU_2_06) { + info->sps[i].page_shift = 16; + info->sps[i].slb_enc = 0x110; + info->sps[i].enc[0].page_shift = 16; + info->sps[i].enc[0].pte_enc = 1; + i++; + } + + /* Standard 16M large page size segment */ + info->sps[i].page_shift = 24; + info->sps[i].slb_enc = SLB_VSID_L; + info->sps[i].enc[0].page_shift = 24; + info->sps[i].enc[0].pte_enc = 0; + } +} + +static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info) +{ + int ret; + + if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { + ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info); + if (ret == 0) { + return; + } + } + + kvm_get_fallback_smmu_info(env, info); +} + +static long getrampagesize(void) +{ + struct statfs fs; + int ret; + + if (!mem_path) { + /* guest RAM is backed by normal anonymous pages */ + return getpagesize(); + } + + do { + ret = statfs(mem_path, &fs); + } while (ret != 0 && errno == EINTR); + + if (ret != 0) { + fprintf(stderr, "Couldn't statfs() memory path: %s\n", + strerror(errno)); + exit(1); + } + +#define HUGETLBFS_MAGIC 0x958458f6 + + if (fs.f_type != HUGETLBFS_MAGIC) { + /* Explicit mempath, but it's ordinary pages */ + return getpagesize(); + } + + /* It's hugepage, return the huge page size */ + return fs.f_bsize; +} + +static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) +{ + if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { + return true; + } + + return (1ul << shift) <= rampgsize; +} + +static void kvm_fixup_page_sizes(CPUPPCState *env) +{ + static struct kvm_ppc_smmu_info smmu_info; + static bool has_smmu_info; + long rampagesize; + int iq, ik, jq, jk; + + /* We only handle page sizes for 64-bit server guests for now */ + if (!(env->mmu_model & POWERPC_MMU_64)) { + return; + } + + /* Collect MMU info from kernel if not already */ + if (!has_smmu_info) { + kvm_get_smmu_info(env, &smmu_info); + has_smmu_info = true; + } + + rampagesize = getrampagesize(); + + /* Convert to QEMU form */ + memset(&env->sps, 0, sizeof(env->sps)); + + for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { + struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; + struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; + + if (!kvm_valid_page_size(smmu_info.flags, rampagesize, + ksps->page_shift)) { + continue; + } + qsps->page_shift = ksps->page_shift; + qsps->slb_enc = ksps->slb_enc; + for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { + if (!kvm_valid_page_size(smmu_info.flags, rampagesize, + ksps->enc[jk].page_shift)) { + continue; + } + qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; + qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; + if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { + break; + } + } + if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { + break; + } + } + env->slb_nr = smmu_info.slb_size; + if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) { + env->mmu_model |= POWERPC_MMU_1TSEG; + } else { + env->mmu_model &= ~POWERPC_MMU_1TSEG; + } +} +#else /* defined (TARGET_PPC64) */ + +static inline void kvm_fixup_page_sizes(CPUPPCState *env) +{ +} + +#endif /* !defined (TARGET_PPC64) */ + +int kvm_arch_init_vcpu(CPUPPCState *cenv) { + PowerPCCPU *cpu = ppc_env_get_cpu(cenv); int ret; + /* Gather server mmu info from KVM and update the CPU state */ + kvm_fixup_page_sizes(cenv); + + /* Synchronize sregs with kvm */ ret = kvm_arch_sync_sregs(cenv); if (ret) { return ret; } - idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv); + idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu); /* Some targets support access to KVM's guest TLB. */ switch (cenv->mmu_model) { @@ -189,11 +403,11 @@ int kvm_arch_init_vcpu(CPUState *cenv) return ret; } -void kvm_arch_reset_vcpu(CPUState *env) +void kvm_arch_reset_vcpu(CPUPPCState *env) { } -static void kvm_sw_tlb_put(CPUState *env) +static void kvm_sw_tlb_put(CPUPPCState *env) { struct kvm_dirty_tlb dirty_tlb; unsigned char *bitmap; @@ -218,7 +432,7 @@ static void kvm_sw_tlb_put(CPUState *env) g_free(bitmap); } -int kvm_arch_put_registers(CPUState *env, int level) +int kvm_arch_put_registers(CPUPPCState *env, int level) { struct kvm_regs regs; int ret; @@ -260,10 +474,58 @@ int kvm_arch_put_registers(CPUState *env, int level) env->tlb_dirty = false; } + if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { + struct kvm_sregs sregs; + + sregs.pvr = env->spr[SPR_PVR]; + + sregs.u.s.sdr1 = env->spr[SPR_SDR1]; + + /* Sync SLB */ +#ifdef TARGET_PPC64 + for (i = 0; i < 64; i++) { + sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; + sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; + } +#endif + + /* Sync SRs */ + for (i = 0; i < 16; i++) { + sregs.u.s.ppc32.sr[i] = env->sr[i]; + } + + /* Sync BATs */ + for (i = 0; i < 8; i++) { + /* Beware. We have to swap upper and lower bits here */ + sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) + | env->DBAT[1][i]; + sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) + | env->IBAT[1][i]; + } + + ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs); + if (ret) { + return ret; + } + } + + if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { + uint64_t hior = env->spr[SPR_HIOR]; + struct kvm_one_reg reg = { + .id = KVM_REG_PPC_HIOR, + .addr = (uintptr_t) &hior, + }; + + ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + } + return ret; } -int kvm_arch_get_registers(CPUState *env) +int kvm_arch_get_registers(CPUPPCState *env) { struct kvm_regs regs; struct kvm_sregs sregs; @@ -440,7 +702,7 @@ int kvm_arch_get_registers(CPUState *env) return 0; } -int kvmppc_set_interrupt(CPUState *env, int irq, int level) +int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level) { unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; @@ -465,12 +727,12 @@ int kvmppc_set_interrupt(CPUState *env, int irq, int level) #define PPC_INPUT_INT PPC6xx_INPUT_INT #endif -void kvm_arch_pre_run(CPUState *env, struct kvm_run *run) +void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run) { int r; unsigned irq; - /* PowerPC Qemu tracks the various core input pins (interrupt, critical + /* PowerPC QEMU tracks the various core input pins (interrupt, critical * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ if (!cap_interrupt_level && run->ready_for_interrupt_injection && @@ -498,16 +760,16 @@ void kvm_arch_pre_run(CPUState *env, struct kvm_run *run) * anyways, so we will get a chance to deliver the rest. */ } -void kvm_arch_post_run(CPUState *env, struct kvm_run *run) +void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run) { } -int kvm_arch_process_async_events(CPUState *env) +int kvm_arch_process_async_events(CPUPPCState *env) { - return 0; + return env->halted; } -static int kvmppc_handle_halt(CPUState *env) +static int kvmppc_handle_halt(CPUPPCState *env) { if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { env->halted = 1; @@ -518,7 +780,7 @@ static int kvmppc_handle_halt(CPUState *env) } /* map dcr access to existing qemu dcr emulation */ -static int kvmppc_handle_dcr_read(CPUState *env, uint32_t dcrn, uint32_t *data) +static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) { if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); @@ -526,7 +788,7 @@ static int kvmppc_handle_dcr_read(CPUState *env, uint32_t dcrn, uint32_t *data) return 0; } -static int kvmppc_handle_dcr_write(CPUState *env, uint32_t dcrn, uint32_t data) +static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) { if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); @@ -534,7 +796,7 @@ static int kvmppc_handle_dcr_write(CPUState *env, uint32_t dcrn, uint32_t data) return 0; } -int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) +int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run) { int ret; @@ -555,9 +817,10 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) #ifdef CONFIG_PSERIES case KVM_EXIT_PAPR_HCALL: dprintf("handle PAPR hypercall\n"); - run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr, + run->papr_hcall.ret = spapr_hypercall(ppc_env_get_cpu(env), + run->papr_hcall.nr, run->papr_hcall.args); - ret = 1; + ret = 0; break; #endif default: @@ -586,7 +849,7 @@ static int read_cpuinfo(const char *field, char *value, int len) break; } if (!strncmp(line, field, field_len)) { - strncpy(value, line, len); + pstrcpy(value, len, line); ret = 0; break; } @@ -704,7 +967,7 @@ uint32_t kvmppc_get_dfp(void) return kvmppc_read_int_cpu_dt("ibm,dfp"); } -int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len) +int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) { uint32_t *hc = (uint32_t*)buf; @@ -734,51 +997,17 @@ int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len) return 0; } -void kvmppc_set_papr(CPUState *env) +void kvmppc_set_papr(CPUPPCState *env) { struct kvm_enable_cap cap = {}; - struct kvm_one_reg reg = {}; - struct kvm_sregs sregs = {}; int ret; cap.cap = KVM_CAP_PPC_PAPR; ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap); if (ret) { - goto fail; + cpu_abort(env, "This KVM version does not support PAPR\n"); } - - /* - * XXX We set HIOR here. It really should be a qdev property of - * the CPU node, but we don't have CPUs converted to qdev yet. - * - * Once we have qdev CPUs, move HIOR to a qdev property and - * remove this chunk. - */ - reg.id = KVM_ONE_REG_PPC_HIOR; - reg.u.reg64 = env->spr[SPR_HIOR]; - ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, ®); - if (ret) { - goto fail; - } - - /* Set SDR1 so kernel space finds the HTAB */ - ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs); - if (ret) { - goto fail; - } - - sregs.u.s.sdr1 = env->spr[SPR_SDR1]; - - ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs); - if (ret) { - goto fail; - } - - return; - -fail: - cpu_abort(env, "This KVM version does not support PAPR\n"); } int kvmppc_smt_threads(void) @@ -786,6 +1015,7 @@ int kvmppc_smt_threads(void) return cap_ppc_smt ? cap_ppc_smt : 1; } +#ifdef TARGET_PPC64 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem) { void *rma; @@ -822,12 +1052,23 @@ off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem) }; rma_region = g_new(MemoryRegion, 1); - memory_region_init_ram_ptr(rma_region, NULL, name, size, rma); + memory_region_init_ram_ptr(rma_region, name, size, rma); + vmstate_register_ram_global(rma_region); memory_region_add_subregion(sysmem, 0, rma_region); return size; } +uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) +{ + if (cap_ppc_rma >= 2) { + return current_size; + } + return MIN(current_size, + getrampagesize() << (hash_shift - 7)); +} +#endif + void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) { struct kvm_create_spapr_tce args = { @@ -838,20 +1079,28 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) int fd; void *table; + /* Must set fd to -1 so we don't try to munmap when called for + * destroying the table, which the upper layers -will- do + */ + *pfd = -1; if (!cap_spapr_tce) { return NULL; } fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); if (fd < 0) { + fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", + liobn); return NULL; } - len = (window_size / SPAPR_VIO_TCE_PAGE_SIZE) * sizeof(VIOsPAPR_RTCE); + len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE); /* FIXME: round this up to page size */ - table = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); + table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (table == MAP_FAILED) { + fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", + liobn); close(fd); return NULL; } @@ -868,17 +1117,55 @@ int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size) return -1; } - len = (window_size / SPAPR_VIO_TCE_PAGE_SIZE)*sizeof(VIOsPAPR_RTCE); + len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE); if ((munmap(table, len) < 0) || (close(fd) < 0)) { - fprintf(stderr, "KVM: Unexpected error removing KVM SPAPR TCE " - "table: %s", strerror(errno)); + fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", + strerror(errno)); /* Leak the table */ } return 0; } +int kvmppc_reset_htab(int shift_hint) +{ + uint32_t shift = shift_hint; + + if (!kvm_enabled()) { + /* Full emulation, tell caller to allocate htab itself */ + return 0; + } + if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { + int ret; + ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); + if (ret == -ENOTTY) { + /* At least some versions of PR KVM advertise the + * capability, but don't implement the ioctl(). Oops. + * Return 0 so that we allocate the htab in qemu, as is + * correct for PR. */ + return 0; + } else if (ret < 0) { + return ret; + } + return shift; + } + + /* We have a kernel that predates the htab reset calls. For PR + * KVM, we need to allocate the htab ourselves, for an HV KVM of + * this era, it has allocated a 16MB fixed size hash table + * already. Kernels of this era have the GET_PVINFO capability + * only on PR, so we use this hack to determine the right + * answer */ + if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) { + /* PR - tell caller to allocate htab */ + return 0; + } else { + /* HV - assume 16MB kernel allocated htab */ + return 24; + } +} + static inline uint32_t mfpvr(void) { uint32_t pvr; @@ -925,12 +1212,25 @@ const ppc_def_t *kvmppc_host_cpu_def(void) return spec; } -bool kvm_arch_stop_on_emulation_error(CPUState *env) +int kvmppc_fixup_cpu(CPUPPCState *env) +{ + int smt; + + /* Adjust cpu index for SMT */ + smt = kvmppc_smt_threads(); + env->cpu_index = (env->cpu_index / smp_threads) * smt + + (env->cpu_index % smp_threads); + + return 0; +} + + +bool kvm_arch_stop_on_emulation_error(CPUPPCState *env) { return true; } -int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr) +int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr) { return 1; }