#include "cpu.h"
#include "cpu-models.h"
#include "qemu/timer.h"
-#include "sysemu/sysemu.h"
#include "sysemu/hw_accel.h"
#include "kvm_ppc.h"
#include "sysemu/cpus.h"
#include "hw/sysbus.h"
#include "hw/ppc/spapr.h"
#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/hw.h"
#include "hw/ppc/ppc.h"
+#include "migration/qemu-file-types.h"
#include "sysemu/watchdog.h"
#include "trace.h"
#include "exec/gdbstub.h"
#include "exec/ram_addr.h"
#include "sysemu/hostmem.h"
#include "qemu/cutils.h"
+#include "qemu/main-loop.h"
#include "qemu/mmap-alloc.h"
#include "elf.h"
#include "sysemu/kvm_int.h"
-//#define DEBUG_KVM
-
-#ifdef DEBUG_KVM
-#define DPRINTF(fmt, ...) \
- do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
- do { } while (0)
-#endif
-
#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
KVM_CAP_LAST_INFO
};
-static int cap_interrupt_unset = false;
-static int cap_interrupt_level = false;
+static int cap_interrupt_unset;
static int cap_segstate;
static int cap_booke_sregs;
static int cap_ppc_smt;
static int cap_htm; /* Hardware transactional memory support */
static int cap_mmu_radix;
static int cap_mmu_hash_v3;
+static int cap_xive;
static int cap_resize_hpt;
static int cap_ppc_pvr_compat;
static int cap_ppc_safe_cache;
static int cap_ppc_safe_bounds_check;
static int cap_ppc_safe_indirect_branch;
+static int cap_ppc_count_cache_flush_assist;
static int cap_ppc_nested_kvm_hv;
static int cap_large_decr;
static uint32_t debug_inst_opcode;
-/* XXX We have a race condition where we actually have a level triggered
- * interrupt, but the infrastructure can't expose that yet, so the guest
- * takes but ignores it, goes to sleep and never gets notified that there's
- * still an interrupt pending.
- *
- * As a quick workaround, let's just wake up again 20 ms after we injected
- * an interrupt. That way we can assure that we're always reinjecting
- * interrupts in case the guest swallowed them.
- */
-static QEMUTimer *idle_timer;
-
-static void kvm_kick_cpu(void *opaque)
-{
- PowerPCCPU *cpu = opaque;
-
- qemu_cpu_kick(CPU(cpu));
-}
-
-/* Check whether we are running with KVM-PR (instead of KVM-HV). This
+/*
+ * Check whether we are running with KVM-PR (instead of KVM-HV). This
* should only be used for fallback tests - generally we should use
* explicit capabilities for the features we want, rather than
- * assuming what is/isn't available depending on the KVM variant. */
+ * assuming what is/isn't available depending on the KVM variant.
+ */
static bool kvmppc_is_pr(KVMState *ks)
{
/* Assume KVM-PR if the GET_PVINFO capability is available */
return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
}
-static int kvm_ppc_register_host_cpu_type(MachineState *ms);
+static int kvm_ppc_register_host_cpu_type(void);
static void kvmppc_get_cpu_characteristics(KVMState *s);
static int kvmppc_get_dec_bits(void);
int kvm_arch_init(MachineState *ms, KVMState *s)
{
cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
- cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
- /* Note: we don't set cap_papr here, because this capability is
- * only activated after this by kvmppc_set_papr() */
+ /*
+ * Note: we don't set cap_papr here, because this capability is
+ * only activated after this by kvmppc_set_papr()
+ */
cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
+ cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE);
cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
kvmppc_get_cpu_characteristics(s);
cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
* in KVM at this moment.
*
* TODO: call kvm_vm_check_extension() with the right capability
- * after the kernel starts implementing it.*/
+ * after the kernel starts implementing it.
+ */
cap_ppc_pvr_compat = false;
- if (!cap_interrupt_level) {
- fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
- "VM to stall at times!\n");
+ if (!kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL)) {
+ error_report("KVM: Host kernel doesn't have level irq capability");
+ exit(1);
}
- kvm_ppc_register_host_cpu_type(ms);
+ kvm_ppc_register_host_cpu_type();
return 0;
}
-int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
+int kvm_arch_irqchip_create(KVMState *s)
{
return 0;
}
int ret;
if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
- /* What we're really trying to say is "if we're on BookE, we use
- the native PVR for now". This is the only sane way to check
- it though, so we potentially confuse users that they can run
- BookE guests on BookS. Let's hope nobody dares enough :) */
+ /*
+ * What we're really trying to say is "if we're on BookE, we
+ * use the native PVR for now". This is the only sane way to
+ * check it though, so we potentially confuse users that they
+ * can run BookE guests on BookS. Let's hope nobody dares
+ * enough :)
+ */
return 0;
} else {
if (!cap_segstate) {
}
if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
- /* Mostly what guest pagesizes we can use are related to the
+ /*
+ * Mostly what guest pagesizes we can use are related to the
* host pages used to map guest RAM, which is handled in the
* platform code. Cache-Inhibited largepages (64k) however are
* used for I/O, so if they're mapped to the host at all it
* will be a normal mapping, not a special hugepage one used
- * for RAM. */
- if (getpagesize() < 0x10000) {
+ * for RAM.
+ */
+ if (qemu_real_host_page_size < 0x10000) {
error_setg(errp,
"KVM can't supply 64kiB CI pages, which guest expects");
}
return POWERPC_CPU(cpu)->vcpu_id;
}
-/* e500 supports 2 h/w breakpoint and 2 watchpoint.
- * book3s supports only 1 watchpoint, so array size
- * of 4 is sufficient for now.
+/*
+ * e500 supports 2 h/w breakpoint and 2 watchpoint. book3s supports
+ * only 1 watchpoint, so array size of 4 is sufficient for now.
*/
#define MAX_HW_BKPTS 4
return ret;
}
- idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
-
switch (cenv->mmu_model) {
case POWERPC_MMU_BOOKE206:
/* This target supports access to KVM's guest TLB */
break;
case POWERPC_MMU_2_07:
if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
- /* KVM-HV has transactional memory on POWER8 also without the
- * KVM_CAP_PPC_HTM extension, so enable it here instead as
- * long as it's availble to userspace on the host. */
+ /*
+ * KVM-HV has transactional memory on POWER8 also without
+ * the KVM_CAP_PPC_HTM extension, so enable it here
+ * instead as long as it's availble to userspace on the
+ * host.
+ */
if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
cap_htm = true;
}
return ret;
}
+int kvm_arch_destroy_vcpu(CPUState *cs)
+{
+ return 0;
+}
+
static void kvm_sw_tlb_put(PowerPCCPU *cpu)
{
CPUPPCState *env = &cpu->env;
reg.addr = (uintptr_t)&fpscr;
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
+ trace_kvm_failed_fpscr_set(strerror(errno));
return ret;
}
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
- i, strerror(errno));
+ trace_kvm_failed_fp_set(vsx ? "VSR" : "FPR", i,
+ strerror(errno));
return ret;
}
}
reg.addr = (uintptr_t)&env->vscr;
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
+ trace_kvm_failed_vscr_set(strerror(errno));
return ret;
}
reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
+ trace_kvm_failed_vr_set(i, strerror(errno));
return ret;
}
}
reg.addr = (uintptr_t)&fpscr;
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
+ trace_kvm_failed_fpscr_get(strerror(errno));
return ret;
} else {
env->fpscr = fpscr;
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to get %s%d from KVM: %s\n",
- vsx ? "VSR" : "FPR", i, strerror(errno));
+ trace_kvm_failed_fp_get(vsx ? "VSR" : "FPR", i,
+ strerror(errno));
return ret;
} else {
#ifdef HOST_WORDS_BIGENDIAN
reg.addr = (uintptr_t)&env->vscr;
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
+ trace_kvm_failed_vscr_get(strerror(errno));
return ret;
}
reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to get VR%d from KVM: %s\n",
- i, strerror(errno));
+ trace_kvm_failed_vr_get(i, strerror(errno));
return ret;
}
}
static int kvm_get_vpa(CPUState *cs)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
- sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
struct kvm_one_reg reg;
int ret;
reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
+ trace_kvm_failed_vpa_addr_get(strerror(errno));
return ret;
}
reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
- strerror(errno));
+ trace_kvm_failed_slb_get(strerror(errno));
return ret;
}
reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
- strerror(errno));
+ trace_kvm_failed_dtl_get(strerror(errno));
return ret;
}
static int kvm_put_vpa(CPUState *cs)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
- sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
struct kvm_one_reg reg;
int ret;
- /* SLB shadow or DTL can't be registered unless a master VPA is
+ /*
+ * SLB shadow or DTL can't be registered unless a master VPA is
* registered. That means when restoring state, if a VPA *is*
* registered, we need to set that up first. If not, we need to
- * deregister the others before deregistering the master VPA */
+ * deregister the others before deregistering the master VPA
+ */
assert(spapr_cpu->vpa_addr
|| !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
+ trace_kvm_failed_vpa_addr_set(strerror(errno));
return ret;
}
}
reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
+ trace_kvm_failed_slb_set(strerror(errno));
return ret;
}
reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
- strerror(errno));
+ trace_kvm_failed_dtl_set(strerror(errno));
return ret;
}
reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
if (ret < 0) {
- DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
+ trace_kvm_failed_null_vpa_addr_set(strerror(errno));
return ret;
}
}
regs.pid = env->spr[SPR_BOOKE_PID];
- for (i = 0;i < 32; i++)
+ for (i = 0; i < 32; i++) {
regs.gpr[i] = env->gpr[i];
+ }
regs.cr = 0;
for (i = 0; i < 8; i++) {
}
ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s);
- if (ret < 0)
+ if (ret < 0) {
return ret;
+ }
kvm_put_fp(cs);
if (cap_one_reg) {
int i;
- /* We deliberately ignore errors here, for kernels which have
+ /*
+ * We deliberately ignore errors here, for kernels which have
* the ONE_REG calls, but don't support the specific
* registers, there's a reasonable chance things will still
- * work, at least until we try to migrate. */
+ * work, at least until we try to migrate.
+ */
for (i = 0; i < 1024; i++) {
uint64_t id = env->spr_cb[i].one_reg_id;
if (cap_papr) {
if (kvm_put_vpa(cs) < 0) {
- DPRINTF("Warning: Unable to set VPA information to KVM\n");
+ trace_kvm_failed_put_vpa();
}
}
kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
+
+ if (level > KVM_PUT_RUNTIME_STATE) {
+ kvm_put_one_spr(cs, KVM_REG_PPC_DPDES, SPR_DPDES);
+ }
#endif /* TARGET_PPC64 */
}
int i, ret;
ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s);
- if (ret < 0)
+ if (ret < 0) {
return ret;
+ }
cr = regs.cr;
for (i = 7; i >= 0; i--) {
env->spr[SPR_BOOKE_PID] = regs.pid;
- for (i = 0;i < 32; i++)
+ for (i = 0; i < 32; i++) {
env->gpr[i] = regs.gpr[i];
+ }
kvm_get_fp(cs);
if (cap_one_reg) {
int i;
- /* We deliberately ignore errors here, for kernels which have
+ /*
+ * We deliberately ignore errors here, for kernels which have
* the ONE_REG calls, but don't support the specific
* registers, there's a reasonable chance things will still
- * work, at least until we try to migrate. */
+ * work, at least until we try to migrate.
+ */
for (i = 0; i < 1024; i++) {
uint64_t id = env->spr_cb[i].one_reg_id;
if (cap_papr) {
if (kvm_get_vpa(cs) < 0) {
- DPRINTF("Warning: Unable to get VPA information from KVM\n");
+ trace_kvm_failed_get_vpa();
}
}
kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
+ kvm_get_one_spr(cs, KVM_REG_PPC_DPDES, SPR_DPDES);
#endif
}
return 0;
}
- if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
+ if (!kvm_enabled() || !cap_interrupt_unset) {
return 0;
}
void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
{
- PowerPCCPU *cpu = POWERPC_CPU(cs);
- CPUPPCState *env = &cpu->env;
- int r;
- unsigned irq;
-
- qemu_mutex_lock_iothread();
-
- /* PowerPC QEMU tracks the various core input pins (interrupt, critical
- * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
- if (!cap_interrupt_level &&
- run->ready_for_interrupt_injection &&
- (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
- (env->irq_input_state & (1<<PPC_INPUT_INT)))
- {
- /* For now KVM disregards the 'irq' argument. However, in the
- * future KVM could cache it in-kernel to avoid a heavyweight exit
- * when reading the UIC.
- */
- irq = KVM_INTERRUPT_SET;
-
- DPRINTF("injected interrupt %d\n", irq);
- r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
- if (r < 0) {
- printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
- }
-
- /* Always wake up soon in case the interrupt was level based */
- timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
- (NANOSECONDS_PER_SECOND / 50));
- }
-
- /* We don't know if there are more interrupts pending after this. However,
- * the guest will return to userspace in the course of handling this one
- * anyways, so we will get a chance to deliver the rest. */
-
- qemu_mutex_unlock_iothread();
+ return;
}
MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
}
/* map dcr access to existing qemu dcr emulation */
-static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
+static int kvmppc_handle_dcr_read(CPUPPCState *env,
+ uint32_t dcrn, uint32_t *data)
{
- if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
+ if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) {
fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
+ }
return 0;
}
-static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
+static int kvmppc_handle_dcr_write(CPUPPCState *env,
+ uint32_t dcrn, uint32_t data)
{
- if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
+ if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) {
fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
+ }
return 0;
}
}
}
-static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
+static int kvm_handle_hw_breakpoint(CPUState *cs,
+ struct kvm_debug_exit_arch *arch_info)
{
- CPUState *cs = CPU(cpu);
- CPUPPCState *env = &cpu->env;
- struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
int handle = 0;
int n;
int flag = 0;
- if (cs->singlestep_enabled) {
- handle = 1;
- } else if (arch_info->status) {
- if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
- if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
- n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
- if (n >= 0) {
- handle = 1;
- }
- } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
- KVMPPC_DEBUG_WATCH_WRITE)) {
- n = find_hw_watchpoint(arch_info->address, &flag);
- if (n >= 0) {
- handle = 1;
- cs->watchpoint_hit = &hw_watchpoint;
- hw_watchpoint.vaddr = hw_debug_points[n].addr;
- hw_watchpoint.flags = flag;
- }
+ if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
+ if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
+ n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
+ if (n >= 0) {
+ handle = 1;
+ }
+ } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
+ KVMPPC_DEBUG_WATCH_WRITE)) {
+ n = find_hw_watchpoint(arch_info->address, &flag);
+ if (n >= 0) {
+ handle = 1;
+ cs->watchpoint_hit = &hw_watchpoint;
+ hw_watchpoint.vaddr = hw_debug_points[n].addr;
+ hw_watchpoint.flags = flag;
}
}
- } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
- handle = 1;
- } else {
- /* QEMU is not able to handle debug exception, so inject
- * program exception to guest;
- * Yes program exception NOT debug exception !!
- * When QEMU is using debug resources then debug exception must
- * be always set. To achieve this we set MSR_DE and also set
- * MSRP_DEP so guest cannot change MSR_DE.
- * When emulating debug resource for guest we want guest
- * to control MSR_DE (enable/disable debug interrupt on need).
- * Supporting both configurations are NOT possible.
- * So the result is that we cannot share debug resources
- * between QEMU and Guest on BOOKE architecture.
- * In the current design QEMU gets the priority over guest,
- * this means that if QEMU is using debug resources then guest
- * cannot use them;
- * For software breakpoint QEMU uses a privileged instruction;
- * So there cannot be any reason that we are here for guest
- * set debug exception, only possibility is guest executed a
- * privileged / illegal instruction and that's why we are
- * injecting a program interrupt.
- */
+ }
+ return handle;
+}
- cpu_synchronize_state(cs);
- /* env->nip is PC, so increment this by 4 to use
- * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
- */
- env->nip += 4;
- cs->exception_index = POWERPC_EXCP_PROGRAM;
- env->error_code = POWERPC_EXCP_INVAL;
- ppc_cpu_do_interrupt(cs);
+static int kvm_handle_singlestep(void)
+{
+ return 1;
+}
+
+static int kvm_handle_sw_breakpoint(void)
+{
+ return 1;
+}
+
+static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
+{
+ CPUState *cs = CPU(cpu);
+ CPUPPCState *env = &cpu->env;
+ struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
+
+ if (cs->singlestep_enabled) {
+ return kvm_handle_singlestep();
}
- return handle;
+ if (arch_info->status) {
+ return kvm_handle_hw_breakpoint(cs, arch_info);
+ }
+
+ if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
+ return kvm_handle_sw_breakpoint();
+ }
+
+ /*
+ * QEMU is not able to handle debug exception, so inject
+ * program exception to guest;
+ * Yes program exception NOT debug exception !!
+ * When QEMU is using debug resources then debug exception must
+ * be always set. To achieve this we set MSR_DE and also set
+ * MSRP_DEP so guest cannot change MSR_DE.
+ * When emulating debug resource for guest we want guest
+ * to control MSR_DE (enable/disable debug interrupt on need).
+ * Supporting both configurations are NOT possible.
+ * So the result is that we cannot share debug resources
+ * between QEMU and Guest on BOOKE architecture.
+ * In the current design QEMU gets the priority over guest,
+ * this means that if QEMU is using debug resources then guest
+ * cannot use them;
+ * For software breakpoint QEMU uses a privileged instruction;
+ * So there cannot be any reason that we are here for guest
+ * set debug exception, only possibility is guest executed a
+ * privileged / illegal instruction and that's why we are
+ * injecting a program interrupt.
+ */
+ cpu_synchronize_state(cs);
+ /*
+ * env->nip is PC, so increment this by 4 to use
+ * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
+ */
+ env->nip += 4;
+ cs->exception_index = POWERPC_EXCP_PROGRAM;
+ env->error_code = POWERPC_EXCP_INVAL;
+ ppc_cpu_do_interrupt(cs);
+
+ return 0;
}
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
switch (run->exit_reason) {
case KVM_EXIT_DCR:
if (run->dcr.is_write) {
- DPRINTF("handle dcr write\n");
+ trace_kvm_handle_dcr_write();
ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
} else {
- DPRINTF("handle dcr read\n");
+ trace_kvm_handle_dcr_read();
ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
}
break;
case KVM_EXIT_HLT:
- DPRINTF("handle halt\n");
+ trace_kvm_handle_halt();
ret = kvmppc_handle_halt(cpu);
break;
#if defined(TARGET_PPC64)
case KVM_EXIT_PAPR_HCALL:
- DPRINTF("handle PAPR hypercall\n");
+ trace_kvm_handle_papr_hcall();
run->papr_hcall.ret = spapr_hypercall(cpu,
run->papr_hcall.nr,
run->papr_hcall.args);
break;
#endif
case KVM_EXIT_EPR:
- DPRINTF("handle epr\n");
+ trace_kvm_handle_epr();
run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
ret = 0;
break;
case KVM_EXIT_WATCHDOG:
- DPRINTF("handle watchdog expiry\n");
+ trace_kvm_handle_watchdog_expiry();
watchdog_perform_action();
ret = 0;
break;
case KVM_EXIT_DEBUG:
- DPRINTF("handle debug exception\n");
+ trace_kvm_handle_debug_exception();
if (kvm_handle_debug(cpu, run)) {
ret = EXCP_DEBUG;
break;
ret = 0;
break;
}
- } while(*line);
+ } while (*line);
fclose(f);
return retval;
}
- if (!(ns = strchr(line, ':'))) {
+ ns = strchr(line, ':');
+ if (!ns) {
return retval;
}
struct dirent *dirp;
DIR *dp;
- if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
+ dp = opendir(PROC_DEVTREE_CPU);
+ if (!dp) {
printf("Can't open directory " PROC_DEVTREE_CPU "\n");
return -1;
}
return 0;
}
-/* Read a CPU node property from the host device tree that's a single
+/*
+ * Read a CPU node property from the host device tree that's a single
* integer (32-bit or 64-bit). Returns 0 if anything goes wrong
- * (can't find or open the property, or doesn't understand the
- * format) */
+ * (can't find or open the property, or doesn't understand the format)
+ */
static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
{
char buf[PATH_MAX], *tmp;
}
static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
- {
- PowerPCCPU *cpu = ppc_env_get_cpu(env);
- CPUState *cs = CPU(cpu);
+{
+ CPUState *cs = env_cpu(env);
if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
!kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
{
- uint32_t *hc = (uint32_t*)buf;
+ uint32_t *hc = (uint32_t *)buf;
struct kvm_ppc_pvinfo pvinfo;
if (!kvmppc_get_pvinfo(env, &pvinfo)) {
kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
}
+void kvmppc_enable_h_page_init(void)
+{
+ kvmppc_enable_hcall(kvm_state, H_PAGE_INIT);
+}
+
void kvmppc_set_papr(PowerPCCPU *cpu)
{
CPUState *cs = CPU(cpu);
exit(1);
}
- /* Update the capability flag so we sync the right information
- * with kvm */
+ /*
+ * Update the capability flag so we sync the right information
+ * with kvm
+ */
cap_papr = 1;
}
return ret;
}
-void kvmppc_hint_smt_possible(Error **errp)
+void kvmppc_error_append_smt_possible_hint(Error *const *errp)
{
int i;
GString *g;
long rampagesize, best_page_shift;
int i;
- /* Find the largest hardware supported page size that's less than
- * or equal to the (logical) backing page size of guest RAM */
+ /*
+ * Find the largest hardware supported page size that's less than
+ * or equal to the (logical) backing page size of guest RAM
+ */
kvm_get_smmu_info(&info, &error_fatal);
- rampagesize = qemu_getrampagesize();
+ rampagesize = qemu_minrampagesize();
best_page_shift = 0;
for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
int fd;
void *table;
- /* Must set fd to -1 so we don't try to munmap when called for
+ /*
+ * Must set fd to -1 so we don't try to munmap when called for
* destroying the table, which the upper layers -will- do
*/
*pfd = -1;
len = nb_table * sizeof(uint64_t);
/* FIXME: round this up to page size */
- table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ table = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (table == MAP_FAILED) {
fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
liobn);
int ret;
ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
if (ret == -ENOTTY) {
- /* At least some versions of PR KVM advertise the
+ /*
+ * At least some versions of PR KVM advertise the
* capability, but don't implement the ioctl(). Oops.
* Return 0 so that we allocate the htab in qemu, as is
- * correct for PR. */
+ * correct for PR.
+ */
return 0;
} else if (ret < 0) {
return ret;
return shift;
}
- /* We have a kernel that predates the htab reset calls. For PR
+ /*
+ * We have a kernel that predates the htab reset calls. For PR
* KVM, we need to allocate the htab ourselves, for an HV KVM of
- * this era, it has allocated a 16MB fixed size hash table already. */
+ * this era, it has allocated a 16MB fixed size hash table
+ * already.
+ */
if (kvmppc_is_pr(kvm_state)) {
/* PR - tell caller to allocate htab */
return 0;
static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
{
- if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
+ if ((~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) &&
+ (~c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) &&
+ (~c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED)) {
+ return SPAPR_CAP_FIXED_NA;
+ } else if (c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) {
+ return SPAPR_CAP_WORKAROUND;
+ } else if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
return SPAPR_CAP_FIXED_CCD;
} else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
return SPAPR_CAP_FIXED_IBS;
return 0;
}
+static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
+{
+ if (c.character & c.character_mask & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) {
+ return 1;
+ }
+ return 0;
+}
+
+bool kvmppc_has_cap_xive(void)
+{
+ return cap_xive;
+}
+
static void kvmppc_get_cpu_characteristics(KVMState *s)
{
struct kvm_ppc_cpu_char c;
cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
+ cap_ppc_count_cache_flush_assist =
+ parse_cap_ppc_count_cache_flush_assist(c);
}
int kvmppc_get_cap_safe_cache(void)
return cap_ppc_safe_indirect_branch;
}
+int kvmppc_get_cap_count_cache_flush_assist(void)
+{
+ return cap_ppc_count_cache_flush_assist;
+}
+
bool kvmppc_has_cap_nested_kvm_hv(void)
{
return !!cap_ppc_nested_kvm_hv;
return pvr_pcc;
}
-static int kvm_ppc_register_host_cpu_type(MachineState *ms)
+static void pseries_machine_class_fixup(ObjectClass *oc, void *opaque)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+
+ mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
+}
+
+static int kvm_ppc_register_host_cpu_type(void)
{
TypeInfo type_info = {
.name = TYPE_HOST_POWERPC_CPU,
.class_init = kvmppc_host_cpu_class_init,
};
- MachineClass *mc = MACHINE_GET_CLASS(ms);
PowerPCCPUClass *pvr_pcc;
ObjectClass *oc;
DeviceClass *dc;
}
type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
type_register(&type_info);
- if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
- /* override TCG default cpu type with 'host' cpu model */
- mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
- }
+ /* override TCG default cpu type with 'host' cpu model */
+ object_class_foreach(pseries_machine_class_fixup, TYPE_SPAPR_MACHINE,
+ false, NULL);
oc = object_class_by_name(type_info.name);
g_assert(oc);
return -ENOENT;
}
- strncpy(args.name, function, sizeof(args.name));
+ strncpy(args.name, function, sizeof(args.name) - 1);
return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
}
}
}
} while ((rc != 0)
- && ((max_ns < 0)
- || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
+ && ((max_ns < 0) ||
+ ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
return (rc == 0) ? 1 : 0;
}
uint16_t n_valid, uint16_t n_invalid)
{
struct kvm_get_htab_header *buf;
- size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
+ size_t chunksize = sizeof(*buf) + n_valid * HASH_PTE_SIZE_64;
ssize_t rc;
buf = alloca(chunksize);
buf->n_valid = n_valid;
buf->n_invalid = n_invalid;
- qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
+ qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64 * n_valid);
rc = write(fd, buf, chunksize);
if (rc < 0) {
kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
}
}
+
+void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset)
+{
+ CPUState *cs = CPU(cpu);
+
+ if (kvm_enabled()) {
+ kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &tb_offset);
+ }
+}