#include "hw/i386/pc.h"
#include "hw/i386/apic.h"
#include "exec/ioport.h"
-#include "hyperv.h"
+#include <asm/hyperv.h>
#include "hw/pci/pci.h"
//#define DEBUG_KVM
static bool has_msr_hsave_pa;
static bool has_msr_tsc_adjust;
static bool has_msr_tsc_deadline;
+static bool has_msr_feature_control;
static bool has_msr_async_pf_en;
static bool has_msr_pv_eoi_en;
static bool has_msr_misc_enable;
static bool has_msr_kvm_steal_time;
static int lm_capable_kernel;
+static bool has_msr_architectural_pmu;
+static uint32_t num_architectural_pmu_counters;
+
bool kvm_allows_irq0_override(void)
{
return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
if ((env->mcg_cap & MCG_SER_P) && addr
&& (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) {
- if (qemu_ram_addr_from_host(addr, &ram_addr) ||
+ if (qemu_ram_addr_from_host(addr, &ram_addr) == NULL ||
!kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
fprintf(stderr, "Hardware memory error for memory used by "
"QEMU itself instead of guest system!\n");
int kvm_arch_on_sigbus(int code, void *addr)
{
- if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) {
+ X86CPU *cpu = X86_CPU(first_cpu);
+
+ if ((cpu->env.mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) {
ram_addr_t ram_addr;
hwaddr paddr;
/* Hope we are lucky for AO MCE */
- if (qemu_ram_addr_from_host(addr, &ram_addr) ||
- !kvm_physical_memory_addr_from_host(CPU(first_cpu)->kvm_state,
+ if (qemu_ram_addr_from_host(addr, &ram_addr) == NULL ||
+ !kvm_physical_memory_addr_from_host(first_cpu->kvm_state,
addr, &paddr)) {
fprintf(stderr, "Hardware memory error for memory used by "
"QEMU itself instead of guest system!: %p\n", addr);
return 0;
}
kvm_hwpoison_page_add(ram_addr);
- kvm_mce_inject(x86_env_get_cpu(first_cpu), paddr, code);
+ kvm_mce_inject(X86_CPU(first_cpu), paddr, code);
} else {
if (code == BUS_MCEERR_AO) {
return 0;
return cpu->env.cpuid_apic_id;
}
+#ifndef KVM_CPUID_SIGNATURE_NEXT
+#define KVM_CPUID_SIGNATURE_NEXT 0x40000100
+#endif
+
+static bool hyperv_hypercall_available(X86CPU *cpu)
+{
+ return cpu->hyperv_vapic ||
+ (cpu->hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_RETRY);
+}
+
+static bool hyperv_enabled(X86CPU *cpu)
+{
+ return hyperv_hypercall_available(cpu) ||
+ cpu->hyperv_relaxed_timing;
+}
+
#define KVM_MAX_CPUID_ENTRIES 100
int kvm_arch_init_vcpu(CPUState *cs)
uint32_t signature[3];
int r;
+ memset(&cpuid_data, 0, sizeof(cpuid_data));
+
cpuid_i = 0;
/* Paravirtualization CPUIDs */
c = &cpuid_data.entries[cpuid_i++];
- memset(c, 0, sizeof(*c));
c->function = KVM_CPUID_SIGNATURE;
- if (!hyperv_enabled()) {
+ if (!hyperv_enabled(cpu)) {
memcpy(signature, "KVMKVMKVM\0\0\0", 12);
c->eax = 0;
} else {
c->edx = signature[2];
c = &cpuid_data.entries[cpuid_i++];
- memset(c, 0, sizeof(*c));
c->function = KVM_CPUID_FEATURES;
c->eax = env->features[FEAT_KVM];
- if (hyperv_enabled()) {
+ if (hyperv_enabled(cpu)) {
memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12);
c->eax = signature[0];
c = &cpuid_data.entries[cpuid_i++];
- memset(c, 0, sizeof(*c));
c->function = HYPERV_CPUID_VERSION;
c->eax = 0x00001bbc;
c->ebx = 0x00060001;
c = &cpuid_data.entries[cpuid_i++];
- memset(c, 0, sizeof(*c));
c->function = HYPERV_CPUID_FEATURES;
- if (hyperv_relaxed_timing_enabled()) {
+ if (cpu->hyperv_relaxed_timing) {
c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
}
- if (hyperv_vapic_recommended()) {
+ if (cpu->hyperv_vapic) {
c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
c->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE;
}
c = &cpuid_data.entries[cpuid_i++];
- memset(c, 0, sizeof(*c));
c->function = HYPERV_CPUID_ENLIGHTMENT_INFO;
- if (hyperv_relaxed_timing_enabled()) {
+ if (cpu->hyperv_relaxed_timing) {
c->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
}
- if (hyperv_vapic_recommended()) {
+ if (cpu->hyperv_vapic) {
c->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
}
- c->ebx = hyperv_get_spinlock_retries();
+ c->ebx = cpu->hyperv_spinlock_attempts;
c = &cpuid_data.entries[cpuid_i++];
- memset(c, 0, sizeof(*c));
c->function = HYPERV_CPUID_IMPLEMENT_LIMITS;
c->eax = 0x40;
c->ebx = 0x40;
c = &cpuid_data.entries[cpuid_i++];
- memset(c, 0, sizeof(*c));
c->function = KVM_CPUID_SIGNATURE_NEXT;
memcpy(signature, "KVMKVMKVM\0\0\0", 12);
c->eax = 0;
break;
}
}
+
+ if (limit >= 0x0a) {
+ uint32_t ver;
+
+ cpu_x86_cpuid(env, 0x0a, 0, &ver, &unused, &unused, &unused);
+ if ((ver & 0xff) > 0) {
+ has_msr_architectural_pmu = true;
+ num_architectural_pmu_counters = (ver & 0xff00) >> 8;
+
+ /* Shouldn't be more than 32, since that's the number of bits
+ * available in EBX to tell us _which_ counters are available.
+ * Play it safe.
+ */
+ if (num_architectural_pmu_counters > MAX_GP_COUNTERS) {
+ num_architectural_pmu_counters = MAX_GP_COUNTERS;
+ }
+ }
+ }
+
cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
for (i = 0x80000000; i <= limit; i++) {
qemu_add_vm_change_state_handler(cpu_update_state, env);
+ c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0);
+ if (c) {
+ has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) ||
+ !!(c->ecx & CPUID_EXT_SMX);
+ }
+
cpuid_data.cpuid.padding = 0;
r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data);
if (r) {
int kvm_arch_init(KVMState *s)
{
- QemuOptsList *list = qemu_find_opts("machine");
uint64_t identity_base = 0xfffbc000;
uint64_t shadow_mem;
int ret;
}
qemu_register_reset(kvm_unpoison_all, NULL);
- if (!QTAILQ_EMPTY(&list->head)) {
- shadow_mem = qemu_opt_get_size(QTAILQ_FIRST(&list->head),
- "kvm_shadow_mem", -1);
- if (shadow_mem != -1) {
- shadow_mem /= 4096;
- ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem);
- if (ret < 0) {
- return ret;
- }
+ shadow_mem = qemu_opt_get_size(qemu_get_machine_opts(),
+ "kvm_shadow_mem", -1);
+ if (shadow_mem != -1) {
+ shadow_mem /= 4096;
+ ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem);
+ if (ret < 0) {
+ return ret;
}
}
return 0;
entry->data = value;
}
+static int kvm_put_tscdeadline_msr(X86CPU *cpu)
+{
+ CPUX86State *env = &cpu->env;
+ struct {
+ struct kvm_msrs info;
+ struct kvm_msr_entry entries[1];
+ } msr_data;
+ struct kvm_msr_entry *msrs = msr_data.entries;
+
+ if (!has_msr_tsc_deadline) {
+ return 0;
+ }
+
+ kvm_msr_entry_set(&msrs[0], MSR_IA32_TSCDEADLINE, env->tsc_deadline);
+
+ msr_data.info.nmsrs = 1;
+
+ return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
+}
+
static int kvm_put_msrs(X86CPU *cpu, int level)
{
CPUX86State *env = &cpu->env;
struct kvm_msr_entry entries[100];
} msr_data;
struct kvm_msr_entry *msrs = msr_data.entries;
- int n = 0;
+ int n = 0, i;
kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
if (has_msr_tsc_adjust) {
kvm_msr_entry_set(&msrs[n++], MSR_TSC_ADJUST, env->tsc_adjust);
}
- if (has_msr_tsc_deadline) {
- kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSCDEADLINE, env->tsc_deadline);
- }
if (has_msr_misc_enable) {
kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE,
env->msr_ia32_misc_enable);
}
}
/*
- * The following paravirtual MSRs have side effects on the guest or are
- * too heavy for normal writeback. Limit them to reset or full state
- * updates.
+ * The following MSRs have side effects on the guest or are too heavy
+ * for normal writeback. Limit them to reset or full state updates.
*/
if (level >= KVM_PUT_RESET_STATE) {
kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
kvm_msr_entry_set(&msrs[n++], MSR_KVM_STEAL_TIME,
env->steal_time_msr);
}
- if (hyperv_hypercall_available()) {
+ if (has_msr_architectural_pmu) {
+ /* Stop the counter. */
+ kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
+ kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ /* Set the counter values. */
+ for (i = 0; i < MAX_FIXED_COUNTERS; i++) {
+ kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_FIXED_CTR0 + i,
+ env->msr_fixed_counters[i]);
+ }
+ for (i = 0; i < num_architectural_pmu_counters; i++) {
+ kvm_msr_entry_set(&msrs[n++], MSR_P6_PERFCTR0 + i,
+ env->msr_gp_counters[i]);
+ kvm_msr_entry_set(&msrs[n++], MSR_P6_EVNTSEL0 + i,
+ env->msr_gp_evtsel[i]);
+ }
+ kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_STATUS,
+ env->msr_global_status);
+ kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_OVF_CTRL,
+ env->msr_global_ovf_ctrl);
+
+ /* Now start the PMU. */
+ kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_FIXED_CTR_CTRL,
+ env->msr_fixed_ctr_ctrl);
+ kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_CTRL,
+ env->msr_global_ctrl);
+ }
+ if (hyperv_hypercall_available(cpu)) {
kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID, 0);
kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL, 0);
}
- if (hyperv_vapic_recommended()) {
+ if (cpu->hyperv_vapic) {
kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0);
}
+ if (has_msr_feature_control) {
+ kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL,
+ env->msr_ia32_feature_control);
+ }
}
if (env->mcg_cap) {
int i;
for (i = 0; i < xcrs.nr_xcrs; i++) {
/* Only support xcr0 now */
- if (xcrs.xcrs[0].xcr == 0) {
- env->xcr0 = xcrs.xcrs[0].value;
+ if (xcrs.xcrs[i].xcr == 0) {
+ env->xcr0 = xcrs.xcrs[i].value;
break;
}
}
if (has_msr_misc_enable) {
msrs[n++].index = MSR_IA32_MISC_ENABLE;
}
+ if (has_msr_feature_control) {
+ msrs[n++].index = MSR_IA32_FEATURE_CONTROL;
+ }
if (!env->tsc_valid) {
msrs[n++].index = MSR_IA32_TSC;
if (has_msr_kvm_steal_time) {
msrs[n++].index = MSR_KVM_STEAL_TIME;
}
+ if (has_msr_architectural_pmu) {
+ msrs[n++].index = MSR_CORE_PERF_FIXED_CTR_CTRL;
+ msrs[n++].index = MSR_CORE_PERF_GLOBAL_CTRL;
+ msrs[n++].index = MSR_CORE_PERF_GLOBAL_STATUS;
+ msrs[n++].index = MSR_CORE_PERF_GLOBAL_OVF_CTRL;
+ for (i = 0; i < MAX_FIXED_COUNTERS; i++) {
+ msrs[n++].index = MSR_CORE_PERF_FIXED_CTR0 + i;
+ }
+ for (i = 0; i < num_architectural_pmu_counters; i++) {
+ msrs[n++].index = MSR_P6_PERFCTR0 + i;
+ msrs[n++].index = MSR_P6_EVNTSEL0 + i;
+ }
+ }
if (env->mcg_cap) {
msrs[n++].index = MSR_MCG_STATUS;
}
for (i = 0; i < ret; i++) {
- switch (msrs[i].index) {
+ uint32_t index = msrs[i].index;
+ switch (index) {
case MSR_IA32_SYSENTER_CS:
env->sysenter_cs = msrs[i].data;
break;
case MSR_IA32_MISC_ENABLE:
env->msr_ia32_misc_enable = msrs[i].data;
break;
+ case MSR_IA32_FEATURE_CONTROL:
+ env->msr_ia32_feature_control = msrs[i].data;
+ break;
default:
if (msrs[i].index >= MSR_MC0_CTL &&
msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
case MSR_KVM_STEAL_TIME:
env->steal_time_msr = msrs[i].data;
break;
+ case MSR_CORE_PERF_FIXED_CTR_CTRL:
+ env->msr_fixed_ctr_ctrl = msrs[i].data;
+ break;
+ case MSR_CORE_PERF_GLOBAL_CTRL:
+ env->msr_global_ctrl = msrs[i].data;
+ break;
+ case MSR_CORE_PERF_GLOBAL_STATUS:
+ env->msr_global_status = msrs[i].data;
+ break;
+ case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ env->msr_global_ovf_ctrl = msrs[i].data;
+ break;
+ case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR0 + MAX_FIXED_COUNTERS - 1:
+ env->msr_fixed_counters[index - MSR_CORE_PERF_FIXED_CTR0] = msrs[i].data;
+ break;
+ case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR0 + MAX_GP_COUNTERS - 1:
+ env->msr_gp_counters[index - MSR_P6_PERFCTR0] = msrs[i].data;
+ break;
+ case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1:
+ env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data;
+ break;
}
}
static int kvm_guest_debug_workarounds(X86CPU *cpu)
{
+ CPUState *cs = CPU(cpu);
CPUX86State *env = &cpu->env;
int ret = 0;
unsigned long reinject_trap = 0;
* reinject them via SET_GUEST_DEBUG.
*/
if (reinject_trap ||
- (!kvm_has_robust_singlestep() && env->singlestep_enabled)) {
- ret = kvm_update_guest_debug(env, reinject_trap);
+ (!kvm_has_robust_singlestep() && cs->singlestep_enabled)) {
+ ret = kvm_update_guest_debug(cs, reinject_trap);
}
return ret;
}
return ret;
}
}
+
+ ret = kvm_put_tscdeadline_msr(x86_cpu);
+ if (ret < 0) {
+ return ret;
+ }
+
ret = kvm_put_vcpu_events(x86_cpu, level);
if (ret < 0) {
return ret;
return 1;
}
-int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
+int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
{
- CPUX86State *env = &X86_CPU(cpu)->env;
static const uint8_t int3 = 0xcc;
- if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
- cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1)) {
+ if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
+ cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&int3, 1, 1)) {
return -EINVAL;
}
return 0;
}
-int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
+int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
{
- CPUX86State *env = &X86_CPU(cpu)->env;
uint8_t int3;
- if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
- cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) {
+ if (cpu_memory_rw_debug(cs, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
+ cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) {
return -EINVAL;
}
return 0;
static int kvm_handle_debug(X86CPU *cpu,
struct kvm_debug_exit_arch *arch_info)
{
+ CPUState *cs = CPU(cpu);
CPUX86State *env = &cpu->env;
int ret = 0;
int n;
if (arch_info->exception == 1) {
if (arch_info->dr6 & (1 << 14)) {
- if (env->singlestep_enabled) {
+ if (cs->singlestep_enabled) {
ret = EXCP_DEBUG;
}
} else {
ret = EXCP_DEBUG;
}
if (ret == 0) {
- cpu_synchronize_state(env);
+ cpu_synchronize_state(CPU(cpu));
assert(env->exception_injected == -1);
/* pass to guest */