X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=target-i386%2Fkvm.c;h=1188482359f929a1225b9f016efdcfaa07acdc48;hb=30c367ed446b6ea53245589a5cf373578ac075d7;hp=39f4fbb3cf2ee8f30c167999a8dd31c43794ba3b;hpb=cb446ecab714b2444a270be209e0533bcd2ee534;p=qemu.git diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 39f4fbb3c..118848235 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -31,7 +31,7 @@ #include "hw/i386/pc.h" #include "hw/i386/apic.h" #include "exec/ioport.h" -#include "hyperv.h" +#include #include "hw/pci/pci.h" //#define DEBUG_KVM @@ -65,12 +65,16 @@ static bool has_msr_star; static bool has_msr_hsave_pa; static bool has_msr_tsc_adjust; static bool has_msr_tsc_deadline; +static bool has_msr_feature_control; static bool has_msr_async_pf_en; static bool has_msr_pv_eoi_en; static bool has_msr_misc_enable; static bool has_msr_kvm_steal_time; static int lm_capable_kernel; +static bool has_msr_architectural_pmu; +static uint32_t num_architectural_pmu_counters; + bool kvm_allows_irq0_override(void) { return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); @@ -318,7 +322,7 @@ int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) if ((env->mcg_cap & MCG_SER_P) && addr && (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) { - if (qemu_ram_addr_from_host(addr, &ram_addr) || + if (qemu_ram_addr_from_host(addr, &ram_addr) == NULL || !kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { fprintf(stderr, "Hardware memory error for memory used by " "QEMU itself instead of guest system!\n"); @@ -345,20 +349,22 @@ int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) int kvm_arch_on_sigbus(int code, void *addr) { - if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) { + X86CPU *cpu = X86_CPU(first_cpu); + + if ((cpu->env.mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) { ram_addr_t ram_addr; hwaddr paddr; /* Hope we are lucky for AO MCE */ - if (qemu_ram_addr_from_host(addr, &ram_addr) || - !kvm_physical_memory_addr_from_host(CPU(first_cpu)->kvm_state, + if (qemu_ram_addr_from_host(addr, &ram_addr) == NULL || + !kvm_physical_memory_addr_from_host(first_cpu->kvm_state, addr, &paddr)) { fprintf(stderr, "Hardware memory error for memory used by " "QEMU itself instead of guest system!: %p\n", addr); return 0; } kvm_hwpoison_page_add(ram_addr); - kvm_mce_inject(x86_env_get_cpu(first_cpu), paddr, code); + kvm_mce_inject(X86_CPU(first_cpu), paddr, code); } else { if (code == BUS_MCEERR_AO) { return 0; @@ -418,6 +424,22 @@ unsigned long kvm_arch_vcpu_id(CPUState *cs) return cpu->env.cpuid_apic_id; } +#ifndef KVM_CPUID_SIGNATURE_NEXT +#define KVM_CPUID_SIGNATURE_NEXT 0x40000100 +#endif + +static bool hyperv_hypercall_available(X86CPU *cpu) +{ + return cpu->hyperv_vapic || + (cpu->hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_RETRY); +} + +static bool hyperv_enabled(X86CPU *cpu) +{ + return hyperv_hypercall_available(cpu) || + cpu->hyperv_relaxed_timing; +} + #define KVM_MAX_CPUID_ENTRIES 100 int kvm_arch_init_vcpu(CPUState *cs) @@ -434,13 +456,14 @@ int kvm_arch_init_vcpu(CPUState *cs) uint32_t signature[3]; int r; + memset(&cpuid_data, 0, sizeof(cpuid_data)); + cpuid_i = 0; /* Paravirtualization CPUIDs */ c = &cpuid_data.entries[cpuid_i++]; - memset(c, 0, sizeof(*c)); c->function = KVM_CPUID_SIGNATURE; - if (!hyperv_enabled()) { + if (!hyperv_enabled(cpu)) { memcpy(signature, "KVMKVMKVM\0\0\0", 12); c->eax = 0; } else { @@ -452,50 +475,44 @@ int kvm_arch_init_vcpu(CPUState *cs) c->edx = signature[2]; c = &cpuid_data.entries[cpuid_i++]; - memset(c, 0, sizeof(*c)); c->function = KVM_CPUID_FEATURES; c->eax = env->features[FEAT_KVM]; - if (hyperv_enabled()) { + if (hyperv_enabled(cpu)) { memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); c->eax = signature[0]; c = &cpuid_data.entries[cpuid_i++]; - memset(c, 0, sizeof(*c)); c->function = HYPERV_CPUID_VERSION; c->eax = 0x00001bbc; c->ebx = 0x00060001; c = &cpuid_data.entries[cpuid_i++]; - memset(c, 0, sizeof(*c)); c->function = HYPERV_CPUID_FEATURES; - if (hyperv_relaxed_timing_enabled()) { + if (cpu->hyperv_relaxed_timing) { c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE; } - if (hyperv_vapic_recommended()) { + if (cpu->hyperv_vapic) { c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE; c->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE; } c = &cpuid_data.entries[cpuid_i++]; - memset(c, 0, sizeof(*c)); c->function = HYPERV_CPUID_ENLIGHTMENT_INFO; - if (hyperv_relaxed_timing_enabled()) { + if (cpu->hyperv_relaxed_timing) { c->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; } - if (hyperv_vapic_recommended()) { + if (cpu->hyperv_vapic) { c->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; } - c->ebx = hyperv_get_spinlock_retries(); + c->ebx = cpu->hyperv_spinlock_attempts; c = &cpuid_data.entries[cpuid_i++]; - memset(c, 0, sizeof(*c)); c->function = HYPERV_CPUID_IMPLEMENT_LIMITS; c->eax = 0x40; c->ebx = 0x40; c = &cpuid_data.entries[cpuid_i++]; - memset(c, 0, sizeof(*c)); c->function = KVM_CPUID_SIGNATURE_NEXT; memcpy(signature, "KVMKVMKVM\0\0\0", 12); c->eax = 0; @@ -579,6 +596,25 @@ int kvm_arch_init_vcpu(CPUState *cs) break; } } + + if (limit >= 0x0a) { + uint32_t ver; + + cpu_x86_cpuid(env, 0x0a, 0, &ver, &unused, &unused, &unused); + if ((ver & 0xff) > 0) { + has_msr_architectural_pmu = true; + num_architectural_pmu_counters = (ver & 0xff00) >> 8; + + /* Shouldn't be more than 32, since that's the number of bits + * available in EBX to tell us _which_ counters are available. + * Play it safe. + */ + if (num_architectural_pmu_counters > MAX_GP_COUNTERS) { + num_architectural_pmu_counters = MAX_GP_COUNTERS; + } + } + } + cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); for (i = 0x80000000; i <= limit; i++) { @@ -642,6 +678,12 @@ int kvm_arch_init_vcpu(CPUState *cs) qemu_add_vm_change_state_handler(cpu_update_state, env); + c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0); + if (c) { + has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) || + !!(c->ecx & CPUID_EXT_SMX); + } + cpuid_data.cpuid.padding = 0; r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data); if (r) { @@ -741,7 +783,6 @@ static int kvm_get_supported_msrs(KVMState *s) int kvm_arch_init(KVMState *s) { - QemuOptsList *list = qemu_find_opts("machine"); uint64_t identity_base = 0xfffbc000; uint64_t shadow_mem; int ret; @@ -790,15 +831,13 @@ int kvm_arch_init(KVMState *s) } qemu_register_reset(kvm_unpoison_all, NULL); - if (!QTAILQ_EMPTY(&list->head)) { - shadow_mem = qemu_opt_get_size(QTAILQ_FIRST(&list->head), - "kvm_shadow_mem", -1); - if (shadow_mem != -1) { - shadow_mem /= 4096; - ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem); - if (ret < 0) { - return ret; - } + shadow_mem = qemu_opt_get_size(qemu_get_machine_opts(), + "kvm_shadow_mem", -1); + if (shadow_mem != -1) { + shadow_mem /= 4096; + ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem); + if (ret < 0) { + return ret; } } return 0; @@ -1045,6 +1084,26 @@ static void kvm_msr_entry_set(struct kvm_msr_entry *entry, entry->data = value; } +static int kvm_put_tscdeadline_msr(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[1]; + } msr_data; + struct kvm_msr_entry *msrs = msr_data.entries; + + if (!has_msr_tsc_deadline) { + return 0; + } + + kvm_msr_entry_set(&msrs[0], MSR_IA32_TSCDEADLINE, env->tsc_deadline); + + msr_data.info.nmsrs = 1; + + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); +} + static int kvm_put_msrs(X86CPU *cpu, int level) { CPUX86State *env = &cpu->env; @@ -1053,7 +1112,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) struct kvm_msr_entry entries[100]; } msr_data; struct kvm_msr_entry *msrs = msr_data.entries; - int n = 0; + int n = 0, i; kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); @@ -1068,9 +1127,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (has_msr_tsc_adjust) { kvm_msr_entry_set(&msrs[n++], MSR_TSC_ADJUST, env->tsc_adjust); } - if (has_msr_tsc_deadline) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSCDEADLINE, env->tsc_deadline); - } if (has_msr_misc_enable) { kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE, env->msr_ia32_misc_enable); @@ -1095,9 +1151,8 @@ static int kvm_put_msrs(X86CPU *cpu, int level) } } /* - * The following paravirtual MSRs have side effects on the guest or are - * too heavy for normal writeback. Limit them to reset or full state - * updates. + * The following MSRs have side effects on the guest or are too heavy + * for normal writeback. Limit them to reset or full state updates. */ if (level >= KVM_PUT_RESET_STATE) { kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, @@ -1115,13 +1170,44 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], MSR_KVM_STEAL_TIME, env->steal_time_msr); } - if (hyperv_hypercall_available()) { + if (has_msr_architectural_pmu) { + /* Stop the counter. */ + kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_FIXED_CTR_CTRL, 0); + kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_CTRL, 0); + + /* Set the counter values. */ + for (i = 0; i < MAX_FIXED_COUNTERS; i++) { + kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_FIXED_CTR0 + i, + env->msr_fixed_counters[i]); + } + for (i = 0; i < num_architectural_pmu_counters; i++) { + kvm_msr_entry_set(&msrs[n++], MSR_P6_PERFCTR0 + i, + env->msr_gp_counters[i]); + kvm_msr_entry_set(&msrs[n++], MSR_P6_EVNTSEL0 + i, + env->msr_gp_evtsel[i]); + } + kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_STATUS, + env->msr_global_status); + kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_OVF_CTRL, + env->msr_global_ovf_ctrl); + + /* Now start the PMU. */ + kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_FIXED_CTR_CTRL, + env->msr_fixed_ctr_ctrl); + kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_CTRL, + env->msr_global_ctrl); + } + if (hyperv_hypercall_available(cpu)) { kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID, 0); kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL, 0); } - if (hyperv_vapic_recommended()) { + if (cpu->hyperv_vapic) { kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0); } + if (has_msr_feature_control) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL, + env->msr_ia32_feature_control); + } } if (env->mcg_cap) { int i; @@ -1223,8 +1309,8 @@ static int kvm_get_xcrs(X86CPU *cpu) for (i = 0; i < xcrs.nr_xcrs; i++) { /* Only support xcr0 now */ - if (xcrs.xcrs[0].xcr == 0) { - env->xcr0 = xcrs.xcrs[0].value; + if (xcrs.xcrs[i].xcr == 0) { + env->xcr0 = xcrs.xcrs[i].value; break; } } @@ -1346,6 +1432,9 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_misc_enable) { msrs[n++].index = MSR_IA32_MISC_ENABLE; } + if (has_msr_feature_control) { + msrs[n++].index = MSR_IA32_FEATURE_CONTROL; + } if (!env->tsc_valid) { msrs[n++].index = MSR_IA32_TSC; @@ -1371,6 +1460,19 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_kvm_steal_time) { msrs[n++].index = MSR_KVM_STEAL_TIME; } + if (has_msr_architectural_pmu) { + msrs[n++].index = MSR_CORE_PERF_FIXED_CTR_CTRL; + msrs[n++].index = MSR_CORE_PERF_GLOBAL_CTRL; + msrs[n++].index = MSR_CORE_PERF_GLOBAL_STATUS; + msrs[n++].index = MSR_CORE_PERF_GLOBAL_OVF_CTRL; + for (i = 0; i < MAX_FIXED_COUNTERS; i++) { + msrs[n++].index = MSR_CORE_PERF_FIXED_CTR0 + i; + } + for (i = 0; i < num_architectural_pmu_counters; i++) { + msrs[n++].index = MSR_P6_PERFCTR0 + i; + msrs[n++].index = MSR_P6_EVNTSEL0 + i; + } + } if (env->mcg_cap) { msrs[n++].index = MSR_MCG_STATUS; @@ -1387,7 +1489,8 @@ static int kvm_get_msrs(X86CPU *cpu) } for (i = 0; i < ret; i++) { - switch (msrs[i].index) { + uint32_t index = msrs[i].index; + switch (index) { case MSR_IA32_SYSENTER_CS: env->sysenter_cs = msrs[i].data; break; @@ -1444,6 +1547,9 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_IA32_MISC_ENABLE: env->msr_ia32_misc_enable = msrs[i].data; break; + case MSR_IA32_FEATURE_CONTROL: + env->msr_ia32_feature_control = msrs[i].data; + break; default: if (msrs[i].index >= MSR_MC0_CTL && msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) { @@ -1459,6 +1565,27 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_KVM_STEAL_TIME: env->steal_time_msr = msrs[i].data; break; + case MSR_CORE_PERF_FIXED_CTR_CTRL: + env->msr_fixed_ctr_ctrl = msrs[i].data; + break; + case MSR_CORE_PERF_GLOBAL_CTRL: + env->msr_global_ctrl = msrs[i].data; + break; + case MSR_CORE_PERF_GLOBAL_STATUS: + env->msr_global_status = msrs[i].data; + break; + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + env->msr_global_ovf_ctrl = msrs[i].data; + break; + case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR0 + MAX_FIXED_COUNTERS - 1: + env->msr_fixed_counters[index - MSR_CORE_PERF_FIXED_CTR0] = msrs[i].data; + break; + case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR0 + MAX_GP_COUNTERS - 1: + env->msr_gp_counters[index - MSR_P6_PERFCTR0] = msrs[i].data; + break; + case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1: + env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data; + break; } } @@ -1595,6 +1722,7 @@ static int kvm_get_vcpu_events(X86CPU *cpu) static int kvm_guest_debug_workarounds(X86CPU *cpu) { + CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; int ret = 0; unsigned long reinject_trap = 0; @@ -1617,8 +1745,8 @@ static int kvm_guest_debug_workarounds(X86CPU *cpu) * reinject them via SET_GUEST_DEBUG. */ if (reinject_trap || - (!kvm_has_robust_singlestep() && env->singlestep_enabled)) { - ret = kvm_update_guest_debug(env, reinject_trap); + (!kvm_has_robust_singlestep() && cs->singlestep_enabled)) { + ret = kvm_update_guest_debug(cs, reinject_trap); } return ret; } @@ -1708,6 +1836,12 @@ int kvm_arch_put_registers(CPUState *cpu, int level) return ret; } } + + ret = kvm_put_tscdeadline_msr(x86_cpu); + if (ret < 0) { + return ret; + } + ret = kvm_put_vcpu_events(x86_cpu, level); if (ret < 0) { return ret; @@ -1932,25 +2066,23 @@ static int kvm_handle_tpr_access(X86CPU *cpu) return 1; } -int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp) +int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) { - CPUX86State *env = &X86_CPU(cpu)->env; static const uint8_t int3 = 0xcc; - if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) || - cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1)) { + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) || + cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&int3, 1, 1)) { return -EINVAL; } return 0; } -int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp) +int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) { - CPUX86State *env = &X86_CPU(cpu)->env; uint8_t int3; - if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc || - cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) { + if (cpu_memory_rw_debug(cs, bp->pc, &int3, 1, 0) || int3 != 0xcc || + cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) { return -EINVAL; } return 0; @@ -2043,13 +2175,14 @@ static CPUWatchpoint hw_watchpoint; static int kvm_handle_debug(X86CPU *cpu, struct kvm_debug_exit_arch *arch_info) { + CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; int ret = 0; int n; if (arch_info->exception == 1) { if (arch_info->dr6 & (1 << 14)) { - if (env->singlestep_enabled) { + if (cs->singlestep_enabled) { ret = EXCP_DEBUG; } } else {