X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=kvm-all.c;h=90b8573656a2bfec72bb17b91c751e3a2d3df6f6;hb=4eaf72029446ba693c63429475ce46348f65bf01;hp=fbd2d93188f648d8acff196e1df8d29aa480d4a7;hpb=b66e10e4c9ae738412b9742db49457f6b703e349;p=mirror_qemu.git diff --git a/kvm-all.c b/kvm-all.c index fbd2d93188..90b8573656 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -15,7 +15,6 @@ #include "qemu/osdep.h" #include -#include #include @@ -26,15 +25,17 @@ #include "qemu/error-report.h" #include "hw/hw.h" #include "hw/pci/msi.h" +#include "hw/pci/msix.h" #include "hw/s390x/adapter.h" #include "exec/gdbstub.h" #include "sysemu/kvm_int.h" +#include "sysemu/cpus.h" #include "qemu/bswap.h" #include "exec/memory.h" #include "exec/ram_addr.h" #include "exec/address-spaces.h" #include "qemu/event_notifier.h" -#include "trace.h" +#include "trace-root.h" #include "hw/irq.h" #include "hw/boards.h" @@ -119,6 +120,8 @@ bool kvm_readonly_mem_allowed; bool kvm_vm_attributes_allowed; bool kvm_direct_msi_allowed; bool kvm_ioeventfd_any_length_allowed; +bool kvm_msi_use_devid; +static bool kvm_immediate_exit; static const KVMCapabilityInfo kvm_required_capabilites[] = { KVM_CAP_INFO(USER_MEMORY), @@ -1047,7 +1050,16 @@ void kvm_irqchip_commit_routes(KVMState *s) { int ret; + if (kvm_gsi_direct_mapping()) { + return; + } + + if (!kvm_gsi_routing_enabled()) { + return; + } + s->irq_routes->flags = 0; + trace_kvm_irqchip_commit_routes(); ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); assert(ret == 0); } @@ -1094,8 +1106,6 @@ static int kvm_update_routing_entry(KVMState *s, *entry = *new_entry; - kvm_irqchip_commit_routes(s); - return 0; } @@ -1133,6 +1143,7 @@ void kvm_irqchip_release_virq(KVMState *s, int virq) } } clear_gsi(s, virq); + kvm_arch_release_virq_post(virq); } static unsigned int kvm_hash_msi(uint32_t data) @@ -1238,10 +1249,15 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) return kvm_set_irq(s, route->kroute.gsi, 1); } -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev) +int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) { struct kvm_irq_routing_entry kroute = {}; int virq; + MSIMessage msg = {0, 0}; + + if (dev) { + msg = pci_get_msi_message(dev, vector); + } if (kvm_gsi_direct_mapping()) { return kvm_arch_msi_data_to_gsi(msg.data); @@ -1262,12 +1278,19 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev) kroute.u.msi.address_lo = (uint32_t)msg.address; kroute.u.msi.address_hi = msg.address >> 32; kroute.u.msi.data = le32_to_cpu(msg.data); + if (kvm_msi_devid_required()) { + kroute.flags = KVM_MSI_VALID_DEVID; + kroute.u.msi.devid = pci_requester_id(dev); + } if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) { kvm_irqchip_release_virq(s, virq); return -EINVAL; } + trace_kvm_irqchip_add_msi_route(virq); + kvm_add_routing_entry(s, &kroute); + kvm_arch_add_msi_route_post(&kroute, vector, dev); kvm_irqchip_commit_routes(s); return virq; @@ -1292,10 +1315,16 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, kroute.u.msi.address_lo = (uint32_t)msg.address; kroute.u.msi.address_hi = msg.address >> 32; kroute.u.msi.data = le32_to_cpu(msg.data); + if (kvm_msi_devid_required()) { + kroute.flags = KVM_MSI_VALID_DEVID; + kroute.u.msi.devid = pci_requester_id(dev); + } if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) { return -EINVAL; } + trace_kvm_irqchip_update_msi_route(virq); + return kvm_update_routing_entry(s, &kroute); } @@ -1391,7 +1420,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) abort(); } -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) +int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) { return -ENOSYS; } @@ -1520,10 +1549,16 @@ static int kvm_max_vcpus(KVMState *s) return (ret) ? ret : kvm_recommended_vcpus(s); } +static int kvm_max_vcpu_id(KVMState *s) +{ + int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPU_ID); + return (ret) ? ret : kvm_max_vcpus(s); +} + bool kvm_vcpu_id_is_valid(int vcpu_id) { KVMState *s = KVM_STATE(current_machine->accelerator); - return vcpu_id >= 0 && vcpu_id < kvm_max_vcpus(s); + return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s); } static int kvm_init(MachineState *ms) @@ -1586,6 +1621,7 @@ static int kvm_init(MachineState *ms) goto err; } + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); /* If unspecified, use the default value */ @@ -1823,10 +1859,8 @@ void kvm_flush_coalesced_mmio_buffer(void) s->coalesced_flush_in_progress = false; } -static void do_kvm_cpu_synchronize_state(void *arg) +static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { - CPUState *cpu = arg; - if (!cpu->kvm_vcpu_dirty) { kvm_arch_get_registers(cpu); cpu->kvm_vcpu_dirty = true; @@ -1836,34 +1870,85 @@ static void do_kvm_cpu_synchronize_state(void *arg) void kvm_cpu_synchronize_state(CPUState *cpu) { if (!cpu->kvm_vcpu_dirty) { - run_on_cpu(cpu, do_kvm_cpu_synchronize_state, cpu); + run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL); } } -static void do_kvm_cpu_synchronize_post_reset(void *arg) +static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { - CPUState *cpu = arg; - kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE); cpu->kvm_vcpu_dirty = false; } void kvm_cpu_synchronize_post_reset(CPUState *cpu) { - run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, cpu); + run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); } -static void do_kvm_cpu_synchronize_post_init(void *arg) +static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { - CPUState *cpu = arg; - kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE); cpu->kvm_vcpu_dirty = false; } void kvm_cpu_synchronize_post_init(CPUState *cpu) { - run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, cpu); + run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); +} + +#ifdef KVM_HAVE_MCE_INJECTION +static __thread void *pending_sigbus_addr; +static __thread int pending_sigbus_code; +static __thread bool have_sigbus_pending; +#endif + +static void kvm_cpu_kick(CPUState *cpu) +{ + atomic_set(&cpu->kvm_run->immediate_exit, 1); +} + +static void kvm_cpu_kick_self(void) +{ + if (kvm_immediate_exit) { + kvm_cpu_kick(current_cpu); + } else { + qemu_cpu_kick_self(); + } +} + +static void kvm_eat_signals(CPUState *cpu) +{ + struct timespec ts = { 0, 0 }; + siginfo_t siginfo; + sigset_t waitset; + sigset_t chkset; + int r; + + if (kvm_immediate_exit) { + atomic_set(&cpu->kvm_run->immediate_exit, 0); + /* Write kvm_run->immediate_exit before the cpu->exit_request + * write in kvm_cpu_exec. + */ + smp_wmb(); + return; + } + + sigemptyset(&waitset); + sigaddset(&waitset, SIG_IPI); + + do { + r = sigtimedwait(&waitset, &siginfo, &ts); + if (r == -1 && !(errno == EAGAIN || errno == EINTR)) { + perror("sigtimedwait"); + exit(1); + } + + r = sigpending(&chkset); + if (r == -1) { + perror("sigpending"); + exit(1); + } + } while (sigismember(&chkset, SIG_IPI)); } int kvm_cpu_exec(CPUState *cpu) @@ -1874,7 +1959,7 @@ int kvm_cpu_exec(CPUState *cpu) DPRINTF("kvm_cpu_exec()\n"); if (kvm_arch_process_async_events(cpu)) { - cpu->exit_request = 0; + atomic_set(&cpu->exit_request, 0); return EXCP_HLT; } @@ -1889,23 +1974,39 @@ int kvm_cpu_exec(CPUState *cpu) } kvm_arch_pre_run(cpu, run); - if (cpu->exit_request) { + if (atomic_read(&cpu->exit_request)) { DPRINTF("interrupt exit requested\n"); /* * KVM requires us to reenter the kernel after IO exits to complete * instruction emulation. This self-signal will ensure that we * leave ASAP again. */ - qemu_cpu_kick_self(); + kvm_cpu_kick_self(); } + /* Read cpu->exit_request before KVM_RUN reads run->immediate_exit. + * Matching barrier in kvm_eat_signals. + */ + smp_rmb(); + run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0); attrs = kvm_arch_post_run(cpu, run); +#ifdef KVM_HAVE_MCE_INJECTION + if (unlikely(have_sigbus_pending)) { + qemu_mutex_lock_iothread(); + kvm_arch_on_sigbus_vcpu(cpu, pending_sigbus_code, + pending_sigbus_addr); + have_sigbus_pending = false; + qemu_mutex_unlock_iothread(); + } +#endif + if (run_ret < 0) { if (run_ret == -EINTR || run_ret == -EAGAIN) { DPRINTF("io window exit\n"); + kvm_eat_signals(cpu); ret = EXCP_INTERRUPT; break; } @@ -1973,8 +2074,9 @@ int kvm_cpu_exec(CPUState *cpu) ret = EXCP_INTERRUPT; break; case KVM_SYSTEM_EVENT_CRASH: + kvm_cpu_synchronize_state(cpu); qemu_mutex_lock_iothread(); - qemu_system_guest_panicked(); + qemu_system_guest_panicked(cpu_get_crash_info(cpu)); qemu_mutex_unlock_iothread(); ret = 0; break; @@ -1998,7 +2100,7 @@ int kvm_cpu_exec(CPUState *cpu) vm_stop(RUN_STATE_INTERNAL_ERROR); } - cpu->exit_request = 0; + atomic_set(&cpu->exit_request, 0); return ret; } @@ -2119,11 +2221,12 @@ void kvm_device_access(int fd, int group, uint64_t attr, if (err < 0) { error_report("KVM_%s_DEVICE_ATTR failed: %s", write ? "SET" : "GET", strerror(-err)); - error_printf("Group %d attr 0x%016" PRIx64, group, attr); + error_printf("Group %d attr 0x%016" PRIx64 "\n", group, attr); abort(); } } +/* Return 1 on success, 0 on failure */ int kvm_has_sync_mmu(void) { return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU); @@ -2166,20 +2269,6 @@ int kvm_has_intx_set_mask(void) return kvm_state->intx_set_mask; } -void kvm_setup_guest_memory(void *start, size_t size) -{ - if (!kvm_has_sync_mmu()) { - int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK); - - if (ret) { - perror("qemu_madvise"); - fprintf(stderr, - "Need MADV_DONTFORK in absence of synchronous KVM MMU\n"); - exit(1); - } - } -} - #ifdef KVM_CAP_SET_GUEST_DEBUG struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu, target_ulong pc) @@ -2201,15 +2290,15 @@ int kvm_sw_breakpoints_active(CPUState *cpu) struct kvm_set_guest_debug_data { struct kvm_guest_debug dbg; - CPUState *cpu; int err; }; -static void kvm_invoke_set_guest_debug(void *data) +static void kvm_invoke_set_guest_debug(CPUState *cpu, run_on_cpu_data data) { - struct kvm_set_guest_debug_data *dbg_data = data; + struct kvm_set_guest_debug_data *dbg_data = + (struct kvm_set_guest_debug_data *) data.host_ptr; - dbg_data->err = kvm_vcpu_ioctl(dbg_data->cpu, KVM_SET_GUEST_DEBUG, + dbg_data->err = kvm_vcpu_ioctl(cpu, KVM_SET_GUEST_DEBUG, &dbg_data->dbg); } @@ -2223,9 +2312,9 @@ int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap) data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; } kvm_arch_update_guest_debug(cpu, &data.dbg); - data.cpu = cpu; - run_on_cpu(cpu, kvm_invoke_set_guest_debug, &data); + run_on_cpu(cpu, kvm_invoke_set_guest_debug, + RUN_ON_CPU_HOST_PTR(&data)); return data.err; } @@ -2357,16 +2446,12 @@ void kvm_remove_all_breakpoints(CPUState *cpu) } #endif /* !KVM_CAP_SET_GUEST_DEBUG */ -int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) +static int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) { KVMState *s = kvm_state; struct kvm_signal_mask *sigmask; int r; - if (!sigset) { - return kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, NULL); - } - sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset)); sigmask->len = s->sigmask_len; @@ -2376,14 +2461,73 @@ int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) return r; } + +static void kvm_ipi_signal(int sig) +{ + if (current_cpu) { + assert(kvm_immediate_exit); + kvm_cpu_kick(current_cpu); + } +} + +void kvm_init_cpu_signals(CPUState *cpu) +{ + int r; + sigset_t set; + struct sigaction sigact; + + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = kvm_ipi_signal; + sigaction(SIG_IPI, &sigact, NULL); + + pthread_sigmask(SIG_BLOCK, NULL, &set); +#if defined KVM_HAVE_MCE_INJECTION + sigdelset(&set, SIGBUS); + pthread_sigmask(SIG_SETMASK, &set, NULL); +#endif + sigdelset(&set, SIG_IPI); + if (kvm_immediate_exit) { + r = pthread_sigmask(SIG_SETMASK, &set, NULL); + } else { + r = kvm_set_signal_mask(cpu, &set); + } + if (r) { + fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); + exit(1); + } +} + +/* Called asynchronously in VCPU thread. */ int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) { - return kvm_arch_on_sigbus_vcpu(cpu, code, addr); +#ifdef KVM_HAVE_MCE_INJECTION + if (have_sigbus_pending) { + return 1; + } + have_sigbus_pending = true; + pending_sigbus_addr = addr; + pending_sigbus_code = code; + atomic_set(&cpu->exit_request, 1); + return 0; +#else + return 1; +#endif } +/* Called synchronously (via signalfd) in main thread. */ int kvm_on_sigbus(int code, void *addr) { - return kvm_arch_on_sigbus(code, addr); +#ifdef KVM_HAVE_MCE_INJECTION + /* Action required MCE kills the process if SIGBUS is blocked. Because + * that's what happens in the I/O thread, where we handle MCE via signalfd, + * we can only get action optional here. + */ + assert(code != BUS_MCEERR_AR); + kvm_arch_on_sigbus_vcpu(first_cpu, code, addr); + return 0; +#else + return 1; +#endif } int kvm_create_device(KVMState *s, uint64_t type, bool test)