X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=accel%2Fkvm%2Fkvm-all.c;h=439a4efe526327b97d6215936f2a390389282ebc;hb=9e264985ff0bc86927b44b334bd504687f78659d;hp=ca00daa2f56f88a5f6f69f6a7b6d2b7dad127f77;hpb=e0d79c9435c60d7637a49083a5789471b1b70bdb;p=mirror_qemu.git diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index ca00daa2f5..439a4efe52 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -41,6 +41,9 @@ #include "hw/irq.h" #include "sysemu/sev.h" #include "sysemu/balloon.h" +#include "qapi/visitor.h" +#include "qapi/qapi-types-common.h" +#include "qapi/qapi-visit-common.h" #include "hw/boards.h" @@ -92,6 +95,10 @@ struct KVMState int max_nested_state_len; int many_ioeventfds; int intx_set_mask; + int kvm_shadow_mem; + bool kernel_irqchip_allowed; + bool kernel_irqchip_required; + OnOffAuto kernel_irqchip_split; bool sync_mmu; bool manual_dirty_log_protect; /* The man page (and posix) say ioctl numbers are signed int, but @@ -157,7 +164,7 @@ static NotifierList kvm_irqchip_change_notifiers = int kvm_get_max_memslots(void) { - KVMState *s = KVM_STATE(current_machine->accelerator); + KVMState *s = KVM_STATE(current_accel()); return s->nr_slots; } @@ -301,13 +308,23 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, boo /* Set the slot size to 0 before setting the slot to the desired * value. This is needed based on KVM commit 75d61fbc. */ mem.memory_size = 0; - kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + if (ret < 0) { + goto err; + } } mem.memory_size = slot->memory_size; ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); slot->old_flags = mem.flags; +err: trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr, mem.memory_size, mem.userspace_addr, ret); + if (ret < 0) { + error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", + __func__, mem.slot, slot->start_addr, + (uint64_t)mem.memory_size, strerror(errno)); + } return ret; } @@ -518,6 +535,27 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) +/* Allocate the dirty bitmap for a slot */ +static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) +{ + /* + * XXX bad kernel interface alert + * For dirty bitmap, kernel allocates array of size aligned to + * bits-per-long. But for case when the kernel is 64bits and + * the userspace is 32bits, userspace can't align to the same + * bits-per-long, since sizeof(long) is different between kernel + * and user space. This way, userspace will provide buffer which + * may be 4 bytes less than the kernel will use, resulting in + * userspace memory corruption (which is not detectable by valgrind + * too, in most cases). + * So for now, let's align to 64 instead of HOST_LONG_BITS here, in + * a hope that sizeof(long) won't become >8 any time soon. + */ + hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), + /*HOST_LONG_BITS*/ 64) / 8; + mem->dirty_bmap = g_malloc0(bitmap_size); +} + /** * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space * @@ -550,23 +588,9 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, goto out; } - /* XXX bad kernel interface alert - * For dirty bitmap, kernel allocates array of size aligned to - * bits-per-long. But for case when the kernel is 64bits and - * the userspace is 32bits, userspace can't align to the same - * bits-per-long, since sizeof(long) is different between kernel - * and user space. This way, userspace will provide buffer which - * may be 4 bytes less than the kernel will use, resulting in - * userspace memory corruption (which is not detectable by valgrind - * too, in most cases). - * So for now, let's align to 64 instead of HOST_LONG_BITS here, in - * a hope that sizeof(long) won't become >8 any time soon. - */ if (!mem->dirty_bmap) { - hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), - /*HOST_LONG_BITS*/ 64) / 8; /* Allocate on the first log_sync, once and for all */ - mem->dirty_bmap = g_malloc0(bitmap_size); + kvm_memslot_init_dirty_bitmap(mem); } d.dirty_bitmap = mem->dirty_bmap; @@ -1067,6 +1091,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, mem->ram = ram; mem->flags = kvm_mem_flags(mr); + if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { + /* + * Reallocate the bmap; it means it doesn't disappear in + * middle of a migrate. + */ + kvm_memslot_init_dirty_bitmap(mem); + } err = kvm_set_user_memory_region(kml, mem, true); if (err) { fprintf(stderr, "%s: error registering slot: %s\n", __func__, @@ -1758,10 +1789,11 @@ void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi) g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi)); } -static void kvm_irqchip_create(MachineState *machine, KVMState *s) +static void kvm_irqchip_create(KVMState *s) { int ret; + assert(s->kernel_irqchip_split != ON_OFF_AUTO_AUTO); if (kvm_check_extension(s, KVM_CAP_IRQCHIP)) { ; } else if (kvm_check_extension(s, KVM_CAP_S390_IRQCHIP)) { @@ -1776,9 +1808,9 @@ static void kvm_irqchip_create(MachineState *machine, KVMState *s) /* First probe and see if there's a arch-specific hook to create the * in-kernel irqchip for us */ - ret = kvm_arch_irqchip_create(machine, s); + ret = kvm_arch_irqchip_create(s); if (ret == 0) { - if (machine_kernel_irqchip_split(machine)) { + if (s->kernel_irqchip_split == ON_OFF_AUTO_ON) { perror("Split IRQ chip mode not supported."); exit(1); } else { @@ -1826,7 +1858,7 @@ static int kvm_max_vcpu_id(KVMState *s) bool kvm_vcpu_id_is_valid(int vcpu_id) { - KVMState *s = KVM_STATE(current_machine->accelerator); + KVMState *s = KVM_STATE(current_accel()); return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s); } @@ -2049,8 +2081,12 @@ static int kvm_init(MachineState *ms) goto err; } - if (machine_kernel_irqchip_allowed(ms)) { - kvm_irqchip_create(ms, s); + if (s->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { + s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; + } + + if (s->kernel_irqchip_allowed) { + kvm_irqchip_create(s); } if (kvm_eventfds_allowed) { @@ -2152,9 +2188,9 @@ void kvm_flush_coalesced_mmio_buffer(void) ent = &ring->coalesced_mmio[ring->first]; if (ent->pio == 1) { - address_space_rw(&address_space_io, ent->phys_addr, - MEMTXATTRS_UNSPECIFIED, ent->data, - ent->len, true); + address_space_write(&address_space_io, ent->phys_addr, + MEMTXATTRS_UNSPECIFIED, ent->data, + ent->len); } else { cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len); } @@ -2940,6 +2976,95 @@ static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, return false; } +static void kvm_get_kvm_shadow_mem(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + int64_t value = s->kvm_shadow_mem; + + visit_type_int(v, name, &value, errp); +} + +static void kvm_set_kvm_shadow_mem(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + Error *error = NULL; + int64_t value; + + visit_type_int(v, name, &value, &error); + if (error) { + error_propagate(errp, error); + return; + } + + s->kvm_shadow_mem = value; +} + +static void kvm_set_kernel_irqchip(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + Error *err = NULL; + KVMState *s = KVM_STATE(obj); + OnOffSplit mode; + + visit_type_OnOffSplit(v, name, &mode, &err); + if (err) { + error_propagate(errp, err); + return; + } else { + switch (mode) { + case ON_OFF_SPLIT_ON: + s->kernel_irqchip_allowed = true; + s->kernel_irqchip_required = true; + s->kernel_irqchip_split = ON_OFF_AUTO_OFF; + break; + case ON_OFF_SPLIT_OFF: + s->kernel_irqchip_allowed = false; + s->kernel_irqchip_required = false; + s->kernel_irqchip_split = ON_OFF_AUTO_OFF; + break; + case ON_OFF_SPLIT_SPLIT: + s->kernel_irqchip_allowed = true; + s->kernel_irqchip_required = true; + s->kernel_irqchip_split = ON_OFF_AUTO_ON; + break; + default: + /* The value was checked in visit_type_OnOffSplit() above. If + * we get here, then something is wrong in QEMU. + */ + abort(); + } + } +} + +bool kvm_kernel_irqchip_allowed(void) +{ + return kvm_state->kernel_irqchip_allowed; +} + +bool kvm_kernel_irqchip_required(void) +{ + return kvm_state->kernel_irqchip_required; +} + +bool kvm_kernel_irqchip_split(void) +{ + return kvm_state->kernel_irqchip_split == ON_OFF_AUTO_ON; +} + +static void kvm_accel_instance_init(Object *obj) +{ + KVMState *s = KVM_STATE(obj); + + s->kvm_shadow_mem = -1; + s->kernel_irqchip_allowed = true; + s->kernel_irqchip_split = ON_OFF_AUTO_AUTO; +} + static void kvm_accel_class_init(ObjectClass *oc, void *data) { AccelClass *ac = ACCEL_CLASS(oc); @@ -2947,11 +3072,24 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data) ac->init_machine = kvm_init; ac->has_memory = kvm_accel_has_memory; ac->allowed = &kvm_allowed; + + object_class_property_add(oc, "kernel-irqchip", "on|off|split", + NULL, kvm_set_kernel_irqchip, + NULL, NULL, &error_abort); + object_class_property_set_description(oc, "kernel-irqchip", + "Configure KVM in-kernel irqchip", &error_abort); + + object_class_property_add(oc, "kvm-shadow-mem", "int", + kvm_get_kvm_shadow_mem, kvm_set_kvm_shadow_mem, + NULL, NULL, &error_abort); + object_class_property_set_description(oc, "kvm-shadow-mem", + "KVM shadow MMU size", &error_abort); } static const TypeInfo kvm_accel_type = { .name = TYPE_KVM_ACCEL, .parent = TYPE_ACCEL, + .instance_init = kvm_accel_instance_init, .class_init = kvm_accel_class_init, .instance_size = sizeof(KVMState), };