X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=hw%2Fi386%2Fpc.c;h=c33ce47578d73718bec654f8418b9f6eb2ec1213;hb=0788a56bd1ae3ea4e118beb4aeb071a181791184;hp=1690b1935f77fc358f8dff0414458702eca34283;hpb=9ae805637a9cfab2edc15f56fbc3219815c8118e;p=mirror_qemu.git diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 1690b1935f..c33ce47578 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -30,6 +30,7 @@ #include "hw/char/parallel.h" #include "hw/i386/apic.h" #include "hw/i386/topology.h" +#include "hw/i386/fw_cfg.h" #include "sysemu/cpus.h" #include "hw/block/fdc.h" #include "hw/ide.h" @@ -49,11 +50,13 @@ #include "hw/pci/msi.h" #include "hw/sysbus.h" #include "sysemu/sysemu.h" +#include "sysemu/tcg.h" #include "sysemu/numa.h" #include "sysemu/kvm.h" #include "sysemu/qtest.h" #include "kvm_i386.h" #include "hw/xen/xen.h" +#include "hw/xen/start_info.h" #include "ui/qemu-spice.h" #include "exec/memory.h" #include "exec/address-spaces.h" @@ -75,6 +78,11 @@ #include "hw/usb.h" #include "hw/i386/intel_iommu.h" #include "hw/net/ne2000-isa.h" +#include "standard-headers/asm-x86/bootparam.h" +#include "hw/virtio/virtio-pmem-pci.h" +#include "hw/mem/memory-device.h" +#include "sysemu/replay.h" +#include "qapi/qmp/qerror.h" /* debug PC/ISA interrupts */ //#define DEBUG_IRQ @@ -86,12 +94,6 @@ #define DPRINTF(fmt, ...) #endif -#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0) -#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1) -#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2) -#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3) -#define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4) - #define E820_NR_ENTRIES 16 struct e820_entry { @@ -110,6 +112,12 @@ static struct e820_entry *e820_table; static unsigned e820_entries; struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; +/* Physical Address of PVH entry point read from kernel ELF NOTE */ +static size_t pvh_start_addr; + +GlobalProperty pc_compat_4_0[] = {}; +const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0); + GlobalProperty pc_compat_3_1[] = { { "intel-iommu", "dma-drain", "off" }, { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, @@ -131,6 +139,7 @@ GlobalProperty pc_compat_3_1[] = { { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" }, + { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" }, }; const size_t pc_compat_3_1_len = G_N_ELEMENTS(pc_compat_3_1); @@ -399,7 +408,7 @@ uint64_t cpu_get_tsc(CPUX86State *env) /* IRQ handling */ int cpu_get_pic_interrupt(CPUX86State *env) { - X86CPU *cpu = x86_env_get_cpu(env); + X86CPU *cpu = env_archcpu(env); int intno; if (!kvm_irqchip_in_kernel()) { @@ -908,14 +917,6 @@ bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length) return false; } -/* Enables contiguous-apic-ID mode, for compatibility */ -static bool compat_apic_id_mode; - -void enable_compat_apic_id_mode(void) -{ - compat_apic_id_mode = true; -} - /* Calculates initial APIC ID for a specific CPU index * * Currently we need to be able to calculate the APIC ID from the CPU index @@ -923,13 +924,17 @@ void enable_compat_apic_id_mode(void) * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of * all CPUs up to max_cpus. */ -static uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index) +static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, + unsigned int cpu_index) { + MachineState *ms = MACHINE(pcms); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); uint32_t correct_id; static bool warned; - correct_id = x86_apicid_from_cpu_idx(smp_cores, smp_threads, cpu_index); - if (compat_apic_id_mode) { + correct_id = x86_apicid_from_cpu_idx(pcms->smp_dies, ms->smp.cores, + ms->smp.threads, cpu_index); + if (pcmc->compat_apic_id_mode) { if (cpu_index != correct_id && !warned && !qtest_enabled()) { error_report("APIC IDs set in compatibility mode, " "CPU topology won't match the configuration"); @@ -953,7 +958,7 @@ static void pc_build_smbios(PCMachineState *pcms) /* tell smbios about cpuid version and features */ smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); - smbios_tables = smbios_get_table_legacy(&smbios_tables_len); + smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len); if (smbios_tables) { fw_cfg_add_bytes(pcms->fw_cfg, FW_CFG_SMBIOS_ENTRIES, smbios_tables, smbios_tables_len); @@ -970,7 +975,7 @@ static void pc_build_smbios(PCMachineState *pcms) array_count++; } } - smbios_get_tables(mem_array, array_count, + smbios_get_tables(ms, mem_array, array_count, &smbios_tables, &smbios_tables_len, &smbios_anchor, &smbios_anchor_len); g_free(mem_array); @@ -1055,13 +1060,6 @@ static long get_file_size(FILE *f) return size; } -/* setup_data types */ -#define SETUP_NONE 0 -#define SETUP_E820_EXT 1 -#define SETUP_DTB 2 -#define SETUP_PCI 3 -#define SETUP_EFI 4 - struct setup_data { uint64_t next; uint32_t type; @@ -1069,6 +1067,109 @@ struct setup_data { uint8_t data[0]; } __attribute__((packed)); + +/* + * The entry point into the kernel for PVH boot is different from + * the native entry point. The PVH entry is defined by the x86/HVM + * direct boot ABI and is available in an ELFNOTE in the kernel binary. + * + * This function is passed to load_elf() when it is called from + * load_elfboot() which then additionally checks for an ELF Note of + * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to + * parse the PVH entry address from the ELF Note. + * + * Due to trickery in elf_opts.h, load_elf() is actually available as + * load_elf32() or load_elf64() and this routine needs to be able + * to deal with being called as 32 or 64 bit. + * + * The address of the PVH entry point is saved to the 'pvh_start_addr' + * global variable. (although the entry point is 32-bit, the kernel + * binary can be either 32-bit or 64-bit). + */ +static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) +{ + size_t *elf_note_data_addr; + + /* Check if ELF Note header passed in is valid */ + if (arg1 == NULL) { + return 0; + } + + if (is64) { + struct elf64_note *nhdr64 = (struct elf64_note *)arg1; + uint64_t nhdr_size64 = sizeof(struct elf64_note); + uint64_t phdr_align = *(uint64_t *)arg2; + uint64_t nhdr_namesz = nhdr64->n_namesz; + + elf_note_data_addr = + ((void *)nhdr64) + nhdr_size64 + + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); + } else { + struct elf32_note *nhdr32 = (struct elf32_note *)arg1; + uint32_t nhdr_size32 = sizeof(struct elf32_note); + uint32_t phdr_align = *(uint32_t *)arg2; + uint32_t nhdr_namesz = nhdr32->n_namesz; + + elf_note_data_addr = + ((void *)nhdr32) + nhdr_size32 + + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); + } + + pvh_start_addr = *elf_note_data_addr; + + return pvh_start_addr; +} + +static bool load_elfboot(const char *kernel_filename, + int kernel_file_size, + uint8_t *header, + size_t pvh_xen_start_addr, + FWCfgState *fw_cfg) +{ + uint32_t flags = 0; + uint32_t mh_load_addr = 0; + uint32_t elf_kernel_size = 0; + uint64_t elf_entry; + uint64_t elf_low, elf_high; + int kernel_size; + + if (ldl_p(header) != 0x464c457f) { + return false; /* no elfboot */ + } + + bool elf_is64 = header[EI_CLASS] == ELFCLASS64; + flags = elf_is64 ? + ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; + + if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ + error_report("elfboot unsupported flags = %x", flags); + exit(1); + } + + uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; + kernel_size = load_elf(kernel_filename, read_pvh_start_addr, + NULL, &elf_note_type, &elf_entry, + &elf_low, &elf_high, 0, I386_ELF_MACHINE, + 0, 0); + + if (kernel_size < 0) { + error_report("Error while loading elf kernel"); + exit(1); + } + mh_load_addr = elf_low; + elf_kernel_size = elf_high - elf_low; + + if (pvh_start_addr == 0) { + error_report("Error loading uncompressed kernel without PVH ELF Note"); + exit(1); + } + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); + + return true; +} + static void load_linux(PCMachineState *pcms, FWCfgState *fw_cfg) { @@ -1108,12 +1209,70 @@ static void load_linux(PCMachineState *pcms, if (ldl_p(header+0x202) == 0x53726448) { protocol = lduw_p(header+0x206); } else { - /* This looks like a multiboot kernel. If it is, let's stop - treating it like a Linux kernel. */ + /* + * This could be a multiboot kernel. If it is, let's stop treating it + * like a Linux kernel. + * Note: some multiboot images could be in the ELF format (the same of + * PVH), so we try multiboot first since we check the multiboot magic + * header before to load it. + */ if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, kernel_cmdline, kernel_size, header)) { return; } + /* + * Check if the file is an uncompressed kernel file (ELF) and load it, + * saving the PVH entry point used by the x86/HVM direct boot ABI. + * If load_elfboot() is successful, populate the fw_cfg info. + */ + if (pcmc->pvh_enabled && + load_elfboot(kernel_filename, kernel_size, + header, pvh_start_addr, fw_cfg)) { + fclose(f); + + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, + strlen(kernel_cmdline) + 1); + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); + + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, + header, sizeof(header)); + + /* load initrd */ + if (initrd_filename) { + gsize initrd_size; + gchar *initrd_data; + GError *gerr = NULL; + + if (!g_file_get_contents(initrd_filename, &initrd_data, + &initrd_size, &gerr)) { + fprintf(stderr, "qemu: error reading initrd %s: %s\n", + initrd_filename, gerr->message); + exit(1); + } + + initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; + if (initrd_size >= initrd_max) { + fprintf(stderr, "qemu: initrd is too large, cannot support." + "(max: %"PRIu32", need %"PRId64")\n", + initrd_max, (uint64_t)initrd_size); + exit(1); + } + + initrd_addr = (initrd_max - initrd_size) & ~4095; + + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, + initrd_size); + } + + option_rom[nb_option_roms].bootindex = 0; + option_rom[nb_option_roms].name = "pvh.bin"; + nb_option_roms++; + + return; + } protocol = 0; } @@ -1145,7 +1304,26 @@ static void load_linux(PCMachineState *pcms, #endif /* highest address for loading the initrd */ - if (protocol >= 0x203) { + if (protocol >= 0x20c && + lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { + /* + * Linux has supported initrd up to 4 GB for a very long time (2007, + * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), + * though it only sets initrd_max to 2 GB to "work around bootloader + * bugs". Luckily, QEMU firmware(which does something like bootloader) + * has supported this. + * + * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can + * be loaded into any address. + * + * In addition, initrd_max is uint32_t simply because QEMU doesn't + * support the 64-bit boot protocol (specifically the ext_ramdisk_image + * field). + * + * Therefore here just limit initrd_max to UINT32_MAX simply as well. + */ + initrd_max = UINT32_MAX; + } else if (protocol >= 0x203) { initrd_max = ldl_p(header+0x22c); } else { initrd_max = 0x37ffffff; @@ -1338,12 +1516,16 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level) } } -static void pc_new_cpu(const char *typename, int64_t apic_id, Error **errp) +static void pc_new_cpu(PCMachineState *pcms, int64_t apic_id, Error **errp) { Object *cpu = NULL; Error *local_err = NULL; + CPUX86State *env = NULL; - cpu = object_new(typename); + cpu = object_new(MACHINE(pcms)->cpu_type); + + env = &X86_CPU(cpu)->env; + env->nr_dies = pcms->smp_dies; object_property_set_uint(cpu, apic_id, "apic-id", &local_err); object_property_set_bool(cpu, true, "realized", &local_err); @@ -1352,10 +1534,90 @@ static void pc_new_cpu(const char *typename, int64_t apic_id, Error **errp) error_propagate(errp, local_err); } -void pc_hot_add_cpu(const int64_t id, Error **errp) +/* + * This function is very similar to smp_parse() + * in hw/core/machine.c but includes CPU die support. + */ +void pc_smp_parse(MachineState *ms, QemuOpts *opts) +{ + PCMachineState *pcms = PC_MACHINE(ms); + + if (opts) { + unsigned cpus = qemu_opt_get_number(opts, "cpus", 0); + unsigned sockets = qemu_opt_get_number(opts, "sockets", 0); + unsigned dies = qemu_opt_get_number(opts, "dies", 1); + unsigned cores = qemu_opt_get_number(opts, "cores", 0); + unsigned threads = qemu_opt_get_number(opts, "threads", 0); + + /* compute missing values, prefer sockets over cores over threads */ + if (cpus == 0 || sockets == 0) { + cores = cores > 0 ? cores : 1; + threads = threads > 0 ? threads : 1; + if (cpus == 0) { + sockets = sockets > 0 ? sockets : 1; + cpus = cores * threads * dies * sockets; + } else { + ms->smp.max_cpus = + qemu_opt_get_number(opts, "maxcpus", cpus); + sockets = ms->smp.max_cpus / (cores * threads * dies); + } + } else if (cores == 0) { + threads = threads > 0 ? threads : 1; + cores = cpus / (sockets * dies * threads); + cores = cores > 0 ? cores : 1; + } else if (threads == 0) { + threads = cpus / (cores * dies * sockets); + threads = threads > 0 ? threads : 1; + } else if (sockets * dies * cores * threads < cpus) { + error_report("cpu topology: " + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < " + "smp_cpus (%u)", + sockets, dies, cores, threads, cpus); + exit(1); + } + + ms->smp.max_cpus = + qemu_opt_get_number(opts, "maxcpus", cpus); + + if (ms->smp.max_cpus < cpus) { + error_report("maxcpus must be equal to or greater than smp"); + exit(1); + } + + if (sockets * dies * cores * threads > ms->smp.max_cpus) { + error_report("cpu topology: " + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) > " + "maxcpus (%u)", + sockets, dies, cores, threads, + ms->smp.max_cpus); + exit(1); + } + + if (sockets * dies * cores * threads != ms->smp.max_cpus) { + warn_report("Invalid CPU topology deprecated: " + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) " + "!= maxcpus (%u)", + sockets, dies, cores, threads, + ms->smp.max_cpus); + } + + ms->smp.cpus = cpus; + ms->smp.cores = cores; + ms->smp.threads = threads; + pcms->smp_dies = dies; + } + + if (ms->smp.cpus > 1) { + Error *blocker = NULL; + error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); + replay_add_blocker(blocker); + } +} + +void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp) { - MachineState *ms = MACHINE(qdev_get_machine()); - int64_t apic_id = x86_cpu_apic_id_from_index(id); + PCMachineState *pcms = PC_MACHINE(ms); + int64_t apic_id = x86_cpu_apic_id_from_index(pcms, id); Error *local_err = NULL; if (id < 0) { @@ -1370,7 +1632,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp) return; } - pc_new_cpu(ms->cpu_type, apic_id, &local_err); + pc_new_cpu(PC_MACHINE(ms), apic_id, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -1383,6 +1645,9 @@ void pc_cpus_init(PCMachineState *pcms) const CPUArchIdList *possible_cpus; MachineState *ms = MACHINE(pcms); MachineClass *mc = MACHINE_GET_CLASS(pcms); + PCMachineClass *pcmc = PC_MACHINE_CLASS(mc); + + x86_cpu_set_default_version(pcmc->default_cpu_version); /* Calculates the limit to CPU APIC ID values * @@ -1391,11 +1656,11 @@ void pc_cpus_init(PCMachineState *pcms) * * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). */ - pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1; + pcms->apic_id_limit = x86_cpu_apic_id_from_index(pcms, + ms->smp.max_cpus - 1) + 1; possible_cpus = mc->possible_cpu_arch_ids(ms); - for (i = 0; i < smp_cpus; i++) { - pc_new_cpu(possible_cpus->cpus[i].type, possible_cpus->cpus[i].arch_id, - &error_fatal); + for (i = 0; i < ms->smp.cpus; i++) { + pc_new_cpu(pcms, possible_cpus->cpus[i].arch_id, &error_fatal); } } @@ -1515,33 +1780,6 @@ void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory, pci_address_space, -1); } -void pc_acpi_init(const char *default_dsdt) -{ - char *filename; - - if (acpi_tables != NULL) { - /* manually set via -acpitable, leave it alone */ - return; - } - - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, default_dsdt); - if (filename == NULL) { - warn_report("failed to find %s", default_dsdt); - } else { - QemuOpts *opts = qemu_opts_create(qemu_find_opts("acpi"), NULL, 0, - &error_abort); - Error *err = NULL; - - qemu_opt_set(opts, "file", filename, &error_abort); - - acpi_table_add_builtin(opts, &err); - if (err) { - warn_reportf_err(err, "failed to load %s: ", filename); - } - g_free(filename); - } -} - void xen_load_linux(PCMachineState *pcms) { int i; @@ -1557,6 +1795,7 @@ void xen_load_linux(PCMachineState *pcms) for (i = 0; i < nb_option_roms; i++) { assert(!strcmp(option_rom[i].name, "linuxboot.bin") || !strcmp(option_rom[i].name, "linuxboot_dma.bin") || + !strcmp(option_rom[i].name, "pvh.bin") || !strcmp(option_rom[i].name, "multiboot.bin")); rom_add_option(option_rom[i].name, option_rom[i].bootindex); } @@ -1656,7 +1895,7 @@ void pc_memory_init(PCMachineState *pcms, } /* Initialize PC system firmware */ - pc_system_firmware_init(rom_memory, !pcmc->pci_enabled); + pc_system_firmware_init(pcms, rom_memory); option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, @@ -1922,8 +2161,10 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, { const PCMachineState *pcms = PC_MACHINE(hotplug_dev); const PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + const MachineState *ms = MACHINE(hotplug_dev); const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); const uint64_t legacy_align = TARGET_PAGE_SIZE; + Error *local_err = NULL; /* * When -no-acpi is used with Q35 machine type, no ACPI is built, @@ -1936,11 +2177,17 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, return; } - if (is_nvdimm && !pcms->acpi_nvdimm_state.is_enabled) { + if (is_nvdimm && !ms->nvdimms_state->is_enabled) { error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'"); return; } + hotplug_handler_pre_plug(pcms->acpi_dev, dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), pcmc->enforce_aligned_dimm ? NULL : &legacy_align, errp); } @@ -1948,9 +2195,9 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, static void pc_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); pc_dimm_plug(PC_DIMM(dev), MACHINE(pcms), &local_err); @@ -1959,11 +2206,10 @@ static void pc_memory_plug(HotplugHandler *hotplug_dev, } if (is_nvdimm) { - nvdimm_plug(&pcms->acpi_nvdimm_state); + nvdimm_plug(ms->nvdimms_state); } - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort); + hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort); out: error_propagate(errp, local_err); } @@ -1971,7 +2217,6 @@ out: static void pc_memory_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); @@ -1992,9 +2237,8 @@ static void pc_memory_unplug_request(HotplugHandler *hotplug_dev, goto out; } - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); - + hotplug_handler_unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, + &local_err); out: error_propagate(errp, local_err); } @@ -2003,19 +2247,15 @@ static void pc_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { PCMachineState *pcms = PC_MACHINE(hotplug_dev); - HotplugHandlerClass *hhc; Error *local_err = NULL; - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); - + hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); if (local_err) { goto out; } pc_dimm_unplug(PC_DIMM(dev), MACHINE(pcms)); - object_unparent(OBJECT(dev)); - + object_property_set_bool(OBJECT(dev), false, "realized", NULL); out: error_propagate(errp, local_err); } @@ -2050,14 +2290,12 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { CPUArchId *found_cpu; - HotplugHandlerClass *hhc; Error *local_err = NULL; X86CPU *cpu = X86_CPU(dev); PCMachineState *pcms = PC_MACHINE(hotplug_dev); if (pcms->acpi_dev) { - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); + hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); if (local_err) { goto out; } @@ -2081,7 +2319,6 @@ static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { int idx = -1; - HotplugHandlerClass *hhc; Error *local_err = NULL; X86CPU *cpu = X86_CPU(dev); PCMachineState *pcms = PC_MACHINE(hotplug_dev); @@ -2098,9 +2335,8 @@ static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, goto out; } - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); - + hotplug_handler_unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, + &local_err); if (local_err) { goto out; } @@ -2114,21 +2350,18 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { CPUArchId *found_cpu; - HotplugHandlerClass *hhc; Error *local_err = NULL; X86CPU *cpu = X86_CPU(dev); PCMachineState *pcms = PC_MACHINE(hotplug_dev); - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); - + hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); if (local_err) { goto out; } found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL); found_cpu->cpu = NULL; - object_unparent(OBJECT(dev)); + object_property_set_bool(OBJECT(dev), false, "realized", NULL); /* decrement the number of CPUs */ pcms->boot_cpus--; @@ -2147,8 +2380,11 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, CPUArchId *cpu_slot; X86CPUTopoInfo topo; X86CPU *cpu = X86_CPU(dev); + CPUX86State *env = &cpu->env; MachineState *ms = MACHINE(hotplug_dev); PCMachineState *pcms = PC_MACHINE(hotplug_dev); + unsigned int smp_cores = ms->smp.cores; + unsigned int smp_threads = ms->smp.threads; if(!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", @@ -2156,9 +2392,15 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, return; } - /* if APIC ID is not set, set it based on socket/core/thread properties */ + env->nr_dies = pcms->smp_dies; + + /* + * If APIC ID is not set, + * set it based on socket/die/core/thread properties. + */ if (cpu->apic_id == UNASSIGNED_APIC_ID) { - int max_socket = (max_cpus - 1) / smp_threads / smp_cores; + int max_socket = (ms->smp.max_cpus - 1) / + smp_threads / smp_cores / pcms->smp_dies; if (cpu->socket_id < 0) { error_setg(errp, "CPU socket-id is not set"); @@ -2167,6 +2409,10 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", cpu->socket_id, max_socket); return; + } else if (cpu->die_id > pcms->smp_dies - 1) { + error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", + cpu->die_id, max_socket); + return; } if (cpu->core_id < 0) { error_setg(errp, "CPU core-id is not set"); @@ -2186,20 +2432,24 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, } topo.pkg_id = cpu->socket_id; + topo.die_id = cpu->die_id; topo.core_id = cpu->core_id; topo.smt_id = cpu->thread_id; - cpu->apic_id = apicid_from_topo_ids(smp_cores, smp_threads, &topo); + cpu->apic_id = apicid_from_topo_ids(pcms->smp_dies, smp_cores, + smp_threads, &topo); } cpu_slot = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, &idx); if (!cpu_slot) { MachineState *ms = MACHINE(pcms); - x86_topo_ids_from_apicid(cpu->apic_id, smp_cores, smp_threads, &topo); - error_setg(errp, "Invalid CPU [socket: %u, core: %u, thread: %u] with" - " APIC ID %" PRIu32 ", valid index range 0:%d", - topo.pkg_id, topo.core_id, topo.smt_id, cpu->apic_id, - ms->possible_cpus->len - 1); + x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies, + smp_cores, smp_threads, &topo); + error_setg(errp, + "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" + " APIC ID %" PRIu32 ", valid index range 0:%d", + topo.pkg_id, topo.die_id, topo.core_id, topo.smt_id, + cpu->apic_id, ms->possible_cpus->len - 1); return; } @@ -2215,7 +2465,8 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() * once -smp refactoring is complete and there will be CPU private * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ - x86_topo_ids_from_apicid(cpu->apic_id, smp_cores, smp_threads, &topo); + x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies, + smp_cores, smp_threads, &topo); if (cpu->socket_id != -1 && cpu->socket_id != topo.pkg_id) { error_setg(errp, "property socket-id: %u doesn't match set apic-id:" " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, topo.pkg_id); @@ -2223,6 +2474,13 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, } cpu->socket_id = topo.pkg_id; + if (cpu->die_id != -1 && cpu->die_id != topo.die_id) { + error_setg(errp, "property die-id: %u doesn't match set apic-id:" + " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo.die_id); + return; + } + cpu->die_id = topo.die_id; + if (cpu->core_id != -1 && cpu->core_id != topo.core_id) { error_setg(errp, "property core-id: %u doesn't match set apic-id:" " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, topo.core_id); @@ -2237,7 +2495,8 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, } cpu->thread_id = topo.smt_id; - if (cpu->hyperv_vpindex && !kvm_hv_vpindex_settable()) { + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && + !kvm_hv_vpindex_settable()) { error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); return; } @@ -2248,6 +2507,65 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, numa_cpu_pre_plug(cpu_slot, dev, errp); } +static void pc_virtio_pmem_pci_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); + Error *local_err = NULL; + + if (!hotplug_dev2) { + /* + * Without a bus hotplug handler, we cannot control the plug/unplug + * order. This should never be the case on x86, however better add + * a safety net. + */ + error_setg(errp, "virtio-pmem-pci not supported on this bus."); + return; + } + /* + * First, see if we can plug this memory device at all. If that + * succeeds, branch of to the actual hotplug handler. + */ + memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL, + &local_err); + if (!local_err) { + hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err); + } + error_propagate(errp, local_err); +} + +static void pc_virtio_pmem_pci_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); + Error *local_err = NULL; + + /* + * Plug the memory device first and then branch off to the actual + * hotplug handler. If that one fails, we can easily undo the memory + * device bits. + */ + memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); + hotplug_handler_plug(hotplug_dev2, dev, &local_err); + if (local_err) { + memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); + } + error_propagate(errp, local_err); +} + +static void pc_virtio_pmem_pci_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + /* We don't support virtio pmem hot unplug */ + error_setg(errp, "virtio pmem device unplug not supported."); +} + +static void pc_virtio_pmem_pci_unplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + /* We don't support virtio pmem hot unplug */ +} + static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -2255,6 +2573,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, pc_memory_pre_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { pc_cpu_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { + pc_virtio_pmem_pci_pre_plug(hotplug_dev, dev, errp); } } @@ -2265,6 +2585,8 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, pc_memory_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { pc_cpu_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { + pc_virtio_pmem_pci_plug(hotplug_dev, dev, errp); } } @@ -2275,6 +2597,8 @@ static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, pc_memory_unplug_request(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { pc_cpu_unplug_request_cb(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { + pc_virtio_pmem_pci_unplug_request(hotplug_dev, dev, errp); } else { error_setg(errp, "acpi: device unplug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -2288,6 +2612,8 @@ static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev, pc_memory_unplug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { pc_cpu_unplug_cb(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { + pc_virtio_pmem_pci_unplug(hotplug_dev, dev, errp); } else { error_setg(errp, "acpi: device unplug for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -2298,7 +2624,8 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine, DeviceState *dev) { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || - object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + object_dynamic_cast(OBJECT(dev), TYPE_CPU) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { return HOTPLUG_HANDLER(machine); } @@ -2311,7 +2638,11 @@ pc_machine_get_device_memory_region_size(Object *obj, Visitor *v, Error **errp) { MachineState *ms = MACHINE(obj); - int64_t value = memory_region_size(&ms->device_memory->mr); + int64_t value = 0; + + if (ms->device_memory) { + value = memory_region_size(&ms->device_memory->mr); + } visit_type_int(v, name, &value, errp); } @@ -2414,47 +2745,6 @@ static void pc_machine_set_smm(Object *obj, Visitor *v, const char *name, visit_type_OnOffAuto(v, name, &pcms->smm, errp); } -static bool pc_machine_get_nvdimm(Object *obj, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - return pcms->acpi_nvdimm_state.is_enabled; -} - -static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - pcms->acpi_nvdimm_state.is_enabled = value; -} - -static char *pc_machine_get_nvdimm_persistence(Object *obj, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - return g_strdup(pcms->acpi_nvdimm_state.persistence_string); -} - -static void pc_machine_set_nvdimm_persistence(Object *obj, const char *value, - Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - AcpiNVDIMMState *nvdimm_state = &pcms->acpi_nvdimm_state; - - if (strcmp(value, "cpu") == 0) - nvdimm_state->persistence = 3; - else if (strcmp(value, "mem-ctrl") == 0) - nvdimm_state->persistence = 2; - else { - error_setg(errp, "-machine nvdimm-persistence=%s: unsupported option", - value); - return; - } - - g_free(nvdimm_state->persistence_string); - nvdimm_state->persistence_string = g_strdup(value); -} - static bool pc_machine_get_smbus(Object *obj, Error **errp) { PCMachineState *pcms = PC_MACHINE(obj); @@ -2504,16 +2794,17 @@ static void pc_machine_initfn(Object *obj) pcms->max_ram_below_4g = 0; /* use default */ pcms->smm = ON_OFF_AUTO_AUTO; pcms->vmport = ON_OFF_AUTO_AUTO; - /* nvdimm is disabled on default. */ - pcms->acpi_nvdimm_state.is_enabled = false; /* acpi build is enabled by default if machine supports it */ pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; pcms->smbus_enabled = true; pcms->sata_enabled = true; pcms->pit_enabled = true; + pcms->smp_dies = 1; + + pc_system_flash_create(pcms); } -static void pc_machine_reset(void) +static void pc_machine_reset(MachineState *machine) { CPUState *cs; X86CPU *cpu; @@ -2545,16 +2836,20 @@ pc_cpu_index_to_props(MachineState *ms, unsigned cpu_index) static int64_t pc_get_default_cpu_node_id(const MachineState *ms, int idx) { X86CPUTopoInfo topo; + PCMachineState *pcms = PC_MACHINE(ms); assert(idx < ms->possible_cpus->len); x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, - smp_cores, smp_threads, &topo); + pcms->smp_dies, ms->smp.cores, + ms->smp.threads, &topo); return topo.pkg_id % nb_numa_nodes; } static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) { + PCMachineState *pcms = PC_MACHINE(ms); int i; + unsigned int max_cpus = ms->smp.max_cpus; if (ms->possible_cpus) { /* @@ -2573,11 +2868,14 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) ms->possible_cpus->cpus[i].type = ms->cpu_type; ms->possible_cpus->cpus[i].vcpus_count = 1; - ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i); + ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(pcms, i); x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, - smp_cores, smp_threads, &topo); + pcms->smp_dies, ms->smp.cores, + ms->smp.threads, &topo); ms->possible_cpus->cpus[i].props.has_socket_id = true; ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; + ms->possible_cpus->cpus[i].props.has_die_id = true; + ms->possible_cpus->cpus[i].props.die_id = topo.die_id; ms->possible_cpus->cpus[i].props.has_core_id = true; ms->possible_cpus->cpus[i].props.core_id = topo.core_id; ms->possible_cpus->cpus[i].props.has_thread_id = true; @@ -2623,6 +2921,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->acpi_data_size = 0x20000 + 0x8000; pcmc->save_tsc_khz = true; pcmc->linuxboot_dma_enabled = true; + pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = pc_get_hotplug_handler; mc->cpu_index_to_instance_props = pc_cpu_index_to_props; @@ -2632,6 +2931,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->hot_add_cpu = pc_hot_add_cpu; + mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; mc->max_cpus = 255; mc->reset = pc_machine_reset; @@ -2641,6 +2941,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) hc->unplug = pc_machine_device_unplug_cb; nc->nmi_monitor_handler = x86_nmi; mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE; + mc->nvdimm_supported = true; + mc->numa_mem_supported = true; object_class_property_add(oc, PC_MACHINE_DEVMEM_REGION_SIZE, "int", pc_machine_get_device_memory_region_size, NULL, @@ -2665,13 +2967,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, PC_MACHINE_VMPORT, "Enable vmport (pc & q35)", &error_abort); - object_class_property_add_bool(oc, PC_MACHINE_NVDIMM, - pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort); - - object_class_property_add_str(oc, PC_MACHINE_NVDIMM_PERSIST, - pc_machine_get_nvdimm_persistence, - pc_machine_set_nvdimm_persistence, &error_abort); - object_class_property_add_bool(oc, PC_MACHINE_SMBUS, pc_machine_get_smbus, pc_machine_set_smbus, &error_abort);