#include "hw/char/parallel.h"
#include "hw/i386/apic.h"
#include "hw/i386/topology.h"
+#include "hw/i386/fw_cfg.h"
#include "sysemu/cpus.h"
#include "hw/block/fdc.h"
#include "hw/ide.h"
#include "hw/pci/msi.h"
#include "hw/sysbus.h"
#include "sysemu/sysemu.h"
+#include "sysemu/tcg.h"
#include "sysemu/numa.h"
#include "sysemu/kvm.h"
#include "sysemu/qtest.h"
#include "kvm_i386.h"
#include "hw/xen/xen.h"
+#include "hw/xen/start_info.h"
#include "ui/qemu-spice.h"
#include "exec/memory.h"
#include "exec/address-spaces.h"
#include "hw/usb.h"
#include "hw/i386/intel_iommu.h"
#include "hw/net/ne2000-isa.h"
+#include "standard-headers/asm-x86/bootparam.h"
+#include "hw/virtio/virtio-pmem-pci.h"
+#include "hw/mem/memory-device.h"
+#include "sysemu/replay.h"
+#include "qapi/qmp/qerror.h"
/* debug PC/ISA interrupts */
//#define DEBUG_IRQ
#define DPRINTF(fmt, ...)
#endif
-#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
-#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
-#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2)
-#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
-#define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4)
-
#define E820_NR_ENTRIES 16
struct e820_entry {
static unsigned e820_entries;
struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
+/* Physical Address of PVH entry point read from kernel ELF NOTE */
+static size_t pvh_start_addr;
+
+GlobalProperty pc_compat_4_0[] = {};
+const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0);
+
GlobalProperty pc_compat_3_1[] = {
{ "intel-iommu", "dma-drain", "off" },
{ "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" },
{ "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" },
{ "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" },
{ "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" },
+ { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" },
};
const size_t pc_compat_3_1_len = G_N_ELEMENTS(pc_compat_3_1);
/* IRQ handling */
int cpu_get_pic_interrupt(CPUX86State *env)
{
- X86CPU *cpu = x86_env_get_cpu(env);
+ X86CPU *cpu = env_archcpu(env);
int intno;
if (!kvm_irqchip_in_kernel()) {
return false;
}
-/* Enables contiguous-apic-ID mode, for compatibility */
-static bool compat_apic_id_mode;
-
-void enable_compat_apic_id_mode(void)
-{
- compat_apic_id_mode = true;
-}
-
/* Calculates initial APIC ID for a specific CPU index
*
* Currently we need to be able to calculate the APIC ID from the CPU index
* no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of
* all CPUs up to max_cpus.
*/
-static uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index)
+static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms,
+ unsigned int cpu_index)
{
+ MachineState *ms = MACHINE(pcms);
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
uint32_t correct_id;
static bool warned;
- correct_id = x86_apicid_from_cpu_idx(smp_cores, smp_threads, cpu_index);
- if (compat_apic_id_mode) {
+ correct_id = x86_apicid_from_cpu_idx(pcms->smp_dies, ms->smp.cores,
+ ms->smp.threads, cpu_index);
+ if (pcmc->compat_apic_id_mode) {
if (cpu_index != correct_id && !warned && !qtest_enabled()) {
error_report("APIC IDs set in compatibility mode, "
"CPU topology won't match the configuration");
/* tell smbios about cpuid version and features */
smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
- smbios_tables = smbios_get_table_legacy(&smbios_tables_len);
+ smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len);
if (smbios_tables) {
fw_cfg_add_bytes(pcms->fw_cfg, FW_CFG_SMBIOS_ENTRIES,
smbios_tables, smbios_tables_len);
array_count++;
}
}
- smbios_get_tables(mem_array, array_count,
+ smbios_get_tables(ms, mem_array, array_count,
&smbios_tables, &smbios_tables_len,
&smbios_anchor, &smbios_anchor_len);
g_free(mem_array);
return size;
}
-/* setup_data types */
-#define SETUP_NONE 0
-#define SETUP_E820_EXT 1
-#define SETUP_DTB 2
-#define SETUP_PCI 3
-#define SETUP_EFI 4
-
struct setup_data {
uint64_t next;
uint32_t type;
uint8_t data[0];
} __attribute__((packed));
+
+/*
+ * The entry point into the kernel for PVH boot is different from
+ * the native entry point. The PVH entry is defined by the x86/HVM
+ * direct boot ABI and is available in an ELFNOTE in the kernel binary.
+ *
+ * This function is passed to load_elf() when it is called from
+ * load_elfboot() which then additionally checks for an ELF Note of
+ * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to
+ * parse the PVH entry address from the ELF Note.
+ *
+ * Due to trickery in elf_opts.h, load_elf() is actually available as
+ * load_elf32() or load_elf64() and this routine needs to be able
+ * to deal with being called as 32 or 64 bit.
+ *
+ * The address of the PVH entry point is saved to the 'pvh_start_addr'
+ * global variable. (although the entry point is 32-bit, the kernel
+ * binary can be either 32-bit or 64-bit).
+ */
+static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64)
+{
+ size_t *elf_note_data_addr;
+
+ /* Check if ELF Note header passed in is valid */
+ if (arg1 == NULL) {
+ return 0;
+ }
+
+ if (is64) {
+ struct elf64_note *nhdr64 = (struct elf64_note *)arg1;
+ uint64_t nhdr_size64 = sizeof(struct elf64_note);
+ uint64_t phdr_align = *(uint64_t *)arg2;
+ uint64_t nhdr_namesz = nhdr64->n_namesz;
+
+ elf_note_data_addr =
+ ((void *)nhdr64) + nhdr_size64 +
+ QEMU_ALIGN_UP(nhdr_namesz, phdr_align);
+ } else {
+ struct elf32_note *nhdr32 = (struct elf32_note *)arg1;
+ uint32_t nhdr_size32 = sizeof(struct elf32_note);
+ uint32_t phdr_align = *(uint32_t *)arg2;
+ uint32_t nhdr_namesz = nhdr32->n_namesz;
+
+ elf_note_data_addr =
+ ((void *)nhdr32) + nhdr_size32 +
+ QEMU_ALIGN_UP(nhdr_namesz, phdr_align);
+ }
+
+ pvh_start_addr = *elf_note_data_addr;
+
+ return pvh_start_addr;
+}
+
+static bool load_elfboot(const char *kernel_filename,
+ int kernel_file_size,
+ uint8_t *header,
+ size_t pvh_xen_start_addr,
+ FWCfgState *fw_cfg)
+{
+ uint32_t flags = 0;
+ uint32_t mh_load_addr = 0;
+ uint32_t elf_kernel_size = 0;
+ uint64_t elf_entry;
+ uint64_t elf_low, elf_high;
+ int kernel_size;
+
+ if (ldl_p(header) != 0x464c457f) {
+ return false; /* no elfboot */
+ }
+
+ bool elf_is64 = header[EI_CLASS] == ELFCLASS64;
+ flags = elf_is64 ?
+ ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags;
+
+ if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */
+ error_report("elfboot unsupported flags = %x", flags);
+ exit(1);
+ }
+
+ uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY;
+ kernel_size = load_elf(kernel_filename, read_pvh_start_addr,
+ NULL, &elf_note_type, &elf_entry,
+ &elf_low, &elf_high, 0, I386_ELF_MACHINE,
+ 0, 0);
+
+ if (kernel_size < 0) {
+ error_report("Error while loading elf kernel");
+ exit(1);
+ }
+ mh_load_addr = elf_low;
+ elf_kernel_size = elf_high - elf_low;
+
+ if (pvh_start_addr == 0) {
+ error_report("Error loading uncompressed kernel without PVH ELF Note");
+ exit(1);
+ }
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size);
+
+ return true;
+}
+
static void load_linux(PCMachineState *pcms,
FWCfgState *fw_cfg)
{
if (ldl_p(header+0x202) == 0x53726448) {
protocol = lduw_p(header+0x206);
} else {
- /* This looks like a multiboot kernel. If it is, let's stop
- treating it like a Linux kernel. */
+ /*
+ * This could be a multiboot kernel. If it is, let's stop treating it
+ * like a Linux kernel.
+ * Note: some multiboot images could be in the ELF format (the same of
+ * PVH), so we try multiboot first since we check the multiboot magic
+ * header before to load it.
+ */
if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename,
kernel_cmdline, kernel_size, header)) {
return;
}
+ /*
+ * Check if the file is an uncompressed kernel file (ELF) and load it,
+ * saving the PVH entry point used by the x86/HVM direct boot ABI.
+ * If load_elfboot() is successful, populate the fw_cfg info.
+ */
+ if (pcmc->pvh_enabled &&
+ load_elfboot(kernel_filename, kernel_size,
+ header, pvh_start_addr, fw_cfg)) {
+ fclose(f);
+
+ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE,
+ strlen(kernel_cmdline) + 1);
+ fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
+
+ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header));
+ fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA,
+ header, sizeof(header));
+
+ /* load initrd */
+ if (initrd_filename) {
+ gsize initrd_size;
+ gchar *initrd_data;
+ GError *gerr = NULL;
+
+ if (!g_file_get_contents(initrd_filename, &initrd_data,
+ &initrd_size, &gerr)) {
+ fprintf(stderr, "qemu: error reading initrd %s: %s\n",
+ initrd_filename, gerr->message);
+ exit(1);
+ }
+
+ initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1;
+ if (initrd_size >= initrd_max) {
+ fprintf(stderr, "qemu: initrd is too large, cannot support."
+ "(max: %"PRIu32", need %"PRId64")\n",
+ initrd_max, (uint64_t)initrd_size);
+ exit(1);
+ }
+
+ initrd_addr = (initrd_max - initrd_size) & ~4095;
+
+ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
+ fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data,
+ initrd_size);
+ }
+
+ option_rom[nb_option_roms].bootindex = 0;
+ option_rom[nb_option_roms].name = "pvh.bin";
+ nb_option_roms++;
+
+ return;
+ }
protocol = 0;
}
#endif
/* highest address for loading the initrd */
- if (protocol >= 0x203) {
+ if (protocol >= 0x20c &&
+ lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) {
+ /*
+ * Linux has supported initrd up to 4 GB for a very long time (2007,
+ * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013),
+ * though it only sets initrd_max to 2 GB to "work around bootloader
+ * bugs". Luckily, QEMU firmware(which does something like bootloader)
+ * has supported this.
+ *
+ * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can
+ * be loaded into any address.
+ *
+ * In addition, initrd_max is uint32_t simply because QEMU doesn't
+ * support the 64-bit boot protocol (specifically the ext_ramdisk_image
+ * field).
+ *
+ * Therefore here just limit initrd_max to UINT32_MAX simply as well.
+ */
+ initrd_max = UINT32_MAX;
+ } else if (protocol >= 0x203) {
initrd_max = ldl_p(header+0x22c);
} else {
initrd_max = 0x37ffffff;
}
}
-static void pc_new_cpu(const char *typename, int64_t apic_id, Error **errp)
+static void pc_new_cpu(PCMachineState *pcms, int64_t apic_id, Error **errp)
{
Object *cpu = NULL;
Error *local_err = NULL;
+ CPUX86State *env = NULL;
- cpu = object_new(typename);
+ cpu = object_new(MACHINE(pcms)->cpu_type);
+
+ env = &X86_CPU(cpu)->env;
+ env->nr_dies = pcms->smp_dies;
object_property_set_uint(cpu, apic_id, "apic-id", &local_err);
object_property_set_bool(cpu, true, "realized", &local_err);
error_propagate(errp, local_err);
}
-void pc_hot_add_cpu(const int64_t id, Error **errp)
+/*
+ * This function is very similar to smp_parse()
+ * in hw/core/machine.c but includes CPU die support.
+ */
+void pc_smp_parse(MachineState *ms, QemuOpts *opts)
+{
+ PCMachineState *pcms = PC_MACHINE(ms);
+
+ if (opts) {
+ unsigned cpus = qemu_opt_get_number(opts, "cpus", 0);
+ unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
+ unsigned dies = qemu_opt_get_number(opts, "dies", 1);
+ unsigned cores = qemu_opt_get_number(opts, "cores", 0);
+ unsigned threads = qemu_opt_get_number(opts, "threads", 0);
+
+ /* compute missing values, prefer sockets over cores over threads */
+ if (cpus == 0 || sockets == 0) {
+ cores = cores > 0 ? cores : 1;
+ threads = threads > 0 ? threads : 1;
+ if (cpus == 0) {
+ sockets = sockets > 0 ? sockets : 1;
+ cpus = cores * threads * dies * sockets;
+ } else {
+ ms->smp.max_cpus =
+ qemu_opt_get_number(opts, "maxcpus", cpus);
+ sockets = ms->smp.max_cpus / (cores * threads * dies);
+ }
+ } else if (cores == 0) {
+ threads = threads > 0 ? threads : 1;
+ cores = cpus / (sockets * dies * threads);
+ cores = cores > 0 ? cores : 1;
+ } else if (threads == 0) {
+ threads = cpus / (cores * dies * sockets);
+ threads = threads > 0 ? threads : 1;
+ } else if (sockets * dies * cores * threads < cpus) {
+ error_report("cpu topology: "
+ "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < "
+ "smp_cpus (%u)",
+ sockets, dies, cores, threads, cpus);
+ exit(1);
+ }
+
+ ms->smp.max_cpus =
+ qemu_opt_get_number(opts, "maxcpus", cpus);
+
+ if (ms->smp.max_cpus < cpus) {
+ error_report("maxcpus must be equal to or greater than smp");
+ exit(1);
+ }
+
+ if (sockets * dies * cores * threads > ms->smp.max_cpus) {
+ error_report("cpu topology: "
+ "sockets (%u) * dies (%u) * cores (%u) * threads (%u) > "
+ "maxcpus (%u)",
+ sockets, dies, cores, threads,
+ ms->smp.max_cpus);
+ exit(1);
+ }
+
+ if (sockets * dies * cores * threads != ms->smp.max_cpus) {
+ warn_report("Invalid CPU topology deprecated: "
+ "sockets (%u) * dies (%u) * cores (%u) * threads (%u) "
+ "!= maxcpus (%u)",
+ sockets, dies, cores, threads,
+ ms->smp.max_cpus);
+ }
+
+ ms->smp.cpus = cpus;
+ ms->smp.cores = cores;
+ ms->smp.threads = threads;
+ pcms->smp_dies = dies;
+ }
+
+ if (ms->smp.cpus > 1) {
+ Error *blocker = NULL;
+ error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp");
+ replay_add_blocker(blocker);
+ }
+}
+
+void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp)
{
- MachineState *ms = MACHINE(qdev_get_machine());
- int64_t apic_id = x86_cpu_apic_id_from_index(id);
+ PCMachineState *pcms = PC_MACHINE(ms);
+ int64_t apic_id = x86_cpu_apic_id_from_index(pcms, id);
Error *local_err = NULL;
if (id < 0) {
return;
}
- pc_new_cpu(ms->cpu_type, apic_id, &local_err);
+ pc_new_cpu(PC_MACHINE(ms), apic_id, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
const CPUArchIdList *possible_cpus;
MachineState *ms = MACHINE(pcms);
MachineClass *mc = MACHINE_GET_CLASS(pcms);
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(mc);
+
+ x86_cpu_set_default_version(pcmc->default_cpu_version);
/* Calculates the limit to CPU APIC ID values
*
*
* This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
*/
- pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
+ pcms->apic_id_limit = x86_cpu_apic_id_from_index(pcms,
+ ms->smp.max_cpus - 1) + 1;
possible_cpus = mc->possible_cpu_arch_ids(ms);
- for (i = 0; i < smp_cpus; i++) {
- pc_new_cpu(possible_cpus->cpus[i].type, possible_cpus->cpus[i].arch_id,
- &error_fatal);
+ for (i = 0; i < ms->smp.cpus; i++) {
+ pc_new_cpu(pcms, possible_cpus->cpus[i].arch_id, &error_fatal);
}
}
pci_address_space, -1);
}
-void pc_acpi_init(const char *default_dsdt)
-{
- char *filename;
-
- if (acpi_tables != NULL) {
- /* manually set via -acpitable, leave it alone */
- return;
- }
-
- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, default_dsdt);
- if (filename == NULL) {
- warn_report("failed to find %s", default_dsdt);
- } else {
- QemuOpts *opts = qemu_opts_create(qemu_find_opts("acpi"), NULL, 0,
- &error_abort);
- Error *err = NULL;
-
- qemu_opt_set(opts, "file", filename, &error_abort);
-
- acpi_table_add_builtin(opts, &err);
- if (err) {
- warn_reportf_err(err, "failed to load %s: ", filename);
- }
- g_free(filename);
- }
-}
-
void xen_load_linux(PCMachineState *pcms)
{
int i;
for (i = 0; i < nb_option_roms; i++) {
assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
!strcmp(option_rom[i].name, "linuxboot_dma.bin") ||
+ !strcmp(option_rom[i].name, "pvh.bin") ||
!strcmp(option_rom[i].name, "multiboot.bin"));
rom_add_option(option_rom[i].name, option_rom[i].bootindex);
}
}
/* Initialize PC system firmware */
- pc_system_firmware_init(rom_memory, !pcmc->pci_enabled);
+ pc_system_firmware_init(pcms, rom_memory);
option_rom_mr = g_malloc(sizeof(*option_rom_mr));
memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
{
const PCMachineState *pcms = PC_MACHINE(hotplug_dev);
const PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
+ const MachineState *ms = MACHINE(hotplug_dev);
const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
const uint64_t legacy_align = TARGET_PAGE_SIZE;
+ Error *local_err = NULL;
/*
* When -no-acpi is used with Q35 machine type, no ACPI is built,
return;
}
- if (is_nvdimm && !pcms->acpi_nvdimm_state.is_enabled) {
+ if (is_nvdimm && !ms->nvdimms_state->is_enabled) {
error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'");
return;
}
+ hotplug_handler_pre_plug(pcms->acpi_dev, dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev),
pcmc->enforce_aligned_dimm ? NULL : &legacy_align, errp);
}
static void pc_memory_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
- HotplugHandlerClass *hhc;
Error *local_err = NULL;
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
+ MachineState *ms = MACHINE(hotplug_dev);
bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
pc_dimm_plug(PC_DIMM(dev), MACHINE(pcms), &local_err);
}
if (is_nvdimm) {
- nvdimm_plug(&pcms->acpi_nvdimm_state);
+ nvdimm_plug(ms->nvdimms_state);
}
- hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort);
+ hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort);
out:
error_propagate(errp, local_err);
}
static void pc_memory_unplug_request(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
- HotplugHandlerClass *hhc;
Error *local_err = NULL;
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
goto out;
}
- hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
-
+ hotplug_handler_unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev,
+ &local_err);
out:
error_propagate(errp, local_err);
}
DeviceState *dev, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
- HotplugHandlerClass *hhc;
Error *local_err = NULL;
- hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
-
+ hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
if (local_err) {
goto out;
}
pc_dimm_unplug(PC_DIMM(dev), MACHINE(pcms));
- object_unparent(OBJECT(dev));
-
+ object_property_set_bool(OBJECT(dev), false, "realized", NULL);
out:
error_propagate(errp, local_err);
}
DeviceState *dev, Error **errp)
{
CPUArchId *found_cpu;
- HotplugHandlerClass *hhc;
Error *local_err = NULL;
X86CPU *cpu = X86_CPU(dev);
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
if (pcms->acpi_dev) {
- hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
+ hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
if (local_err) {
goto out;
}
DeviceState *dev, Error **errp)
{
int idx = -1;
- HotplugHandlerClass *hhc;
Error *local_err = NULL;
X86CPU *cpu = X86_CPU(dev);
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
goto out;
}
- hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
-
+ hotplug_handler_unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev,
+ &local_err);
if (local_err) {
goto out;
}
DeviceState *dev, Error **errp)
{
CPUArchId *found_cpu;
- HotplugHandlerClass *hhc;
Error *local_err = NULL;
X86CPU *cpu = X86_CPU(dev);
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
- hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
-
+ hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
if (local_err) {
goto out;
}
found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL);
found_cpu->cpu = NULL;
- object_unparent(OBJECT(dev));
+ object_property_set_bool(OBJECT(dev), false, "realized", NULL);
/* decrement the number of CPUs */
pcms->boot_cpus--;
CPUArchId *cpu_slot;
X86CPUTopoInfo topo;
X86CPU *cpu = X86_CPU(dev);
+ CPUX86State *env = &cpu->env;
MachineState *ms = MACHINE(hotplug_dev);
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
+ unsigned int smp_cores = ms->smp.cores;
+ unsigned int smp_threads = ms->smp.threads;
if(!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) {
error_setg(errp, "Invalid CPU type, expected cpu type: '%s'",
return;
}
- /* if APIC ID is not set, set it based on socket/core/thread properties */
+ env->nr_dies = pcms->smp_dies;
+
+ /*
+ * If APIC ID is not set,
+ * set it based on socket/die/core/thread properties.
+ */
if (cpu->apic_id == UNASSIGNED_APIC_ID) {
- int max_socket = (max_cpus - 1) / smp_threads / smp_cores;
+ int max_socket = (ms->smp.max_cpus - 1) /
+ smp_threads / smp_cores / pcms->smp_dies;
if (cpu->socket_id < 0) {
error_setg(errp, "CPU socket-id is not set");
error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u",
cpu->socket_id, max_socket);
return;
+ } else if (cpu->die_id > pcms->smp_dies - 1) {
+ error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u",
+ cpu->die_id, max_socket);
+ return;
}
if (cpu->core_id < 0) {
error_setg(errp, "CPU core-id is not set");
}
topo.pkg_id = cpu->socket_id;
+ topo.die_id = cpu->die_id;
topo.core_id = cpu->core_id;
topo.smt_id = cpu->thread_id;
- cpu->apic_id = apicid_from_topo_ids(smp_cores, smp_threads, &topo);
+ cpu->apic_id = apicid_from_topo_ids(pcms->smp_dies, smp_cores,
+ smp_threads, &topo);
}
cpu_slot = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, &idx);
if (!cpu_slot) {
MachineState *ms = MACHINE(pcms);
- x86_topo_ids_from_apicid(cpu->apic_id, smp_cores, smp_threads, &topo);
- error_setg(errp, "Invalid CPU [socket: %u, core: %u, thread: %u] with"
- " APIC ID %" PRIu32 ", valid index range 0:%d",
- topo.pkg_id, topo.core_id, topo.smt_id, cpu->apic_id,
- ms->possible_cpus->len - 1);
+ x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies,
+ smp_cores, smp_threads, &topo);
+ error_setg(errp,
+ "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with"
+ " APIC ID %" PRIu32 ", valid index range 0:%d",
+ topo.pkg_id, topo.die_id, topo.core_id, topo.smt_id,
+ cpu->apic_id, ms->possible_cpus->len - 1);
return;
}
/* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn()
* once -smp refactoring is complete and there will be CPU private
* CPUState::nr_cores and CPUState::nr_threads fields instead of globals */
- x86_topo_ids_from_apicid(cpu->apic_id, smp_cores, smp_threads, &topo);
+ x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies,
+ smp_cores, smp_threads, &topo);
if (cpu->socket_id != -1 && cpu->socket_id != topo.pkg_id) {
error_setg(errp, "property socket-id: %u doesn't match set apic-id:"
" 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, topo.pkg_id);
}
cpu->socket_id = topo.pkg_id;
+ if (cpu->die_id != -1 && cpu->die_id != topo.die_id) {
+ error_setg(errp, "property die-id: %u doesn't match set apic-id:"
+ " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo.die_id);
+ return;
+ }
+ cpu->die_id = topo.die_id;
+
if (cpu->core_id != -1 && cpu->core_id != topo.core_id) {
error_setg(errp, "property core-id: %u doesn't match set apic-id:"
" 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, topo.core_id);
}
cpu->thread_id = topo.smt_id;
- if (cpu->hyperv_vpindex && !kvm_hv_vpindex_settable()) {
+ if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) &&
+ !kvm_hv_vpindex_settable()) {
error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX");
return;
}
numa_cpu_pre_plug(cpu_slot, dev, errp);
}
+static void pc_virtio_pmem_pci_pre_plug(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev);
+ Error *local_err = NULL;
+
+ if (!hotplug_dev2) {
+ /*
+ * Without a bus hotplug handler, we cannot control the plug/unplug
+ * order. This should never be the case on x86, however better add
+ * a safety net.
+ */
+ error_setg(errp, "virtio-pmem-pci not supported on this bus.");
+ return;
+ }
+ /*
+ * First, see if we can plug this memory device at all. If that
+ * succeeds, branch of to the actual hotplug handler.
+ */
+ memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL,
+ &local_err);
+ if (!local_err) {
+ hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err);
+ }
+ error_propagate(errp, local_err);
+}
+
+static void pc_virtio_pmem_pci_plug(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev);
+ Error *local_err = NULL;
+
+ /*
+ * Plug the memory device first and then branch off to the actual
+ * hotplug handler. If that one fails, we can easily undo the memory
+ * device bits.
+ */
+ memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
+ hotplug_handler_plug(hotplug_dev2, dev, &local_err);
+ if (local_err) {
+ memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
+ }
+ error_propagate(errp, local_err);
+}
+
+static void pc_virtio_pmem_pci_unplug_request(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ /* We don't support virtio pmem hot unplug */
+ error_setg(errp, "virtio pmem device unplug not supported.");
+}
+
+static void pc_virtio_pmem_pci_unplug(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ /* We don't support virtio pmem hot unplug */
+}
+
static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
pc_memory_pre_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
pc_cpu_pre_plug(hotplug_dev, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
+ pc_virtio_pmem_pci_pre_plug(hotplug_dev, dev, errp);
}
}
pc_memory_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
pc_cpu_plug(hotplug_dev, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
+ pc_virtio_pmem_pci_plug(hotplug_dev, dev, errp);
}
}
pc_memory_unplug_request(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
pc_cpu_unplug_request_cb(hotplug_dev, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
+ pc_virtio_pmem_pci_unplug_request(hotplug_dev, dev, errp);
} else {
error_setg(errp, "acpi: device unplug request for not supported device"
" type: %s", object_get_typename(OBJECT(dev)));
pc_memory_unplug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
pc_cpu_unplug_cb(hotplug_dev, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
+ pc_virtio_pmem_pci_unplug(hotplug_dev, dev, errp);
} else {
error_setg(errp, "acpi: device unplug for not supported device"
" type: %s", object_get_typename(OBJECT(dev)));
DeviceState *dev)
{
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
- object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+ object_dynamic_cast(OBJECT(dev), TYPE_CPU) ||
+ object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) {
return HOTPLUG_HANDLER(machine);
}
Error **errp)
{
MachineState *ms = MACHINE(obj);
- int64_t value = memory_region_size(&ms->device_memory->mr);
+ int64_t value = 0;
+
+ if (ms->device_memory) {
+ value = memory_region_size(&ms->device_memory->mr);
+ }
visit_type_int(v, name, &value, errp);
}
visit_type_OnOffAuto(v, name, &pcms->smm, errp);
}
-static bool pc_machine_get_nvdimm(Object *obj, Error **errp)
-{
- PCMachineState *pcms = PC_MACHINE(obj);
-
- return pcms->acpi_nvdimm_state.is_enabled;
-}
-
-static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp)
-{
- PCMachineState *pcms = PC_MACHINE(obj);
-
- pcms->acpi_nvdimm_state.is_enabled = value;
-}
-
-static char *pc_machine_get_nvdimm_persistence(Object *obj, Error **errp)
-{
- PCMachineState *pcms = PC_MACHINE(obj);
-
- return g_strdup(pcms->acpi_nvdimm_state.persistence_string);
-}
-
-static void pc_machine_set_nvdimm_persistence(Object *obj, const char *value,
- Error **errp)
-{
- PCMachineState *pcms = PC_MACHINE(obj);
- AcpiNVDIMMState *nvdimm_state = &pcms->acpi_nvdimm_state;
-
- if (strcmp(value, "cpu") == 0)
- nvdimm_state->persistence = 3;
- else if (strcmp(value, "mem-ctrl") == 0)
- nvdimm_state->persistence = 2;
- else {
- error_setg(errp, "-machine nvdimm-persistence=%s: unsupported option",
- value);
- return;
- }
-
- g_free(nvdimm_state->persistence_string);
- nvdimm_state->persistence_string = g_strdup(value);
-}
-
static bool pc_machine_get_smbus(Object *obj, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
pcms->max_ram_below_4g = 0; /* use default */
pcms->smm = ON_OFF_AUTO_AUTO;
pcms->vmport = ON_OFF_AUTO_AUTO;
- /* nvdimm is disabled on default. */
- pcms->acpi_nvdimm_state.is_enabled = false;
/* acpi build is enabled by default if machine supports it */
pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
pcms->smbus_enabled = true;
pcms->sata_enabled = true;
pcms->pit_enabled = true;
+ pcms->smp_dies = 1;
+
+ pc_system_flash_create(pcms);
}
-static void pc_machine_reset(void)
+static void pc_machine_reset(MachineState *machine)
{
CPUState *cs;
X86CPU *cpu;
static int64_t pc_get_default_cpu_node_id(const MachineState *ms, int idx)
{
X86CPUTopoInfo topo;
+ PCMachineState *pcms = PC_MACHINE(ms);
assert(idx < ms->possible_cpus->len);
x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id,
- smp_cores, smp_threads, &topo);
+ pcms->smp_dies, ms->smp.cores,
+ ms->smp.threads, &topo);
return topo.pkg_id % nb_numa_nodes;
}
static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
{
+ PCMachineState *pcms = PC_MACHINE(ms);
int i;
+ unsigned int max_cpus = ms->smp.max_cpus;
if (ms->possible_cpus) {
/*
ms->possible_cpus->cpus[i].type = ms->cpu_type;
ms->possible_cpus->cpus[i].vcpus_count = 1;
- ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i);
+ ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(pcms, i);
x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id,
- smp_cores, smp_threads, &topo);
+ pcms->smp_dies, ms->smp.cores,
+ ms->smp.threads, &topo);
ms->possible_cpus->cpus[i].props.has_socket_id = true;
ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id;
+ ms->possible_cpus->cpus[i].props.has_die_id = true;
+ ms->possible_cpus->cpus[i].props.die_id = topo.die_id;
ms->possible_cpus->cpus[i].props.has_core_id = true;
ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
ms->possible_cpus->cpus[i].props.has_thread_id = true;
pcmc->acpi_data_size = 0x20000 + 0x8000;
pcmc->save_tsc_khz = true;
pcmc->linuxboot_dma_enabled = true;
+ pcmc->pvh_enabled = true;
assert(!mc->get_hotplug_handler);
mc->get_hotplug_handler = pc_get_hotplug_handler;
mc->cpu_index_to_instance_props = pc_cpu_index_to_props;
mc->has_hotpluggable_cpus = true;
mc->default_boot_order = "cad";
mc->hot_add_cpu = pc_hot_add_cpu;
+ mc->smp_parse = pc_smp_parse;
mc->block_default_type = IF_IDE;
mc->max_cpus = 255;
mc->reset = pc_machine_reset;
hc->unplug = pc_machine_device_unplug_cb;
nc->nmi_monitor_handler = x86_nmi;
mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE;
+ mc->nvdimm_supported = true;
+ mc->numa_mem_supported = true;
object_class_property_add(oc, PC_MACHINE_DEVMEM_REGION_SIZE, "int",
pc_machine_get_device_memory_region_size, NULL,
object_class_property_set_description(oc, PC_MACHINE_VMPORT,
"Enable vmport (pc & q35)", &error_abort);
- object_class_property_add_bool(oc, PC_MACHINE_NVDIMM,
- pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort);
-
- object_class_property_add_str(oc, PC_MACHINE_NVDIMM_PERSIST,
- pc_machine_get_nvdimm_persistence,
- pc_machine_set_nvdimm_persistence, &error_abort);
-
object_class_property_add_bool(oc, PC_MACHINE_SMBUS,
pc_machine_get_smbus, pc_machine_set_smbus, &error_abort);