]> git.proxmox.com Git - qemu.git/blobdiff - hw/i386/pc.c
Merge remote-tracking branch 'kraxel/e820.1' into staging
[qemu.git] / hw / i386 / pc.c
index e7ad93189d11f96e25828fc541dff565c873d99c..dee409d2711ce2ea9814a7ea1aecd9add16181e6 100644 (file)
@@ -55,6 +55,8 @@
 #include "hw/acpi/acpi.h"
 #include "hw/cpu/icc_bus.h"
 #include "hw/boards.h"
+#include "hw/pci/pci_host.h"
+#include "acpi-build.h"
 
 /* debug PC/ISA interrupts */
 //#define DEBUG_IRQ
@@ -75,8 +77,6 @@
 #define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
 #define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4)
 
-#define IO_APIC_DEFAULT_ADDRESS 0xfec00000
-
 #define E820_NR_ENTRIES                16
 
 struct e820_entry {
@@ -160,8 +160,9 @@ void cpu_smm_register(cpu_set_smm_t callback, void *arg)
 
 void cpu_smm_update(CPUX86State *env)
 {
-    if (smm_set && smm_arg && env == first_cpu)
+    if (smm_set && smm_arg && CPU(x86_env_get_cpu(env)) == first_cpu) {
         smm_set(!!(env->hflags & HF_SMM_MASK), smm_arg);
+    }
 }
 
 
@@ -185,18 +186,20 @@ int cpu_get_pic_interrupt(CPUX86State *env)
 
 static void pic_irq_request(void *opaque, int irq, int level)
 {
-    CPUX86State *env = first_cpu;
+    CPUState *cs = first_cpu;
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
 
     DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
     if (env->apic_state) {
-        while (env) {
+        CPU_FOREACH(cs) {
+            cpu = X86_CPU(cs);
+            env = &cpu->env;
             if (apic_accept_pic_intr(env->apic_state)) {
                 apic_deliver_pic_intr(env->apic_state, level);
             }
-            env = env->next_cpu;
         }
     } else {
-        CPUState *cs = CPU(x86_env_get_cpu(env));
         if (level) {
             cpu_interrupt(cs, CPU_INTERRUPT_HARD);
         } else {
@@ -886,8 +889,9 @@ void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd)
 
 DeviceState *cpu_get_current_apic(void)
 {
-    if (cpu_single_env) {
-        return cpu_single_env->apic_state;
+    if (current_cpu) {
+        X86CPU *cpu = X86_CPU(current_cpu);
+        return cpu->env.apic_state;
     } else {
         return NULL;
     }
@@ -908,20 +912,19 @@ static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id,
     X86CPU *cpu;
     Error *local_err = NULL;
 
-    cpu = cpu_x86_create(cpu_model, icc_bridge, errp);
-    if (!cpu) {
-        return cpu;
+    cpu = cpu_x86_create(cpu_model, icc_bridge, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return NULL;
     }
 
     object_property_set_int(OBJECT(cpu), apic_id, "apic-id", &local_err);
     object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
 
     if (local_err) {
-        if (cpu != NULL) {
-            object_unref(OBJECT(cpu));
-            cpu = NULL;
-        }
         error_propagate(errp, local_err);
+        object_unref(OBJECT(cpu));
+        cpu = NULL;
     }
     return cpu;
 }
@@ -975,7 +978,7 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
         cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i),
                          icc_bridge, &error);
         if (error) {
-            fprintf(stderr, "%s\n", error_get_pretty(error));
+            error_report("%s", error_get_pretty(error));
             error_free(error);
             exit(1);
         }
@@ -989,6 +992,107 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
     }
 }
 
+/* pci-info ROM file. Little endian format */
+typedef struct PcRomPciInfo {
+    uint64_t w32_min;
+    uint64_t w32_max;
+    uint64_t w64_min;
+    uint64_t w64_max;
+} PcRomPciInfo;
+
+static void pc_fw_cfg_guest_info(PcGuestInfo *guest_info)
+{
+    PcRomPciInfo *info;
+    Object *pci_info;
+    bool ambiguous = false;
+
+    if (!guest_info->has_pci_info || !guest_info->fw_cfg) {
+        return;
+    }
+    pci_info = object_resolve_path_type("", TYPE_PCI_HOST_BRIDGE, &ambiguous);
+    g_assert(!ambiguous);
+    if (!pci_info) {
+        return;
+    }
+
+    info = g_malloc(sizeof *info);
+    info->w32_min = cpu_to_le64(object_property_get_int(pci_info,
+                                PCI_HOST_PROP_PCI_HOLE_START, NULL));
+    info->w32_max = cpu_to_le64(object_property_get_int(pci_info,
+                                PCI_HOST_PROP_PCI_HOLE_END, NULL));
+    info->w64_min = cpu_to_le64(object_property_get_int(pci_info,
+                                PCI_HOST_PROP_PCI_HOLE64_START, NULL));
+    info->w64_max = cpu_to_le64(object_property_get_int(pci_info,
+                                PCI_HOST_PROP_PCI_HOLE64_END, NULL));
+    /* Pass PCI hole info to guest via a side channel.
+     * Required so guest PCI enumeration does the right thing. */
+    fw_cfg_add_file(guest_info->fw_cfg, "etc/pci-info", info, sizeof *info);
+}
+
+typedef struct PcGuestInfoState {
+    PcGuestInfo info;
+    Notifier machine_done;
+} PcGuestInfoState;
+
+static
+void pc_guest_info_machine_done(Notifier *notifier, void *data)
+{
+    PcGuestInfoState *guest_info_state = container_of(notifier,
+                                                      PcGuestInfoState,
+                                                      machine_done);
+    pc_fw_cfg_guest_info(&guest_info_state->info);
+    acpi_setup(&guest_info_state->info);
+}
+
+PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
+                                ram_addr_t above_4g_mem_size)
+{
+    PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
+    PcGuestInfo *guest_info = &guest_info_state->info;
+    int i, j;
+
+    guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
+    guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
+    guest_info->apic_xrupt_override = kvm_allows_irq0_override();
+    guest_info->numa_nodes = nb_numa_nodes;
+    guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes *
+                                    sizeof *guest_info->node_mem);
+    guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
+                                     sizeof *guest_info->node_cpu);
+
+    for (i = 0; i < max_cpus; i++) {
+        unsigned int apic_id = x86_cpu_apic_id_from_index(i);
+        assert(apic_id < guest_info->apic_id_limit);
+        for (j = 0; j < nb_numa_nodes; j++) {
+            if (test_bit(i, node_cpumask[j])) {
+                guest_info->node_cpu[apic_id] = j;
+                break;
+            }
+        }
+    }
+
+    guest_info_state->machine_done.notify = pc_guest_info_machine_done;
+    qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);
+    return guest_info;
+}
+
+void pc_init_pci64_hole(PcPciInfo *pci_info, uint64_t pci_hole64_start,
+                        uint64_t pci_hole64_size)
+{
+    if ((sizeof(hwaddr) == 4) || (!pci_hole64_size)) {
+        return;
+    }
+    /*
+     * BIOS does not set MTRR entries for the 64 bit window, so no need to
+     * align address to power of two.  Align address at 1G, this makes sure
+     * it can be exactly covered with a PAT entry even when using huge
+     * pages.
+     */
+    pci_info->w64.begin = ROUND_UP(pci_hole64_start, 0x1ULL << 30);
+    pci_info->w64.end = pci_info->w64.begin + pci_hole64_size;
+    assert(pci_info->w64.begin <= pci_info->w64.end);
+}
+
 void pc_acpi_init(const char *default_dsdt)
 {
     char *filename;
@@ -1012,10 +1116,10 @@ void pc_acpi_init(const char *default_dsdt)
         opts = qemu_opts_parse(qemu_find_opts("acpi"), arg, 0);
         g_assert(opts != NULL);
 
-        acpi_table_add(opts, &err);
+        acpi_table_add_builtin(opts, &err);
         if (err) {
-            fprintf(stderr, "WARNING: failed to load %s: %s\n", filename,
-                    error_get_pretty(err));
+            error_report("WARNING: failed to load %s: %s", filename,
+                         error_get_pretty(err));
             error_free(err);
         }
         g_free(arg);
@@ -1030,7 +1134,8 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
                            ram_addr_t below_4g_mem_size,
                            ram_addr_t above_4g_mem_size,
                            MemoryRegion *rom_memory,
-                           MemoryRegion **ram_memory)
+                           MemoryRegion **ram_memory,
+                           PcGuestInfo *guest_info)
 {
     int linux_boot, i;
     MemoryRegion *ram, *option_rom_mr;
@@ -1052,17 +1157,25 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
     memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram,
                              0, below_4g_mem_size);
     memory_region_add_subregion(system_memory, 0, ram_below_4g);
+    if (0) {
+        /*
+         * Ideally we should do that too, but that would ruin the e820
+         * reservations added by seabios before initializing fw_cfg.
+         */
+        e820_add_entry(0, below_4g_mem_size, E820_RAM);
+    }
     if (above_4g_mem_size > 0) {
         ram_above_4g = g_malloc(sizeof(*ram_above_4g));
         memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
                                  below_4g_mem_size, above_4g_mem_size);
         memory_region_add_subregion(system_memory, 0x100000000ULL,
                                     ram_above_4g);
+        e820_add_entry(0x100000000ULL, above_4g_mem_size, E820_RAM);
     }
 
 
     /* Initialize PC system firmware */
-    pc_system_firmware_init(rom_memory);
+    pc_system_firmware_init(rom_memory, guest_info->isapc_ram_fw);
 
     option_rom_mr = g_malloc(sizeof(*option_rom_mr));
     memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE);
@@ -1082,6 +1195,7 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
     for (i = 0; i < nb_option_roms; i++) {
         rom_add_option(option_rom[i].name, option_rom[i].bootindex);
     }
+    guest_info->fw_cfg = fw_cfg;
     return fw_cfg;
 }
 
@@ -1106,10 +1220,10 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
 
 static void cpu_request_exit(void *opaque, int irq, int level)
 {
-    CPUX86State *env = cpu_single_env;
+    CPUState *cpu = current_cpu;
 
-    if (env && level) {
-        cpu_exit(CPU(x86_env_get_cpu(env)));
+    if (cpu && level) {
+        cpu_exit(cpu);
     }
 }
 
@@ -1203,8 +1317,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
         }
     }
 
-    a20_line = qemu_allocate_irqs(handle_a20_line_change,
-                                  x86_env_get_cpu(first_cpu), 2);
+    a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2);
     i8042 = isa_create_simple(isa_bus, "i8042");
     i8042_setup_a20_line(i8042, &a20_line[0]);
     if (!no_vmport) {
@@ -1240,7 +1353,7 @@ void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus)
         if (!pci_bus || (nd->model && strcmp(nd->model, "ne2k_isa") == 0)) {
             pc_init_ne2k_isa(isa_bus, nd);
         } else {
-            pci_nic_init_nofail(nd, "e1000", NULL);
+            pci_nic_init_nofail(nd, pci_bus, "e1000", NULL);
         }
     }
 }