*/
#include "qemu/osdep.h"
-#include "qemu-common.h"
#include "qemu/datadir.h"
+#include "qemu/units.h"
#include "hw/irq.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_bus.h"
#include "hw/pci/pci_host.h"
#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
#include "migration/qemu-file-types.h"
#include "migration/vmstate.h"
#include "monitor/monitor.h"
#include "trace.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
-#include "exec/address-spaces.h"
#include "hw/hotplug.h"
#include "hw/boards.h"
#include "qapi/error.h"
static Property pci_props[] = {
DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
DEFINE_PROP_STRING("romfile", PCIDevice, romfile),
+ DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, -1),
DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1),
DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present,
QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false),
QEMU_PCIE_EXTCAP_INIT_BITNR, true),
DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
failover_pair_id),
+ DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0),
DEFINE_PROP_END_OF_LIST()
};
static void pcie_bus_realize(BusState *qbus, Error **errp)
{
PCIBus *bus = PCI_BUS(qbus);
+ Error *local_err = NULL;
- pci_bus_realize(qbus, errp);
+ pci_bus_realize(qbus, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
/*
* A PCI-E bus can support extended config space if it's the root
{
uint8_t type;
+ /* PCIe virtual functions do not have their own BARs */
+ assert(!pci_is_vf(d));
+
if (reg != PCI_ROM_SLOT)
return PCI_BASE_ADDRESS_0 + reg * 4;
}
}
-static void pci_do_device_reset(PCIDevice *dev)
+static void pci_reset_regions(PCIDevice *dev)
{
int r;
+ if (pci_is_vf(dev)) {
+ return;
+ }
+
+ for (r = 0; r < PCI_NUM_REGIONS; ++r) {
+ PCIIORegion *region = &dev->io_regions[r];
+ if (!region->size) {
+ continue;
+ }
+ if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) &&
+ region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ pci_set_quad(dev->config + pci_bar(dev, r), region->type);
+ } else {
+ pci_set_long(dev->config + pci_bar(dev, r), region->type);
+ }
+ }
+}
+
+static void pci_do_device_reset(PCIDevice *dev)
+{
pci_device_deassert_intx(dev);
assert(dev->irq_state == 0);
pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
- for (r = 0; r < PCI_NUM_REGIONS; ++r) {
- PCIIORegion *region = &dev->io_regions[r];
- if (!region->size) {
- continue;
- }
-
- if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) &&
- region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- pci_set_quad(dev->config + pci_bar(dev, r), region->type);
- } else {
- pci_set_long(dev->config + pci_bar(dev, r), region->type);
- }
- }
+ pci_reset_regions(dev);
pci_update_mappings(dev);
msi_reset(dev);
return rootbus->qbus.name;
}
-static void pci_root_bus_init(PCIBus *bus, DeviceState *parent,
- MemoryRegion *address_space_mem,
- MemoryRegion *address_space_io,
- uint8_t devfn_min)
+bool pci_bus_bypass_iommu(PCIBus *bus)
+{
+ PCIBus *rootbus = bus;
+ PCIHostState *host_bridge;
+
+ if (!pci_bus_is_root(bus)) {
+ rootbus = pci_device_root_bus(bus->parent_dev);
+ }
+
+ host_bridge = PCI_HOST_BRIDGE(rootbus->qbus.parent);
+
+ assert(host_bridge->bus == rootbus);
+
+ return host_bridge->bypass_iommu;
+}
+
+static void pci_root_bus_internal_init(PCIBus *bus, DeviceState *parent,
+ MemoryRegion *address_space_mem,
+ MemoryRegion *address_space_io,
+ uint8_t devfn_min)
{
assert(PCI_FUNC(devfn_min) == 0);
bus->devfn_min = devfn_min;
return object_dynamic_cast(OBJECT(bus), TYPE_PCIE_BUS);
}
-void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent,
- const char *name,
- MemoryRegion *address_space_mem,
- MemoryRegion *address_space_io,
- uint8_t devfn_min, const char *typename)
+void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent,
+ const char *name,
+ MemoryRegion *address_space_mem,
+ MemoryRegion *address_space_io,
+ uint8_t devfn_min, const char *typename)
{
- qbus_create_inplace(bus, bus_size, typename, parent, name);
- pci_root_bus_init(bus, parent, address_space_mem, address_space_io,
- devfn_min);
+ qbus_init(bus, bus_size, typename, parent, name);
+ pci_root_bus_internal_init(bus, parent, address_space_mem,
+ address_space_io, devfn_min);
}
PCIBus *pci_root_bus_new(DeviceState *parent, const char *name,
{
PCIBus *bus;
- bus = PCI_BUS(qbus_create(typename, parent, name));
- pci_root_bus_init(bus, parent, address_space_mem, address_space_io,
- devfn_min);
+ bus = PCI_BUS(qbus_new(typename, parent, name));
+ pci_root_bus_internal_init(bus, parent, address_space_mem,
+ address_space_io, devfn_min);
return bus;
}
return PCI_BUS_GET_CLASS(s)->bus_num(s);
}
+/* Returns the min and max bus numbers of a PCI bus hierarchy */
+void pci_bus_range(PCIBus *bus, int *min_bus, int *max_bus)
+{
+ int i;
+ *min_bus = *max_bus = pci_bus_num(bus);
+
+ for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
+ PCIDevice *dev = bus->devices[i];
+
+ if (dev && PCI_DEVICE_GET_CLASS(dev)->is_bridge) {
+ *min_bus = MIN(*min_bus, dev->config[PCI_SECONDARY_BUS]);
+ *max_bus = MAX(*max_bus, dev->config[PCI_SUBORDINATE_BUS]);
+ }
+ }
+}
+
int pci_bus_numa_node(PCIBus *bus)
{
return PCI_BUS_GET_CLASS(bus)->numa_node(bus);
/* just put buffer */
static int put_pci_config_device(QEMUFile *f, void *pv, size_t size,
- const VMStateField *field, QJSON *vmdesc)
+ const VMStateField *field, JSONWriter *vmdesc)
{
const uint8_t **v = pv;
assert(size == pci_config_size(container_of(pv, PCIDevice, config)));
}
static int put_pci_irq_state(QEMUFile *f, void *pv, size_t size,
- const VMStateField *field, QJSON *vmdesc)
+ const VMStateField *field, JSONWriter *vmdesc)
{
int i;
PCIDevice *s = container_of(pv, PCIDevice, irq_state);
dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
}
+ /*
+ * With SR/IOV and ARI, a device at function 0 need not be a multifunction
+ * device, as it may just be a VF that ended up with function 0 in
+ * the legacy PCI interpretation. Avoid failing in such cases:
+ */
+ if (pci_is_vf(dev) &&
+ dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+ return;
+ }
+
/*
* multifunction bit is interpreted in two ways as follows.
* - all functions must set the bit to 1.
return NULL;
} else if (!pci_bus_devfn_available(bus, devfn)) {
error_setg(errp, "PCI: slot %d function %d not available for %s,"
- " in use by %s",
+ " in use by %s,id=%s",
PCI_SLOT(devfn), PCI_FUNC(devfn), name,
- bus->devices[devfn]->name);
+ bus->devices[devfn]->name, bus->devices[devfn]->qdev.id);
return NULL;
} else if (dev->hotplugged &&
+ !pci_is_vf(pci_dev) &&
pci_get_function_0(pci_dev)) {
error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
" new func %s cannot be exposed to guest.",
address_space_init(&pci_dev->bus_master_as,
&pci_dev->bus_master_container_region, pci_dev->name);
- if (qdev_hotplug) {
+ if (phase_check(PHASE_MACHINE_READY)) {
pci_init_bus_master(pci_dev);
}
pci_dev->irq_state = 0;
pcibus_t size = memory_region_size(memory);
uint8_t hdr_type;
+ assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
assert(region_num >= 0);
assert(region_num < PCI_NUM_REGIONS);
assert(is_power_of_2(size));
return pci_dev->io_regions[region_num].addr;
}
-static pcibus_t pci_bar_address(PCIDevice *d,
- int reg, uint8_t type, pcibus_t size)
+static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
+ uint8_t type, pcibus_t size)
+{
+ pcibus_t new_addr;
+ if (!pci_is_vf(d)) {
+ int bar = pci_bar(d, reg);
+ if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ new_addr = pci_get_quad(d->config + bar);
+ } else {
+ new_addr = pci_get_long(d->config + bar);
+ }
+ } else {
+ PCIDevice *pf = d->exp.sriov_vf.pf;
+ uint16_t sriov_cap = pf->exp.sriov_cap;
+ int bar = sriov_cap + PCI_SRIOV_BAR + reg * 4;
+ uint16_t vf_offset =
+ pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
+ uint16_t vf_stride =
+ pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
+ uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride;
+
+ if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ new_addr = pci_get_quad(pf->config + bar);
+ } else {
+ new_addr = pci_get_long(pf->config + bar);
+ }
+ new_addr += vf_num * size;
+ }
+ /* The ROM slot has a specific enable bit, keep it intact */
+ if (reg != PCI_ROM_SLOT) {
+ new_addr &= ~(size - 1);
+ }
+ return new_addr;
+}
+
+pcibus_t pci_bar_address(PCIDevice *d,
+ int reg, uint8_t type, pcibus_t size)
{
pcibus_t new_addr, last_addr;
- int bar = pci_bar(d, reg);
uint16_t cmd = pci_get_word(d->config + PCI_COMMAND);
Object *machine = qdev_get_machine();
ObjectClass *oc = object_get_class(machine);
if (!(cmd & PCI_COMMAND_IO)) {
return PCI_BAR_UNMAPPED;
}
- new_addr = pci_get_long(d->config + bar) & ~(size - 1);
+ new_addr = pci_config_get_bar_addr(d, reg, type, size);
last_addr = new_addr + size - 1;
/* Check if 32 bit BAR wraps around explicitly.
* TODO: make priorities correct and remove this work around.
if (!(cmd & PCI_COMMAND_MEMORY)) {
return PCI_BAR_UNMAPPED;
}
- if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- new_addr = pci_get_quad(d->config + bar);
- } else {
- new_addr = pci_get_long(d->config + bar);
- }
+ new_addr = pci_config_get_bar_addr(d, reg, type, size);
/* the ROM slot has a specific enable bit */
if (reg == PCI_ROM_SLOT && !(new_addr & PCI_ROM_ADDRESS_ENABLE)) {
return PCI_BAR_UNMAPPED;
continue;
new_addr = pci_bar_address(d, i, r->type, r->size);
+ if (!d->has_power) {
+ new_addr = PCI_BAR_UNMAPPED;
+ }
/* This bar isn't changed */
if (new_addr == r->addr)
/* now do the real mapping */
if (r->addr != PCI_BAR_UNMAPPED) {
- trace_pci_update_mappings_del(d, pci_dev_bus_num(d),
+ trace_pci_update_mappings_del(d->name, pci_dev_bus_num(d),
PCI_SLOT(d->devfn),
PCI_FUNC(d->devfn),
i, r->addr, r->size);
}
r->addr = new_addr;
if (r->addr != PCI_BAR_UNMAPPED) {
- trace_pci_update_mappings_add(d, pci_dev_bus_num(d),
+ trace_pci_update_mappings_add(d->name, pci_dev_bus_num(d),
PCI_SLOT(d->devfn),
PCI_FUNC(d->devfn),
i, r->addr, r->size);
if (range_covers_byte(addr, l, PCI_COMMAND)) {
pci_update_irq_disabled(d, was_irq_disabled);
memory_region_set_enabled(&d->bus_master_enable_region,
- pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER);
+ (pci_get_word(d->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER) && d->has_power);
}
msi_write_config(d, addr, val_in, l);
msix_write_config(d, addr, val_in, l);
+ pcie_sriov_config_write(d, addr, val_in, l);
}
/***********************************************************/
PCIDevice *pci_dev = opaque;
int change;
+ assert(0 <= irq_num && irq_num < PCI_NUM_PINS);
+ assert(level == 0 || level == 1);
change = level - pci_irq_state(pci_dev, irq_num);
if (!change)
return;
pci_change_irq_level(pci_dev, irq_num, change);
}
-static inline int pci_intx(PCIDevice *pci_dev)
-{
- return pci_get_byte(pci_dev->config + PCI_INTERRUPT_PIN) - 1;
-}
-
qemu_irq pci_allocate_irq(PCIDevice *pci_dev)
{
int intx = pci_intx(pci_dev);
+ assert(0 <= intx && intx < PCI_NUM_PINS);
return qemu_allocate_irq(pci_irq_handler, pci_dev, intx);
}
{ 0x0902, "Mouse", "mouse"},
{ 0x0A00, "Dock station", "dock", 0x00ff},
{ 0x0B00, "i386 cpu", "cpu", 0x00ff},
- { 0x0c00, "Fireware contorller", "fireware"},
+ { 0x0c00, "Firewire controller", "firewire"},
{ 0x0c01, "Access bus controller", "access-bus"},
{ 0x0c02, "SSA controller", "ssa"},
{ 0x0c03, "USB controller", "usb"},
{ 0, NULL}
};
-static void pci_for_each_device_under_bus_reverse(PCIBus *bus,
- void (*fn)(PCIBus *b,
- PCIDevice *d,
- void *opaque),
- void *opaque)
+void pci_for_each_device_under_bus_reverse(PCIBus *bus,
+ pci_bus_dev_fn fn,
+ void *opaque)
{
PCIDevice *d;
int devfn;
}
void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
- void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
- void *opaque)
+ pci_bus_dev_fn fn, void *opaque)
{
bus = pci_find_bus_nr(bus, bus_num);
}
}
-static void pci_for_each_device_under_bus(PCIBus *bus,
- void (*fn)(PCIBus *b, PCIDevice *d,
- void *opaque),
- void *opaque)
+void pci_for_each_device_under_bus(PCIBus *bus,
+ pci_bus_dev_fn fn, void *opaque)
{
PCIDevice *d;
int devfn;
}
void pci_for_each_device(PCIBus *bus, int bus_num,
- void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
- void *opaque)
+ pci_bus_dev_fn fn, void *opaque)
{
bus = pci_find_bus_nr(bus, bus_num);
static PciMemoryRegionList *qmp_query_pci_regions(const PCIDevice *dev)
{
- PciMemoryRegionList *head = NULL, *cur_item = NULL;
+ PciMemoryRegionList *head = NULL, **tail = &head;
int i;
for (i = 0; i < PCI_NUM_REGIONS; i++) {
const PCIIORegion *r = &dev->io_regions[i];
- PciMemoryRegionList *region;
+ PciMemoryRegion *region;
if (!r->size) {
continue;
}
region = g_malloc0(sizeof(*region));
- region->value = g_malloc0(sizeof(*region->value));
if (r->type & PCI_BASE_ADDRESS_SPACE_IO) {
- region->value->type = g_strdup("io");
+ region->type = g_strdup("io");
} else {
- region->value->type = g_strdup("memory");
- region->value->has_prefetch = true;
- region->value->prefetch = !!(r->type & PCI_BASE_ADDRESS_MEM_PREFETCH);
- region->value->has_mem_type_64 = true;
- region->value->mem_type_64 = !!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64);
+ region->type = g_strdup("memory");
+ region->has_prefetch = true;
+ region->prefetch = !!(r->type & PCI_BASE_ADDRESS_MEM_PREFETCH);
+ region->has_mem_type_64 = true;
+ region->mem_type_64 = !!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64);
}
- region->value->bar = i;
- region->value->address = r->addr;
- region->value->size = r->size;
+ region->bar = i;
+ region->address = r->addr;
+ region->size = r->size;
- /* XXX: waiting for the qapi to support GSList */
- if (!cur_item) {
- head = cur_item = region;
- } else {
- cur_item->next = region;
- cur_item = region;
- }
+ QAPI_LIST_APPEND(tail, region);
}
return head;
static PciDeviceInfoList *qmp_query_pci_devices(PCIBus *bus, int bus_num)
{
- PciDeviceInfoList *info, *head = NULL, *cur_item = NULL;
+ PciDeviceInfoList *head = NULL, **tail = &head;
PCIDevice *dev;
int devfn;
for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
dev = bus->devices[devfn];
if (dev) {
- info = g_malloc0(sizeof(*info));
- info->value = qmp_query_pci_device(dev, bus, bus_num);
-
- /* XXX: waiting for the qapi to support GSList */
- if (!cur_item) {
- head = cur_item = info;
- } else {
- cur_item->next = info;
- cur_item = info;
- }
+ QAPI_LIST_APPEND(tail, qmp_query_pci_device(dev, bus, bus_num));
}
}
PciInfoList *qmp_query_pci(Error **errp)
{
- PciInfoList *info, *head = NULL, *cur_item = NULL;
+ PciInfoList *head = NULL, **tail = &head;
PCIHostState *host_bridge;
QLIST_FOREACH(host_bridge, &pci_host_bridges, next) {
- info = g_malloc0(sizeof(*info));
- info->value = qmp_query_pci_bus(host_bridge->bus,
- pci_bus_num(host_bridge->bus));
-
- /* XXX: waiting for the qapi to support GSList */
- if (!cur_item) {
- head = cur_item = info;
- } else {
- cur_item->next = info;
- cur_item = info;
- }
+ QAPI_LIST_APPEND(tail,
+ qmp_query_pci_bus(host_bridge->bus,
+ pci_bus_num(host_bridge->bus)));
}
return head;
return NULL;
}
-void pci_for_each_bus_depth_first(PCIBus *bus,
- void *(*begin)(PCIBus *bus, void *parent_state),
- void (*end)(PCIBus *bus, void *state),
- void *parent_state)
+void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin,
+ pci_bus_fn end, void *parent_state)
{
PCIBus *sec;
void *state;
bool is_default_rom;
uint16_t class_id;
+ if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) {
+ error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize);
+ return;
+ }
+
/* initialize cap_present for pci_is_express() and pci_config_size(),
* Note that hybrid PCIs are not set automatically and need to manage
* QEMU_PCI_CAP_EXPRESS manually */
pci_qdev_unrealize(DEVICE(pci_dev));
return;
}
- if (!(pci_dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION)
- && (PCI_FUNC(pci_dev->devfn) == 0)) {
- qdev->allow_unplug_during_migration = true;
- } else {
+ if ((pci_dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION)
+ || (PCI_FUNC(pci_dev->devfn) != 0)) {
error_setg(errp, "failover: primary device must be in its own "
"PCI slot");
pci_qdev_unrealize(DEVICE(pci_dev));
pci_qdev_unrealize(DEVICE(pci_dev));
return;
}
+
+ pci_set_power(pci_dev, true);
}
PCIDevice *pci_new_multifunction(int devfn, bool multifunction,
/* Patch the PCI vendor and device ids in a PCI rom image if necessary.
This is needed for an option rom which is used for more than one device. */
-static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, int size)
+static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
{
uint16_t vendor_id;
uint16_t device_id;
static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
Error **errp)
{
- int size;
+ int64_t size;
char *path;
void *ptr;
char name[32];
error_setg(errp, "romfile \"%s\" is empty", pdev->romfile);
g_free(path);
return;
+ } else if (size > 2 * GiB) {
+ error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 GiB)",
+ pdev->romfile);
+ g_free(path);
+ return;
+ }
+ if (pdev->romsize != -1) {
+ if (size > pdev->romsize) {
+ error_setg(errp, "romfile \"%s\" (%u bytes) is too large for ROM size %u",
+ pdev->romfile, (uint32_t)size, pdev->romsize);
+ g_free(path);
+ return;
+ }
+ } else {
+ pdev->romsize = pow2ceil(size);
}
- size = pow2ceil(size);
vmsd = qdev_get_vmsd(DEVICE(pdev));
snprintf(name, sizeof(name), "%s.rom", object_get_typename(OBJECT(pdev)));
}
pdev->has_rom = true;
- memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, size, &error_fatal);
+ memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, &error_fatal);
ptr = memory_region_get_ram_ptr(&pdev->rom);
if (load_image_size(path, ptr, size) < 0) {
error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile);
iommu_bus = parent_bus;
}
- if (iommu_bus && iommu_bus->iommu_fn) {
+ if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) {
return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
}
return &address_space_memory;
return msg;
}
+void pci_set_power(PCIDevice *d, bool state)
+{
+ if (d->has_power == state) {
+ return;
+ }
+
+ d->has_power = state;
+ pci_update_mappings(d);
+ memory_region_set_enabled(&d->bus_master_enable_region,
+ (pci_get_word(d->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER) && d->has_power);
+ if (!d->has_power) {
+ pci_device_reset(d);
+ }
+}
+
static const TypeInfo pci_device_type_info = {
.name = TYPE_PCI_DEVICE,
.parent = TYPE_DEVICE,