vfio-pci currently requires a host= parameter, which comes in the
form of a PCI address in [domain:]<bus:slot.function> notation. We
expect to find a matching entry in sysfs for that under
/sys/bus/pci/devices/. vfio-platform takes a similar approach, but
defines the host= parameter to be a string, which can be matched
directly under /sys/bus/platform/devices/. On the PCI side, we have
some interest in using vfio to expose vGPU devices. These are not
actual discrete PCI devices, so they don't have a compatible host PCI
bus address or a device link where QEMU wants to look for it. There's
also really no requirement that vfio can only be used to expose
physical devices, a new vfio bus and iommu driver could expose a
completely emulated device. To fit within the vfio framework, it
would need a kernel struct device and associated IOMMU group, but
those are easy constraints to manage.
To support such devices, which would include vGPUs, that honor the
VFIO PCI programming API, but are not necessarily backed by a unique
PCI address, add support for specifying any device in sysfs. The
vfio API already has support for probing the device type to ensure
compatibility with either vfio-pci or vfio-platform.
With this, a vfio-pci device could either be specified as:
-device vfio-pci,host=02:00.0
or
-device vfio-pci,sysfsdev=/sys/devices/pci0000:00/0000:00:1c.0/0000:02:00.0
or even
-device vfio-pci,sysfsdev=/sys/bus/pci/devices/0000:02:00.0
When vGPU support comes along, this might look something more like:
-device vfio-pci,sysfsdev=/sys/devices/virtual/intel-vgpu/vgpu0@0000:00:02.0
NB - This is only a made up example path
The same change is made for vfio-platform, specifying sysfsdev has
precedence over the old host option.
Tested-by: Eric Auger <eric.auger@linaro.org>
Reviewed-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
/* Since pci handles romfile, just print a message and return */
if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
/* Since pci handles romfile, just print a message and return */
if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
- error_printf("Warning : Device at %04x:%02x:%02x.%x "
- "is known to cause system instability issues during "
- "option rom execution. "
- "Proceeding anyway since user specified romfile\n",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
+ error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified romfile\n",
+ vdev->vbasedev.name);
pwrite(fd, &size, 4, offset) != 4 ||
pread(fd, &size, 4, offset) != 4 ||
pwrite(fd, &orig, 4, offset) != 4) {
pwrite(fd, &size, 4, offset) != 4 ||
pread(fd, &size, 4, offset) != 4 ||
pwrite(fd, &orig, 4, offset) != 4) {
- error_report("%s(%04x:%02x:%02x.%x) failed: %m",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function);
+ error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name);
if (vfio_blacklist_opt_rom(vdev)) {
if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
if (vfio_blacklist_opt_rom(vdev)) {
if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
- error_printf("Warning : Device at %04x:%02x:%02x.%x "
- "is known to cause system instability issues during "
- "option rom execution. "
- "Proceeding anyway since user specified non zero value for "
- "rombar\n",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
+ error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified non zero value for rombar\n",
+ vdev->vbasedev.name);
- error_printf("Warning : Rom loading for device at "
- "%04x:%02x:%02x.%x has been disabled due to "
- "system instability issues. "
- "Specify rombar=1 or romfile to force\n",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
+ error_printf("Warning : Rom loading for device at %s has been disabled due to system instability issues. Specify rombar=1 or romfile to force\n",
+ vdev->vbasedev.name);
return;
}
}
trace_vfio_pci_size_rom(vdev->vbasedev.name, size);
return;
}
}
trace_vfio_pci_size_rom(vdev->vbasedev.name, size);
- snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
+ snprintf(name, sizeof(name), "vfio[%s].rom", vdev->vbasedev.name);
memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev),
&vfio_rom_ops, vdev, name, size);
memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev),
&vfio_rom_ops, vdev, name, size);
ret = pread(vdev->vbasedev.fd, &phys_val, len,
vdev->config_offset + addr);
if (ret != len) {
ret = pread(vdev->vbasedev.fd, &phys_val, len,
vdev->config_offset + addr);
if (ret != len) {
- error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function, addr, len);
+ error_report("%s(%s, 0x%x, 0x%x) failed: %m",
+ __func__, vdev->vbasedev.name, addr, len);
return -errno;
}
phys_val = le32_to_cpu(phys_val);
return -errno;
}
phys_val = le32_to_cpu(phys_val);
/* Write everything to VFIO, let it filter out what we can't write */
if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr)
!= len) {
/* Write everything to VFIO, let it filter out what we can't write */
if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr)
!= len) {
- error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function, addr, val, len);
+ error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m",
+ __func__, vdev->vbasedev.name, addr, val, len);
}
/* MSI/MSI-X Enabling/Disabling */
}
/* MSI/MSI-X Enabling/Disabling */
- snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function, nr);
+ snprintf(name, sizeof(name), "VFIO %s BAR %d", vdev->vbasedev.name, nr);
/* Determine what type of BAR this is for registration */
ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar),
/* Determine what type of BAR this is for registration */
ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar),
- error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability "
- "0x%x[0x%x]@0x%x: %d", vdev->host.domain,
- vdev->host.bus, vdev->host.slot, vdev->host.function,
+ error_report("vfio: %s Error adding PCI capability "
+ "0x%x[0x%x]@0x%x: %d", vdev->vbasedev.name,
cap_id, size, pos, ret);
return ret;
}
cap_id, size, pos, ret);
return ret;
}
vfio_intx_enable(vdev);
}
vfio_intx_enable(vdev);
}
-static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
- PCIHostDeviceAddress *host2)
+static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
- return (host1->domain == host2->domain && host1->bus == host2->bus &&
- host1->slot == host2->slot && host1->function == host2->function);
+ char tmp[13];
+
+ sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain,
+ addr->bus, addr->slot, addr->function);
+
+ return (strcmp(tmp, name) == 0);
}
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
}
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
if (ret && errno != ENOSPC) {
ret = -errno;
if (!vdev->has_pm_reset) {
if (ret && errno != ENOSPC) {
ret = -errno;
if (!vdev->has_pm_reset) {
- error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, "
- "no available reset mechanism.", vdev->host.domain,
- vdev->host.bus, vdev->host.slot, vdev->host.function);
+ error_report("vfio: Cannot reset device %s, "
+ "no available reset mechanism.", vdev->vbasedev.name);
trace_vfio_pci_hot_reset_dep_devices(host.domain,
host.bus, host.slot, host.function, devices[i].group_id);
trace_vfio_pci_hot_reset_dep_devices(host.domain,
host.bus, host.slot, host.function, devices[i].group_id);
- if (vfio_pci_host_match(&host, &vdev->host)) {
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
- if (vfio_pci_host_match(&host, &tmp->host)) {
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
if (single) {
ret = -EINVAL;
goto out_single;
if (single) {
ret = -EINVAL;
goto out_single;
host.slot = PCI_SLOT(devices[i].devfn);
host.function = PCI_FUNC(devices[i].devfn);
host.slot = PCI_SLOT(devices[i].devfn);
host.function = PCI_FUNC(devices[i].devfn);
- if (vfio_pci_host_match(&host, &vdev->host)) {
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
- if (vfio_pci_host_match(&host, &tmp->host)) {
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
vfio_pci_post_reset(tmp);
break;
}
vfio_pci_post_reset(tmp);
break;
}
* guest to contain the error.
*/
* guest to contain the error.
*/
- error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
- "Please collect any data possible and then kill the guest",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function);
+ error_report("%s(%s) Unrecoverable error detected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name);
vm_stop(RUN_STATE_INTERNAL_ERROR);
}
vm_stop(RUN_STATE_INTERNAL_ERROR);
}
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
VFIODevice *vbasedev_iter;
VFIOGroup *group;
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
VFIODevice *vbasedev_iter;
VFIOGroup *group;
- char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+ char *tmp, group_path[PATH_MAX], *group_name;
ssize_t len;
struct stat st;
int groupid;
int ret;
ssize_t len;
struct stat st;
int groupid;
int ret;
- /* Check that the host device exists */
- snprintf(path, sizeof(path),
- "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
- if (stat(path, &st) < 0) {
- error_report("vfio: error: no such host device: %s", path);
+ if (!vdev->vbasedev.sysfsdev) {
+ vdev->vbasedev.sysfsdev =
+ g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x",
+ vdev->host.domain, vdev->host.bus,
+ vdev->host.slot, vdev->host.function);
+ }
+
+ if (stat(vdev->vbasedev.sysfsdev, &st) < 0) {
+ error_report("vfio: error: no such host device: %s",
+ vdev->vbasedev.sysfsdev);
+ vdev->vbasedev.name = g_strdup(basename(vdev->vbasedev.sysfsdev));
vdev->vbasedev.ops = &vfio_pci_ops;
vdev->vbasedev.ops = &vfio_pci_ops;
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
- vdev->vbasedev.name = g_strdup_printf("%04x:%02x:%02x.%01x",
- vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function);
- strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
+ tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev);
+ len = readlink(tmp, group_path, sizeof(group_path));
+ g_free(tmp);
- len = readlink(path, iommu_group_path, sizeof(path));
- if (len <= 0 || len >= sizeof(path)) {
+ if (len <= 0 || len >= sizeof(group_path)) {
error_report("vfio: error no iommu_group for device");
return len < 0 ? -errno : -ENAMETOOLONG;
}
error_report("vfio: error no iommu_group for device");
return len < 0 ? -errno : -ENAMETOOLONG;
}
- iommu_group_path[len] = 0;
- group_name = basename(iommu_group_path);
+ group_name = basename(group_path);
if (sscanf(group_name, "%d", &groupid) != 1) {
if (sscanf(group_name, "%d", &groupid) != 1) {
- error_report("vfio: error reading %s: %m", path);
+ error_report("vfio: error reading %s: %m", group_path);
- snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
-
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
- error_report("vfio: error: device %s is already attached", path);
+ error_report("vfio: error: device %s is already attached",
+ vdev->vbasedev.name);
vfio_put_group(group);
return -EBUSY;
}
}
vfio_put_group(group);
return -EBUSY;
}
}
- ret = vfio_get_device(group, path, &vdev->vbasedev);
+ ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev);
- error_report("vfio: failed to get device %s", path);
+ error_report("vfio: failed to get device %s", vdev->vbasedev.name);
vfio_put_group(group);
return ret;
}
vfio_put_group(group);
return ret;
}
static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
+ DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev),
DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
intx.mmap_timeout, 1100),
DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,
DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
intx.mmap_timeout, 1100),
DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,
{
VFIOGroup *group;
VFIODevice *vbasedev_iter;
{
VFIOGroup *group;
VFIODevice *vbasedev_iter;
- char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+ char *tmp, group_path[PATH_MAX], *group_name;
ssize_t len;
struct stat st;
int groupid;
int ret;
ssize_t len;
struct stat st;
int groupid;
int ret;
- /* name must be set prior to the call */
- if (!vbasedev->name || strchr(vbasedev->name, '/')) {
- return -EINVAL;
- }
+ /* @sysfsdev takes precedence over @host */
+ if (vbasedev->sysfsdev) {
+ g_free(vbasedev->name);
+ vbasedev->name = g_strdup(basename(vbasedev->sysfsdev));
+ } else {
+ if (!vbasedev->name || strchr(vbasedev->name, '/')) {
+ return -EINVAL;
+ }
- /* Check that the host device exists */
- g_snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/",
- vbasedev->name);
+ vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s",
+ vbasedev->name);
+ }
- if (stat(path, &st) < 0) {
- error_report("vfio: error: no such host device: %s", path);
+ if (stat(vbasedev->sysfsdev, &st) < 0) {
+ error_report("vfio: error: no such host device: %s",
+ vbasedev->sysfsdev);
- g_strlcat(path, "iommu_group", sizeof(path));
- len = readlink(path, iommu_group_path, sizeof(iommu_group_path));
- if (len < 0 || len >= sizeof(iommu_group_path)) {
+ tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
+ len = readlink(tmp, group_path, sizeof(group_path));
+ g_free(tmp);
+
+ if (len < 0 || len >= sizeof(group_path)) {
error_report("vfio: error no iommu_group for device");
return len < 0 ? -errno : -ENAMETOOLONG;
}
error_report("vfio: error no iommu_group for device");
return len < 0 ? -errno : -ENAMETOOLONG;
}
- iommu_group_path[len] = 0;
- group_name = basename(iommu_group_path);
+ group_name = basename(group_path);
if (sscanf(group_name, "%d", &groupid) != 1) {
if (sscanf(group_name, "%d", &groupid) != 1) {
- error_report("vfio: error reading %s: %m", path);
+ error_report("vfio: error reading %s: %m", group_path);
- g_snprintf(path, sizeof(path), "%s", vbasedev->name);
-
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
- error_report("vfio: error: device %s is already attached", path);
+ error_report("vfio: error: device %s is already attached",
+ vbasedev->name);
vfio_put_group(group);
return -EBUSY;
}
}
vfio_put_group(group);
return -EBUSY;
}
}
- ret = vfio_get_device(group, path, vbasedev);
+ ret = vfio_get_device(group, vbasedev->name, vbasedev);
- error_report("vfio: failed to get device %s", path);
+ error_report("vfio: failed to get device %s", vbasedev->name);
vfio_put_group(group);
return ret;
}
ret = vfio_populate_device(vbasedev);
if (ret) {
vfio_put_group(group);
return ret;
}
ret = vfio_populate_device(vbasedev);
if (ret) {
- error_report("vfio: failed to populate device %s", path);
+ error_report("vfio: failed to populate device %s", vbasedev->name);
vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
vbasedev->ops = &vfio_platform_ops;
vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
vbasedev->ops = &vfio_platform_ops;
- trace_vfio_platform_realize(vbasedev->name, vdev->compat);
+ trace_vfio_platform_realize(vbasedev->sysfsdev ?
+ vbasedev->sysfsdev : vbasedev->name,
+ vdev->compat);
ret = vfio_base_device_init(vbasedev);
if (ret) {
ret = vfio_base_device_init(vbasedev);
if (ret) {
static Property vfio_platform_dev_properties[] = {
DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
static Property vfio_platform_dev_properties[] = {
DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
+ DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev),
DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
mmap_timeout, 1100),
DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
mmap_timeout, 1100),
typedef struct VFIODevice {
QLIST_ENTRY(VFIODevice) next;
struct VFIOGroup *group;
typedef struct VFIODevice {
QLIST_ENTRY(VFIODevice) next;
struct VFIOGroup *group;
char *name;
int fd;
int type;
char *name;
int fd;
int type;