#include "qemu/osdep.h"
#include "qemu/log.h"
#include "qemu/iov.h"
-#include "qemu-common.h"
+#include "exec/target_page.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio.h"
#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
+#include "sysemu/sysemu.h"
+#include "qemu/reserved-region.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "trace.h"
#include "standard-headers/linux/virtio_ids.h"
#include "hw/virtio/virtio-bus.h"
-#include "hw/virtio/virtio-access.h"
#include "hw/virtio/virtio-iommu.h"
#include "hw/pci/pci_bus.h"
#include "hw/pci/pci.h"
typedef struct VirtIOIOMMUDomain {
uint32_t id;
+ bool bypass;
GTree *mappings;
QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list;
} VirtIOIOMMUDomain;
return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
}
+static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev)
+{
+ uint32_t sid;
+ bool bypassed;
+ VirtIOIOMMU *s = sdev->viommu;
+ VirtIOIOMMUEndpoint *ep;
+
+ sid = virtio_iommu_get_bdf(sdev);
+
+ qemu_rec_mutex_lock(&s->mutex);
+ /* need to check bypass before system reset */
+ if (!s->endpoints) {
+ bypassed = s->config.bypass;
+ goto unlock;
+ }
+
+ ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
+ if (!ep || !ep->domain) {
+ bypassed = s->config.bypass;
+ } else {
+ bypassed = ep->domain->bypass;
+ }
+
+unlock:
+ qemu_rec_mutex_unlock(&s->mutex);
+ return bypassed;
+}
+
+/* Return whether the device is using IOMMU translation. */
+static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev)
+{
+ bool use_remapping;
+
+ assert(sdev);
+
+ use_remapping = !virtio_iommu_device_bypassed(sdev);
+
+ trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus),
+ PCI_SLOT(sdev->devfn),
+ PCI_FUNC(sdev->devfn),
+ use_remapping);
+
+ /* Turn off first then on the other */
+ if (use_remapping) {
+ memory_region_set_enabled(&sdev->bypass_mr, false);
+ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true);
+ } else {
+ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false);
+ memory_region_set_enabled(&sdev->bypass_mr, true);
+ }
+
+ return use_remapping;
+}
+
+static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s)
+{
+ GHashTableIter iter;
+ IOMMUPciBus *iommu_pci_bus;
+ int i;
+
+ g_hash_table_iter_init(&iter, s->as_by_busptr);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
+ for (i = 0; i < PCI_DEVFN_MAX; i++) {
+ if (!iommu_pci_bus->pbdev[i]) {
+ continue;
+ }
+ virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]);
+ }
+ }
+}
+
/**
* The bus number is used for lookup when SID based operations occur.
* In that case we lazily populate the IOMMUPciBus array from the bus hash
}
}
+static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion *mr,
+ IOMMUTLBEvent *event,
+ hwaddr virt_start, hwaddr virt_end)
+{
+ uint64_t delta = virt_end - virt_start;
+
+ event->entry.iova = virt_start;
+ event->entry.addr_mask = delta;
+
+ if (delta == UINT64_MAX) {
+ memory_region_notify_iommu(mr, 0, *event);
+ }
+
+ while (virt_start != virt_end + 1) {
+ uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64);
+
+ event->entry.addr_mask = mask;
+ event->entry.iova = virt_start;
+ memory_region_notify_iommu(mr, 0, *event);
+ virt_start += mask + 1;
+ if (event->entry.perm != IOMMU_NONE) {
+ event->entry.translated_addr += mask + 1;
+ }
+ }
+}
+
static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
hwaddr virt_end, hwaddr paddr,
uint32_t flags)
{
- IOMMUTLBEntry entry;
+ IOMMUTLBEvent event;
IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ,
flags & VIRTIO_IOMMU_MAP_F_WRITE);
trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end,
paddr, perm);
- entry.target_as = &address_space_memory;
- entry.addr_mask = virt_end - virt_start;
- entry.iova = virt_start;
- entry.perm = perm;
- entry.translated_addr = paddr;
+ event.type = IOMMU_NOTIFIER_MAP;
+ event.entry.target_as = &address_space_memory;
+ event.entry.perm = perm;
+ event.entry.translated_addr = paddr;
- memory_region_notify_iommu(mr, 0, entry);
+ virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end);
}
static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
hwaddr virt_end)
{
- IOMMUTLBEntry entry;
+ IOMMUTLBEvent event;
if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) {
return;
trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end);
- entry.target_as = &address_space_memory;
- entry.addr_mask = virt_end - virt_start;
- entry.iova = virt_start;
- entry.perm = IOMMU_NONE;
- entry.translated_addr = 0;
+ event.type = IOMMU_NOTIFIER_UNMAP;
+ event.entry.target_as = &address_space_memory;
+ event.entry.perm = IOMMU_NONE;
+ event.entry.translated_addr = 0;
- memory_region_notify_iommu(mr, 0, entry);
+ virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end);
}
static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value,
static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
{
VirtIOIOMMUDomain *domain = ep->domain;
+ IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
if (!ep->domain) {
return;
ep->iommu_mr);
QLIST_REMOVE(ep, next);
ep->domain = NULL;
+ virtio_iommu_switch_address_space(sdev);
}
static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
}
static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s,
- uint32_t domain_id)
+ uint32_t domain_id,
+ bool bypass)
{
VirtIOIOMMUDomain *domain;
domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
if (domain) {
+ if (domain->bypass != bypass) {
+ return NULL;
+ }
return domain;
}
domain = g_malloc0(sizeof(*domain));
domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
NULL, (GDestroyNotify)g_free,
(GDestroyNotify)g_free);
+ domain->bypass = bypass;
g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain);
QLIST_INIT(&domain->endpoint_list);
trace_virtio_iommu_get_domain(domain_id);
g_free(domain);
}
+static void add_prop_resv_regions(IOMMUDevice *sdev)
+{
+ VirtIOIOMMU *s = sdev->viommu;
+ int i;
+
+ for (i = 0; i < s->nr_prop_resv_regions; i++) {
+ ReservedRegion *reg = g_new0(ReservedRegion, 1);
+
+ *reg = s->prop_resv_regions[i];
+ sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
+ }
+}
+
static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
int devfn)
{
char *name = g_strdup_printf("%s-%d-%d",
TYPE_VIRTIO_IOMMU_MEMORY_REGION,
mr_index++, devfn);
- sdev = sbus->pbdev[devfn] = g_malloc0(sizeof(IOMMUDevice));
+ sdev = sbus->pbdev[devfn] = g_new0(IOMMUDevice, 1);
sdev->viommu = s;
sdev->bus = bus;
trace_virtio_iommu_init_iommu_mr(name);
+ memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX);
+ address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU);
+ add_prop_resv_regions(sdev);
+
+ /*
+ * Build the IOMMU disabled container with aliases to the
+ * shared MRs. Note that aliasing to a shared memory region
+ * could help the memory API to detect same FlatViews so we
+ * can have devices to share the same FlatView when in bypass
+ * mode. (either by not configuring virtio-iommu driver or with
+ * "iommu=pt"). It will greatly reduce the total number of
+ * FlatViews of the system hence VM runs faster.
+ */
+ memory_region_init_alias(&sdev->bypass_mr, OBJECT(s),
+ "system", get_system_memory(), 0,
+ memory_region_size(get_system_memory()));
+
memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
TYPE_VIRTIO_IOMMU_MEMORY_REGION,
OBJECT(s), name,
UINT64_MAX);
- address_space_init(&sdev->as,
- MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU);
+
+ /*
+ * Hook both the containers under the root container, we
+ * switch between iommu & bypass MRs by enable/disable
+ * corresponding sub-containers
+ */
+ memory_region_add_subregion_overlap(&sdev->root, 0,
+ MEMORY_REGION(&sdev->iommu_mr),
+ 0);
+ memory_region_add_subregion_overlap(&sdev->root, 0,
+ &sdev->bypass_mr, 0);
+
+ virtio_iommu_switch_address_space(sdev);
g_free(name);
}
return &sdev->as;
{
uint32_t domain_id = le32_to_cpu(req->domain);
uint32_t ep_id = le32_to_cpu(req->endpoint);
+ uint32_t flags = le32_to_cpu(req->flags);
VirtIOIOMMUDomain *domain;
VirtIOIOMMUEndpoint *ep;
+ IOMMUDevice *sdev;
trace_virtio_iommu_attach(domain_id, ep_id);
+ if (flags & ~VIRTIO_IOMMU_ATTACH_F_BYPASS) {
+ return VIRTIO_IOMMU_S_INVAL;
+ }
+
ep = virtio_iommu_get_endpoint(s, ep_id);
if (!ep) {
return VIRTIO_IOMMU_S_NOENT;
}
}
- domain = virtio_iommu_get_domain(s, domain_id);
+ domain = virtio_iommu_get_domain(s, domain_id,
+ flags & VIRTIO_IOMMU_ATTACH_F_BYPASS);
+ if (!domain) {
+ /* Incompatible bypass flag */
+ return VIRTIO_IOMMU_S_INVAL;
+ }
QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
ep->domain = domain;
+ sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
+ virtio_iommu_switch_address_space(sdev);
/* Replay domain mappings on the associated memory region */
g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb,
return VIRTIO_IOMMU_S_NOENT;
}
+ if (domain->bypass) {
+ return VIRTIO_IOMMU_S_INVAL;
+ }
+
interval = g_malloc0(sizeof(*interval));
interval->low = virt_start;
if (!domain) {
return VIRTIO_IOMMU_S_NOENT;
}
+
+ if (domain->bypass) {
+ return VIRTIO_IOMMU_S_INVAL;
+ }
+
interval.low = virt_start;
interval.high = virt_end;
{
struct virtio_iommu_probe_resv_mem prop = {};
size_t size = sizeof(prop), length = size - sizeof(prop.head), total;
- int i;
+ IOMMUDevice *sdev;
+ GList *l;
- total = size * s->nb_reserved_regions;
+ sdev = container_of(virtio_iommu_mr(s, ep), IOMMUDevice, iommu_mr);
+ if (!sdev) {
+ return -EINVAL;
+ }
+ total = size * g_list_length(sdev->resv_regions);
if (total > free) {
return -ENOSPC;
}
- for (i = 0; i < s->nb_reserved_regions; i++) {
- unsigned subtype = s->reserved_regions[i].type;
+ for (l = sdev->resv_regions; l; l = l->next) {
+ ReservedRegion *reg = l->data;
+ unsigned subtype = reg->type;
+ Range *range = ®->range;
assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED ||
subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI);
prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM);
prop.head.length = cpu_to_le16(length);
prop.subtype = subtype;
- prop.start = cpu_to_le64(s->reserved_regions[i].low);
- prop.end = cpu_to_le64(s->reserved_regions[i].high);
+ prop.start = cpu_to_le64(range_lob(range));
+ prop.end = cpu_to_le64(range_upb(range));
memcpy(buf, &prop, size);
static int virtio_iommu_iov_to_req(struct iovec *iov,
unsigned int iov_cnt,
- void *req, size_t req_sz)
+ void *req, size_t payload_sz)
{
- size_t sz, payload_sz = req_sz - sizeof(struct virtio_iommu_req_tail);
+ size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz);
- sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz);
if (unlikely(sz != payload_sz)) {
return VIRTIO_IOMMU_S_INVAL;
}
unsigned int iov_cnt) \
{ \
struct virtio_iommu_req_ ## __req req; \
- int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \
+ int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \
+ sizeof(req) - sizeof(struct virtio_iommu_req_tail));\
\
return ret ? ret : virtio_iommu_ ## __req(s, &req); \
}
VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
struct virtio_iommu_req_head head;
struct virtio_iommu_req_tail tail = {};
- size_t output_size = sizeof(tail), sz;
VirtQueueElement *elem;
unsigned int iov_cnt;
struct iovec *iov;
void *buf = NULL;
+ size_t sz;
for (;;) {
+ size_t output_size = sizeof(tail);
+
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
if (!elem) {
return;
tail.status = VIRTIO_IOMMU_S_DEVERR;
goto out;
}
- qemu_mutex_lock(&s->mutex);
+ qemu_rec_mutex_lock(&s->mutex);
switch (head.type) {
case VIRTIO_IOMMU_T_ATTACH:
tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
output_size = s->config.probe_size + sizeof(tail);
buf = g_malloc0(output_size);
- ptail = (struct virtio_iommu_req_tail *)
- (buf + s->config.probe_size);
+ ptail = buf + s->config.probe_size;
ptail->status = virtio_iommu_handle_probe(s, iov, iov_cnt, buf);
break;
}
default:
tail.status = VIRTIO_IOMMU_S_UNSUPP;
}
- qemu_mutex_unlock(&s->mutex);
+ qemu_rec_mutex_unlock(&s->mutex);
out:
sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
virtio_notify(vdev, vq);
g_free(elem);
g_free(buf);
+ buf = NULL;
}
}
VirtIOIOMMUEndpoint *ep;
uint32_t sid, flags;
bool bypass_allowed;
+ int granule;
bool found;
- int i;
+ GList *l;
interval.low = addr;
interval.high = addr + 1;
+ granule = ctz64(s->config.page_size_mask);
IOMMUTLBEntry entry = {
.target_as = &address_space_memory,
.iova = addr,
.translated_addr = addr,
- .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
+ .addr_mask = BIT_ULL(granule) - 1,
.perm = IOMMU_NONE,
};
- bypass_allowed = virtio_vdev_has_feature(&s->parent_obj,
- VIRTIO_IOMMU_F_BYPASS);
+ bypass_allowed = s->config.bypass;
sid = virtio_iommu_get_bdf(sdev);
trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
- qemu_mutex_lock(&s->mutex);
+ qemu_rec_mutex_lock(&s->mutex);
ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
+
+ if (bypass_allowed)
+ assert(ep && ep->domain && !ep->domain->bypass);
+
if (!ep) {
if (!bypass_allowed) {
error_report_once("%s sid=%d is not known!!", __func__, sid);
goto unlock;
}
- for (i = 0; i < s->nb_reserved_regions; i++) {
- ReservedRegion *reg = &s->reserved_regions[i];
+ for (l = sdev->resv_regions; l; l = l->next) {
+ ReservedRegion *reg = l->data;
- if (addr >= reg->low && addr <= reg->high) {
+ if (range_contains(®->range, addr)) {
switch (reg->type) {
case VIRTIO_IOMMU_RESV_MEM_T_MSI:
entry.perm = flag;
entry.perm = flag;
}
goto unlock;
+ } else if (ep->domain->bypass) {
+ entry.perm = flag;
+ goto unlock;
}
found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval),
trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
unlock:
- qemu_mutex_unlock(&s->mutex);
+ qemu_rec_mutex_unlock(&s->mutex);
return entry;
}
static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data)
{
VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
- struct virtio_iommu_config *config = &dev->config;
-
- trace_virtio_iommu_get_config(config->page_size_mask,
- config->input_range.start,
- config->input_range.end,
- config->domain_range.end,
- config->probe_size);
- memcpy(config_data, &dev->config, sizeof(struct virtio_iommu_config));
+ struct virtio_iommu_config *dev_config = &dev->config;
+ struct virtio_iommu_config *out_config = (void *)config_data;
+
+ out_config->page_size_mask = cpu_to_le64(dev_config->page_size_mask);
+ out_config->input_range.start = cpu_to_le64(dev_config->input_range.start);
+ out_config->input_range.end = cpu_to_le64(dev_config->input_range.end);
+ out_config->domain_range.start = cpu_to_le32(dev_config->domain_range.start);
+ out_config->domain_range.end = cpu_to_le32(dev_config->domain_range.end);
+ out_config->probe_size = cpu_to_le32(dev_config->probe_size);
+ out_config->bypass = dev_config->bypass;
+
+ trace_virtio_iommu_get_config(dev_config->page_size_mask,
+ dev_config->input_range.start,
+ dev_config->input_range.end,
+ dev_config->domain_range.start,
+ dev_config->domain_range.end,
+ dev_config->probe_size,
+ dev_config->bypass);
}
static void virtio_iommu_set_config(VirtIODevice *vdev,
- const uint8_t *config_data)
+ const uint8_t *config_data)
{
- struct virtio_iommu_config config;
-
- memcpy(&config, config_data, sizeof(struct virtio_iommu_config));
- trace_virtio_iommu_set_config(config.page_size_mask,
- config.input_range.start,
- config.input_range.end,
- config.domain_range.end,
- config.probe_size);
+ VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
+ struct virtio_iommu_config *dev_config = &dev->config;
+ const struct virtio_iommu_config *in_config = (void *)config_data;
+
+ if (in_config->bypass != dev_config->bypass) {
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) {
+ virtio_error(vdev, "cannot set config.bypass");
+ return;
+ } else if (in_config->bypass != 0 && in_config->bypass != 1) {
+ virtio_error(vdev, "invalid config.bypass value '%u'",
+ in_config->bypass);
+ return;
+ }
+ dev_config->bypass = in_config->bypass;
+ virtio_iommu_switch_address_space_all(dev);
+ }
+
+ trace_virtio_iommu_set_config(in_config->bypass);
}
static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f,
sid = virtio_iommu_get_bdf(sdev);
- qemu_mutex_lock(&s->mutex);
+ qemu_rec_mutex_lock(&s->mutex);
if (!s->endpoints) {
goto unlock;
g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr);
unlock:
- qemu_mutex_unlock(&s->mutex);
+ qemu_rec_mutex_unlock(&s->mutex);
}
static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr,
IOMMUNotifierFlag new,
Error **errp)
{
+ if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) {
+ error_setg(errp, "Virtio-iommu does not support dev-iotlb yet");
+ return -EINVAL;
+ }
+
if (old == IOMMU_NOTIFIER_NONE) {
trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name);
} else if (new == IOMMU_NOTIFIER_NONE) {
new_mask);
if ((cur_mask & new_mask) == 0) {
- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64
- " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask);
+ error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64
+ " incompatible with currently supported mask 0x%"PRIx64,
+ mr->parent_obj.name, new_mask, cur_mask);
return -1;
}
/*
- * After the machine is finalized, we can't change the mask anymore. If by
+ * Once the granule is frozen we can't change the mask anymore. If by
* chance the hotplugged device supports the same granule, we can still
- * accept it. Having a different masks is possible but the guest will use
- * sub-optimal block sizes, so warn about it.
+ * accept it.
*/
- if (qdev_hotplug) {
- int new_granule = ctz64(new_mask);
+ if (s->granule_frozen) {
int cur_granule = ctz64(cur_mask);
- if (new_granule != cur_granule) {
- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64
- " is incompatible with mask 0x%"PRIx64, cur_mask,
- new_mask);
+ if (!(BIT_ULL(cur_granule) & new_mask)) {
+ error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx",
+ mr->parent_obj.name, BIT_ULL(cur_granule));
return -1;
- } else if (new_mask != cur_mask) {
- warn_report("virtio-iommu page mask 0x%"PRIx64
- " does not match 0x%"PRIx64, cur_mask, new_mask);
}
return 0;
}
return 0;
}
+static void virtio_iommu_system_reset(void *opaque)
+{
+ VirtIOIOMMU *s = opaque;
+
+ trace_virtio_iommu_system_reset();
+
+ /*
+ * config.bypass is sticky across device reset, but should be restored on
+ * system reset
+ */
+ s->config.bypass = s->boot_bypass;
+ virtio_iommu_switch_address_space_all(s);
+
+}
+
+static void virtio_iommu_freeze_granule(Notifier *notifier, void *data)
+{
+ VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done);
+ int granule;
+
+ if (likely(s->config.bypass)) {
+ /*
+ * Transient IOMMU MR enable to collect page_size_mask requirements
+ * through memory_region_iommu_set_page_size_mask() called by
+ * VFIO region_add() callback
+ */
+ s->config.bypass = false;
+ virtio_iommu_switch_address_space_all(s);
+ /* restore default */
+ s->config.bypass = true;
+ virtio_iommu_switch_address_space_all(s);
+ }
+ s->granule_frozen = true;
+ granule = ctz64(s->config.page_size_mask);
+ trace_virtio_iommu_freeze_granule(BIT_ULL(granule));
+}
+
static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
- virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
- sizeof(struct virtio_iommu_config));
+ virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config));
memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num));
virtio_iommu_handle_command);
s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
- s->config.page_size_mask = TARGET_PAGE_MASK;
- s->config.input_range.end = -1UL;
- s->config.domain_range.end = 32;
+ /*
+ * config.bypass is needed to get initial address space early, such as
+ * in vfio realize
+ */
+ s->config.bypass = s->boot_bypass;
+ s->config.page_size_mask = qemu_target_page_mask();
+ s->config.input_range.end = UINT64_MAX;
+ s->config.domain_range.end = UINT32_MAX;
s->config.probe_size = VIOMMU_PROBE_SIZE;
virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP);
- virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE);
+ virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG);
- qemu_mutex_init(&s->mutex);
+ qemu_rec_mutex_init(&s->mutex);
s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
} else {
error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
}
+
+ s->machine_done.notify = virtio_iommu_freeze_granule;
+ qemu_add_machine_init_done_notifier(&s->machine_done);
+
+ qemu_register_reset(virtio_iommu_system_reset, s);
}
static void virtio_iommu_device_unrealize(DeviceState *dev)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
+ qemu_unregister_reset(virtio_iommu_system_reset, s);
+ qemu_remove_machine_init_done_notifier(&s->machine_done);
+
g_hash_table_destroy(s->as_by_busptr);
if (s->domains) {
g_tree_destroy(s->domains);
g_tree_destroy(s->endpoints);
}
+ qemu_rec_mutex_destroy(&s->mutex);
+
virtio_delete_queue(s->req_vq);
virtio_delete_queue(s->event_vq);
virtio_cleanup(vdev);
static const VMStateDescription vmstate_domain = {
.name = "domain",
- .version_id = 1,
- .minimum_version_id = 1,
+ .version_id = 2,
+ .minimum_version_id = 2,
.pre_load = domain_preload,
.fields = (VMStateField[]) {
VMSTATE_UINT32(id, VirtIOIOMMUDomain),
VirtIOIOMMUInterval, VirtIOIOMMUMapping),
VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1,
vmstate_endpoint, VirtIOIOMMUEndpoint, next),
+ VMSTATE_BOOL_V(bypass, VirtIOIOMMUDomain, 2),
VMSTATE_END_OF_LIST()
}
};
VirtIOIOMMU *s = opaque;
g_tree_foreach(s->domains, reconstruct_endpoints, s);
+
+ /*
+ * Memory regions are dynamically turned on/off depending on
+ * 'config.bypass' and attached domain type if there is. After
+ * migration, we need to make sure the memory regions are
+ * still correct.
+ */
+ virtio_iommu_switch_address_space_all(s);
return 0;
}
static const VMStateDescription vmstate_virtio_iommu_device = {
.name = "virtio-iommu-device",
- .minimum_version_id = 1,
- .version_id = 1,
+ .minimum_version_id = 2,
+ .version_id = 2,
.post_load = iommu_post_load,
.fields = (VMStateField[]) {
- VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 1,
+ VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 2,
&vmstate_domain, VirtIOIOMMUDomain),
+ VMSTATE_UINT8_V(config.bypass, VirtIOIOMMU, 2),
VMSTATE_END_OF_LIST()
},
};
static const VMStateDescription vmstate_virtio_iommu = {
.name = "virtio-iommu",
- .minimum_version_id = 1,
+ .minimum_version_id = 2,
.priority = MIG_PRI_IOMMU,
- .version_id = 1,
+ .version_id = 2,
.fields = (VMStateField[]) {
VMSTATE_VIRTIO_DEVICE,
VMSTATE_END_OF_LIST()
};
static Property virtio_iommu_properties[] = {
- DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *),
+ DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus,
+ TYPE_PCI_BUS, PCIBus *),
+ DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
DEFINE_PROP_END_OF_LIST(),
};