#include "qapi/error.h"
#include "exec/ramlist.h"
#include "exec/cpu-common.h"
+#include "exec/memory.h"
#include "trace.h"
#include "qemu/error-report.h"
#include "standard-headers/linux/pci_regs.h"
*/
static char *sysfs_find_group_file(const char *device, Error **errp)
{
+ g_autoptr(GError) gerr = NULL;
char *sysfs_link;
char *sysfs_group;
char *p;
char *path = NULL;
sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device);
- sysfs_group = g_malloc0(PATH_MAX);
- if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) {
- error_setg_errno(errp, errno, "Failed to find iommu group sysfs path");
+ sysfs_group = g_file_read_link(sysfs_link, &gerr);
+ if (gerr) {
+ error_setg(errp, "Failed to find iommu group sysfs path: %s",
+ gerr->message);
goto out;
}
p = strrchr(sysfs_group, '/');
Error **errp)
{
void *p;
+ assert(QEMU_IS_ALIGNED(offset, qemu_real_host_page_size()));
assert_bar_index_valid(s, index);
p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
prot, MAP_SHARED,
s->config_region_info.offset,
s->config_region_info.size);
assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size));
- do {
- ret = pread(s->device, buf, size, s->config_region_info.offset + ofs);
- } while (ret == -1 && errno == EINTR);
+ ret = RETRY_ON_EINTR(
+ pread(s->device, buf, size, s->config_region_info.offset + ofs)
+ );
return ret == size ? 0 : -errno;
}
s->config_region_info.offset,
s->config_region_info.size);
assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size));
- do {
- ret = pwrite(s->device, buf, size, s->config_region_info.offset + ofs);
- } while (ret == -1 && errno == EINTR);
+ ret = RETRY_ON_EINTR(
+ pwrite(s->device, buf, size, s->config_region_info.offset + ofs)
+ );
return ret == size ? 0 : -errno;
}
if (!cap->next) {
return;
}
- cap = (struct vfio_info_cap_header *)(buf + cap->next);
+ cap = buf + cap->next;
}
cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
s->nb_iova_ranges = cap_iova_range->nr_iovas;
if (s->nb_iova_ranges > 1) {
s->usable_iova_ranges =
- g_realloc(s->usable_iova_ranges,
- s->nb_iova_ranges * sizeof(struct IOVARange));
+ g_renew(struct IOVARange, s->usable_iova_ranges,
+ s->nb_iova_ranges);
}
for (i = 0; i < s->nb_iova_ranges; i++) {
return ret;
}
-static void qemu_vfio_ram_block_added(RAMBlockNotifier *n,
- void *host, size_t size)
+static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, void *host,
+ size_t size, size_t max_size)
{
QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
- trace_qemu_vfio_ram_block_added(s, host, size);
- qemu_vfio_dma_map(s, host, size, false, NULL);
+ Error *local_err = NULL;
+ int ret;
+
+ trace_qemu_vfio_ram_block_added(s, host, max_size);
+ ret = qemu_vfio_dma_map(s, host, max_size, false, NULL, &local_err);
+ if (ret) {
+ error_reportf_err(local_err,
+ "qemu_vfio_dma_map(%p, %zu) failed: ",
+ host, max_size);
+ }
}
-static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n,
- void *host, size_t size)
+static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n, void *host,
+ size_t size, size_t max_size)
{
QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
if (host) {
- trace_qemu_vfio_ram_block_removed(s, host, size);
+ trace_qemu_vfio_ram_block_removed(s, host, max_size);
qemu_vfio_dma_unmap(s, host);
}
}
-static int qemu_vfio_init_ramblock(RAMBlock *rb, void *opaque)
-{
- void *host_addr = qemu_ram_get_host_addr(rb);
- ram_addr_t length = qemu_ram_get_used_length(rb);
- int ret;
- QEMUVFIOState *s = opaque;
-
- if (!host_addr) {
- return 0;
- }
- ret = qemu_vfio_dma_map(s, host_addr, length, false, NULL);
- if (ret) {
- fprintf(stderr, "qemu_vfio_init_ramblock: failed %p %" PRId64 "\n",
- host_addr, (uint64_t)length);
- }
- return 0;
-}
-
static void qemu_vfio_open_common(QEMUVFIOState *s)
{
qemu_mutex_init(&s->lock);
s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added;
s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed;
- ram_block_notifier_add(&s->ram_notifier);
s->low_water_mark = QEMU_VFIO_IOVA_MIN;
s->high_water_mark = QEMU_VFIO_IOVA_MAX;
- qemu_ram_foreach_block(qemu_vfio_init_ramblock, s);
+ ram_block_notifier_add(&s->ram_notifier);
}
/**
int r;
QEMUVFIOState *s = g_new0(QEMUVFIOState, 1);
+ /*
+ * VFIO may pin all memory inside mappings, resulting it in pinning
+ * all memory inside RAM blocks unconditionally.
+ */
+ r = ram_block_discard_disable(true);
+ if (r) {
+ error_setg_errno(errp, -r, "Cannot set discarding of RAM broken");
+ g_free(s);
+ return NULL;
+ }
+
r = qemu_vfio_init_pci(s, device, errp);
if (r) {
+ ram_block_discard_disable(false);
g_free(s);
return NULL;
}
IOVAMapping m = {.host = host, .size = size, .iova = iova};
IOVAMapping *insert;
- assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
- assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size));
- assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size));
+ assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size()));
+ assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size()));
+ assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size()));
trace_qemu_vfio_new_mapping(s, host, size, index, iova);
assert(index >= 0);
/* Do the DMA mapping with VFIO. */
static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
- uint64_t iova)
+ uint64_t iova, Error **errp)
{
struct vfio_iommu_type1_dma_map dma_map = {
.argsz = sizeof(dma_map),
trace_qemu_vfio_do_mapping(s, host, iova, size);
if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
- error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
+ error_setg_errno(errp, errno, "VFIO_MAP_DMA failed");
return -errno;
}
return 0;
index = mapping - s->mappings;
assert(mapping->size > 0);
- assert(QEMU_IS_ALIGNED(mapping->size, qemu_real_host_page_size));
+ assert(QEMU_IS_ALIGNED(mapping->size, qemu_real_host_page_size()));
assert(index >= 0 && index < s->nr_mappings);
if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
error_setg_errno(errp, errno, "VFIO_UNMAP_DMA failed");
if (QEMU_VFIO_DEBUG) {
for (i = 0; i < s->nr_mappings - 1; ++i) {
if (!(s->mappings[i].host < s->mappings[i + 1].host)) {
- fprintf(stderr, "item %d not sorted!\n", i);
+ error_report("item %d not sorted!", i);
qemu_vfio_dump_mappings(s);
return false;
}
if (!(s->mappings[i].host + s->mappings[i].size <=
s->mappings[i + 1].host)) {
- fprintf(stderr, "item %d overlap with next!\n", i);
+ error_report("item %d overlap with next!", i);
qemu_vfio_dump_mappings(s);
return false;
}
return true;
}
-static int
-qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
+static bool qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size,
+ uint64_t *iova, Error **errp)
{
int i;
s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) {
*iova = s->low_water_mark;
s->low_water_mark += size;
- return 0;
+ return true;
}
}
- return -ENOMEM;
+ error_setg(errp, "fixed iova range not found");
+
+ return false;
}
-static int
-qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
+static bool qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size,
+ uint64_t *iova, Error **errp)
{
int i;
s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) {
*iova = s->high_water_mark - size;
s->high_water_mark = *iova;
- return 0;
+ return true;
}
}
- return -ENOMEM;
+ error_setg(errp, "temporary iova range not found");
+
+ return false;
+}
+
+/**
+ * qemu_vfio_water_mark_reached:
+ *
+ * Returns %true if high watermark has been reached, %false otherwise.
+ */
+static bool qemu_vfio_water_mark_reached(QEMUVFIOState *s, size_t size,
+ Error **errp)
+{
+ if (s->high_water_mark - s->low_water_mark + 1 < size) {
+ error_setg(errp, "iova exhausted (water mark reached)");
+ return true;
+ }
+ return false;
}
/* Map [host, host + size) area into a contiguous IOVA address space, and store
* mapping status within this area is not allowed).
*/
int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
- bool temporary, uint64_t *iova)
+ bool temporary, uint64_t *iova, Error **errp)
{
- int ret = 0;
int index;
IOVAMapping *mapping;
uint64_t iova0;
- assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size));
- assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
+ assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size()));
+ assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size()));
trace_qemu_vfio_dma_map(s, host, size, temporary, iova);
- qemu_mutex_lock(&s->lock);
+ QEMU_LOCK_GUARD(&s->lock);
mapping = qemu_vfio_find_mapping(s, host, &index);
if (mapping) {
iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host);
} else {
- if (s->high_water_mark - s->low_water_mark + 1 < size) {
- ret = -ENOMEM;
- goto out;
+ int ret;
+
+ if (qemu_vfio_water_mark_reached(s, size, errp)) {
+ return -ENOMEM;
}
if (!temporary) {
- if (qemu_vfio_find_fixed_iova(s, size, &iova0)) {
- ret = -ENOMEM;
- goto out;
+ if (!qemu_vfio_find_fixed_iova(s, size, &iova0, errp)) {
+ return -ENOMEM;
}
mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
- if (!mapping) {
- ret = -ENOMEM;
- goto out;
- }
assert(qemu_vfio_verify_mappings(s));
- ret = qemu_vfio_do_mapping(s, host, size, iova0);
- if (ret) {
+ ret = qemu_vfio_do_mapping(s, host, size, iova0, errp);
+ if (ret < 0) {
qemu_vfio_undo_mapping(s, mapping, NULL);
- goto out;
+ return ret;
}
qemu_vfio_dump_mappings(s);
} else {
- if (qemu_vfio_find_temp_iova(s, size, &iova0)) {
- ret = -ENOMEM;
- goto out;
+ if (!qemu_vfio_find_temp_iova(s, size, &iova0, errp)) {
+ return -ENOMEM;
}
- ret = qemu_vfio_do_mapping(s, host, size, iova0);
- if (ret) {
- goto out;
+ ret = qemu_vfio_do_mapping(s, host, size, iova0, errp);
+ if (ret < 0) {
+ return ret;
}
}
}
if (iova) {
*iova = iova0;
}
-out:
- qemu_mutex_unlock(&s->lock);
- return ret;
+ return 0;
}
/* Reset the high watermark and free all "temporary" mappings. */
}
trace_qemu_vfio_dma_unmap(s, host);
- qemu_mutex_lock(&s->lock);
+ QEMU_LOCK_GUARD(&s->lock);
m = qemu_vfio_find_mapping(s, host, &index);
if (!m) {
- goto out;
+ return;
}
qemu_vfio_undo_mapping(s, m, NULL);
-out:
- qemu_mutex_unlock(&s->lock);
}
static void qemu_vfio_reset(QEMUVFIOState *s)
if (!s) {
return;
}
+
+ ram_block_notifier_remove(&s->ram_notifier);
+
for (i = 0; i < s->nr_mappings; ++i) {
qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
}
- ram_block_notifier_remove(&s->ram_notifier);
+
g_free(s->usable_iova_ranges);
s->nb_iova_ranges = 0;
qemu_vfio_reset(s);
close(s->device);
close(s->group);
close(s->container);
+ ram_block_discard_disable(false);
}