]> git.proxmox.com Git - qemu.git/blobdiff - hw/vhost.c
misc: move include files to include/qemu/
[qemu.git] / hw / vhost.c
index 0ca3507f4439dcf9481de619a7804bb813f1d29e..4e1cb474187f86987b54fd225309db813a482055 100644 (file)
@@ -8,15 +8,20 @@
  *
  * This work is licensed under the terms of the GNU GPL, version 2.  See
  * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
  */
 
 #include <sys/ioctl.h>
 #include "vhost.h"
 #include "hw/hw.h"
-#include "range.h"
+#include "qemu/range.h"
 #include <linux/vhost.h>
+#include "exec/address-spaces.h"
 
 static void vhost_dev_sync_region(struct vhost_dev *dev,
+                                  MemoryRegionSection *section,
                                   uint64_t mfirst, uint64_t mlast,
                                   uint64_t rfirst, uint64_t rlast)
 {
@@ -26,11 +31,12 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
     vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
     uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
 
-    assert(end / VHOST_LOG_CHUNK < dev->log_size);
-    assert(start / VHOST_LOG_CHUNK < dev->log_size);
     if (end < start) {
         return;
     }
+    assert(end / VHOST_LOG_CHUNK < dev->log_size);
+    assert(start / VHOST_LOG_CHUNK < dev->log_size);
+
     for (;from < to; ++from) {
         vhost_log_chunk_t log;
         int bit;
@@ -47,38 +53,52 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
         log = __sync_fetch_and_and(from, 0);
         while ((bit = sizeof(log) > sizeof(int) ?
                 ffsll(log) : ffs(log))) {
+            ram_addr_t ram_addr;
             bit -= 1;
-            cpu_physical_memory_set_dirty(addr + bit * VHOST_LOG_PAGE);
+            ram_addr = section->offset_within_region + bit * VHOST_LOG_PAGE;
+            memory_region_set_dirty(section->mr, ram_addr, VHOST_LOG_PAGE);
             log &= ~(0x1ull << bit);
         }
         addr += VHOST_LOG_CHUNK;
     }
 }
 
-static int vhost_client_sync_dirty_bitmap(CPUPhysMemoryClient *client,
-                                          target_phys_addr_t start_addr,
-                                          target_phys_addr_t end_addr)
+static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
+                                   MemoryRegionSection *section,
+                                   hwaddr start_addr,
+                                   hwaddr end_addr)
 {
-    struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
     int i;
+
     if (!dev->log_enabled || !dev->started) {
         return 0;
     }
     for (i = 0; i < dev->mem->nregions; ++i) {
         struct vhost_memory_region *reg = dev->mem->regions + i;
-        vhost_dev_sync_region(dev, start_addr, end_addr,
+        vhost_dev_sync_region(dev, section, start_addr, end_addr,
                               reg->guest_phys_addr,
                               range_get_last(reg->guest_phys_addr,
                                              reg->memory_size));
     }
     for (i = 0; i < dev->nvqs; ++i) {
         struct vhost_virtqueue *vq = dev->vqs + i;
-        vhost_dev_sync_region(dev, start_addr, end_addr, vq->used_phys,
+        vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
                               range_get_last(vq->used_phys, vq->used_size));
     }
     return 0;
 }
 
+static void vhost_log_sync(MemoryListener *listener,
+                          MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+    hwaddr start_addr = section->offset_within_address_space;
+    hwaddr end_addr = start_addr + section->size;
+
+    vhost_sync_dirty_bitmap(dev, section, start_addr, end_addr);
+}
+
 /* Assign/unassign. Keep an unsorted array of non-overlapping
  * memory regions in dev->mem. */
 static void vhost_dev_unassign_memory(struct vhost_dev *dev,
@@ -118,7 +138,6 @@ static void vhost_dev_unassign_memory(struct vhost_dev *dev,
         if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
             --dev->mem->nregions;
             --to;
-            assert(to >= 0);
             ++overlap_middle;
             continue;
         }
@@ -249,19 +268,22 @@ static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
 {
     vhost_log_chunk_t *log;
     uint64_t log_base;
-    int r;
+    int r, i;
     if (size) {
-        log = qemu_mallocz(size * sizeof *log);
+        log = g_malloc0(size * sizeof *log);
     } else {
         log = NULL;
     }
     log_base = (uint64_t)(unsigned long)log;
     r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
     assert(r >= 0);
-    vhost_client_sync_dirty_bitmap(&dev->client, 0,
-                                   (target_phys_addr_t)~0x0ull);
+    for (i = 0; i < dev->n_mem_sections; ++i) {
+        /* Sync only the range covered by the old log */
+        vhost_sync_dirty_bitmap(dev, &dev->mem_sections[i], 0,
+                                dev->log_size * VHOST_LOG_CHUNK - 1);
+    }
     if (dev->log) {
-        qemu_free(dev->log);
+        g_free(dev->log);
     }
     dev->log = log;
     dev->log_size = size;
@@ -274,7 +296,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
     int i;
     for (i = 0; i < dev->nvqs; ++i) {
         struct vhost_virtqueue *vq = dev->vqs + i;
-        target_phys_addr_t l;
+        hwaddr l;
         void *p;
 
         if (!ranges_overlap(start_addr, size, vq->ring_phys, vq->ring_size)) {
@@ -295,26 +317,86 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
     return 0;
 }
 
-static void vhost_client_set_memory(CPUPhysMemoryClient *client,
-                                    target_phys_addr_t start_addr,
-                                    ram_addr_t size,
-                                    ram_addr_t phys_offset)
+static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev,
+                                                     uint64_t start_addr,
+                                                     uint64_t size)
 {
-    struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
-    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
+    int i, n = dev->mem->nregions;
+    for (i = 0; i < n; ++i) {
+        struct vhost_memory_region *reg = dev->mem->regions + i;
+        if (ranges_overlap(reg->guest_phys_addr, reg->memory_size,
+                           start_addr, size)) {
+            return reg;
+        }
+    }
+    return NULL;
+}
+
+static bool vhost_dev_cmp_memory(struct vhost_dev *dev,
+                                 uint64_t start_addr,
+                                 uint64_t size,
+                                 uint64_t uaddr)
+{
+    struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size);
+    uint64_t reglast;
+    uint64_t memlast;
+
+    if (!reg) {
+        return true;
+    }
+
+    reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
+    memlast = range_get_last(start_addr, size);
+
+    /* Need to extend region? */
+    if (start_addr < reg->guest_phys_addr || memlast > reglast) {
+        return true;
+    }
+    /* userspace_addr changed? */
+    return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr;
+}
+
+static void vhost_set_memory(MemoryListener *listener,
+                             MemoryRegionSection *section,
+                             bool add)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+    hwaddr start_addr = section->offset_within_address_space;
+    ram_addr_t size = section->size;
+    bool log_dirty = memory_region_is_logging(section->mr);
     int s = offsetof(struct vhost_memory, regions) +
         (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
     uint64_t log_size;
     int r;
-    dev->mem = qemu_realloc(dev->mem, s);
+    void *ram;
+
+    dev->mem = g_realloc(dev->mem, s);
+
+    if (log_dirty) {
+        add = false;
+    }
 
     assert(size);
 
+    /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */
+    ram = memory_region_get_ram_ptr(section->mr) + section->offset_within_region;
+    if (add) {
+        if (!vhost_dev_cmp_memory(dev, start_addr, size, (uintptr_t)ram)) {
+            /* Region exists with same address. Nothing to do. */
+            return;
+        }
+    } else {
+        if (!vhost_dev_find_reg(dev, start_addr, size)) {
+            /* Removing region that we don't access. Nothing to do. */
+            return;
+        }
+    }
+
     vhost_dev_unassign_memory(dev, start_addr, size);
-    if (flags == IO_MEM_RAM) {
+    if (add) {
         /* Add given mapping, merging adjacent regions if any */
-        vhost_dev_assign_memory(dev, start_addr, size,
-                                (uintptr_t)qemu_get_ram_ptr(phys_offset));
+        vhost_dev_assign_memory(dev, start_addr, size, (uintptr_t)ram);
     } else {
         /* Remove old mapping for this memory, if any. */
         vhost_dev_unassign_memory(dev, start_addr, size);
@@ -350,6 +432,64 @@ static void vhost_client_set_memory(CPUPhysMemoryClient *client,
     }
 }
 
+static bool vhost_section(MemoryRegionSection *section)
+{
+    return memory_region_is_ram(section->mr);
+}
+
+static void vhost_begin(MemoryListener *listener)
+{
+}
+
+static void vhost_commit(MemoryListener *listener)
+{
+}
+
+static void vhost_region_add(MemoryListener *listener,
+                             MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+
+    if (!vhost_section(section)) {
+        return;
+    }
+
+    ++dev->n_mem_sections;
+    dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections,
+                                dev->n_mem_sections);
+    dev->mem_sections[dev->n_mem_sections - 1] = *section;
+    vhost_set_memory(listener, section, true);
+}
+
+static void vhost_region_del(MemoryListener *listener,
+                             MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+    int i;
+
+    if (!vhost_section(section)) {
+        return;
+    }
+
+    vhost_set_memory(listener, section, false);
+    for (i = 0; i < dev->n_mem_sections; ++i) {
+        if (dev->mem_sections[i].offset_within_address_space
+            == section->offset_within_address_space) {
+            --dev->n_mem_sections;
+            memmove(&dev->mem_sections[i], &dev->mem_sections[i+1],
+                    (dev->n_mem_sections - i) * sizeof(*dev->mem_sections));
+            break;
+        }
+    }
+}
+
+static void vhost_region_nop(MemoryListener *listener,
+                             MemoryRegionSection *section)
+{
+}
+
 static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
                                     struct vhost_virtqueue *vq,
                                     unsigned idx, bool enable_log)
@@ -407,10 +547,10 @@ err_features:
     return r;
 }
 
-static int vhost_client_migration_log(CPUPhysMemoryClient *client,
-                                      int enable)
+static int vhost_migration_log(MemoryListener *listener, int enable)
 {
-    struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
     int r;
     if (!!enable == dev->log_enabled) {
         return 0;
@@ -425,7 +565,7 @@ static int vhost_client_migration_log(CPUPhysMemoryClient *client,
             return r;
         }
         if (dev->log) {
-            qemu_free(dev->log);
+            g_free(dev->log);
         }
         dev->log = NULL;
         dev->log_size = 0;
@@ -440,12 +580,44 @@ static int vhost_client_migration_log(CPUPhysMemoryClient *client,
     return 0;
 }
 
+static void vhost_log_global_start(MemoryListener *listener)
+{
+    int r;
+
+    r = vhost_migration_log(listener, true);
+    if (r < 0) {
+        abort();
+    }
+}
+
+static void vhost_log_global_stop(MemoryListener *listener)
+{
+    int r;
+
+    r = vhost_migration_log(listener, false);
+    if (r < 0) {
+        abort();
+    }
+}
+
+static void vhost_log_start(MemoryListener *listener,
+                            MemoryRegionSection *section)
+{
+    /* FIXME: implement */
+}
+
+static void vhost_log_stop(MemoryListener *listener,
+                           MemoryRegionSection *section)
+{
+    /* FIXME: implement */
+}
+
 static int vhost_virtqueue_init(struct vhost_dev *dev,
                                 struct VirtIODevice *vdev,
                                 struct vhost_virtqueue *vq,
                                 unsigned idx)
 {
-    target_phys_addr_t s, l, a;
+    hwaddr s, l, a;
     int r;
     struct vhost_vring_file file = {
         .index = idx,
@@ -455,11 +627,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
     };
     struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
 
-    if (!vdev->binding->set_host_notifier) {
-        fprintf(stderr, "binding does not support host notifiers\n");
-        return -ENOSYS;
-    }
-
     vq->num = state.num = virtio_queue_get_num(vdev, idx);
     r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
     if (r) {
@@ -507,12 +674,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
         r = -errno;
         goto fail_alloc;
     }
-    r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, true);
-    if (r < 0) {
-        fprintf(stderr, "Error binding host notifier: %d\n", -r);
-        goto fail_host_notifier;
-    }
-
     file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
     r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
     if (r) {
@@ -531,8 +692,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
 
 fail_call:
 fail_kick:
-    vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
-fail_host_notifier:
 fail_alloc:
     cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
                               0, 0);
@@ -558,12 +717,6 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
         .index = idx,
     };
     int r;
-    r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
-    if (r < 0) {
-        fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r);
-        fflush(stderr);
-    }
-    assert (r >= 0);
     r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
     if (r < 0) {
         fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
@@ -581,14 +734,27 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
                               0, virtio_queue_get_desc_size(vdev, idx));
 }
 
-int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
+static void vhost_eventfd_add(MemoryListener *listener,
+                              MemoryRegionSection *section,
+                              bool match_data, uint64_t data, EventNotifier *e)
+{
+}
+
+static void vhost_eventfd_del(MemoryListener *listener,
+                              MemoryRegionSection *section,
+                              bool match_data, uint64_t data, EventNotifier *e)
+{
+}
+
+int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
+                   bool force)
 {
     uint64_t features;
     int r;
     if (devfd >= 0) {
         hdev->control = devfd;
     } else {
-        hdev->control = open("/dev/vhost-net", O_RDWR);
+        hdev->control = open(devpath, O_RDWR);
         if (hdev->control < 0) {
             return -errno;
         }
@@ -604,17 +770,29 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
     }
     hdev->features = features;
 
-    hdev->client.set_memory = vhost_client_set_memory;
-    hdev->client.sync_dirty_bitmap = vhost_client_sync_dirty_bitmap;
-    hdev->client.migration_log = vhost_client_migration_log;
-    hdev->client.log_start = NULL;
-    hdev->client.log_stop = NULL;
-    hdev->mem = qemu_mallocz(offsetof(struct vhost_memory, regions));
+    hdev->memory_listener = (MemoryListener) {
+        .begin = vhost_begin,
+        .commit = vhost_commit,
+        .region_add = vhost_region_add,
+        .region_del = vhost_region_del,
+        .region_nop = vhost_region_nop,
+        .log_start = vhost_log_start,
+        .log_stop = vhost_log_stop,
+        .log_sync = vhost_log_sync,
+        .log_global_start = vhost_log_global_start,
+        .log_global_stop = vhost_log_global_stop,
+        .eventfd_add = vhost_eventfd_add,
+        .eventfd_del = vhost_eventfd_del,
+        .priority = 10
+    };
+    hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
+    hdev->n_mem_sections = 0;
+    hdev->mem_sections = NULL;
     hdev->log = NULL;
     hdev->log_size = 0;
     hdev->log_enabled = false;
     hdev->started = false;
-    cpu_register_phys_memory_client(&hdev->client);
+    memory_listener_register(&hdev->memory_listener, &address_space_memory);
     hdev->force = force;
     return 0;
 fail:
@@ -625,8 +803,9 @@ fail:
 
 void vhost_dev_cleanup(struct vhost_dev *hdev)
 {
-    cpu_unregister_phys_memory_client(&hdev->client);
-    qemu_free(hdev->mem);
+    memory_listener_unregister(&hdev->memory_listener);
+    g_free(hdev->mem);
+    g_free(hdev->mem_sections);
     close(hdev->control);
 }
 
@@ -637,6 +816,60 @@ bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
         hdev->force;
 }
 
+/* Stop processing guest IO notifications in qemu.
+ * Start processing them in vhost in kernel.
+ */
+int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+    int i, r;
+    if (!vdev->binding->set_host_notifier) {
+        fprintf(stderr, "binding does not support host notifiers\n");
+        r = -ENOSYS;
+        goto fail;
+    }
+
+    for (i = 0; i < hdev->nvqs; ++i) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
+        if (r < 0) {
+            fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r);
+            goto fail_vq;
+        }
+    }
+
+    return 0;
+fail_vq:
+    while (--i >= 0) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+        if (r < 0) {
+            fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r);
+            fflush(stderr);
+        }
+        assert (r >= 0);
+    }
+fail:
+    return r;
+}
+
+/* Stop processing guest IO notifications in vhost.
+ * Start processing them in qemu.
+ * This might actually run the qemu handlers right away,
+ * so virtio in qemu must be completely setup when this is called.
+ */
+void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+    int i, r;
+
+    for (i = 0; i < hdev->nvqs; ++i) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+        if (r < 0) {
+            fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r);
+            fflush(stderr);
+        }
+        assert (r >= 0);
+    }
+}
+
+/* Host notifiers must be enabled at this point. */
 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
 {
     int i, r;
@@ -674,7 +907,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
     if (hdev->log_enabled) {
         hdev->log_size = vhost_get_log_size(hdev);
         hdev->log = hdev->log_size ?
-            qemu_mallocz(hdev->log_size * sizeof *hdev->log) : NULL;
+            g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL;
         r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
                   (uint64_t)(unsigned long)hdev->log);
         if (r < 0) {
@@ -702,6 +935,7 @@ fail:
     return r;
 }
 
+/* Host notifiers must be enabled at this point. */
 void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
 {
     int i, r;
@@ -712,8 +946,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
                                 hdev->vqs + i,
                                 i);
     }
-    vhost_client_sync_dirty_bitmap(&hdev->client, 0,
-                                   (target_phys_addr_t)~0x0ull);
+    for (i = 0; i < hdev->n_mem_sections; ++i) {
+        vhost_sync_dirty_bitmap(hdev, &hdev->mem_sections[i],
+                                0, (hwaddr)~0x0ull);
+    }
     r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
     if (r < 0) {
         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
@@ -722,6 +958,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
     assert (r >= 0);
 
     hdev->started = false;
-    qemu_free(hdev->log);
+    g_free(hdev->log);
+    hdev->log = NULL;
     hdev->log_size = 0;
 }