Merge remote branch 'mst/for_anthony' into staging

author Anthony Liguori <aliguori@us.ibm.com>

Mon, 17 Jan 2011 15:49:38 +0000 (09:49 -0600)

committer Anthony Liguori <aliguori@us.ibm.com>

Mon, 17 Jan 2011 15:49:38 +0000 (09:49 -0600)
author Anthony Liguori <aliguori@us.ibm.com>
Mon, 17 Jan 2011 15:49:38 +0000 (09:49 -0600)
committer Anthony Liguori <aliguori@us.ibm.com>
Mon, 17 Jan 2011 15:49:38 +0000 (09:49 -0600)
diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt

index f252c8e3bcd1afdd26b10d1087b164cfd6ba00c5..f2f9b757a568a94d5656e3964b7f4ae51a00a51e 100644 (file)
--- a/docs/qdev-device-use.txt
+++ b/docs/qdev-device-use.txt
@@ -97,10 +97,13 @@ The -device argument differs in detail for each kind of drive:
  
  * if=virtio
  
-  -device virtio-blk-pci,drive=DRIVE-ID,class=C,vectors=V
+  -device virtio-blk-pci,drive=DRIVE-ID,class=C,vectors=V,ioeventfd=IOEVENTFD
  
    This lets you control PCI device class and MSI-X vectors.
  
+  IOEVENTFD controls whether or not ioeventfd is used for virtqueue notify.  It
+  can be set to on (default) or off.
+
    As for all PCI devices, you can add bus=PCI-BUS,addr=DEVFN to
    control the PCI device address.
  
@@ -240,6 +243,9 @@ For PCI devices, you can add bus=PCI-BUS,addr=DEVFN to control the PCI
  device address, as usual.  The old -net nic provides parameter addr
  for that, it is silently ignored when the NIC is not a PCI device.
  
+For virtio-net-pci, you can control whether or not ioeventfd is used for
+virtqueue notify by setting ioeventfd= to on or off (default).
+
  -net nic accepts vectors=V for all models, but it's silently ignored
  except for virtio-net-pci (model=virtio).  With -device, only devices
  that support it accept it.
diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c

index 173d78148ddcbf683f2a6386e073be25cfe62bce..273097d480e91d774e661c73cfad8343b4bd1d0e 100644 (file)
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -428,6 +428,8 @@ static PCIDeviceInfo piix4_pm_info = {
      .qdev.desc          = "PM",
      .qdev.size          = sizeof(PIIX4PMState),
      .qdev.vmsd          = &vmstate_acpi,
+    .qdev.no_user       = 1,
+    .no_hotplug         = 1,
      .init               = piix4_pm_initfn,
      .config_write       = pm_write_config,
      .qdev.props         = (Property[]) {
diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c

index 75d1cc6f57f7abde45d4ca5485777c32130f1c07..5f45b5dee73dd2965a86a5f8083b927ffc4c3e71 100644 (file)
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -3140,6 +3140,7 @@ static PCIDeviceInfo cirrus_vga_info = {
      .qdev.desc    = "Cirrus CLGD 54xx VGA",
      .qdev.size    = sizeof(PCICirrusVGAState),
      .qdev.vmsd    = &vmstate_pci_cirrus_vga,
+    .no_hotplug   = 1,
      .init         = pci_cirrus_vga_initfn,
      .romfile      = VGABIOS_CIRRUS_FILENAME,
      .config_write = pci_cirrus_write_config,
diff --git a/hw/ide/piix.c b/hw/ide/piix.c

index 1cad9066a0aad3028afcf7196cad734be321f8b0..d4289af9c45e8987b667ab72fefe7e2a25fe3e1a 100644 (file)
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -194,11 +194,13 @@ static PCIDeviceInfo piix_ide_info[] = {
          .qdev.name    = "piix3-ide",
          .qdev.size    = sizeof(PCIIDEState),
          .qdev.no_user = 1,
+        .no_hotplug   = 1,
          .init         = pci_piix3_ide_initfn,
      },{
          .qdev.name    = "piix4-ide",
          .qdev.size    = sizeof(PCIIDEState),
          .qdev.no_user = 1,
+        .no_hotplug   = 1,
          .init         = pci_piix4_ide_initfn,
      },{
          /* end of list */
diff --git a/hw/pci.c b/hw/pci.c

index d0b51b80bdb1ae936986e46b08136bfc5729773a..8d0e3df2e5c3573dc03b1452badeb70c6efbab05 100644 (file)
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1624,6 +1624,11 @@ static int pci_qdev_init(DeviceState *qdev, DeviceInfo *base)
                                       info->is_bridge);
      if (pci_dev == NULL)
          return -1;
+    if (qdev->hotplugged && info->no_hotplug) {
+        qerror_report(QERR_DEVICE_NO_HOTPLUG, info->qdev.name);
+        do_pci_unregister_device(pci_dev);
+        return -1;
+    }
      rc = info->init(pci_dev);
      if (rc != 0) {
          do_pci_unregister_device(pci_dev);
@@ -1656,7 +1661,12 @@ static int pci_qdev_init(DeviceState *qdev, DeviceInfo *base)
  static int pci_unplug_device(DeviceState *qdev)
  {
      PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, qdev);
+    PCIDeviceInfo *info = container_of(qdev->info, PCIDeviceInfo, qdev);
  
+    if (info->no_hotplug) {
+        qerror_report(QERR_DEVICE_NO_HOTPLUG, info->qdev.name);
+        return -1;
+    }
      return dev->bus->hotplug(dev->bus->hotplug_qdev, dev,
                               PCI_HOTPLUG_DISABLED);
  }
diff --git a/hw/pci.h b/hw/pci.h

index 052960e3eac00ee4ffc9562d7cb4922dbfacd8fe..bc8d5bb3c732e86205d66719815a132fc2469f41 100644 (file)
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -436,6 +436,9 @@ typedef struct {
      /* pcie stuff */
      int is_express;   /* is this device pci express? */
  
+    /* device isn't hot-pluggable */
+    int no_hotplug;
+
      /* rom bar */
      const char *romfile;
  } PCIDeviceInfo;
diff --git a/hw/piix4.c b/hw/piix4.c

index 5489386d689207c2f42e773788f83bf939aafb12..72073cd0a094da3e292df2fbd0f5992911d18b03 100644 (file)
--- a/hw/piix4.c
+++ b/hw/piix4.c
@@ -113,6 +113,7 @@ static PCIDeviceInfo piix4_info[] = {
          .qdev.desc    = "ISA bridge",
          .qdev.size    = sizeof(PCIDevice),
          .qdev.no_user = 1,
+        .no_hotplug   = 1,
          .init         = piix4_initfn,
      },{
          /* end of list */
diff --git a/hw/piix_pci.c b/hw/piix_pci.c

index 38f9d9eea4e46f30c533d9cf65e78f8d8ec4e0ee..358da58a800651e1cdcc3258fda33424f5caca9b 100644 (file)
--- a/hw/piix_pci.c
+++ b/hw/piix_pci.c
@@ -348,6 +348,7 @@ static PCIDeviceInfo i440fx_info[] = {
          .qdev.size    = sizeof(PCII440FXState),
          .qdev.vmsd    = &vmstate_i440fx,
          .qdev.no_user = 1,
+        .no_hotplug   = 1,
          .init         = i440fx_initfn,
          .config_write = i440fx_write_config,
      },{
@@ -356,6 +357,7 @@ static PCIDeviceInfo i440fx_info[] = {
          .qdev.size    = sizeof(PIIX3State),
          .qdev.vmsd    = &vmstate_piix3,
          .qdev.no_user = 1,
+        .no_hotplug   = 1,
          .init         = piix3_initfn,
      },{
          /* end of list */
diff --git a/hw/qdev.c b/hw/qdev.c

index 31eb464f23d505cea24ca58965442ab2252a1112..5b8d3742ec71bf757d6099aae3012988857c32de 100644 (file)
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -32,6 +32,8 @@
  #include "blockdev.h"
  
  static int qdev_hotplug = 0;
+static bool qdev_hot_added = false;
+static bool qdev_hot_removed = false;
  
  /* This is a nasty hack to allow passing a NULL bus to qdev_create.  */
  static BusState *main_system_bus;
@@ -93,6 +95,7 @@ static DeviceState *qdev_create_from_info(BusState *bus, DeviceInfo *info)
      if (qdev_hotplug) {
          assert(bus->allow_hotplug);
          dev->hotplugged = 1;
+        qdev_hot_added = true;
      }
      dev->instance_id_alias = -1;
      dev->state = DEV_STATE_CREATED;
@@ -294,6 +297,8 @@ int qdev_unplug(DeviceState *dev)
      }
      assert(dev->info->unplug != NULL);
  
+    qdev_hot_removed = true;
+
      return dev->info->unplug(dev);
  }
  
@@ -395,6 +400,11 @@ void qdev_machine_creation_done(void)
      qdev_hotplug = 1;
  }
  
+bool qdev_machine_modified(void)
+{
+    return qdev_hot_added || qdev_hot_removed;
+}
+
  /* Get a character (serial) device interface.  */
  CharDriverState *qdev_init_chardev(DeviceState *dev)
  {
diff --git a/hw/qdev.h b/hw/qdev.h

index 2be775f9e8e5915da4a306034b4eeed6e6b8e4d7..e520aaa7863cd17523f39d24ff33785402fafefb 100644 (file)
--- a/hw/qdev.h
+++ b/hw/qdev.h
@@ -132,6 +132,7 @@ int qdev_unplug(DeviceState *dev);
  void qdev_free(DeviceState *dev);
  int qdev_simple_unplug_cb(DeviceState *dev);
  void qdev_machine_creation_done(void);
+bool qdev_machine_modified(void);
  
  qemu_irq qdev_get_gpio_in(DeviceState *dev, int n);
  void qdev_connect_gpio_out(DeviceState *dev, int n, qemu_irq pin);
diff --git a/hw/qxl.c b/hw/qxl.c

index 207aa63f90c81ce2dd849c6fceb9e01e449167b2..bd71e5810fe87867e16076c45ab54653a70c1829 100644 (file)
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -1546,6 +1546,7 @@ static PCIDeviceInfo qxl_info_primary = {
      .qdev.size    = sizeof(PCIQXLDevice),
      .qdev.reset   = qxl_reset_handler,
      .qdev.vmsd    = &qxl_vmstate,
+    .no_hotplug   = 1,
      .init         = qxl_init_primary,
      .config_write = qxl_write_config,
      .romfile      = "vgabios-qxl.bin",
diff --git a/hw/rtl8139.c b/hw/rtl8139.c

index a8aed89074de31c9a3cc01a75ca6657ffdd137ce..a22530cf89f09826d1b4d1aa010357647b473d8e 100644 (file)
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -495,6 +495,8 @@ typedef struct RTL8139State {
      QEMUTimer *timer;
      int64_t TimerExpire;
  
+    /* Support migration to/from old versions */
+    int rtl8139_mmio_io_addr_dummy;
  } RTL8139State;
  
  static void rtl8139_set_next_tctr_time(RTL8139State *s, int64_t current_time);
@@ -3162,6 +3164,21 @@ static int rtl8139_post_load(void *opaque, int version_id)
      return 0;
  }
  
+static bool rtl8139_hotplug_ready_needed(void *opaque)
+{
+    return qdev_machine_modified();
+}
+
+static const VMStateDescription vmstate_rtl8139_hotplug_ready ={
+    .name = "rtl8139/hotplug_ready",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField []) {
+        VMSTATE_END_OF_LIST()
+    }
+};
+
  static void rtl8139_pre_save(void *opaque)
  {
      RTL8139State* s = opaque;
@@ -3171,6 +3188,7 @@ static void rtl8139_pre_save(void *opaque)
      rtl8139_set_next_tctr_time(s, current_time);
      s->TCTR = muldiv64(current_time - s->TCTR_base, PCI_FREQUENCY,
                         get_ticks_per_sec());
+    s->rtl8139_mmio_io_addr_dummy = s->rtl8139_mmio_io_addr;
  }
  
  static const VMStateDescription vmstate_rtl8139 = {
@@ -3223,7 +3241,7 @@ static const VMStateDescription vmstate_rtl8139 = {
  
          VMSTATE_UNUSED(4),
          VMSTATE_MACADDR(conf.macaddr, RTL8139State),
-        VMSTATE_INT32(rtl8139_mmio_io_addr, RTL8139State),
+        VMSTATE_INT32(rtl8139_mmio_io_addr_dummy, RTL8139State),
  
          VMSTATE_UINT32(currTxDesc, RTL8139State),
          VMSTATE_UINT32(currCPlusRxDesc, RTL8139State),
@@ -3252,6 +3270,14 @@ static const VMStateDescription vmstate_rtl8139 = {
  
          VMSTATE_UINT32_V(cplus_enabled, RTL8139State, 4),
          VMSTATE_END_OF_LIST()
+    },
+    .subsections = (VMStateSubsection []) {
+        {
+            .vmsd = &vmstate_rtl8139_hotplug_ready,
+            .needed = rtl8139_hotplug_ready_needed,
+        }, {
+            /* empty */
+        }
      }
  };
  
diff --git a/hw/vga-pci.c b/hw/vga-pci.c

index 791ca22763eec9896b476ee2bfb05b4d230019c3..ce9ec4577719f522f5a2ec26cb2ec7c4ec9b2894 100644 (file)
--- a/hw/vga-pci.c
+++ b/hw/vga-pci.c
@@ -110,6 +110,7 @@ static PCIDeviceInfo vga_info = {
      .qdev.name    = "VGA",
      .qdev.size    = sizeof(PCIVGAState),
      .qdev.vmsd    = &vmstate_vga_pci,
+    .no_hotplug   = 1,
      .init         = pci_vga_initfn,
      .config_write = pci_vga_write_config,
      .romfile      = "vgabios-stdvga.bin",
diff --git a/hw/virtio-net.c b/hw/virtio-net.c

index ec1bf8dda79e758ec1690d68e7a4289a665ea476..ccb3e632a4280a3c5d85f556557127099b58d1c9 100644 (file)
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -54,8 +54,6 @@ typedef struct VirtIONet
      uint8_t nouni;
      uint8_t nobcast;
      uint8_t vhost_started;
-    bool vm_running;
-    VMChangeStateEntry *vmstate;
      struct {
          int in_use;
          int first_multi;
@@ -102,7 +100,7 @@ static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
  static bool virtio_net_started(VirtIONet *n, uint8_t status)
  {
      return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
-        (n->status & VIRTIO_NET_S_LINK_UP) && n->vm_running;
+        (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
  }
  
  static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
@@ -453,7 +451,7 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
  static int virtio_net_can_receive(VLANClientState *nc)
  {
      VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
-    if (!n->vm_running) {
+    if (!n->vdev.vm_running) {
          return 0;
      }
  
@@ -708,7 +706,7 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
          return num_packets;
      }
  
-    assert(n->vm_running);
+    assert(n->vdev.vm_running);
  
      if (n->async_tx.elem.out_num) {
          virtio_queue_set_notification(n->tx_vq, 0);
@@ -769,7 +767,7 @@ static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
      VirtIONet *n = to_virtio_net(vdev);
  
      /* This happens when device was stopped but VCPU wasn't. */
-    if (!n->vm_running) {
+    if (!n->vdev.vm_running) {
          n->tx_waiting = 1;
          return;
      }
@@ -796,7 +794,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
      }
      n->tx_waiting = 1;
      /* This happens when device was stopped but VCPU wasn't. */
-    if (!n->vm_running) {
+    if (!n->vdev.vm_running) {
          return;
      }
      virtio_queue_set_notification(vq, 0);
@@ -806,7 +804,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
  static void virtio_net_tx_timer(void *opaque)
  {
      VirtIONet *n = opaque;
-    assert(n->vm_running);
+    assert(n->vdev.vm_running);
  
      n->tx_waiting = 0;
  
@@ -823,7 +821,7 @@ static void virtio_net_tx_bh(void *opaque)
      VirtIONet *n = opaque;
      int32_t ret;
  
-    assert(n->vm_running);
+    assert(n->vdev.vm_running);
  
      n->tx_waiting = 0;
  
@@ -988,16 +986,6 @@ static NetClientInfo net_virtio_info = {
      .link_status_changed = virtio_net_set_link_status,
  };
  
-static void virtio_net_vmstate_change(void *opaque, int running, int reason)
-{
-    VirtIONet *n = opaque;
-    n->vm_running = running;
-    /* This is called when vm is started/stopped,
-     * it will start/stop vhost backend if appropriate
-     * e.g. after migration. */
-    virtio_net_set_status(&n->vdev, n->vdev.status);
-}
-
  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
                                virtio_net_conf *net)
  {
@@ -1052,7 +1040,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
      n->qdev = dev;
      register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
                      virtio_net_save, virtio_net_load, n);
-    n->vmstate = qemu_add_vm_change_state_handler(virtio_net_vmstate_change, n);
  
      add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");
  
@@ -1062,7 +1049,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
  void virtio_net_exit(VirtIODevice *vdev)
  {
      VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
-    qemu_del_vm_change_state_handler(n->vmstate);
  
      /* This will stop vhost backend if appropriate. */
      virtio_net_set_status(vdev, 0);
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c

index 6186142b2b1342a6607396b04163083dcc74ab6c..d07ff976be241428af66a13f4379cda14e665e47 100644 (file)
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -80,9 +80,13 @@
   * 12 is historical, and due to x86 page size. */
  #define VIRTIO_PCI_QUEUE_ADDR_SHIFT    12
  
-/* We can catch some guest bugs inside here so we continue supporting older
-   guests. */
-#define VIRTIO_PCI_BUG_BUS_MASTER      (1 << 0)
+/* Flags track per-device state like workarounds for quirks in older guests. */
+#define VIRTIO_PCI_FLAG_BUS_MASTER_BUG  (1 << 0)
+
+/* Performance improves when virtqueue kick processing is decoupled from the
+ * vcpu thread using ioeventfd for some devices. */
+#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1
+#define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
  
  /* QEMU doesn't strictly need write barriers since everything runs in
   * lock-step.  We'll leave the calls to wmb() in though to make it obvious for
@@ -95,7 +99,7 @@
  typedef struct {
      PCIDevice pci_dev;
      VirtIODevice *vdev;
-    uint32_t bugs;
+    uint32_t flags;
      uint32_t addr;
      uint32_t class_code;
      uint32_t nvectors;
@@ -108,6 +112,8 @@ typedef struct {
      /* Max. number of ports we can have for a the virtio-serial device */
      uint32_t max_virtserial_ports;
      virtio_net_conf net;
+    bool ioeventfd_disabled;
+    bool ioeventfd_started;
  } VirtIOPCIProxy;
  
  /* virtio device */
@@ -159,7 +165,7 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f)
         in ready state. Then we have a buggy guest OS. */
      if ((proxy->vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) &&
          !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
-        proxy->bugs |= VIRTIO_PCI_BUG_BUS_MASTER;
+        proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
      }
      return 0;
  }
@@ -180,12 +186,139 @@ static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
      return 0;
  }
  
+static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy,
+                                                 int n, bool assign)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+    int r;
+    if (assign) {
+        r = event_notifier_init(notifier, 1);
+        if (r < 0) {
+            error_report("%s: unable to init event notifier: %d",
+                         __func__, r);
+            return r;
+        }
+        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
+                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+                                       n, assign);
+        if (r < 0) {
+            error_report("%s: unable to map ioeventfd: %d",
+                         __func__, r);
+            event_notifier_cleanup(notifier);
+        }
+    } else {
+        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
+                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+                                       n, assign);
+        if (r < 0) {
+            error_report("%s: unable to unmap ioeventfd: %d",
+                         __func__, r);
+            return r;
+        }
+
+        /* Handle the race condition where the guest kicked and we deassigned
+         * before we got around to handling the kick.
+         */
+        if (event_notifier_test_and_clear(notifier)) {
+            virtio_queue_notify_vq(vq);
+        }
+
+        event_notifier_cleanup(notifier);
+    }
+    return r;
+}
+
+static void virtio_pci_host_notifier_read(void *opaque)
+{
+    VirtQueue *vq = opaque;
+    EventNotifier *n = virtio_queue_get_host_notifier(vq);
+    if (event_notifier_test_and_clear(n)) {
+        virtio_queue_notify_vq(vq);
+    }
+}
+
+static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy,
+                                                    int n, bool assign)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+    if (assign) {
+        qemu_set_fd_handler(event_notifier_get_fd(notifier),
+                            virtio_pci_host_notifier_read, NULL, vq);
+    } else {
+        qemu_set_fd_handler(event_notifier_get_fd(notifier),
+                            NULL, NULL, NULL);
+    }
+}
+
+static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
+{
+    int n, r;
+
+    if (!(proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) ||
+        proxy->ioeventfd_disabled ||
+        proxy->ioeventfd_started) {
+        return;
+    }
+
+    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
+        if (!virtio_queue_get_num(proxy->vdev, n)) {
+            continue;
+        }
+
+        r = virtio_pci_set_host_notifier_internal(proxy, n, true);
+        if (r < 0) {
+            goto assign_error;
+        }
+
+        virtio_pci_set_host_notifier_fd_handler(proxy, n, true);
+    }
+    proxy->ioeventfd_started = true;
+    return;
+
+assign_error:
+    while (--n >= 0) {
+        if (!virtio_queue_get_num(proxy->vdev, n)) {
+            continue;
+        }
+
+        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
+        r = virtio_pci_set_host_notifier_internal(proxy, n, false);
+        assert(r >= 0);
+    }
+    proxy->ioeventfd_started = false;
+    error_report("%s: failed. Fallback to a userspace (slower).", __func__);
+}
+
+static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
+{
+    int r;
+    int n;
+
+    if (!proxy->ioeventfd_started) {
+        return;
+    }
+
+    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
+        if (!virtio_queue_get_num(proxy->vdev, n)) {
+            continue;
+        }
+
+        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
+        r = virtio_pci_set_host_notifier_internal(proxy, n, false);
+        assert(r >= 0);
+    }
+    proxy->ioeventfd_started = false;
+}
+
  static void virtio_pci_reset(DeviceState *d)
  {
      VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev);
+    virtio_pci_stop_ioeventfd(proxy);
      virtio_reset(proxy->vdev);
      msix_reset(&proxy->pci_dev);
-    proxy->bugs = 0;
+    proxy->flags &= ~VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
  }
  
  static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -210,6 +343,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
      case VIRTIO_PCI_QUEUE_PFN:
          pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
          if (pa == 0) {
+            virtio_pci_stop_ioeventfd(proxy);
              virtio_reset(proxy->vdev);
              msix_unuse_all_vectors(&proxy->pci_dev);
          }
@@ -224,7 +358,16 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
          virtio_queue_notify(vdev, val);
          break;
      case VIRTIO_PCI_STATUS:
+        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
+            virtio_pci_stop_ioeventfd(proxy);
+        }
+
          virtio_set_status(vdev, val & 0xFF);
+
+        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
+            virtio_pci_start_ioeventfd(proxy);
+        }
+
          if (vdev->status == 0) {
              virtio_reset(proxy->vdev);
              msix_unuse_all_vectors(&proxy->pci_dev);
@@ -235,7 +378,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
             some safety checks. */
          if ((val & VIRTIO_CONFIG_S_DRIVER_OK) &&
              !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
-            proxy->bugs |= VIRTIO_PCI_BUG_BUS_MASTER;
+            proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
          }
          break;
      case VIRTIO_MSI_CONFIG_VECTOR:
@@ -403,7 +546,8 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
  
      if (PCI_COMMAND == address) {
          if (!(val & PCI_COMMAND_MASTER)) {
-            if (!(proxy->bugs & VIRTIO_PCI_BUG_BUS_MASTER)) {
+            if (!(proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG)) {
+                virtio_pci_stop_ioeventfd(proxy);
                  virtio_set_status(proxy->vdev,
                                    proxy->vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
              }
@@ -481,30 +625,30 @@ assign_error:
  static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)
  {
      VirtIOPCIProxy *proxy = opaque;
-    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
-    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
-    int r;
+
+    /* Stop using ioeventfd for virtqueue kick if the device starts using host
+     * notifiers.  This makes it easy to avoid stepping on each others' toes.
+     */
+    proxy->ioeventfd_disabled = assign;
      if (assign) {
-        r = event_notifier_init(notifier, 1);
-        if (r < 0) {
-            return r;
-        }
-        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
-                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
-                                       n, assign);
-        if (r < 0) {
-            event_notifier_cleanup(notifier);
-        }
+        virtio_pci_stop_ioeventfd(proxy);
+    }
+    /* We don't need to start here: it's not needed because backend
+     * currently only stops on status change away from ok,
+     * reset, vmstop and such. If we do add code to start here,
+     * need to check vmstate, device state etc. */
+    return virtio_pci_set_host_notifier_internal(proxy, n, assign);
+}
+
+static void virtio_pci_vmstate_change(void *opaque, bool running)
+{
+    VirtIOPCIProxy *proxy = opaque;
+
+    if (running) {
+        virtio_pci_start_ioeventfd(proxy);
      } else {
-        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
-                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
-                                       n, assign);
-        if (r < 0) {
-            return r;
-        }
-        event_notifier_cleanup(notifier);
+        virtio_pci_stop_ioeventfd(proxy);
      }
-    return r;
  }
  
  static const VirtIOBindings virtio_pci_bindings = {
@@ -516,6 +660,7 @@ static const VirtIOBindings virtio_pci_bindings = {
      .get_features = virtio_pci_get_features,
      .set_host_notifier = virtio_pci_set_host_notifier,
      .set_guest_notifiers = virtio_pci_set_guest_notifiers,
+    .vmstate_change = virtio_pci_vmstate_change,
  };
  
  static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -560,6 +705,10 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
      pci_register_bar(&proxy->pci_dev, 0, size, PCI_BASE_ADDRESS_SPACE_IO,
                             virtio_map);
  
+    if (!kvm_has_many_ioeventfds()) {
+        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
+    }
+
      virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
      proxy->host_features |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY;
      proxy->host_features |= 0x1 << VIRTIO_F_BAD_FEATURE;
@@ -598,6 +747,7 @@ static int virtio_blk_exit_pci(PCIDevice *pci_dev)
  {
      VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
  
+    virtio_pci_stop_ioeventfd(proxy);
      virtio_blk_exit(proxy->vdev);
      blockdev_mark_auto_del(proxy->block.bs);
      return virtio_exit_pci(pci_dev);
@@ -659,6 +809,7 @@ static int virtio_net_exit_pci(PCIDevice *pci_dev)
  {
      VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
  
+    virtio_pci_stop_ioeventfd(proxy);
      virtio_net_exit(proxy->vdev);
      return virtio_exit_pci(pci_dev);
  }
@@ -706,6 +857,8 @@ static PCIDeviceInfo virtio_info[] = {
          .qdev.props = (Property[]) {
              DEFINE_PROP_HEX32("class", VirtIOPCIProxy, class_code, 0),
              DEFINE_BLOCK_PROPERTIES(VirtIOPCIProxy, block),
+            DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                            VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
              DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
              DEFINE_PROP_END_OF_LIST(),
@@ -718,6 +871,8 @@ static PCIDeviceInfo virtio_info[] = {
          .exit       = virtio_net_exit_pci,
          .romfile    = "pxe-virtio.bin",
          .qdev.props = (Property[]) {
+            DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                            VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false),
              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
              DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
              DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
diff --git a/hw/virtio.c b/hw/virtio.c

index 07dbf868fd2a81cba02466f467a30000d1ff83a8..31bd9e32dc5abd890fea27ecb23c9616df027311 100644 (file)
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -575,11 +575,19 @@ int virtio_queue_get_num(VirtIODevice *vdev, int n)
      return vdev->vq[n].vring.num;
  }
  
+void virtio_queue_notify_vq(VirtQueue *vq)
+{
+    if (vq->vring.desc) {
+        VirtIODevice *vdev = vq->vdev;
+        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+        vq->handle_output(vdev, vq);
+    }
+}
+
  void virtio_queue_notify(VirtIODevice *vdev, int n)
  {
-    if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) {
-        trace_virtio_queue_notify(vdev, n, &vdev->vq[n]);
-        vdev->vq[n].handle_output(vdev, &vdev->vq[n]);
+    if (n < VIRTIO_PCI_QUEUE_MAX) {
+        virtio_queue_notify_vq(&vdev->vq[n]);
      }
  }
  
@@ -743,11 +751,31 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
  
  void virtio_cleanup(VirtIODevice *vdev)
  {
+    qemu_del_vm_change_state_handler(vdev->vmstate);
      if (vdev->config)
          qemu_free(vdev->config);
      qemu_free(vdev->vq);
  }
  
+static void virtio_vmstate_change(void *opaque, int running, int reason)
+{
+    VirtIODevice *vdev = opaque;
+    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
+    vdev->vm_running = running;
+
+    if (backend_run) {
+        virtio_set_status(vdev, vdev->status);
+    }
+
+    if (vdev->binding->vmstate_change) {
+        vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
+    }
+
+    if (!backend_run) {
+        virtio_set_status(vdev, vdev->status);
+    }
+}
+
  VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
                                   size_t config_size, size_t struct_size)
  {
@@ -774,6 +802,8 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
      else
          vdev->config = NULL;
  
+    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, vdev);
+
      return vdev;
  }
  
diff --git a/hw/virtio.h b/hw/virtio.h

index 02fa312d3eb2a9f607085c7e5d5764cb3b8c8240..d8546d5b30a2f0c69cf43e8e84bba06f315fd1e1 100644 (file)
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -95,6 +95,7 @@ typedef struct {
      unsigned (*get_features)(void * opaque);
      int (*set_guest_notifiers)(void * opaque, bool assigned);
      int (*set_host_notifier)(void * opaque, int n, bool assigned);
+    void (*vmstate_change)(void * opaque, bool running);
  } VirtIOBindings;
  
  #define VIRTIO_PCI_QUEUE_MAX 64
@@ -123,6 +124,8 @@ struct VirtIODevice
      const VirtIOBindings *binding;
      void *binding_opaque;
      uint16_t device_id;
+    bool vm_running;
+    VMChangeStateEntry *vmstate;
  };
  
  static inline void virtio_set_status(VirtIODevice *vdev, uint8_t val)
@@ -219,5 +222,6 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
  VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
  EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
  EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
+void virtio_queue_notify_vq(VirtQueue *vq);
  void virtio_irq(VirtQueue *vq);
  #endif
diff --git a/hw/vmware_vga.c b/hw/vmware_vga.c

index d9dd52fc6066795d97b96c4d409cd97ab35efa4b..6c590533081b4469ddfc95aefb2d4fe0e18783cd 100644 (file)
--- a/hw/vmware_vga.c
+++ b/hw/vmware_vga.c
@@ -1318,6 +1318,7 @@ static PCIDeviceInfo vmsvga_info = {
      .qdev.name    = "vmware-svga",
      .qdev.size    = sizeof(struct pci_vmsvga_state_s),
      .qdev.vmsd    = &vmstate_vmware_vga,
+    .no_hotplug   = 1,
      .init         = pci_vmsvga_initfn,
      .romfile      = "vgabios-vmware.bin",
  };
diff --git a/kvm-all.c b/kvm-all.c

index cae24bb87c1c1654eb73f437875ed9083803f59f..255b6fad9c76b6d10b828a39837a10a774bc2102 100644 (file)
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -28,6 +28,11 @@
  #include "kvm.h"
  #include "bswap.h"
  
+/* This check must be after config-host.h is included */
+#ifdef CONFIG_EVENTFD
+#include <sys/eventfd.h>
+#endif
+
  /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
  #define PAGE_SIZE TARGET_PAGE_SIZE
  
@@ -72,6 +77,7 @@ struct KVMState
      int irqchip_in_kernel;
      int pit_in_kernel;
      int xsave, xcrs;
+    int many_ioeventfds;
  };
  
  static KVMState *kvm_state;
@@ -441,6 +447,39 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
      return ret;
  }
  
+static int kvm_check_many_ioeventfds(void)
+{
+    /* Older kernels have a 6 device limit on the KVM io bus.  Find out so we
+     * can avoid creating too many ioeventfds.
+     */
+#ifdef CONFIG_EVENTFD
+    int ioeventfds[7];
+    int i, ret = 0;
+    for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
+        ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
+        if (ioeventfds[i] < 0) {
+            break;
+        }
+        ret = kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, true);
+        if (ret < 0) {
+            close(ioeventfds[i]);
+            break;
+        }
+    }
+
+    /* Decide whether many devices are supported or not */
+    ret = i == ARRAY_SIZE(ioeventfds);
+
+    while (i-- > 0) {
+        kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, false);
+        close(ioeventfds[i]);
+    }
+    return ret;
+#else
+    return 0;
+#endif
+}
+
  static void kvm_set_phys_mem(target_phys_addr_t start_addr,
                              ram_addr_t size,
                              ram_addr_t phys_offset)
@@ -717,6 +756,8 @@ int kvm_init(int smp_cpus)
      kvm_state = s;
      cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
  
+    s->many_ioeventfds = kvm_check_many_ioeventfds();
+
      return 0;
  
  err:
@@ -1046,6 +1087,14 @@ int kvm_has_xcrs(void)
      return kvm_state->xcrs;
  }
  
+int kvm_has_many_ioeventfds(void)
+{
+    if (!kvm_enabled()) {
+        return 0;
+    }
+    return kvm_state->many_ioeventfds;
+}
+
  void kvm_setup_guest_memory(void *start, size_t size)
  {
      if (!kvm_has_sync_mmu()) {
diff --git a/kvm-stub.c b/kvm-stub.c

index 5384a4b9a4187ad11426069bf7021b1fdc498d9c..33d4476fa3649766fc30c2b561b8faae2e83c4ee 100644 (file)
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -99,6 +99,11 @@ int kvm_has_robust_singlestep(void)
      return 0;
  }
  
+int kvm_has_many_ioeventfds(void)
+{
+    return 0;
+}
+
  void kvm_setup_guest_memory(void *start, size_t size)
  {
  }
diff --git a/kvm.h b/kvm.h

index 60a9b425c828863940b60a49a3191f58ebb5fe75..ce08d42756696c258b89a86ec5bd33e6a36f8b6d 100644 (file)
--- a/kvm.h
+++ b/kvm.h
@@ -42,6 +42,7 @@ int kvm_has_robust_singlestep(void);
  int kvm_has_debugregs(void);
  int kvm_has_xsave(void);
  int kvm_has_xcrs(void);
+int kvm_has_many_ioeventfds(void);
  
  #ifdef NEED_CPU_H
  int kvm_init_vcpu(CPUState *env);
diff --git a/qerror.c b/qerror.c

index ac2cdafa6524ad329e84f5642b413f4cd7175d18..9d0cdeb45cccea8f903d93f46d03dd7847ad427c 100644 (file)
--- a/qerror.c
+++ b/qerror.c
@@ -100,6 +100,10 @@ static const QErrorStringTable qerror_table[] = {
          .error_fmt = QERR_DEVICE_NO_BUS,
          .desc      = "Device '%(device)' has no child bus",
      },
+    {
+        .error_fmt = QERR_DEVICE_NO_HOTPLUG,
+        .desc      = "Device '%(device)' does not support hotplugging",
+    },
      {
          .error_fmt = QERR_DUPLICATE_ID,
          .desc      = "Duplicate ID '%(id)' for %(object)",
diff --git a/qerror.h b/qerror.h

index 943a24b4e5b2142d00c409ea958c2faf9cd047c8..b0f69dabe520ade0eee541a309f2dcfc66435a6d 100644 (file)
--- a/qerror.h
+++ b/qerror.h
@@ -90,6 +90,9 @@ QError *qobject_to_qerror(const QObject *obj);
  #define QERR_DEVICE_NO_BUS \
      "{ 'class': 'DeviceNoBus', 'data': { 'device': %s } }"
  
+#define QERR_DEVICE_NO_HOTPLUG \
+    "{ 'class': 'DeviceNoHotplug', 'data': { 'device': %s } }"
+
  #define QERR_DUPLICATE_ID \
      "{ 'class': 'DuplicateId', 'data': { 'id': %s, 'object': %s } }"
author	Anthony Liguori <aliguori@us.ibm.com>
	Mon, 17 Jan 2011 15:49:38 +0000 (09:49 -0600)
committer	Anthony Liguori <aliguori@us.ibm.com>
	Mon, 17 Jan 2011 15:49:38 +0000 (09:49 -0600)
docs/qdev-device-use.txt		patch \| blob \| blame \| history
hw/acpi_piix4.c		patch \| blob \| blame \| history
hw/cirrus_vga.c		patch \| blob \| blame \| history
hw/ide/piix.c		patch \| blob \| blame \| history
hw/pci.c		patch \| blob \| blame \| history
hw/pci.h		patch \| blob \| blame \| history
hw/piix4.c		patch \| blob \| blame \| history
hw/piix_pci.c		patch \| blob \| blame \| history
hw/qdev.c		patch \| blob \| blame \| history
hw/qdev.h		patch \| blob \| blame \| history
hw/qxl.c		patch \| blob \| blame \| history
hw/rtl8139.c		patch \| blob \| blame \| history
hw/vga-pci.c		patch \| blob \| blame \| history
hw/virtio-net.c		patch \| blob \| blame \| history
hw/virtio-pci.c		patch \| blob \| blame \| history
hw/virtio.c		patch \| blob \| blame \| history
hw/virtio.h		patch \| blob \| blame \| history
hw/vmware_vga.c		patch \| blob \| blame \| history
kvm-all.c		patch \| blob \| blame \| history
kvm-stub.c		patch \| blob \| blame \| history
kvm.h		patch \| blob \| blame \| history
qerror.c		patch \| blob \| blame \| history
qerror.h		patch \| blob \| blame \| history