F: hw/net/
F: include/hw/net/
F: tests/virtio-net-test.c
+F: docs/virtio-net-failover.rst
T: git https://github.com/jasowang/qemu.git net
Parallel NOR Flash devices
--- /dev/null
+========================
+QEMU virtio-net standby (net_failover)
+========================
+
+This document explains the setup and usage of virtio-net standby feature which
+is used to create a net_failover pair of devices.
+
+The general idea is that we have a pair of devices, a (vfio-)pci and a
+virtio-net device. Before migration the vfio device is unplugged and data flows
+through the virtio-net device, on the target side another vfio-pci device is
+plugged in to take over the data-path. In the guest the net_failover kernel
+module will pair net devices with the same MAC address.
+
+The two devices are called primary and standby device. The fast hardware based
+networking device is called the primary device and the virtio-net device is the
+standby device.
+
+Restrictions
+------------
+
+Currently only PCIe devices are allowed as primary devices, this restriction
+can be lifted in the future with enhanced QEMU support. Also, only networking
+devices are allowed as primary device. The user needs to ensure that primary
+and standby devices are not plugged into the same PCIe slot.
+
+Usecase
+-------
+
+ Virtio-net standby allows easy migration while using a passed-through fast
+ networking device by falling back to a virtio-net device for the duration of
+ the migration. It is like a simple version of a bond, the difference is that it
+ requires no configuration in the guest. When a guest is live-migrated to
+ another host QEMU will unplug the primary device via the PCIe based hotplug
+ handler and traffic will go through the virtio-net device. On the target
+ system the primary device will be automatically plugged back and the
+ net_failover module registers it again as the primary device.
+
+Usage
+-----
+
+ The primary device can be hotplugged or be part of the startup configuration
+
+ -device virtio-net-pci,netdev=hostnet1,id=net1,mac=52:54:00:6f:55:cc, \
+ bus=root2,failover=on
+
+ With the parameter failover=on the VIRTIO_NET_F_STANDBY feature will be enabled.
+
+ -device vfio-pci,host=5e:00.2,id=hostdev0,bus=root1,failover_pair_id=net1
+
+ failover_pair_id references the id of the virtio-net standby device. This
+ is only for pairing the devices within QEMU. The guest kernel module
+ net_failover will match devices with identical MAC addresses.
+
+Hotplug
+-------
+
+ Both primary and standby device can be hotplugged via the QEMU monitor. Note
+ that if the virtio-net device is plugged first a warning will be issued that it
+ couldn't find the primary device.
+
+Migration
+---------
+
+ A new migration state wait-unplug was added for this feature. If failover primary
+ devices are present in the configuration, migration will go into this state.
+ It will wait until the device unplug is completed in the guest and then move into
+ active state. On the target system the primary devices will be automatically hotplugged
+ when the feature bit was negotiated for the virtio-net standby device.
QTAILQ_REMOVE(&device_listeners, listener, link);
}
+bool qdev_should_hide_device(QemuOpts *opts)
+{
+ int rc = -1;
+ DeviceListener *listener;
+
+ QTAILQ_FOREACH(listener, &device_listeners, link) {
+ if (listener->should_be_hidden) {
+ /*
+ * should_be_hidden_will return
+ * 1 if device matches opts and it should be hidden
+ * 0 if device matches opts and should not be hidden
+ * -1 if device doesn't match ops
+ */
+ rc = listener->should_be_hidden(listener, opts);
+ }
+
+ if (rc > 0) {
+ break;
+ }
+ }
+
+ return rc > 0;
+}
+
void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
int required_for_version)
{
dev->instance_id_alias = -1;
dev->realized = false;
+ dev->allow_unplug_during_migration = false;
object_property_add_bool(obj, "realized",
device_get_realized, device_set_realized, NULL);
*/
#include "qemu/osdep.h"
+#include "qemu/atomic.h"
#include "qemu/iov.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "net/tap.h"
#include "qemu/error-report.h"
#include "qemu/timer.h"
+#include "qemu/option.h"
+#include "qemu/option_int.h"
+#include "qemu/config-file.h"
+#include "qapi/qmp/qdict.h"
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
#include "net/announce.h"
#include "qapi/error.h"
#include "qapi/qapi-events-net.h"
#include "hw/qdev-properties.h"
+#include "qapi/qapi-types-migration.h"
+#include "qapi/qapi-events-migration.h"
#include "hw/virtio/virtio-access.h"
#include "migration/misc.h"
#include "standard-headers/linux/ethtool.h"
#include "sysemu/sysemu.h"
#include "trace.h"
+#include "monitor/qdev.h"
+#include "hw/pci/pci.h"
#define VIRTIO_NET_VM_VERSION 11
return virtio_net_guest_offloads_by_features(vdev->guest_features);
}
+static void failover_add_primary(VirtIONet *n, Error **errp)
+{
+ Error *err = NULL;
+
+ n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
+ n->primary_device_id);
+ if (n->primary_device_opts) {
+ n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
+ if (err) {
+ qemu_opts_del(n->primary_device_opts);
+ }
+ if (n->primary_dev) {
+ n->primary_bus = n->primary_dev->parent_bus;
+ if (err) {
+ qdev_unplug(n->primary_dev, &err);
+ qdev_set_id(n->primary_dev, "");
+
+ }
+ }
+ } else {
+ error_setg(errp, "Primary device not found");
+ error_append_hint(errp, "Virtio-net failover will not work. Make "
+ "sure primary device has parameter"
+ " failover_pair_id=<virtio-net-id>\n");
+}
+ if (err) {
+ error_propagate(errp, err);
+ }
+}
+
+static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
+{
+ VirtIONet *n = opaque;
+ int ret = 0;
+
+ const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
+
+ if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
+ n->primary_device_id = g_strdup(opts->id);
+ ret = 1;
+ }
+
+ return ret;
+}
+
+static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
+{
+ DeviceState *dev = NULL;
+ Error *err = NULL;
+
+ if (qemu_opts_foreach(qemu_find_opts("device"),
+ is_my_primary, n, &err)) {
+ if (err) {
+ error_propagate(errp, err);
+ return NULL;
+ }
+ if (n->primary_device_id) {
+ dev = qdev_find_recursive(sysbus_get_default(),
+ n->primary_device_id);
+ } else {
+ error_setg(errp, "Primary device id not found");
+ return NULL;
+ }
+ }
+ return dev;
+}
+
+
+
+static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
+ DeviceState *dev,
+ Error **errp)
+{
+ DeviceState *prim_dev = NULL;
+ Error *err = NULL;
+
+ prim_dev = virtio_net_find_primary(n, &err);
+ if (prim_dev) {
+ n->primary_device_id = g_strdup(prim_dev->id);
+ n->primary_device_opts = prim_dev->opts;
+ } else {
+ if (err) {
+ error_propagate(errp, err);
+ }
+ }
+
+ return prim_dev;
+}
+
static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
{
VirtIONet *n = VIRTIO_NET(vdev);
+ Error *err = NULL;
int i;
if (n->mtu_bypass_backend &&
} else {
memset(n->vlans, 0xff, MAX_VLAN >> 3);
}
+
+ if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
+ qapi_event_send_failover_negotiated(n->netclient_name);
+ atomic_set(&n->primary_should_be_hidden, false);
+ failover_add_primary(n, &err);
+ if (err) {
+ n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
+ if (err) {
+ goto out_err;
+ }
+ failover_add_primary(n, &err);
+ if (err) {
+ goto out_err;
+ }
+ }
+ }
+ return;
+
+out_err:
+ if (err) {
+ warn_report_err(err);
+ }
}
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
size_t size)
{
- ssize_t r;
+ RCU_READ_LOCK_GUARD();
- rcu_read_lock();
- r = virtio_net_receive_rcu(nc, buf, size);
- rcu_read_unlock();
- return r;
+ return virtio_net_receive_rcu(nc, buf, size);
}
static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
n->netclient_type = g_strdup(type);
}
+static bool failover_unplug_primary(VirtIONet *n)
+{
+ HotplugHandler *hotplug_ctrl;
+ PCIDevice *pci_dev;
+ Error *err = NULL;
+
+ hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
+ if (hotplug_ctrl) {
+ pci_dev = PCI_DEVICE(n->primary_dev);
+ pci_dev->partially_hotplugged = true;
+ hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
+ if (err) {
+ error_report_err(err);
+ return false;
+ }
+ } else {
+ return false;
+ }
+ return true;
+}
+
+static bool failover_replug_primary(VirtIONet *n, Error **errp)
+{
+ HotplugHandler *hotplug_ctrl;
+ PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
+
+ if (!pdev->partially_hotplugged) {
+ return true;
+ }
+ if (!n->primary_device_opts) {
+ n->primary_device_opts = qemu_opts_from_qdict(
+ qemu_find_opts("device"),
+ n->primary_device_dict, errp);
+ }
+ if (n->primary_device_opts) {
+ if (n->primary_dev) {
+ n->primary_bus = n->primary_dev->parent_bus;
+ }
+ qdev_set_parent_bus(n->primary_dev, n->primary_bus);
+ n->primary_should_be_hidden = false;
+ qemu_opt_set_bool(n->primary_device_opts,
+ "partially_hotplugged", true, errp);
+ hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
+ if (hotplug_ctrl) {
+ hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, errp);
+ hotplug_handler_plug(hotplug_ctrl, n->primary_dev, errp);
+ }
+ if (!n->primary_dev) {
+ error_setg(errp, "virtio_net: couldn't find primary device");
+ }
+ }
+ return *errp != NULL;
+}
+
+static void virtio_net_handle_migration_primary(VirtIONet *n,
+ MigrationState *s)
+{
+ bool should_be_hidden;
+ Error *err = NULL;
+
+ should_be_hidden = atomic_read(&n->primary_should_be_hidden);
+
+ if (!n->primary_dev) {
+ n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
+ if (!n->primary_dev) {
+ return;
+ }
+ }
+
+ if (migration_in_setup(s) && !should_be_hidden &&
+ n->primary_dev) {
+ if (failover_unplug_primary(n)) {
+ vmstate_unregister(n->primary_dev, qdev_get_vmsd(n->primary_dev),
+ n->primary_dev);
+ qapi_event_send_unplug_primary(n->primary_device_id);
+ atomic_set(&n->primary_should_be_hidden, true);
+ } else {
+ warn_report("couldn't unplug primary device");
+ }
+ } else if (migration_has_failed(s)) {
+ /* We already unplugged the device let's plugged it back */
+ if (!failover_replug_primary(n, &err)) {
+ if (err) {
+ error_report_err(err);
+ }
+ }
+ }
+}
+
+static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
+{
+ MigrationState *s = data;
+ VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
+ virtio_net_handle_migration_primary(n, s);
+}
+
+static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
+ QemuOpts *device_opts)
+{
+ VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
+ bool match_found;
+ bool hide;
+
+ n->primary_device_dict = qemu_opts_to_qdict(device_opts,
+ n->primary_device_dict);
+ if (n->primary_device_dict) {
+ g_free(n->standby_id);
+ n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
+ "failover_pair_id"));
+ }
+ if (device_opts && g_strcmp0(n->standby_id, n->netclient_name) == 0) {
+ match_found = true;
+ } else {
+ match_found = false;
+ hide = false;
+ g_free(n->standby_id);
+ n->primary_device_dict = NULL;
+ goto out;
+ }
+
+ n->primary_device_opts = device_opts;
+
+ /* primary_should_be_hidden is set during feature negotiation */
+ hide = atomic_read(&n->primary_should_be_hidden);
+
+ if (n->primary_device_dict) {
+ g_free(n->primary_device_id);
+ n->primary_device_id = g_strdup(qdict_get_try_str(
+ n->primary_device_dict, "id"));
+ if (!n->primary_device_id) {
+ warn_report("primary_device_id not set");
+ }
+ }
+
+out:
+ if (match_found && hide) {
+ return 1;
+ } else if (match_found && !hide) {
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
static void virtio_net_device_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
}
+ if (n->failover) {
+ n->primary_listener.should_be_hidden =
+ virtio_net_primary_should_be_hidden;
+ atomic_set(&n->primary_should_be_hidden, true);
+ device_listener_register(&n->primary_listener);
+ n->migration_state.notify = virtio_net_migration_state_notifier;
+ add_migration_state_change_notifier(&n->migration_state);
+ n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
+ }
+
virtio_net_set_config_size(n, n->host_features);
virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
g_free(n->mac_table.macs);
g_free(n->vlans);
+ if (n->failover) {
+ g_free(n->primary_device_id);
+ g_free(n->standby_id);
+ qobject_unref(n->primary_device_dict);
+ n->primary_device_dict = NULL;
+ }
+
max_queues = n->multiqueue ? n->max_queues : 1;
for (i = 0; i < max_queues; i++) {
virtio_net_del_queue(n, i);
return 0;
}
+static bool primary_unplug_pending(void *opaque)
+{
+ DeviceState *dev = opaque;
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VirtIONet *n = VIRTIO_NET(vdev);
+
+ return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
+}
+
+static bool dev_unplug_pending(void *opaque)
+{
+ DeviceState *dev = opaque;
+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
+
+ return vdc->primary_unplug_pending(dev);
+}
+
static const VMStateDescription vmstate_virtio_net = {
.name = "virtio-net",
.minimum_version_id = VIRTIO_NET_VM_VERSION,
VMSTATE_END_OF_LIST()
},
.pre_save = virtio_net_pre_save,
+ .dev_unplug_pending = dev_unplug_pending,
};
static Property virtio_net_properties[] = {
true),
DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
+ DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
DEFINE_PROP_END_OF_LIST(),
};
vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
vdc->post_load = virtio_net_post_load_virtio;
vdc->vmsd = &vmstate_virtio_net_device;
+ vdc->primary_unplug_pending = primary_unplug_pending;
}
static const TypeInfo virtio_net_info = {
QEMU_PCIE_LNKSTA_DLLLA_BITNR, true),
DEFINE_PROP_BIT("x-pcie-extcap-init", PCIDevice, cap_present,
QEMU_PCIE_EXTCAP_INIT_BITNR, true),
+ DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
+ failover_pair_id),
DEFINE_PROP_END_OF_LIST()
};
ObjectClass *klass = OBJECT_CLASS(pc);
Error *local_err = NULL;
bool is_default_rom;
+ uint16_t class_id;
/* initialize cap_present for pci_is_express() and pci_config_size(),
* Note that hybrid PCIs are not set automatically and need to manage
}
}
+ if (pci_dev->failover_pair_id) {
+ if (!pci_bus_is_express(pci_get_bus(pci_dev))) {
+ error_setg(errp, "failover primary device must be on "
+ "PCIExpress bus");
+ error_propagate(errp, local_err);
+ pci_qdev_unrealize(DEVICE(pci_dev), NULL);
+ return;
+ }
+ class_id = pci_get_word(pci_dev->config + PCI_CLASS_DEVICE);
+ if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
+ error_setg(errp, "failover primary device is not an "
+ "Ethernet device");
+ error_propagate(errp, local_err);
+ pci_qdev_unrealize(DEVICE(pci_dev), NULL);
+ return;
+ }
+ if (!(pci_dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION)
+ && (PCI_FUNC(pci_dev->devfn) == 0)) {
+ qdev->allow_unplug_during_migration = true;
+ } else {
+ error_setg(errp, "failover: primary device must be in its own "
+ "PCI slot");
+ error_propagate(errp, local_err);
+ pci_qdev_unrealize(DEVICE(pci_dev), NULL);
+ return;
+ }
+ qdev->allow_unplug_during_migration = true;
+ }
+
/* rom loading */
is_default_rom = false;
if (pci_dev->romfile == NULL && pc->romfile != NULL) {
{
HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(dev));
+ if (dev->partially_hotplugged) {
+ dev->qdev.pending_deleted_event = false;
+ return;
+ }
hotplug_handler_unplug(hotplug_ctrl, DEVICE(dev), &error_abort);
object_unparent(OBJECT(dev));
}
return;
}
+ dev->pending_deleted_event = true;
+
/* In case user cancel the operation of multi-function hot-add,
* remove the function that is unexposed to guest individually,
* without interaction with guest.
#include "pci.h"
#include "trace.h"
#include "qapi/error.h"
+#include "migration/blocker.h"
#define TYPE_VFIO_PCI "vfio-pci"
#define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
return;
}
+ if (!pdev->failover_pair_id) {
+ error_setg(&vdev->migration_blocker,
+ "VFIO device doesn't support migration");
+ ret = migrate_add_blocker(vdev->migration_blocker, &err);
+ if (err) {
+ error_propagate(errp, err);
+ error_free(vdev->migration_blocker);
+ return;
+ }
+ }
+
vdev->vbasedev.name = g_path_get_basename(vdev->vbasedev.sysfsdev);
vdev->vbasedev.ops = &vfio_pci_ops;
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
vfio_bars_exit(vdev);
error:
error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ if (vdev->migration_blocker) {
+ migrate_del_blocker(vdev->migration_blocker);
+ error_free(vdev->migration_blocker);
+ }
}
static void vfio_instance_finalize(Object *obj)
vfio_bars_finalize(vdev);
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
+ if (vdev->migration_blocker) {
+ migrate_del_blocker(vdev->migration_blocker);
+ error_free(vdev->migration_blocker);
+ }
/*
* XXX Leaking igd_opregion is not an oversight, we can't remove the
* fw_cfg entry therefore leaking this allocation seems like the safest
DEFINE_PROP_END_OF_LIST(),
};
-static const VMStateDescription vfio_pci_vmstate = {
- .name = "vfio-pci",
- .unmigratable = 1,
-};
-
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->reset = vfio_pci_reset;
dc->props = vfio_pci_dev_properties;
- dc->vmsd = &vfio_pci_vmstate;
dc->desc = "VFIO-based PCI device assignment";
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
pdc->realize = vfio_realize;
bool no_vfio_ioeventfd;
bool enable_ramfb;
VFIODisplay *dpy;
+ Error *migration_blocker;
} VFIOPCIDevice;
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
uint64_t uaddr, len;
int ret = -EFAULT;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
trace_vhost_iotlb_miss(dev, 1);
trace_vhost_iotlb_miss(dev, 2);
out:
- rcu_read_unlock();
-
return ret;
}
static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
{
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
+
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
vring_set_avail_event(vq, vring_avail_idx(vq));
} else if (enable) {
/* Expose avail event/used flags before caller checks the avail idx. */
smp_mb();
}
- rcu_read_unlock();
}
static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
VRingPackedDescEvent e;
VRingMemoryRegionCaches *caches;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
caches = vring_get_region_caches(vq);
vring_packed_event_read(vq->vdev, &caches->used, &e);
/* Expose avail event/used flags before caller checks the avail idx. */
smp_mb();
}
- rcu_read_unlock();
}
void virtio_queue_set_notification(VirtQueue *vq, int enable)
return 0;
}
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
- rcu_read_unlock();
return empty;
}
static int virtio_queue_packed_empty(VirtQueue *vq)
{
- bool empty;
-
- rcu_read_lock();
- empty = virtio_queue_packed_empty_rcu(vq);
- rcu_read_unlock();
- return empty;
+ RCU_READ_LOCK_GUARD();
+ return virtio_queue_packed_empty_rcu(vq);
}
int virtio_queue_empty(VirtQueue *vq)
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
{
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
virtqueue_fill(vq, elem, len, 0);
virtqueue_flush(vq, 1);
- rcu_read_unlock();
}
/* Called within rcu_read_lock(). */
int64_t len = 0;
int rc;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
+
idx = vq->last_avail_idx;
total_bufs = in_total = out_total = 0;
if (out_bytes) {
*out_bytes = out_total;
}
- rcu_read_unlock();
return;
err:
VRingPackedDesc desc;
bool wrap_counter;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
idx = vq->last_avail_idx;
wrap_counter = vq->last_avail_wrap_counter;
total_bufs = in_total = out_total = 0;
if (out_bytes) {
*out_bytes = out_total;
}
- rcu_read_unlock();
return;
err:
VRingDesc desc;
int rc;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
if (virtio_queue_empty_rcu(vq)) {
goto done;
}
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
done:
address_space_cache_destroy(&indirect_desc_cache);
- rcu_read_unlock();
return elem;
uint16_t id;
int rc;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
if (virtio_queue_packed_empty_rcu(vq)) {
goto done;
}
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
done:
address_space_cache_destroy(&indirect_desc_cache);
- rcu_read_unlock();
return elem;
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
{
- bool should_notify;
- rcu_read_lock();
- should_notify = virtio_should_notify(vdev, vq);
- rcu_read_unlock();
-
- if (!should_notify) {
- return;
+ WITH_RCU_READ_LOCK_GUARD() {
+ if (!virtio_should_notify(vdev, vq)) {
+ return;
+ }
}
trace_virtio_notify_irqfd(vdev, vq);
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
- bool should_notify;
- rcu_read_lock();
- should_notify = virtio_should_notify(vdev, vq);
- rcu_read_unlock();
-
- if (!should_notify) {
- return;
+ WITH_RCU_READ_LOCK_GUARD() {
+ if (!virtio_should_notify(vdev, vq)) {
+ return;
+ }
}
trace_virtio_notify(vdev, vq);
vdev->start_on_kick = true;
}
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
for (i = 0; i < num; i++) {
if (vdev->vq[i].vring.desc) {
uint16_t nheads;
}
}
}
- rcu_read_unlock();
if (vdc->post_load) {
ret = vdc->post_load(vdev);
static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
int n)
{
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
if (vdev->vq[n].vring.desc) {
vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
}
- rcu_read_unlock();
}
void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
{
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
if (vdev->vq[n].vring.desc) {
vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
}
- rcu_read_unlock();
}
void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
struct PCIDevice {
DeviceState qdev;
+ bool partially_hotplugged;
/* PCI config space */
uint8_t *config;
MSIVectorUseNotifier msix_vector_use_notifier;
MSIVectorReleaseNotifier msix_vector_release_notifier;
MSIVectorPollNotifier msix_vector_poll_notifier;
+
+ /* ID of standby device in net_failover pair */
+ char *failover_pair_id;
};
void pci_register_bar(PCIDevice *pci_dev, int region_num,
* respective parent types.
* </para>
* </note>
+ *
+ * # Hiding a device #
+ * To hide a device, a DeviceListener function should_be_hidden() needs to
+ * be registered.
+ * It can be used to defer adding a device and therefore hide it from the
+ * guest. The handler registering to this DeviceListener can save the QOpts
+ * passed to it for re-using it later and must return that it wants the device
+ * to be/remain hidden or not. When the handler function decides the device
+ * shall not be hidden it will be added in qdev_device_add() and
+ * realized as any other device. Otherwise qdev_device_add() will return early
+ * without adding the device. The guest will not see a "hidden" device
+ * until it was marked don't hide and qdev_device_add called again.
+ *
*/
typedef struct DeviceClass {
/*< private >*/
bool pending_deleted_event;
QemuOpts *opts;
int hotplugged;
+ bool allow_unplug_during_migration;
BusState *parent_bus;
QLIST_HEAD(, NamedGPIOList) gpios;
QLIST_HEAD(, BusState) child_bus;
struct DeviceListener {
void (*realize)(DeviceListener *listener, DeviceState *dev);
void (*unrealize)(DeviceListener *listener, DeviceState *dev);
+ /*
+ * This callback is called upon init of the DeviceState and allows to
+ * inform qdev that a device should be hidden, depending on the device
+ * opts, for example, to hide a standby device.
+ */
+ int (*should_be_hidden)(DeviceListener *listener, QemuOpts *device_opts);
QTAILQ_ENTRY(DeviceListener) link;
};
void device_listener_register(DeviceListener *listener);
void device_listener_unregister(DeviceListener *listener);
+/**
+ * @qdev_should_hide_device:
+ * @opts: QemuOpts as passed on cmdline.
+ *
+ * Check if a device should be added.
+ * When a device is added via qdev_device_add() this will be called,
+ * and return if the device should be added now or not.
+ */
+bool qdev_should_hide_device(QemuOpts *opts);
+
#endif
#include "standard-headers/linux/virtio_net.h"
#include "hw/virtio/virtio.h"
#include "net/announce.h"
+#include "qemu/option_int.h"
#define TYPE_VIRTIO_NET "virtio-net-device"
#define VIRTIO_NET(obj) \
int32_t speed;
char *duplex_str;
uint8_t duplex;
+ char *primary_id_str;
} virtio_net_conf;
/* Coalesced packets type & status */
AnnounceTimer announce_timer;
bool needs_vnet_hdr_swap;
bool mtu_bypass_backend;
+ QemuOpts *primary_device_opts;
+ QDict *primary_device_dict;
+ DeviceState *primary_dev;
+ BusState *primary_bus;
+ char *primary_device_id;
+ char *standby_id;
+ bool primary_should_be_hidden;
+ bool failover;
+ DeviceListener primary_listener;
+ Notifier migration_state;
};
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
*/
int (*post_load)(VirtIODevice *vdev);
const VMStateDescription *vmsd;
+ bool (*primary_unplug_pending)(void *opaque);
} VirtioDeviceClass;
void virtio_instance_init_common(Object *proxy_obj, void *data,
int (*pre_save)(void *opaque);
int (*post_save)(void *opaque);
bool (*needed)(void *opaque);
+ bool (*dev_unplug_pending)(void *opaque);
+
const VMStateField *fields;
const VMStateDescription **subsections;
};
#include "hw/qdev-properties.h"
#include "monitor/monitor.h"
#include "net/announce.h"
+#include "qemu/queue.h"
#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
case MIGRATION_STATUS_SETUP:
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
+ case MIGRATION_STATUS_WAIT_UNPLUG:
return true;
default:
case MIGRATION_STATUS_CANCELLED:
info->has_status = true;
break;
+ case MIGRATION_STATUS_WAIT_UNPLUG:
+ info->has_status = true;
+ break;
}
info->status = s->state;
}
case MIGRATION_STATUS_COLO:
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
+ case MIGRATION_STATUS_WAIT_UNPLUG:
return false;
case MIGRATION_STATUS__MAX:
g_assert_not_reached();
qemu_savevm_state_setup(s->to_dst_file);
+ if (qemu_savevm_nr_failover_devices()) {
+ migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
+ MIGRATION_STATUS_WAIT_UNPLUG);
+
+ while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
+ qemu_savevm_state_guest_unplug_pending()) {
+ qemu_sem_timedwait(&s->wait_unplug_sem, 250);
+ }
+
+ migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG,
+ MIGRATION_STATUS_ACTIVE);
+ }
+
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
qemu_mutex_destroy(&ms->qemu_file_lock);
g_free(params->tls_hostname);
g_free(params->tls_creds);
+ qemu_sem_destroy(&ms->wait_unplug_sem);
qemu_sem_destroy(&ms->rate_limit_sem);
qemu_sem_destroy(&ms->pause_sem);
qemu_sem_destroy(&ms->postcopy_pause_sem);
qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
qemu_sem_init(&ms->rp_state.rp_sem, 0);
qemu_sem_init(&ms->rate_limit_sem, 0);
+ qemu_sem_init(&ms->wait_unplug_sem, 0);
qemu_mutex_init(&ms->qemu_file_lock);
}
/* Flag set once the migration thread called bdrv_inactivate_all */
bool block_inactive;
+ /* Migration is waiting for guest to unplug device */
+ QemuSemaphore wait_unplug_sem;
+
/* Migration is paused due to pause-before-switchover */
QemuSemaphore pause_sem;
}
}
+int qemu_savevm_nr_failover_devices(void)
+{
+ SaveStateEntry *se;
+ int n = 0;
+
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (se->vmsd && se->vmsd->dev_unplug_pending) {
+ n++;
+ }
+ }
+
+ return n;
+}
+
+bool qemu_savevm_state_guest_unplug_pending(void)
+{
+ SaveStateEntry *se;
+ int n = 0;
+
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (!se->vmsd || !se->vmsd->dev_unplug_pending) {
+ continue;
+ }
+ if (se->vmsd->dev_unplug_pending(se->opaque)) {
+ n++;
+ }
+ }
+
+ return n > 0;
+}
+
void qemu_savevm_state_setup(QEMUFile *f)
{
SaveStateEntry *se;
bool qemu_savevm_state_blocked(Error **errp);
void qemu_savevm_state_setup(QEMUFile *f);
+int qemu_savevm_nr_failover_devices(void);
+bool qemu_savevm_state_guest_unplug_pending(void);
int qemu_savevm_state_resume_prepare(MigrationState *s);
void qemu_savevm_state_header(QEMUFile *f);
int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);
# @device: During device serialisation when pause-before-switchover is enabled
# (since 2.11)
#
+# @wait-unplug: wait for device unplug request by guest OS to be completed.
+# (since 4.2)
+#
# Since: 2.3
#
##
'data': [ 'none', 'setup', 'cancelling', 'cancelled',
'active', 'postcopy-active', 'postcopy-paused',
'postcopy-recover', 'completed', 'failed', 'colo',
- 'pre-switchover', 'device' ] }
+ 'pre-switchover', 'device', 'wait-unplug' ] }
##
# @MigrationInfo:
# Since: 3.0
##
{ 'command': 'migrate-pause', 'allow-oob': true }
+
+##
+# @UNPLUG_PRIMARY:
+#
+# Emitted from source side of a migration when migration state is
+# WAIT_UNPLUG. Device was unplugged by guest operating system.
+# Device resources in QEMU are kept on standby to be able to re-plug it in case
+# of migration failure.
+#
+# @device-id: QEMU device id of the unplugged device
+#
+# Since: 4.2
+#
+# Example:
+# {"event": "UNPLUG_PRIMARY", "data": {"device-id": "hostdev0"} }
+#
+##
+{ 'event': 'UNPLUG_PRIMARY',
+ 'data': { 'device-id': 'str' } }
##
{ 'command': 'announce-self', 'boxed': true,
'data' : 'AnnounceParameters'}
+
+##
+# @FAILOVER_NEGOTIATED:
+#
+# Emitted when VIRTIO_NET_F_STANDBY was enabled during feature negotiation.
+# Failover primary devices which were hidden (not hotplugged when requested)
+# before will now be hotplugged by the virtio-net standby device.
+#
+# device-id: QEMU device id of the unplugged device
+# Since: 4.2
+#
+# Example:
+#
+# <- { "event": "FAILOVER_NEGOTIATED",
+# "data": "net1" }
+#
+##
+{ 'event': 'FAILOVER_NEGOTIATED',
+ 'data': {'device-id': 'str'} }
#include "qemu/help_option.h"
#include "qemu/option.h"
#include "qemu/qemu-print.h"
+#include "qemu/option_int.h"
#include "sysemu/block-backend.h"
#include "sysemu/sysemu.h"
#include "migration/misc.h"
+#include "migration/migration.h"
/*
* Aliases were a bad idea from the start. Let's keep them
}
}
+static int is_failover_device(void *opaque, const char *name, const char *value,
+ Error **errp)
+{
+ if (strcmp(name, "failover_pair_id") == 0) {
+ QemuOpts *opts = (QemuOpts *)opaque;
+
+ if (qdev_should_hide_device(opts)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static bool should_hide_device(QemuOpts *opts)
+{
+ if (qemu_opt_foreach(opts, is_failover_device, opts, NULL) == 0) {
+ return false;
+ }
+ return true;
+}
+
DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
{
DeviceClass *dc;
const char *driver, *path;
- DeviceState *dev;
+ DeviceState *dev = NULL;
BusState *bus = NULL;
Error *err = NULL;
+ bool hide;
driver = qemu_opt_get(opts, "driver");
if (!driver) {
return NULL;
}
}
- if (qdev_hotplug && bus && !qbus_is_hotpluggable(bus)) {
+ hide = should_hide_device(opts);
+
+ if ((hide || qdev_hotplug) && bus && !qbus_is_hotpluggable(bus)) {
error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name);
return NULL;
}
+ if (hide) {
+ return NULL;
+ }
+
if (!migration_is_idle()) {
error_setg(errp, "device_add not allowed while migrating");
return NULL;
err_del_dev:
error_propagate(errp, err);
- object_unparent(OBJECT(dev));
- object_unref(OBJECT(dev));
+ if (dev) {
+ object_unparent(OBJECT(dev));
+ object_unref(OBJECT(dev));
+ }
return NULL;
}
return;
}
- if (!migration_is_idle()) {
+ if (!migration_is_idle() && !dev->allow_unplug_during_migration) {
error_setg(errp, "device_del not allowed while migrating");
return;
}
break;
}
- if ((strcmp(st, "setup") == 0) || (strcmp(st, "active") == 0)) {
+ if ((strcmp(st, "setup") == 0) || (strcmp(st, "active") == 0)
+ || (strcmp(st, "wait-unplug") == 0)) {
qobject_unref(rsp);
g_usleep(5000);
continue;
DeviceState *dev;
dev = qdev_device_add(opts, errp);
- if (!dev) {
+ if (!dev && *errp) {
+ error_report_err(*errp);
return -1;
+ } else if (dev) {
+ object_unref(OBJECT(dev));
}
- object_unref(OBJECT(dev));
return 0;
}