bsd-user: Implement getrlimit(2) and setrlimit(2)

[mirror_qemu.git] / net / vhost-vdpa.c
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c

index 41de76376f24dfd947f52a71a5cb9e3965f07b76..4e94c50bc7023a3f9b4d57bd2b2051e4da8524c2 100644 (file)
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -26,12 +26,15 @@
  #include <err.h>
  #include "standard-headers/linux/virtio_net.h"
  #include "monitor/monitor.h"
+#include "migration/migration.h"
+#include "migration/misc.h"
  #include "hw/virtio/vhost.h"
  
  /* Todo:need to add the multiqueue support here */
  typedef struct VhostVDPAState {
      NetClientState nc;
      struct vhost_vdpa vhost_vdpa;
+    Notifier migration_state;
      VHostNetState *vhost_net;
  
      /* Control commands shadow buffers */
@@ -40,42 +43,55 @@ typedef struct VhostVDPAState {
  
      /* The device always have SVQ enabled */
      bool always_svq;
+
+    /* The device can isolate CVQ in its own ASID */
+    bool cvq_isolated;
+
      bool started;
  } VhostVDPAState;
  
+/*
+ * The array is sorted alphabetically in ascending order,
+ * with the exception of VHOST_INVALID_FEATURE_BIT,
+ * which should always be the last entry.
+ */
  const int vdpa_feature_bits[] = {
-    VIRTIO_F_NOTIFY_ON_EMPTY,
-    VIRTIO_RING_F_INDIRECT_DESC,
-    VIRTIO_RING_F_EVENT_IDX,
      VIRTIO_F_ANY_LAYOUT,
+    VIRTIO_F_IOMMU_PLATFORM,
+    VIRTIO_F_NOTIFY_ON_EMPTY,
+    VIRTIO_F_RING_PACKED,
+    VIRTIO_F_RING_RESET,
      VIRTIO_F_VERSION_1,
      VIRTIO_NET_F_CSUM,
-    VIRTIO_NET_F_GUEST_CSUM,
+    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
+    VIRTIO_NET_F_CTRL_MAC_ADDR,
+    VIRTIO_NET_F_CTRL_RX,
+    VIRTIO_NET_F_CTRL_RX_EXTRA,
+    VIRTIO_NET_F_CTRL_VLAN,
+    VIRTIO_NET_F_CTRL_VQ,
      VIRTIO_NET_F_GSO,
+    VIRTIO_NET_F_GUEST_CSUM,
+    VIRTIO_NET_F_GUEST_ECN,
      VIRTIO_NET_F_GUEST_TSO4,
      VIRTIO_NET_F_GUEST_TSO6,
-    VIRTIO_NET_F_GUEST_ECN,
      VIRTIO_NET_F_GUEST_UFO,
+    VIRTIO_NET_F_GUEST_USO4,
+    VIRTIO_NET_F_GUEST_USO6,
+    VIRTIO_NET_F_HASH_REPORT,
+    VIRTIO_NET_F_HOST_ECN,
      VIRTIO_NET_F_HOST_TSO4,
      VIRTIO_NET_F_HOST_TSO6,
-    VIRTIO_NET_F_HOST_ECN,
      VIRTIO_NET_F_HOST_UFO,
+    VIRTIO_NET_F_HOST_USO,
+    VIRTIO_NET_F_MQ,
      VIRTIO_NET_F_MRG_RXBUF,
      VIRTIO_NET_F_MTU,
-    VIRTIO_NET_F_CTRL_RX,
-    VIRTIO_NET_F_CTRL_RX_EXTRA,
-    VIRTIO_NET_F_CTRL_VLAN,
-    VIRTIO_NET_F_CTRL_MAC_ADDR,
      VIRTIO_NET_F_RSS,
-    VIRTIO_NET_F_MQ,
-    VIRTIO_NET_F_CTRL_VQ,
-    VIRTIO_F_IOMMU_PLATFORM,
-    VIRTIO_F_RING_PACKED,
-    VIRTIO_F_RING_RESET,
-    VIRTIO_NET_F_RSS,
-    VIRTIO_NET_F_HASH_REPORT,
-    VIRTIO_NET_F_GUEST_ANNOUNCE,
      VIRTIO_NET_F_STATUS,
+    VIRTIO_RING_F_EVENT_IDX,
+    VIRTIO_RING_F_INDIRECT_DESC,
+
+    /* VHOST_INVALID_FEATURE_BIT should always be the last entry */
      VHOST_INVALID_FEATURE_BIT
  };
  
@@ -83,6 +99,7 @@ const int vdpa_feature_bits[] = {
  static const uint64_t vdpa_svq_device_features =
      BIT_ULL(VIRTIO_NET_F_CSUM) |
      BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
+    BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) |
      BIT_ULL(VIRTIO_NET_F_MTU) |
      BIT_ULL(VIRTIO_NET_F_MAC) |
      BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
@@ -96,11 +113,16 @@ static const uint64_t vdpa_svq_device_features =
      BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
      BIT_ULL(VIRTIO_NET_F_STATUS) |
      BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
+    BIT_ULL(VIRTIO_NET_F_CTRL_RX) |
+    BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) |
      BIT_ULL(VIRTIO_NET_F_MQ) |
      BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
      BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
+    /* VHOST_F_LOG_ALL is exposed by SVQ */
+    BIT_ULL(VHOST_F_LOG_ALL) |
      BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
-    BIT_ULL(VIRTIO_NET_F_STANDBY);
+    BIT_ULL(VIRTIO_NET_F_STANDBY) |
+    BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX);
  
  #define VHOST_VDPA_NET_CVQ_ASID 1
  
@@ -111,6 +133,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
      return s->vhost_net;
  }
  
+static size_t vhost_vdpa_net_cvq_cmd_len(void)
+{
+    /*
+     * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
+     * In buffer is always 1 byte, so it should fit here
+     */
+    return sizeof(struct virtio_net_ctrl_hdr) +
+           2 * sizeof(struct virtio_net_ctrl_mac) +
+           MAC_TABLE_ENTRIES * ETH_ALEN;
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
+{
+    return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
+}
+
  static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
  {
      uint64_t invalid_dev_features =
@@ -179,13 +217,17 @@ err_init:
  static void vhost_vdpa_cleanup(NetClientState *nc)
  {
      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
-    struct vhost_dev *dev = &s->vhost_net->dev;
  
-    qemu_vfree(s->cvq_cmd_out_buffer);
-    qemu_vfree(s->status);
-    if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
-        g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+    /*
+     * If a peer NIC is attached, do not cleanup anything.
+     * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup()
+     * when the guest is shutting down.
+     */
+    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
+        return;
      }
+    munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len());
+    munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len());
      if (s->vhost_net) {
          vhost_net_cleanup(s->vhost_net);
          g_free(s->vhost_net);
@@ -235,17 +277,134 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
      return size;
  }
  
+/** From any vdpa net client, get the netclient of the first queue pair */
+static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
+{
+    NICState *nic = qemu_get_nic(s->nc.peer);
+    NetClientState *nc0 = qemu_get_peer(nic->ncs, 0);
+
+    return DO_UPCAST(VhostVDPAState, nc, nc0);
+}
+
+static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
+{
+    struct vhost_vdpa *v = &s->vhost_vdpa;
+    VirtIONet *n;
+    VirtIODevice *vdev;
+    int data_queue_pairs, cvq, r;
+
+    /* We are only called on the first data vqs and only if x-svq is not set */
+    if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
+        return;
+    }
+
+    vdev = v->dev->vdev;
+    n = VIRTIO_NET(vdev);
+    if (!n->vhost_started) {
+        return;
+    }
+
+    data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
+    cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
+                                  n->max_ncs - n->max_queue_pairs : 0;
+    /*
+     * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
+     * in the future and resume the device if read-only operations between
+     * suspend and reset goes wrong.
+     */
+    vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
+
+    /* Start will check migration setup_or_active to configure or not SVQ */
+    r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
+    if (unlikely(r < 0)) {
+        error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
+    }
+}
+
+static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
+{
+    MigrationState *migration = data;
+    VhostVDPAState *s = container_of(notifier, VhostVDPAState,
+                                     migration_state);
+
+    if (migration_in_setup(migration)) {
+        vhost_vdpa_net_log_global_enable(s, true);
+    } else if (migration_has_failed(migration)) {
+        vhost_vdpa_net_log_global_enable(s, false);
+    }
+}
+
+static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
+{
+    struct vhost_vdpa *v = &s->vhost_vdpa;
+
+    add_migration_state_change_notifier(&s->migration_state);
+    if (v->shadow_vqs_enabled) {
+        v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
+                                           v->iova_range.last);
+    }
+}
+
+static int vhost_vdpa_net_data_start(NetClientState *nc)
+{
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    struct vhost_vdpa *v = &s->vhost_vdpa;
+
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+    if (s->always_svq ||
+        migration_is_setup_or_active(migrate_get_current()->state)) {
+        v->shadow_vqs_enabled = true;
+        v->shadow_data = true;
+    } else {
+        v->shadow_vqs_enabled = false;
+        v->shadow_data = false;
+    }
+
+    if (v->index == 0) {
+        vhost_vdpa_net_data_start_first(s);
+        return 0;
+    }
+
+    if (v->shadow_vqs_enabled) {
+        VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s);
+        v->iova_tree = s0->vhost_vdpa.iova_tree;
+    }
+
+    return 0;
+}
+
+static void vhost_vdpa_net_client_stop(NetClientState *nc)
+{
+    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    struct vhost_dev *dev;
+
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+    if (s->vhost_vdpa.index == 0) {
+        remove_migration_state_change_notifier(&s->migration_state);
+    }
+
+    dev = s->vhost_vdpa.dev;
+    if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
+        g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+    }
+}
+
  static NetClientInfo net_vhost_vdpa_info = {
          .type = NET_CLIENT_DRIVER_VHOST_VDPA,
          .size = sizeof(VhostVDPAState),
          .receive = vhost_vdpa_receive,
+        .start = vhost_vdpa_net_data_start,
+        .stop = vhost_vdpa_net_client_stop,
          .cleanup = vhost_vdpa_cleanup,
          .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
          .has_ufo = vhost_vdpa_has_ufo,
          .check_peer_type = vhost_vdpa_check_peer_type,
  };
  
-static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index,
+                                          Error **errp)
  {
      struct vhost_vring_state state = {
          .index = vq_index,
@@ -253,8 +412,8 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
      int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state);
  
      if (unlikely(r < 0)) {
-        error_report("Cannot get VQ %u group: %s", vq_index,
-                     g_strerror(errno));
+        r = -errno;
+        error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index);
          return r;
      }
  
@@ -305,22 +464,6 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
      vhost_iova_tree_remove(tree, *map);
  }
  
-static size_t vhost_vdpa_net_cvq_cmd_len(void)
-{
-    /*
-     * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
-     * In buffer is always 1 byte, so it should fit here
-     */
-    return sizeof(struct virtio_net_ctrl_hdr) +
-           2 * sizeof(struct virtio_net_ctrl_mac) +
-           MAC_TABLE_ENTRIES * ETH_ALEN;
-}
-
-static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
-{
-    return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
-}
-
  /** Map CVQ buffer. */
  static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
                                    bool write)
@@ -352,22 +495,23 @@ dma_map_err:
  
  static int vhost_vdpa_net_cvq_start(NetClientState *nc)
  {
-    VhostVDPAState *s;
+    VhostVDPAState *s, *s0;
      struct vhost_vdpa *v;
-    uint64_t backend_features;
      int64_t cvq_group;
-    int cvq_index, r;
+    int r;
+    Error *err = NULL;
  
      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
  
      s = DO_UPCAST(VhostVDPAState, nc, nc);
      v = &s->vhost_vdpa;
  
-    v->shadow_data = s->always_svq;
+    s0 = vhost_vdpa_net_first_nc_vdpa(s);
+    v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
      v->shadow_vqs_enabled = s->always_svq;
      s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
  
-    if (s->always_svq) {
+    if (s->vhost_vdpa.shadow_data) {
          /* SVQ is already configured for all virtqueues */
          goto out;
      }
@@ -375,49 +519,28 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
      /*
       * If we early return in these cases SVQ will not be enabled. The migration
       * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
-     *
-     * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev
-     * yet.
       */
-    r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
-    if (unlikely(r < 0)) {
-        error_report("Cannot get vdpa backend_features: %s(%d)",
-            g_strerror(errno), errno);
-        return -1;
+    if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
+        return 0;
      }
-    if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) ||
-        !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
+
+    if (!s->cvq_isolated) {
          return 0;
      }
  
-    /*
-     * Check if all the virtqueues of the virtio device are in a different vq
-     * than the last vq. VQ group of last group passed in cvq_group.
-     */
-    cvq_index = v->dev->vq_index_end - 1;
-    cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index);
+    cvq_group = vhost_vdpa_get_vring_group(v->device_fd,
+                                           v->dev->vq_index_end - 1,
+                                           &err);
      if (unlikely(cvq_group < 0)) {
+        error_report_err(err);
          return cvq_group;
      }
-    for (int i = 0; i < cvq_index; ++i) {
-        int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i);
-
-        if (unlikely(group < 0)) {
-            return group;
-        }
-
-        if (group == cvq_group) {
-            return 0;
-        }
-    }
  
      r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
      if (unlikely(r < 0)) {
          return r;
      }
  
-    v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
-                                       v->iova_range.last);
      v->shadow_vqs_enabled = true;
      s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
  
@@ -426,6 +549,26 @@ out:
          return 0;
      }
  
+    if (s0->vhost_vdpa.iova_tree) {
+        /*
+         * SVQ is already configured for all virtqueues.  Reuse IOVA tree for
+         * simplicity, whether CVQ shares ASID with guest or not, because:
+         * - Memory listener need access to guest's memory addresses allocated
+         *   in the IOVA tree.
+         * - There should be plenty of IOVA address space for both ASID not to
+         *   worry about collisions between them.  Guest's translations are
+         *   still validated with virtio virtqueue_pop so there is no risk for
+         *   the guest to access memory that it shouldn't.
+         *
+         * To allocate a iova tree per ASID is doable but it complicates the
+         * code and it is not worth it for the moment.
+         */
+        v->iova_tree = s0->vhost_vdpa.iova_tree;
+    } else {
+        v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
+                                           v->iova_range.last);
+    }
+
      r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
                                 vhost_vdpa_net_cvq_cmd_page_len(), false);
      if (unlikely(r < 0)) {
@@ -450,15 +593,9 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
      if (s->vhost_vdpa.shadow_vqs_enabled) {
          vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
          vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
-        if (!s->always_svq) {
-            /*
-             * If only the CVQ is shadowed we can delete this safely.
-             * If all the VQs are shadows this will be needed by the time the
-             * device is started again to register SVQ vrings and similar.
-             */
-            g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
-        }
      }
+
+    vhost_vdpa_net_client_stop(nc);
  }
  
  static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
@@ -494,35 +631,96 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
  }
  
  static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
-                                       uint8_t cmd, const void *data,
-                                       size_t data_size)
+                                       uint8_t cmd, const struct iovec *data_sg,
+                                       size_t data_num)
  {
      const struct virtio_net_ctrl_hdr ctrl = {
          .class = class,
          .cmd = cmd,
      };
+    size_t data_size = iov_size(data_sg, data_num);
  
      assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
  
+    /* pack the CVQ command header */
      memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
-    memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
  
-    return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size,
+    /* pack the CVQ command command-specific-data */
+    iov_to_buf(data_sg, data_num, 0,
+               s->cvq_cmd_out_buffer + sizeof(ctrl), data_size);
+
+    return vhost_vdpa_net_cvq_add(s, data_size + sizeof(ctrl),
                                    sizeof(virtio_net_ctrl_ack));
  }
  
  static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
  {
-    uint64_t features = n->parent_obj.guest_features;
-    if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
+    if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
+        const struct iovec data = {
+            .iov_base = (void *)n->mac,
+            .iov_len = sizeof(n->mac),
+        };
          ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
                                                    VIRTIO_NET_CTRL_MAC_ADDR_SET,
-                                                  n->mac, sizeof(n->mac));
+                                                  &data, 1);
          if (unlikely(dev_written < 0)) {
              return dev_written;
          }
+        if (*s->status != VIRTIO_NET_OK) {
+            return -EIO;
+        }
+    }
  
-        return *s->status != VIRTIO_NET_OK;
+    /*
+     * According to VirtIO standard, "The device MUST have an
+     * empty MAC filtering table on reset.".
+     *
+     * Therefore, there is no need to send this CVQ command if the
+     * driver also sets an empty MAC filter table, which aligns with
+     * the device's defaults.
+     *
+     * Note that the device's defaults can mismatch the driver's
+     * configuration only at live migration.
+     */
+    if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX) ||
+        n->mac_table.in_use == 0) {
+        return 0;
+    }
+
+    uint32_t uni_entries = n->mac_table.first_multi,
+             uni_macs_size = uni_entries * ETH_ALEN,
+             mul_entries = n->mac_table.in_use - uni_entries,
+             mul_macs_size = mul_entries * ETH_ALEN;
+    struct virtio_net_ctrl_mac uni = {
+        .entries = cpu_to_le32(uni_entries),
+    };
+    struct virtio_net_ctrl_mac mul = {
+        .entries = cpu_to_le32(mul_entries),
+    };
+    const struct iovec data[] = {
+        {
+            .iov_base = &uni,
+            .iov_len = sizeof(uni),
+        }, {
+            .iov_base = n->mac_table.macs,
+            .iov_len = uni_macs_size,
+        }, {
+            .iov_base = &mul,
+            .iov_len = sizeof(mul),
+        }, {
+            .iov_base = &n->mac_table.macs[uni_macs_size],
+            .iov_len = mul_macs_size,
+        },
+    };
+    ssize_t dev_written = vhost_vdpa_net_load_cmd(s,
+                                VIRTIO_NET_CTRL_MAC,
+                                VIRTIO_NET_CTRL_MAC_TABLE_SET,
+                                data, ARRAY_SIZE(data));
+    if (unlikely(dev_written < 0)) {
+        return dev_written;
+    }
+    if (*s->status != VIRTIO_NET_OK) {
+        return -EIO;
      }
  
      return 0;
@@ -532,22 +730,242 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
                                    const VirtIONet *n)
  {
      struct virtio_net_ctrl_mq mq;
-    uint64_t features = n->parent_obj.guest_features;
      ssize_t dev_written;
  
-    if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
+    if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) {
          return 0;
      }
  
      mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
+    const struct iovec data = {
+        .iov_base = &mq,
+        .iov_len = sizeof(mq),
+    };
      dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
-                                          VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
-                                          sizeof(mq));
+                                          VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
+                                          &data, 1);
+    if (unlikely(dev_written < 0)) {
+        return dev_written;
+    }
+    if (*s->status != VIRTIO_NET_OK) {
+        return -EIO;
+    }
+
+    return 0;
+}
+
+static int vhost_vdpa_net_load_offloads(VhostVDPAState *s,
+                                        const VirtIONet *n)
+{
+    uint64_t offloads;
+    ssize_t dev_written;
+
+    if (!virtio_vdev_has_feature(&n->parent_obj,
+                                 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
+        return 0;
+    }
+
+    if (n->curr_guest_offloads == virtio_net_supported_guest_offloads(n)) {
+        /*
+         * According to VirtIO standard, "Upon feature negotiation
+         * corresponding offload gets enabled to preserve
+         * backward compatibility.".
+         *
+         * Therefore, there is no need to send this CVQ command if the
+         * driver also enables all supported offloads, which aligns with
+         * the device's defaults.
+         *
+         * Note that the device's defaults can mismatch the driver's
+         * configuration only at live migration.
+         */
+        return 0;
+    }
+
+    offloads = cpu_to_le64(n->curr_guest_offloads);
+    const struct iovec data = {
+        .iov_base = &offloads,
+        .iov_len = sizeof(offloads),
+    };
+    dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
+                                          VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET,
+                                          &data, 1);
      if (unlikely(dev_written < 0)) {
          return dev_written;
      }
+    if (*s->status != VIRTIO_NET_OK) {
+        return -EIO;
+    }
  
-    return *s->status != VIRTIO_NET_OK;
+    return 0;
+}
+
+static int vhost_vdpa_net_load_rx_mode(VhostVDPAState *s,
+                                       uint8_t cmd,
+                                       uint8_t on)
+{
+    const struct iovec data = {
+        .iov_base = &on,
+        .iov_len = sizeof(on),
+    };
+    return vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_RX,
+                                   cmd, &data, 1);
+}
+
+static int vhost_vdpa_net_load_rx(VhostVDPAState *s,
+                                  const VirtIONet *n)
+{
+    ssize_t dev_written;
+
+    if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX)) {
+        return 0;
+    }
+
+    /*
+     * According to virtio_net_reset(), device turns promiscuous mode
+     * on by default.
+     *
+     * Additionally, according to VirtIO standard, "Since there are
+     * no guarantees, it can use a hash filter or silently switch to
+     * allmulti or promiscuous mode if it is given too many addresses.".
+     * QEMU marks `n->mac_table.uni_overflow` if guest sets too many
+     * non-multicast MAC addresses, indicating that promiscuous mode
+     * should be enabled.
+     *
+     * Therefore, QEMU should only send this CVQ command if the
+     * `n->mac_table.uni_overflow` is not marked and `n->promisc` is off,
+     * which sets promiscuous mode on, different from the device's defaults.
+     *
+     * Note that the device's defaults can mismatch the driver's
+     * configuration only at live migration.
+     */
+    if (!n->mac_table.uni_overflow && !n->promisc) {
+        dev_written = vhost_vdpa_net_load_rx_mode(s,
+                                            VIRTIO_NET_CTRL_RX_PROMISC, 0);
+        if (unlikely(dev_written < 0)) {
+            return dev_written;
+        }
+        if (*s->status != VIRTIO_NET_OK) {
+            return -EIO;
+        }
+    }
+
+    /*
+     * According to virtio_net_reset(), device turns all-multicast mode
+     * off by default.
+     *
+     * According to VirtIO standard, "Since there are no guarantees,
+     * it can use a hash filter or silently switch to allmulti or
+     * promiscuous mode if it is given too many addresses.". QEMU marks
+     * `n->mac_table.multi_overflow` if guest sets too many
+     * non-multicast MAC addresses.
+     *
+     * Therefore, QEMU should only send this CVQ command if the
+     * `n->mac_table.multi_overflow` is marked or `n->allmulti` is on,
+     * which sets all-multicast mode on, different from the device's defaults.
+     *
+     * Note that the device's defaults can mismatch the driver's
+     * configuration only at live migration.
+     */
+    if (n->mac_table.multi_overflow || n->allmulti) {
+        dev_written = vhost_vdpa_net_load_rx_mode(s,
+                                            VIRTIO_NET_CTRL_RX_ALLMULTI, 1);
+        if (unlikely(dev_written < 0)) {
+            return dev_written;
+        }
+        if (*s->status != VIRTIO_NET_OK) {
+            return -EIO;
+        }
+    }
+
+    if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX_EXTRA)) {
+        return 0;
+    }
+
+    /*
+     * According to virtio_net_reset(), device turns all-unicast mode
+     * off by default.
+     *
+     * Therefore, QEMU should only send this CVQ command if the driver
+     * sets all-unicast mode on, different from the device's defaults.
+     *
+     * Note that the device's defaults can mismatch the driver's
+     * configuration only at live migration.
+     */
+    if (n->alluni) {
+        dev_written = vhost_vdpa_net_load_rx_mode(s,
+                                            VIRTIO_NET_CTRL_RX_ALLUNI, 1);
+        if (dev_written < 0) {
+            return dev_written;
+        }
+        if (*s->status != VIRTIO_NET_OK) {
+            return -EIO;
+        }
+    }
+
+    /*
+     * According to virtio_net_reset(), device turns non-multicast mode
+     * off by default.
+     *
+     * Therefore, QEMU should only send this CVQ command if the driver
+     * sets non-multicast mode on, different from the device's defaults.
+     *
+     * Note that the device's defaults can mismatch the driver's
+     * configuration only at live migration.
+     */
+    if (n->nomulti) {
+        dev_written = vhost_vdpa_net_load_rx_mode(s,
+                                            VIRTIO_NET_CTRL_RX_NOMULTI, 1);
+        if (dev_written < 0) {
+            return dev_written;
+        }
+        if (*s->status != VIRTIO_NET_OK) {
+            return -EIO;
+        }
+    }
+
+    /*
+     * According to virtio_net_reset(), device turns non-unicast mode
+     * off by default.
+     *
+     * Therefore, QEMU should only send this CVQ command if the driver
+     * sets non-unicast mode on, different from the device's defaults.
+     *
+     * Note that the device's defaults can mismatch the driver's
+     * configuration only at live migration.
+     */
+    if (n->nouni) {
+        dev_written = vhost_vdpa_net_load_rx_mode(s,
+                                            VIRTIO_NET_CTRL_RX_NOUNI, 1);
+        if (dev_written < 0) {
+            return dev_written;
+        }
+        if (*s->status != VIRTIO_NET_OK) {
+            return -EIO;
+        }
+    }
+
+    /*
+     * According to virtio_net_reset(), device turns non-broadcast mode
+     * off by default.
+     *
+     * Therefore, QEMU should only send this CVQ command if the driver
+     * sets non-broadcast mode on, different from the device's defaults.
+     *
+     * Note that the device's defaults can mismatch the driver's
+     * configuration only at live migration.
+     */
+    if (n->nobcast) {
+        dev_written = vhost_vdpa_net_load_rx_mode(s,
+                                            VIRTIO_NET_CTRL_RX_NOBCAST, 1);
+        if (dev_written < 0) {
+            return dev_written;
+        }
+        if (*s->status != VIRTIO_NET_OK) {
+            return -EIO;
+        }
+    }
+
+    return 0;
  }
  
  static int vhost_vdpa_net_load(NetClientState *nc)
@@ -572,6 +990,14 @@ static int vhost_vdpa_net_load(NetClientState *nc)
      if (unlikely(r)) {
          return r;
      }
+    r = vhost_vdpa_net_load_offloads(s, n);
+    if (unlikely(r)) {
+        return r;
+    }
+    r = vhost_vdpa_net_load_rx(s, n);
+    if (unlikely(r)) {
+        return r;
+    }
  
      return 0;
  }
@@ -589,6 +1015,148 @@ static NetClientInfo net_vhost_vdpa_cvq_info = {
      .check_peer_type = vhost_vdpa_check_peer_type,
  };
  
+/*
+ * Forward the excessive VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command to
+ * vdpa device.
+ *
+ * Considering that QEMU cannot send the entire filter table to the
+ * vdpa device, it should send the VIRTIO_NET_CTRL_RX_PROMISC CVQ
+ * command to enable promiscuous mode to receive all packets,
+ * according to VirtIO standard, "Since there are no guarantees,
+ * it can use a hash filter or silently switch to allmulti or
+ * promiscuous mode if it is given too many addresses.".
+ *
+ * Since QEMU ignores MAC addresses beyond `MAC_TABLE_ENTRIES` and
+ * marks `n->mac_table.x_overflow` accordingly, it should have
+ * the same effect on the device model to receive
+ * (`MAC_TABLE_ENTRIES` + 1) or more non-multicast MAC addresses.
+ * The same applies to multicast MAC addresses.
+ *
+ * Therefore, QEMU can provide the device model with a fake
+ * VIRTIO_NET_CTRL_MAC_TABLE_SET command with (`MAC_TABLE_ENTRIES` + 1)
+ * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1) multicast
+ * MAC addresses. This ensures that the device model marks
+ * `n->mac_table.uni_overflow` and `n->mac_table.multi_overflow`,
+ * allowing all packets to be received, which aligns with the
+ * state of the vdpa device.
+ */
+static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s,
+                                                       VirtQueueElement *elem,
+                                                       struct iovec *out)
+{
+    struct virtio_net_ctrl_mac mac_data, *mac_ptr;
+    struct virtio_net_ctrl_hdr *hdr_ptr;
+    uint32_t cursor;
+    ssize_t r;
+
+    /* parse the non-multicast MAC address entries from CVQ command */
+    cursor = sizeof(*hdr_ptr);
+    r = iov_to_buf(elem->out_sg, elem->out_num, cursor,
+                   &mac_data, sizeof(mac_data));
+    if (unlikely(r != sizeof(mac_data))) {
+        /*
+         * If the CVQ command is invalid, we should simulate the vdpa device
+         * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
+         */
+        *s->status = VIRTIO_NET_ERR;
+        return sizeof(*s->status);
+    }
+    cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN;
+
+    /* parse the multicast MAC address entries from CVQ command */
+    r = iov_to_buf(elem->out_sg, elem->out_num, cursor,
+                   &mac_data, sizeof(mac_data));
+    if (r != sizeof(mac_data)) {
+        /*
+         * If the CVQ command is invalid, we should simulate the vdpa device
+         * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
+         */
+        *s->status = VIRTIO_NET_ERR;
+        return sizeof(*s->status);
+    }
+    cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN;
+
+    /* validate the CVQ command */
+    if (iov_size(elem->out_sg, elem->out_num) != cursor) {
+        /*
+         * If the CVQ command is invalid, we should simulate the vdpa device
+         * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
+         */
+        *s->status = VIRTIO_NET_ERR;
+        return sizeof(*s->status);
+    }
+
+    /*
+     * According to VirtIO standard, "Since there are no guarantees,
+     * it can use a hash filter or silently switch to allmulti or
+     * promiscuous mode if it is given too many addresses.".
+     *
+     * Therefore, considering that QEMU is unable to send the entire
+     * filter table to the vdpa device, it should send the
+     * VIRTIO_NET_CTRL_RX_PROMISC CVQ command to enable promiscuous mode
+     */
+    r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_PROMISC, 1);
+    if (unlikely(r < 0)) {
+        return r;
+    }
+    if (*s->status != VIRTIO_NET_OK) {
+        return sizeof(*s->status);
+    }
+
+    /*
+     * QEMU should also send a fake VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ
+     * command to the device model, including (`MAC_TABLE_ENTRIES` + 1)
+     * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1)
+     * multicast MAC addresses.
+     *
+     * By doing so, the device model can mark `n->mac_table.uni_overflow`
+     * and `n->mac_table.multi_overflow`, enabling all packets to be
+     * received, which aligns with the state of the vdpa device.
+     */
+    cursor = 0;
+    uint32_t fake_uni_entries = MAC_TABLE_ENTRIES + 1,
+             fake_mul_entries = MAC_TABLE_ENTRIES + 1,
+             fake_cvq_size = sizeof(struct virtio_net_ctrl_hdr) +
+                             sizeof(mac_data) + fake_uni_entries * ETH_ALEN +
+                             sizeof(mac_data) + fake_mul_entries * ETH_ALEN;
+
+    assert(fake_cvq_size < vhost_vdpa_net_cvq_cmd_page_len());
+    out->iov_len = fake_cvq_size;
+
+    /* pack the header for fake CVQ command */
+    hdr_ptr = out->iov_base + cursor;
+    hdr_ptr->class = VIRTIO_NET_CTRL_MAC;
+    hdr_ptr->cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
+    cursor += sizeof(*hdr_ptr);
+
+    /*
+     * Pack the non-multicast MAC addresses part for fake CVQ command.
+     *
+     * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC
+     * addresses provided in CVQ command. Therefore, only the entries
+     * field need to be prepared in the CVQ command.
+     */
+    mac_ptr = out->iov_base + cursor;
+    mac_ptr->entries = cpu_to_le32(fake_uni_entries);
+    cursor += sizeof(*mac_ptr) + fake_uni_entries * ETH_ALEN;
+
+    /*
+     * Pack the multicast MAC addresses part for fake CVQ command.
+     *
+     * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC
+     * addresses provided in CVQ command. Therefore, only the entries
+     * field need to be prepared in the CVQ command.
+     */
+    mac_ptr = out->iov_base + cursor;
+    mac_ptr->entries = cpu_to_le32(fake_mul_entries);
+
+    /*
+     * Simulating QEMU poll a vdpa device used buffer
+     * for VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
+     */
+    return sizeof(*s->status);
+}
+
  /**
   * Validate and copy control virtqueue commands.
   *
@@ -601,6 +1169,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
  {
      VhostVDPAState *s = opaque;
      size_t in_len;
+    const struct virtio_net_ctrl_hdr *ctrl;
      virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
      /* Out buffer sent to both the vdpa device and the device model */
      struct iovec out = {
@@ -615,14 +1184,34 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
  
      out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
                               s->cvq_cmd_out_buffer,
-                             vhost_vdpa_net_cvq_cmd_len());
-    if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) {
+                             vhost_vdpa_net_cvq_cmd_page_len());
+
+    ctrl = s->cvq_cmd_out_buffer;
+    if (ctrl->class == VIRTIO_NET_CTRL_ANNOUNCE) {
          /*
           * Guest announce capability is emulated by qemu, so don't forward to
           * the device.
           */
          dev_written = sizeof(status);
          *s->status = VIRTIO_NET_OK;
+    } else if (unlikely(ctrl->class == VIRTIO_NET_CTRL_MAC &&
+                        ctrl->cmd == VIRTIO_NET_CTRL_MAC_TABLE_SET &&
+                        iov_size(elem->out_sg, elem->out_num) > out.iov_len)) {
+        /*
+         * Due to the size limitation of the out buffer sent to the vdpa device,
+         * which is determined by vhost_vdpa_net_cvq_cmd_page_len(), excessive
+         * MAC addresses set by the driver for the filter table can cause
+         * truncation of the CVQ command in QEMU. As a result, the vdpa device
+         * rejects the flawed CVQ command.
+         *
+         * Therefore, QEMU must handle this situation instead of sending
+         * the CVQ command directly.
+         */
+        dev_written = vhost_vdpa_net_excessive_mac_filter_cvq_add(s, elem,
+                                                                  &out);
+        if (unlikely(dev_written < 0)) {
+            goto out;
+        }
      } else {
          dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
          if (unlikely(dev_written < 0)) {
@@ -636,7 +1225,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
      }
  
      if (*s->status != VIRTIO_NET_OK) {
-        return VIRTIO_NET_ERR;
+        goto out;
      }
  
      status = VIRTIO_NET_ERR;
@@ -652,7 +1241,16 @@ out:
          error_report("Bad device CVQ written length");
      }
      vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
-    g_free(elem);
+    /*
+     * `elem` belongs to vhost_vdpa_net_handle_ctrl_avail() only when
+     * the function successfully forwards the CVQ command, indicated
+     * by a non-negative value of `dev_written`. Otherwise, it still
+     * belongs to SVQ.
+     * This function should only free the `elem` when it owns.
+     */
+    if (dev_written >= 0) {
+        g_free(elem);
+    }
      return dev_written < 0 ? dev_written : 0;
  }
  
@@ -660,6 +1258,87 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
      .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
  };
  
+/**
+ * Probe if CVQ is isolated
+ *
+ * @device_fd         The vdpa device fd
+ * @features          Features offered by the device.
+ * @cvq_index         The control vq pair index
+ *
+ * Returns <0 in case of failure, 0 if false and 1 if true.
+ */
+static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features,
+                                          int cvq_index, Error **errp)
+{
+    uint64_t backend_features;
+    int64_t cvq_group;
+    uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE |
+                     VIRTIO_CONFIG_S_DRIVER |
+                     VIRTIO_CONFIG_S_FEATURES_OK;
+    int r;
+
+    ERRP_GUARD();
+
+    r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
+    if (unlikely(r < 0)) {
+        error_setg_errno(errp, errno, "Cannot get vdpa backend_features");
+        return r;
+    }
+
+    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
+        return 0;
+    }
+
+    r = ioctl(device_fd, VHOST_SET_FEATURES, &features);
+    if (unlikely(r)) {
+        error_setg_errno(errp, errno, "Cannot set features");
+    }
+
+    r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
+    if (unlikely(r)) {
+        error_setg_errno(errp, -r, "Cannot set device features");
+        goto out;
+    }
+
+    cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp);
+    if (unlikely(cvq_group < 0)) {
+        if (cvq_group != -ENOTSUP) {
+            r = cvq_group;
+            goto out;
+        }
+
+        /*
+         * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend
+         * support ASID even if the parent driver does not.  The CVQ cannot be
+         * isolated in this case.
+         */
+        error_free(*errp);
+        *errp = NULL;
+        r = 0;
+        goto out;
+    }
+
+    for (int i = 0; i < cvq_index; ++i) {
+        int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp);
+        if (unlikely(group < 0)) {
+            r = group;
+            goto out;
+        }
+
+        if (group == (int64_t)cvq_group) {
+            r = 0;
+            goto out;
+        }
+    }
+
+    r = 1;
+
+out:
+    status = 0;
+    ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
+    return r;
+}
+
  static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
                                         const char *device,
                                         const char *name,
@@ -669,16 +1348,26 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
                                         bool is_datapath,
                                         bool svq,
                                         struct vhost_vdpa_iova_range iova_range,
-                                       VhostIOVATree *iova_tree)
+                                       uint64_t features,
+                                       Error **errp)
  {
      NetClientState *nc = NULL;
      VhostVDPAState *s;
      int ret = 0;
      assert(name);
+    int cvq_isolated;
+
      if (is_datapath) {
          nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
                                   name);
      } else {
+        cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features,
+                                                      queue_pair_index * 2,
+                                                      errp);
+        if (unlikely(cvq_isolated < 0)) {
+            return NULL;
+        }
+
          nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
                                           device, name);
      }
@@ -688,20 +1377,36 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
      s->vhost_vdpa.device_fd = vdpa_device_fd;
      s->vhost_vdpa.index = queue_pair_index;
      s->always_svq = svq;
+    s->migration_state.notify = vdpa_net_migration_state_notifier;
      s->vhost_vdpa.shadow_vqs_enabled = svq;
      s->vhost_vdpa.iova_range = iova_range;
      s->vhost_vdpa.shadow_data = svq;
-    s->vhost_vdpa.iova_tree = iova_tree;
-    if (!is_datapath) {
-        s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
-                                            vhost_vdpa_net_cvq_cmd_page_len());
-        memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
-        s->status = qemu_memalign(qemu_real_host_page_size(),
-                                  vhost_vdpa_net_cvq_cmd_page_len());
-        memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len());
+    if (queue_pair_index == 0) {
+        vhost_vdpa_net_valid_svq_features(features,
+                                          &s->vhost_vdpa.migration_blocker);
+    } else if (!is_datapath) {
+        s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
+                                     PROT_READ | PROT_WRITE,
+                                     MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+        s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
+                         PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
+                         -1, 0);
  
          s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
          s->vhost_vdpa.shadow_vq_ops_opaque = s;
+        s->cvq_isolated = cvq_isolated;
+
+        /*
+         * TODO: We cannot migrate devices with CVQ and no x-svq enabled as
+         * there is no way to set the device state (MAC, MQ, etc) before
+         * starting the datapath.
+         *
+         * Migration blocker ownership now belongs to s->vhost_vdpa.
+         */
+        if (!svq) {
+            error_setg(&s->vhost_vdpa.migration_blocker,
+                       "net vdpa cannot migrate with CVQ feature");
+        }
      }
      ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
      if (ret) {
@@ -711,14 +1416,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
      return nc;
  }
  
-static int vhost_vdpa_get_iova_range(int fd,
-                                     struct vhost_vdpa_iova_range *iova_range)
-{
-    int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
-
-    return ret < 0 ? -errno : 0;
-}
-
  static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
  {
      int ret = ioctl(fd, VHOST_GET_FEATURES, features);
@@ -769,7 +1466,6 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
      uint64_t features;
      int vdpa_device_fd;
      g_autofree NetClientState **ncs = NULL;
-    g_autoptr(VhostIOVATree) iova_tree = NULL;
      struct vhost_vdpa_iova_range iova_range;
      NetClientState *nc;
      int queue_pairs, r, i = 0, has_cvq = 0;
@@ -814,13 +1510,15 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
          return queue_pairs;
      }
  
-    vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
-    if (opts->x_svq) {
-        if (!vhost_vdpa_net_valid_svq_features(features, errp)) {
-            goto err_svq;
-        }
+    r = vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
+    if (unlikely(r < 0)) {
+        error_setg(errp, "vhost-vdpa: get iova range failed: %s",
+                   strerror(-r));
+        goto err;
+    }
  
-        iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
+    if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) {
+        goto err;
      }
  
      ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
@@ -828,7 +1526,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
      for (i = 0; i < queue_pairs; i++) {
          ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
                                       vdpa_device_fd, i, 2, true, opts->x_svq,
-                                     iova_range, iova_tree);
+                                     iova_range, features, errp);
          if (!ncs[i])
              goto err;
      }
@@ -836,13 +1534,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
      if (has_cvq) {
          nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
                                   vdpa_device_fd, i, 1, false,
-                                 opts->x_svq, iova_range, iova_tree);
+                                 opts->x_svq, iova_range, features, errp);
          if (!nc)
              goto err;
      }
  
-    /* iova_tree ownership belongs to last NetClientState */
-    g_steal_pointer(&iova_tree);
      return 0;
  
  err:
@@ -852,7 +1548,6 @@ err:
          }
      }
  
-err_svq:
      qemu_close(vdpa_device_fd);
  
      return -1;