BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
/* VHOST_F_LOG_ALL is exposed by SVQ */
BIT_ULL(VHOST_F_LOG_ALL) |
+ BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |
BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
BIT_ULL(VIRTIO_NET_F_STANDBY) |
BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX);
}
}
+/** Dummy SetSteeringEBPF to support RSS for vhost-vdpa backend */
+static bool vhost_vdpa_set_steering_ebpf(NetClientState *nc, int prog_fd)
+{
+ return true;
+}
+
static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
{
assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
{
struct vhost_vdpa *v = &s->vhost_vdpa;
- add_migration_state_change_notifier(&s->migration_state);
+ migration_add_notifier(&s->migration_state,
+ vdpa_net_migration_state_notifier);
if (v->shadow_vqs_enabled) {
v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
v->iova_range.last);
assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
if (s->vhost_vdpa.index == 0) {
- remove_migration_state_change_notifier(&s->migration_state);
+ migration_remove_notifier(&s->migration_state);
}
dev = s->vhost_vdpa.dev;
.has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
.has_ufo = vhost_vdpa_has_ufo,
.check_peer_type = vhost_vdpa_check_peer_type,
+ .set_steering_ebpf = vhost_vdpa_set_steering_ebpf,
};
static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index,
in_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len();
}
+/*
+ * Poll SVQ for multiple pending control commands and check the device's ack.
+ *
+ * Caller should hold the BQL when invoking this function.
+ *
+ * @s: The VhostVDPAState
+ * @len: The length of the pending status shadow buffer
+ */
+static ssize_t vhost_vdpa_net_svq_flush(VhostVDPAState *s, size_t len)
+{
+ /* device uses a one-byte length ack for each control command */
+ ssize_t dev_written = vhost_vdpa_net_svq_poll(s, len);
+ if (unlikely(dev_written != len)) {
+ return -EIO;
+ }
+
+ /* check the device's ack */
+ for (int i = 0; i < len; ++i) {
+ if (s->status[i] != VIRTIO_NET_OK) {
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s,
struct iovec *out_cursor,
struct iovec *in_cursor, uint8_t class,
.class = class,
.cmd = cmd,
};
- size_t data_size = iov_size(data_sg, data_num);
+ size_t data_size = iov_size(data_sg, data_num), cmd_size;
struct iovec out, in;
ssize_t r;
+ unsigned dummy_cursor_iov_cnt;
+ VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
+ cmd_size = sizeof(ctrl) + data_size;
+ if (vhost_svq_available_slots(svq) < 2 ||
+ iov_size(out_cursor, 1) < cmd_size) {
+ /*
+ * It is time to flush all pending control commands if SVQ is full
+ * or control commands shadow buffers are full.
+ *
+ * We can poll here since we've had BQL from the time
+ * we sent the descriptor.
+ */
+ r = vhost_vdpa_net_svq_flush(s, in_cursor->iov_base -
+ (void *)s->status);
+ if (unlikely(r < 0)) {
+ return r;
+ }
+
+ vhost_vdpa_net_load_cursor_reset(s, out_cursor, in_cursor);
+ }
/* pack the CVQ command header */
iov_from_buf(out_cursor, 1, 0, &ctrl, sizeof(ctrl));
out_cursor->iov_base + sizeof(ctrl), data_size);
/* extract the required buffer from the cursor for output */
- iov_copy(&out, 1, out_cursor, 1, 0, sizeof(ctrl) + data_size);
+ iov_copy(&out, 1, out_cursor, 1, 0, cmd_size);
/* extract the required buffer from the cursor for input */
iov_copy(&in, 1, in_cursor, 1, 0, sizeof(*s->status));
return r;
}
- /*
- * We can poll here since we've had BQL from the time
- * we sent the descriptor.
- */
- return vhost_vdpa_net_svq_poll(s, 1);
+ /* iterate the cursors */
+ dummy_cursor_iov_cnt = 1;
+ iov_discard_front(&out_cursor, &dummy_cursor_iov_cnt, cmd_size);
+ dummy_cursor_iov_cnt = 1;
+ iov_discard_front(&in_cursor, &dummy_cursor_iov_cnt, sizeof(*s->status));
+
+ return 0;
}
static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n,
.iov_base = (void *)n->mac,
.iov_len = sizeof(n->mac),
};
- ssize_t dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
- VIRTIO_NET_CTRL_MAC,
- VIRTIO_NET_CTRL_MAC_ADDR_SET,
- &data, 1);
- if (unlikely(dev_written < 0)) {
- return dev_written;
- }
- if (*s->status != VIRTIO_NET_OK) {
- return -EIO;
+ ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
+ VIRTIO_NET_CTRL_MAC,
+ VIRTIO_NET_CTRL_MAC_ADDR_SET,
+ &data, 1);
+ if (unlikely(r < 0)) {
+ return r;
}
}
.iov_len = mul_macs_size,
},
};
- ssize_t dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
- VIRTIO_NET_CTRL_MAC,
- VIRTIO_NET_CTRL_MAC_TABLE_SET,
- data, ARRAY_SIZE(data));
- if (unlikely(dev_written < 0)) {
- return dev_written;
+ ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
+ VIRTIO_NET_CTRL_MAC,
+ VIRTIO_NET_CTRL_MAC_TABLE_SET,
+ data, ARRAY_SIZE(data));
+ if (unlikely(r < 0)) {
+ return r;
}
- if (*s->status != VIRTIO_NET_OK) {
- return -EIO;
+
+ return 0;
+}
+
+static int vhost_vdpa_net_load_rss(VhostVDPAState *s, const VirtIONet *n,
+ struct iovec *out_cursor,
+ struct iovec *in_cursor)
+{
+ struct virtio_net_rss_config cfg = {};
+ ssize_t r;
+ g_autofree uint16_t *table = NULL;
+
+ /*
+ * According to VirtIO standard, "Initially the device has all hash
+ * types disabled and reports only VIRTIO_NET_HASH_REPORT_NONE.".
+ *
+ * Therefore, there is no need to send this CVQ command if the
+ * driver disables the all hash types, which aligns with
+ * the device's defaults.
+ *
+ * Note that the device's defaults can mismatch the driver's
+ * configuration only at live migration.
+ */
+ if (!n->rss_data.enabled ||
+ n->rss_data.hash_types == VIRTIO_NET_HASH_REPORT_NONE) {
+ return 0;
+ }
+
+ table = g_malloc_n(n->rss_data.indirections_len,
+ sizeof(n->rss_data.indirections_table[0]));
+ cfg.hash_types = cpu_to_le32(n->rss_data.hash_types);
+
+ /*
+ * According to VirtIO standard, "Field reserved MUST contain zeroes.
+ * It is defined to make the structure to match the layout of
+ * virtio_net_rss_config structure, defined in 5.1.6.5.7.".
+ *
+ * Therefore, we need to zero the fields in
+ * struct virtio_net_rss_config, which corresponds to the
+ * `reserved` field in struct virtio_net_hash_config.
+ *
+ * Note that all other fields are zeroed at their definitions,
+ * except for the `indirection_table` field, where the actual data
+ * is stored in the `table` variable to ensure compatibility
+ * with RSS case. Therefore, we need to zero the `table` variable here.
+ */
+ table[0] = 0;
+
+ /*
+ * Considering that virtio_net_handle_rss() currently does not restore
+ * the hash key length parsed from the CVQ command sent from the guest
+ * into n->rss_data and uses the maximum key length in other code, so
+ * we also employ the maximum key length here.
+ */
+ cfg.hash_key_length = sizeof(n->rss_data.key);
+
+ const struct iovec data[] = {
+ {
+ .iov_base = &cfg,
+ .iov_len = offsetof(struct virtio_net_rss_config,
+ indirection_table),
+ }, {
+ .iov_base = table,
+ .iov_len = n->rss_data.indirections_len *
+ sizeof(n->rss_data.indirections_table[0]),
+ }, {
+ .iov_base = &cfg.max_tx_vq,
+ .iov_len = offsetof(struct virtio_net_rss_config, hash_key_data) -
+ offsetof(struct virtio_net_rss_config, max_tx_vq),
+ }, {
+ .iov_base = (void *)n->rss_data.key,
+ .iov_len = sizeof(n->rss_data.key),
+ }
+ };
+
+ r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
+ VIRTIO_NET_CTRL_MQ,
+ VIRTIO_NET_CTRL_MQ_HASH_CONFIG,
+ data, ARRAY_SIZE(data));
+ if (unlikely(r < 0)) {
+ return r;
}
return 0;
struct iovec *in_cursor)
{
struct virtio_net_ctrl_mq mq;
- ssize_t dev_written;
+ ssize_t r;
if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) {
return 0;
.iov_base = &mq,
.iov_len = sizeof(mq),
};
- dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
- VIRTIO_NET_CTRL_MQ,
- VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
- &data, 1);
- if (unlikely(dev_written < 0)) {
- return dev_written;
+ r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
+ VIRTIO_NET_CTRL_MQ,
+ VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
+ &data, 1);
+ if (unlikely(r < 0)) {
+ return r;
}
- if (*s->status != VIRTIO_NET_OK) {
- return -EIO;
+
+ if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_HASH_REPORT)) {
+ return 0;
+ }
+
+ r = vhost_vdpa_net_load_rss(s, n, out_cursor, in_cursor);
+ if (unlikely(r < 0)) {
+ return r;
}
return 0;
struct iovec *in_cursor)
{
uint64_t offloads;
- ssize_t dev_written;
+ ssize_t r;
if (!virtio_vdev_has_feature(&n->parent_obj,
VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
.iov_base = &offloads,
.iov_len = sizeof(offloads),
};
- dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
- VIRTIO_NET_CTRL_GUEST_OFFLOADS,
- VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET,
- &data, 1);
- if (unlikely(dev_written < 0)) {
- return dev_written;
- }
- if (*s->status != VIRTIO_NET_OK) {
- return -EIO;
+ r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
+ VIRTIO_NET_CTRL_GUEST_OFFLOADS,
+ VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET,
+ &data, 1);
+ if (unlikely(r < 0)) {
+ return r;
}
return 0;
.iov_base = &on,
.iov_len = sizeof(on),
};
- ssize_t dev_written;
+ ssize_t r;
- dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
- VIRTIO_NET_CTRL_RX,
- cmd, &data, 1);
- if (unlikely(dev_written < 0)) {
- return dev_written;
- }
- if (*s->status != VIRTIO_NET_OK) {
- return -EIO;
+ r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
+ VIRTIO_NET_CTRL_RX, cmd, &data, 1);
+ if (unlikely(r < 0)) {
+ return r;
}
return 0;
.iov_base = &vid,
.iov_len = sizeof(vid),
};
- ssize_t dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
- VIRTIO_NET_CTRL_VLAN,
- VIRTIO_NET_CTRL_VLAN_ADD,
- &data, 1);
- if (unlikely(dev_written < 0)) {
- return dev_written;
- }
- if (unlikely(*s->status != VIRTIO_NET_OK)) {
- return -EIO;
+ ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
+ VIRTIO_NET_CTRL_VLAN,
+ VIRTIO_NET_CTRL_VLAN_ADD,
+ &data, 1);
+ if (unlikely(r < 0)) {
+ return r;
}
return 0;
if (unlikely(r)) {
return r;
}
+
+ /*
+ * We need to poll and check all pending device's used buffers.
+ *
+ * We can poll here since we've had BQL from the time
+ * we sent the descriptor.
+ */
+ r = vhost_vdpa_net_svq_flush(s, in_cursor.iov_base - (void *)s->status);
+ if (unlikely(r)) {
+ return r;
+ }
}
for (int i = 0; i < v->dev->vq_index; ++i) {
.has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
.has_ufo = vhost_vdpa_has_ufo,
.check_peer_type = vhost_vdpa_check_peer_type,
+ .set_steering_ebpf = vhost_vdpa_set_steering_ebpf,
};
/*
s->vhost_vdpa.device_fd = vdpa_device_fd;
s->vhost_vdpa.index = queue_pair_index;
s->always_svq = svq;
- s->migration_state.notify = vdpa_net_migration_state_notifier;
+ s->migration_state.notify = NULL;
s->vhost_vdpa.shadow_vqs_enabled = svq;
s->vhost_vdpa.iova_range = iova_range;
s->vhost_vdpa.shadow_data = svq;