#include "spdk/likely.h"
#include "spdk/string.h"
#include "spdk/util.h"
+#include "spdk/memory.h"
#include "spdk/barrier.h"
#include "spdk/vhost.h"
#include "vhost_internal.h"
-#include "spdk_internal/memory.h"
-
-struct vhost_poll_group {
- struct spdk_thread *thread;
- TAILQ_ENTRY(vhost_poll_group) tailq;
-};
-
-static TAILQ_HEAD(, vhost_poll_group) g_poll_groups = TAILQ_HEAD_INITIALIZER(g_poll_groups);
-
-static uint32_t *g_num_ctrlrs;
+static struct spdk_cpuset g_vhost_core_mask;
/* Path to folder where character device will be created. Can be set by user. */
static char dev_dirname[PATH_MAX] = "";
-static struct spdk_thread *g_fini_thread;
+/* Thread performing all vhost management operations */
+static struct spdk_thread *g_vhost_init_thread;
+
static spdk_vhost_fini_cb g_fini_cpl_cb;
-struct spdk_vhost_session_fn_ctx {
+/**
+ * DPDK calls our callbacks synchronously but the work those callbacks
+ * perform needs to be async. Luckily, all DPDK callbacks are called on
+ * a DPDK-internal pthread, so we'll just wait on a semaphore in there.
+ */
+static sem_t g_dpdk_sem;
+
+/** Return code for the current DPDK callback */
+static int g_dpdk_response;
+
+struct vhost_session_fn_ctx {
/** Device pointer obtained before enqueuing the event */
struct spdk_vhost_dev *vdev;
/** ID of the session to send event to. */
uint32_t vsession_id;
- /** User callback function to be executed on given lcore. */
+ /** User provided function to be executed on session's thread. */
spdk_vhost_session_fn cb_fn;
- /** Semaphore used to signal that event is done. */
- sem_t sem;
-
- /** Response to be written by enqueued event. */
- int response;
+ /**
+ * User provided function to be called on the init thread
+ * after iterating through all sessions.
+ */
+ spdk_vhost_dev_fn cpl_fn;
/** Custom user context */
void *user_ctx;
};
-static int new_connection(int vid);
-static int start_device(int vid);
-static void stop_device(int vid);
-static void destroy_connection(int vid);
+static TAILQ_HEAD(, spdk_vhost_dev) g_vhost_devices = TAILQ_HEAD_INITIALIZER(
+ g_vhost_devices);
+static pthread_mutex_t g_vhost_mutex = PTHREAD_MUTEX_INITIALIZER;
-#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB
-static int get_config(int vid, uint8_t *config, uint32_t len);
-static int set_config(int vid, uint8_t *config, uint32_t offset,
- uint32_t size, uint32_t flags);
-#endif
-
-const struct vhost_device_ops g_spdk_vhost_ops = {
- .new_device = start_device,
- .destroy_device = stop_device,
- .new_connection = new_connection,
- .destroy_connection = destroy_connection,
-#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB
- .get_config = get_config,
- .set_config = set_config,
- .vhost_nvme_admin_passthrough = spdk_vhost_nvme_admin_passthrough,
- .vhost_nvme_set_cq_call = spdk_vhost_nvme_set_cq_call,
- .vhost_nvme_get_cap = spdk_vhost_nvme_get_cap,
- .vhost_nvme_set_bar_mr = spdk_vhost_nvme_set_bar_mr,
-#endif
-};
-
-static TAILQ_HEAD(, spdk_vhost_dev) g_spdk_vhost_devices = TAILQ_HEAD_INITIALIZER(
- g_spdk_vhost_devices);
-static pthread_mutex_t g_spdk_vhost_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void *spdk_vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len)
+void *vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len)
{
void *vva;
uint64_t newlen;
}
static void
-spdk_vhost_log_req_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
- uint16_t req_id)
+vhost_log_req_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
+ uint16_t req_id)
{
struct vring_desc *desc, *desc_table;
uint32_t desc_table_size;
int rc;
- if (spdk_likely(!spdk_vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
+ if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
return;
}
- rc = spdk_vhost_vq_get_desc(vsession, virtqueue, req_id, &desc, &desc_table, &desc_table_size);
+ rc = vhost_vq_get_desc(vsession, virtqueue, req_id, &desc, &desc_table, &desc_table_size);
if (spdk_unlikely(rc != 0)) {
SPDK_ERRLOG("Can't log used ring descriptors!\n");
return;
}
do {
- if (spdk_vhost_vring_desc_is_wr(desc)) {
+ if (vhost_vring_desc_is_wr(desc)) {
/* To be honest, only pages realy touched should be logged, but
* doing so would require tracking those changes in each backed.
* Also backend most likely will touch all/most of those pages so
* for lets assume we touched all pages passed to as writeable buffers. */
rte_vhost_log_write(vsession->vid, desc->addr, desc->len);
}
- spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
+ vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
} while (desc);
}
static void
-spdk_vhost_log_used_vring_elem(struct spdk_vhost_session *vsession,
- struct spdk_vhost_virtqueue *virtqueue,
- uint16_t idx)
+vhost_log_used_vring_elem(struct spdk_vhost_session *vsession,
+ struct spdk_vhost_virtqueue *virtqueue,
+ uint16_t idx)
{
uint64_t offset, len;
- uint16_t vq_idx;
- if (spdk_likely(!spdk_vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
+ if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
return;
}
- offset = offsetof(struct vring_used, ring[idx]);
- len = sizeof(virtqueue->vring.used->ring[idx]);
- vq_idx = virtqueue - vsession->virtqueue;
+ if (spdk_unlikely(virtqueue->packed.packed_ring)) {
+ offset = idx * sizeof(struct vring_packed_desc);
+ len = sizeof(struct vring_packed_desc);
+ } else {
+ offset = offsetof(struct vring_used, ring[idx]);
+ len = sizeof(virtqueue->vring.used->ring[idx]);
+ }
- rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len);
+ rte_vhost_log_used_vring(vsession->vid, virtqueue->vring_idx, offset, len);
}
static void
-spdk_vhost_log_used_vring_idx(struct spdk_vhost_session *vsession,
- struct spdk_vhost_virtqueue *virtqueue)
+vhost_log_used_vring_idx(struct spdk_vhost_session *vsession,
+ struct spdk_vhost_virtqueue *virtqueue)
{
uint64_t offset, len;
uint16_t vq_idx;
- if (spdk_likely(!spdk_vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
+ if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
return;
}
* Get available requests from avail ring.
*/
uint16_t
-spdk_vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs,
- uint16_t reqs_len)
+vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs,
+ uint16_t reqs_len)
{
struct rte_vhost_vring *vring = &virtqueue->vring;
struct vring_avail *avail = vring->avail;
}
static bool
-spdk_vhost_vring_desc_is_indirect(struct vring_desc *cur_desc)
+vhost_vring_desc_is_indirect(struct vring_desc *cur_desc)
{
return !!(cur_desc->flags & VRING_DESC_F_INDIRECT);
}
+static bool
+vhost_vring_packed_desc_is_indirect(struct vring_packed_desc *cur_desc)
+{
+ return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
+}
+
int
-spdk_vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
- uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
- uint32_t *desc_table_size)
+vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
+ uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
+ uint32_t *desc_table_size)
{
if (spdk_unlikely(req_idx >= virtqueue->vring.size)) {
return -1;
*desc = &virtqueue->vring.desc[req_idx];
- if (spdk_vhost_vring_desc_is_indirect(*desc)) {
+ if (vhost_vring_desc_is_indirect(*desc)) {
*desc_table_size = (*desc)->len / sizeof(**desc);
- *desc_table = spdk_vhost_gpa_to_vva(vsession, (*desc)->addr,
- sizeof(**desc) * *desc_table_size);
+ *desc_table = vhost_gpa_to_vva(vsession, (*desc)->addr,
+ sizeof(**desc) * *desc_table_size);
*desc = *desc_table;
if (*desc == NULL) {
return -1;
}
int
-spdk_vhost_vq_used_signal(struct spdk_vhost_session *vsession,
- struct spdk_vhost_virtqueue *virtqueue)
+vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
+ struct spdk_vhost_virtqueue *virtqueue,
+ uint16_t req_idx, struct vring_packed_desc **desc,
+ struct vring_packed_desc **desc_table, uint32_t *desc_table_size)
+{
+ *desc = &virtqueue->vring.desc_packed[req_idx];
+
+ /* In packed ring when the desc is non-indirect we get next desc
+ * by judging (desc->flag & VRING_DESC_F_NEXT) != 0. When the desc
+ * is indirect we get next desc by idx and desc_table_size. It's
+ * different from split ring.
+ */
+ if (vhost_vring_packed_desc_is_indirect(*desc)) {
+ *desc_table_size = (*desc)->len / sizeof(struct vring_packed_desc);
+ *desc_table = vhost_gpa_to_vva(vsession, (*desc)->addr,
+ (*desc)->len);
+ *desc = *desc_table;
+ if (spdk_unlikely(*desc == NULL)) {
+ return -1;
+ }
+ } else {
+ *desc_table = NULL;
+ *desc_table_size = 0;
+ }
+
+ return 0;
+}
+
+int
+vhost_vq_used_signal(struct spdk_vhost_session *vsession,
+ struct spdk_vhost_virtqueue *virtqueue)
{
if (virtqueue->used_req_cnt == 0) {
return 0;
}
}
+static inline bool
+vhost_vq_event_is_suppressed(struct spdk_vhost_virtqueue *vq)
+{
+ if (spdk_unlikely(vq->packed.packed_ring)) {
+ if (vq->vring.driver_event->flags & VRING_PACKED_EVENT_FLAG_DISABLE) {
+ return true;
+ }
+ } else {
+ if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
void
-spdk_vhost_session_used_signal(struct spdk_vhost_session *vsession)
+vhost_session_used_signal(struct spdk_vhost_session *vsession)
{
struct spdk_vhost_virtqueue *virtqueue;
uint64_t now;
for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) {
virtqueue = &vsession->virtqueue[q_idx];
- if (virtqueue->vring.desc == NULL ||
- (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
+ if (virtqueue->vring.desc == NULL) {
+ continue;
+ }
+
+ if (vhost_vq_event_is_suppressed(virtqueue)) {
continue;
}
- spdk_vhost_vq_used_signal(vsession, virtqueue);
+ vhost_vq_used_signal(vsession, virtqueue);
}
} else {
now = spdk_get_ticks();
virtqueue = &vsession->virtqueue[q_idx];
/* No need for event right now */
- if (now < virtqueue->next_event_time ||
- (virtqueue->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
+ if (now < virtqueue->next_event_time) {
continue;
}
- if (!spdk_vhost_vq_used_signal(vsession, virtqueue)) {
+ if (vhost_vq_event_is_suppressed(virtqueue)) {
+ continue;
+ }
+
+ if (!vhost_vq_used_signal(vsession, virtqueue)) {
continue;
}
}
static int
-spdk_vhost_session_set_coalescing(struct spdk_vhost_dev *vdev,
- struct spdk_vhost_session *vsession, void *ctx)
+vhost_session_set_coalescing(struct spdk_vhost_dev *vdev,
+ struct spdk_vhost_session *vsession, void *ctx)
{
- if (vdev == NULL || vsession == NULL) {
- /* nothing to do */
- return 0;
- }
-
vsession->coalescing_delay_time_base =
vdev->coalescing_delay_us * spdk_get_ticks_hz() / 1000000ULL;
vsession->coalescing_io_rate_threshold =
return 0;
}
-int
-spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
- uint32_t iops_threshold)
+static int
+vhost_dev_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
+ uint32_t iops_threshold)
{
uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL;
uint32_t io_rate = iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U;
vdev->coalescing_delay_us = delay_base_us;
vdev->coalescing_iops_threshold = iops_threshold;
+ return 0;
+}
- spdk_vhost_dev_foreach_session(vdev, spdk_vhost_session_set_coalescing, NULL);
+int
+spdk_vhost_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
+ uint32_t iops_threshold)
+{
+ int rc;
+
+ rc = vhost_dev_set_coalescing(vdev, delay_base_us, iops_threshold);
+ if (rc != 0) {
+ return rc;
+ }
+
+ vhost_dev_foreach_session(vdev, vhost_session_set_coalescing, NULL, NULL);
return 0;
}
* Enqueue id and len to used ring.
*/
void
-spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession,
- struct spdk_vhost_virtqueue *virtqueue,
- uint16_t id, uint32_t len)
+vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession,
+ struct spdk_vhost_virtqueue *virtqueue,
+ uint16_t id, uint32_t len)
{
struct rte_vhost_vring *vring = &virtqueue->vring;
struct vring_used *used = vring->used;
uint16_t last_idx = virtqueue->last_used_idx & (vring->size - 1);
+ uint16_t vq_idx = virtqueue->vring_idx;
SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING,
"Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n",
virtqueue - vsession->virtqueue, virtqueue->last_used_idx, id, len);
- spdk_vhost_log_req_desc(vsession, virtqueue, id);
+ vhost_log_req_desc(vsession, virtqueue, id);
virtqueue->last_used_idx++;
used->ring[last_idx].id = id;
/* Ensure the used ring is updated before we log it or increment used->idx. */
spdk_smp_wmb();
- spdk_vhost_log_used_vring_elem(vsession, virtqueue, last_idx);
+ rte_vhost_set_last_inflight_io_split(vsession->vid, vq_idx, id);
+
+ vhost_log_used_vring_elem(vsession, virtqueue, last_idx);
* (volatile uint16_t *) &used->idx = virtqueue->last_used_idx;
- spdk_vhost_log_used_vring_idx(vsession, virtqueue);
+ vhost_log_used_vring_idx(vsession, virtqueue);
- /* Ensure all our used ring changes are visible to the guest at the time
- * of interrupt.
- * TODO: this is currently an sfence on x86. For other architectures we
- * will most likely need an smp_mb(), but smp_mb() is an overkill for x86.
- */
- spdk_wmb();
+ rte_vhost_clr_inflight_desc_split(vsession->vid, vq_idx, virtqueue->last_used_idx, id);
virtqueue->used_req_cnt++;
}
-int
-spdk_vhost_vring_desc_get_next(struct vring_desc **desc,
- struct vring_desc *desc_table, uint32_t desc_table_size)
+void
+vhost_vq_packed_ring_enqueue(struct spdk_vhost_session *vsession,
+ struct spdk_vhost_virtqueue *virtqueue,
+ uint16_t num_descs, uint16_t buffer_id,
+ uint32_t length)
{
- struct vring_desc *old_desc = *desc;
- uint16_t next_idx;
+ struct vring_packed_desc *desc = &virtqueue->vring.desc_packed[virtqueue->last_used_idx];
+ bool used, avail;
- if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) {
- *desc = NULL;
- return 0;
+ SPDK_DEBUGLOG(SPDK_LOG_VHOST_RING,
+ "Queue %td - RING: buffer_id=%"PRIu16"\n",
+ virtqueue - vsession->virtqueue, buffer_id);
+
+ /* When the descriptor is used, two flags in descriptor
+ * avail flag and used flag are set to equal
+ * and used flag value == used_wrap_counter.
+ */
+ used = !!(desc->flags & VRING_DESC_F_USED);
+ avail = !!(desc->flags & VRING_DESC_F_AVAIL);
+ if (spdk_unlikely(used == virtqueue->packed.used_phase && used == avail)) {
+ SPDK_ERRLOG("descriptor has been used before\n");
+ return;
}
- next_idx = old_desc->next;
- if (spdk_unlikely(next_idx >= desc_table_size)) {
- *desc = NULL;
- return -1;
+ /* In used desc addr is unused and len specifies the buffer length
+ * that has been written to by the device.
+ */
+ desc->addr = 0;
+ desc->len = length;
+
+ /* This bit specifies whether any data has been written by the device */
+ if (length != 0) {
+ desc->flags |= VRING_DESC_F_WRITE;
}
- *desc = &desc_table[next_idx];
- return 0;
+ /* Buffer ID is included in the last descriptor in the list.
+ * The driver needs to keep track of the size of the list corresponding
+ * to each buffer ID.
+ */
+ desc->id = buffer_id;
+
+ /* A device MUST NOT make the descriptor used before buffer_id is
+ * written to the descriptor.
+ */
+ spdk_smp_wmb();
+ /* To mark a desc as used, the device sets the F_USED bit in flags to match
+ * the internal Device ring wrap counter. It also sets the F_AVAIL bit to
+ * match the same value.
+ */
+ if (virtqueue->packed.used_phase) {
+ desc->flags |= VRING_DESC_F_AVAIL_USED;
+ } else {
+ desc->flags &= ~VRING_DESC_F_AVAIL_USED;
+ }
+
+ vhost_log_used_vring_elem(vsession, virtqueue, virtqueue->last_used_idx);
+ virtqueue->last_used_idx += num_descs;
+ if (virtqueue->last_used_idx >= virtqueue->vring.size) {
+ virtqueue->last_used_idx -= virtqueue->vring.size;
+ virtqueue->packed.used_phase = !virtqueue->packed.used_phase;
+ }
+
+ virtqueue->used_req_cnt++;
+}
+
+bool
+vhost_vq_packed_ring_is_avail(struct spdk_vhost_virtqueue *virtqueue)
+{
+ uint16_t flags = virtqueue->vring.desc_packed[virtqueue->last_avail_idx].flags;
+
+ /* To mark a desc as available, the driver sets the F_AVAIL bit in flags
+ * to match the internal avail wrap counter. It also sets the F_USED bit to
+ * match the inverse value but it's not mandatory.
+ */
+ return (!!(flags & VRING_DESC_F_AVAIL) == virtqueue->packed.avail_phase);
}
bool
-spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc)
+vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc)
{
- return !!(cur_desc->flags & VRING_DESC_F_WRITE);
+ return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
}
int
-spdk_vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
- uint16_t *iov_index, const struct vring_desc *desc)
+vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx,
+ struct spdk_vhost_virtqueue *vq,
+ struct vring_packed_desc *desc_table,
+ uint32_t desc_table_size)
+{
+ if (desc_table != NULL) {
+ /* When the desc_table isn't NULL means it's indirect and we get the next
+ * desc by req_idx and desc_table_size. The return value is NULL means
+ * we reach the last desc of this request.
+ */
+ (*req_idx)++;
+ if (*req_idx < desc_table_size) {
+ *desc = &desc_table[*req_idx];
+ } else {
+ *desc = NULL;
+ }
+ } else {
+ /* When the desc_table is NULL means it's non-indirect and we get the next
+ * desc by req_idx and F_NEXT in flags. The return value is NULL means
+ * we reach the last desc of this request. When return new desc
+ * we update the req_idx too.
+ */
+ if (((*desc)->flags & VRING_DESC_F_NEXT) == 0) {
+ *desc = NULL;
+ return 0;
+ }
+
+ *req_idx = (*req_idx + 1) % vq->vring.size;
+ *desc = &vq->vring.desc_packed[*req_idx];
+ }
+
+ return 0;
+}
+
+static int
+vhost_vring_desc_payload_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
+ uint16_t *iov_index, uintptr_t payload, uint64_t remaining)
{
- uint64_t len;
- uint64_t remaining = desc->len;
- uintptr_t payload = desc->addr;
uintptr_t vva;
+ uint64_t len;
do {
if (*iov_index >= SPDK_VHOST_IOVS_MAX) {
return 0;
}
+int
+vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
+ uint16_t *iov_index, const struct vring_packed_desc *desc)
+{
+ return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
+ desc->addr, desc->len);
+}
+
+/* 1, Traverse the desc chain to get the buffer_id and return buffer_id as task_idx.
+ * 2, Update the vq->last_avail_idx to point next available desc chain.
+ * 3, Update the avail_wrap_counter if last_avail_idx overturn.
+ */
+uint16_t
+vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
+ uint16_t *num_descs)
+{
+ struct vring_packed_desc *desc;
+ uint16_t desc_head = req_idx;
+
+ *num_descs = 1;
+
+ desc = &vq->vring.desc_packed[req_idx];
+ if (!vhost_vring_packed_desc_is_indirect(desc)) {
+ while ((desc->flags & VRING_DESC_F_NEXT) != 0) {
+ req_idx = (req_idx + 1) % vq->vring.size;
+ desc = &vq->vring.desc_packed[req_idx];
+ (*num_descs)++;
+ }
+ }
+
+ /* Queue Size doesn't have to be a power of 2
+ * Device maintains last_avail_idx so we can make sure
+ * the value is valid(0 ~ vring.size - 1)
+ */
+ vq->last_avail_idx = (req_idx + 1) % vq->vring.size;
+ if (vq->last_avail_idx < desc_head) {
+ vq->packed.avail_phase = !vq->packed.avail_phase;
+ }
+
+ return desc->id;
+}
+
+int
+vhost_vring_desc_get_next(struct vring_desc **desc,
+ struct vring_desc *desc_table, uint32_t desc_table_size)
+{
+ struct vring_desc *old_desc = *desc;
+ uint16_t next_idx;
+
+ if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) {
+ *desc = NULL;
+ return 0;
+ }
+
+ next_idx = old_desc->next;
+ if (spdk_unlikely(next_idx >= desc_table_size)) {
+ *desc = NULL;
+ return -1;
+ }
+
+ *desc = &desc_table[next_idx];
+ return 0;
+}
+
+int
+vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
+ uint16_t *iov_index, const struct vring_desc *desc)
+{
+ return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
+ desc->addr, desc->len);
+}
+
static struct spdk_vhost_session *
-spdk_vhost_session_find_by_id(struct spdk_vhost_dev *vdev, unsigned id)
+vhost_session_find_by_id(struct spdk_vhost_dev *vdev, unsigned id)
{
struct spdk_vhost_session *vsession;
}
struct spdk_vhost_session *
-spdk_vhost_session_find_by_vid(int vid)
+vhost_session_find_by_vid(int vid)
{
struct spdk_vhost_dev *vdev;
struct spdk_vhost_session *vsession;
- TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) {
+ TAILQ_FOREACH(vdev, &g_vhost_devices, tailq) {
TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) {
if (vsession->vid == vid) {
return vsession;
return NULL;
}
-#define SHIFT_2MB 21
-#define SIZE_2MB (1ULL << SHIFT_2MB)
-#define FLOOR_2MB(x) (((uintptr_t)x) / SIZE_2MB) << SHIFT_2MB
-#define CEIL_2MB(x) ((((uintptr_t)x) + SIZE_2MB - 1) / SIZE_2MB) << SHIFT_2MB
-
-static void
-spdk_vhost_session_mem_register(struct spdk_vhost_session *vsession)
-{
- struct rte_vhost_mem_region *region;
- uint32_t i;
- uint64_t previous_start = UINT64_MAX;
-
- for (i = 0; i < vsession->mem->nregions; i++) {
- uint64_t start, end, len;
- region = &vsession->mem->regions[i];
- start = FLOOR_2MB(region->mmap_addr);
- end = CEIL_2MB(region->mmap_addr + region->mmap_size);
- if (start == previous_start) {
- start += (size_t) SIZE_2MB;
- }
- previous_start = start;
- len = end - start;
- SPDK_INFOLOG(SPDK_LOG_VHOST, "Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n",
- start, len);
-
- if (spdk_mem_register((void *)start, len) != 0) {
- SPDK_WARNLOG("Failed to register memory region %"PRIu32". Future vtophys translation might fail.\n",
- i);
- continue;
- }
- }
-}
-
-static void
-spdk_vhost_session_mem_unregister(struct spdk_vhost_session *vsession)
-{
- struct rte_vhost_mem_region *region;
- uint32_t i;
- uint64_t previous_start = UINT64_MAX;
-
- for (i = 0; i < vsession->mem->nregions; i++) {
- uint64_t start, end, len;
- region = &vsession->mem->regions[i];
- start = FLOOR_2MB(region->mmap_addr);
- end = CEIL_2MB(region->mmap_addr + region->mmap_size);
- if (start == previous_start) {
- start += (size_t) SIZE_2MB;
- }
- previous_start = start;
- len = end - start;
-
- if (spdk_vtophys((void *) start, NULL) == SPDK_VTOPHYS_ERROR) {
- continue; /* region has not been registered */
- }
-
- if (spdk_mem_unregister((void *)start, len) != 0) {
- assert(false);
- }
- }
-
-}
-
-void
-spdk_vhost_free_reactor(uint32_t lcore)
-{
- g_num_ctrlrs[lcore]--;
-}
-
struct spdk_vhost_dev *
spdk_vhost_dev_next(struct spdk_vhost_dev *vdev)
{
if (vdev == NULL) {
- return TAILQ_FIRST(&g_spdk_vhost_devices);
+ return TAILQ_FIRST(&g_vhost_devices);
}
return TAILQ_NEXT(vdev, tailq);
ctrlr_name += dev_dirname_len;
}
- TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) {
+ TAILQ_FOREACH(vdev, &g_vhost_devices, tailq) {
if (strcmp(vdev->name, ctrlr_name) == 0) {
return vdev;
}
}
static int
-spdk_vhost_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask)
+vhost_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask)
{
int rc;
}
if (mask == NULL) {
- spdk_cpuset_copy(cpumask, spdk_app_get_core_mask());
+ spdk_cpuset_copy(cpumask, &g_vhost_core_mask);
return 0;
}
- rc = spdk_app_parse_core_mask(mask, cpumask);
+ rc = spdk_cpuset_parse(cpumask, mask);
if (rc < 0) {
SPDK_ERRLOG("invalid cpumask %s\n", mask);
return -1;
}
+ spdk_cpuset_and(cpumask, &g_vhost_core_mask);
+
if (spdk_cpuset_count(cpumask) == 0) {
- SPDK_ERRLOG("no cpu is selected among reactor mask(=%s)\n",
- spdk_cpuset_fmt(spdk_app_get_core_mask()));
+ SPDK_ERRLOG("no cpu is selected among core mask(=%s)\n",
+ spdk_cpuset_fmt(&g_vhost_core_mask));
return -1;
}
return 0;
}
-static void *
-_start_rte_driver(void *arg)
+static void
+vhost_setup_core_mask(void *ctx)
{
- char *path = arg;
+ struct spdk_thread *thread = spdk_get_thread();
+ spdk_cpuset_or(&g_vhost_core_mask, spdk_thread_get_cpumask(thread));
+}
- if (rte_vhost_driver_start(path) != 0) {
- return NULL;
+static void
+vhost_setup_core_mask_done(void *ctx)
+{
+ spdk_vhost_init_cb init_cb = ctx;
+
+ if (spdk_cpuset_count(&g_vhost_core_mask) == 0) {
+ init_cb(-ECHILD);
+ return;
}
- return path;
+ init_cb(0);
+}
+
+static void
+vhost_dev_thread_exit(void *arg1)
+{
+ spdk_thread_exit(spdk_get_thread());
}
int
-spdk_vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str,
- const struct spdk_vhost_dev_backend *backend)
+vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str,
+ const struct spdk_vhost_dev_backend *backend)
{
char path[PATH_MAX];
- struct stat file_stat;
- struct spdk_cpuset *cpumask;
+ struct spdk_cpuset cpumask = {};
int rc;
assert(vdev);
return -EINVAL;
}
- cpumask = spdk_cpuset_alloc();
- if (!cpumask) {
- SPDK_ERRLOG("spdk_cpuset_alloc failed\n");
- return -ENOMEM;
- }
-
- if (spdk_vhost_parse_core_mask(mask_str, cpumask) != 0) {
- SPDK_ERRLOG("cpumask %s is invalid (app mask is 0x%s)\n",
- mask_str, spdk_cpuset_fmt(spdk_app_get_core_mask()));
- rc = -EINVAL;
- goto out;
+ if (vhost_parse_core_mask(mask_str, &cpumask) != 0) {
+ SPDK_ERRLOG("cpumask %s is invalid (core mask is 0x%s)\n",
+ mask_str, spdk_cpuset_fmt(&g_vhost_core_mask));
+ return -EINVAL;
}
if (spdk_vhost_dev_find(name)) {
SPDK_ERRLOG("vhost controller %s already exists.\n", name);
- rc = -EEXIST;
- goto out;
+ return -EEXIST;
}
if (snprintf(path, sizeof(path), "%s%s", dev_dirname, name) >= (int)sizeof(path)) {
SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n", name, dev_dirname,
name);
- rc = -EINVAL;
- goto out;
- }
-
- /* Register vhost driver to handle vhost messages. */
- if (stat(path, &file_stat) != -1) {
- if (!S_ISSOCK(file_stat.st_mode)) {
- SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
- "The file already exists and is not a socket.\n",
- path);
- rc = -EIO;
- goto out;
- } else if (unlink(path) != 0) {
- SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
- "The socket already exists and failed to unlink.\n",
- path);
- rc = -EIO;
- goto out;
- }
- }
-
- if (rte_vhost_driver_register(path, 0) != 0) {
- SPDK_ERRLOG("Could not register controller %s with vhost library\n", name);
- SPDK_ERRLOG("Check if domain socket %s already exists\n", path);
- rc = -EIO;
- goto out;
- }
- if (rte_vhost_driver_set_features(path, backend->virtio_features) ||
- rte_vhost_driver_disable_features(path, backend->disabled_features)) {
- SPDK_ERRLOG("Couldn't set vhost features for controller %s\n", name);
-
- rte_vhost_driver_unregister(path);
- rc = -EIO;
- goto out;
+ return -EINVAL;
}
- if (rte_vhost_driver_callback_register(path, &g_spdk_vhost_ops) != 0) {
- rte_vhost_driver_unregister(path);
- SPDK_ERRLOG("Couldn't register callbacks for controller %s\n", name);
+ vdev->name = strdup(name);
+ vdev->path = strdup(path);
+ if (vdev->name == NULL || vdev->path == NULL) {
rc = -EIO;
goto out;
}
- vdev->name = strdup(name);
- vdev->path = strdup(path);
- if (vdev->name == NULL || vdev->path == NULL) {
- free(vdev->name);
- free(vdev->path);
- rte_vhost_driver_unregister(path);
+ vdev->thread = spdk_thread_create(vdev->name, &cpumask);
+ if (vdev->thread == NULL) {
+ SPDK_ERRLOG("Failed to create thread for vhost controller %s.\n", name);
rc = -EIO;
goto out;
}
- vdev->cpumask = cpumask;
vdev->registered = true;
vdev->backend = backend;
TAILQ_INIT(&vdev->vsessions);
- TAILQ_INSERT_TAIL(&g_spdk_vhost_devices, vdev, tailq);
- spdk_vhost_set_coalescing(vdev, SPDK_VHOST_COALESCING_DELAY_BASE_US,
- SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD);
+ vhost_dev_set_coalescing(vdev, SPDK_VHOST_COALESCING_DELAY_BASE_US,
+ SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD);
- spdk_vhost_dev_install_rte_compat_hooks(vdev);
-
- /* The following might start a POSIX thread that polls for incoming
- * socket connections and calls backend->start/stop_device. These backend
- * callbacks are also protected by the global SPDK vhost mutex, so we're
- * safe with not initializing the vdev just yet.
- */
- if (spdk_call_unaffinitized(_start_rte_driver, path) == NULL) {
- SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n",
- name, errno, spdk_strerror(errno));
- rte_vhost_driver_unregister(path);
- TAILQ_REMOVE(&g_spdk_vhost_devices, vdev, tailq);
- free(vdev->name);
- free(vdev->path);
+ if (vhost_register_unix_socket(path, name, vdev->virtio_features, vdev->disabled_features,
+ vdev->protocol_features)) {
+ spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL);
rc = -EIO;
goto out;
}
+ TAILQ_INSERT_TAIL(&g_vhost_devices, vdev, tailq);
+
SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: new controller added\n", vdev->name);
return 0;
out:
- spdk_cpuset_free(cpumask);
+ free(vdev->name);
+ free(vdev->path);
return rc;
}
int
-spdk_vhost_dev_unregister(struct spdk_vhost_dev *vdev)
+vhost_dev_unregister(struct spdk_vhost_dev *vdev)
{
if (!TAILQ_EMPTY(&vdev->vsessions)) {
SPDK_ERRLOG("Controller %s has still valid connection.\n", vdev->name);
return -EBUSY;
}
- if (vdev->registered && rte_vhost_driver_unregister(vdev->path) != 0) {
+ if (vdev->registered && vhost_driver_unregister(vdev->path) != 0) {
SPDK_ERRLOG("Could not unregister controller %s with vhost library\n"
"Check if domain socket %s still exists\n",
vdev->name, vdev->path);
SPDK_INFOLOG(SPDK_LOG_VHOST, "Controller %s: removed\n", vdev->name);
+ spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL);
+
free(vdev->name);
free(vdev->path);
- spdk_cpuset_free(vdev->cpumask);
- TAILQ_REMOVE(&g_spdk_vhost_devices, vdev, tailq);
+ TAILQ_REMOVE(&g_vhost_devices, vdev, tailq);
return 0;
}
-static struct spdk_vhost_session *
-spdk_vhost_session_next(struct spdk_vhost_dev *vdev, unsigned prev_id)
-{
- struct spdk_vhost_session *vsession;
-
- TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) {
- if (vsession->id > prev_id) {
- return vsession;
- }
- }
-
- return NULL;
-}
-
const char *
spdk_vhost_dev_get_name(struct spdk_vhost_dev *vdev)
{
spdk_vhost_dev_get_cpumask(struct spdk_vhost_dev *vdev)
{
assert(vdev != NULL);
- return vdev->cpumask;
+ return spdk_thread_get_cpumask(vdev->thread);
}
-uint32_t
-spdk_vhost_allocate_reactor(struct spdk_cpuset *cpumask)
+static void
+wait_for_semaphore(int timeout_sec, const char *errmsg)
{
- uint32_t i, selected_core;
- uint32_t min_ctrlrs;
-
- min_ctrlrs = INT_MAX;
- selected_core = spdk_env_get_first_core();
-
- SPDK_ENV_FOREACH_CORE(i) {
- if (!spdk_cpuset_get_cpu(cpumask, i)) {
- continue;
- }
+ struct timespec timeout;
+ int rc;
- if (g_num_ctrlrs[i] < min_ctrlrs) {
- selected_core = i;
- min_ctrlrs = g_num_ctrlrs[i];
- }
+ clock_gettime(CLOCK_REALTIME, &timeout);
+ timeout.tv_sec += timeout_sec;
+ rc = sem_timedwait(&g_dpdk_sem, &timeout);
+ if (rc != 0) {
+ SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg);
+ sem_wait(&g_dpdk_sem);
}
-
- g_num_ctrlrs[selected_core]++;
- return selected_core;
}
static void
-complete_session_event(struct spdk_vhost_session *vsession, int response)
+vhost_session_cb_done(int rc)
{
- struct spdk_vhost_session_fn_ctx *ctx = vsession->event_ctx;
-
- ctx->response = response;
- sem_post(&ctx->sem);
+ g_dpdk_response = rc;
+ sem_post(&g_dpdk_sem);
}
void
-spdk_vhost_session_start_done(struct spdk_vhost_session *vsession, int response)
+vhost_session_start_done(struct spdk_vhost_session *vsession, int response)
{
if (response == 0) {
vsession->started = true;
+
assert(vsession->vdev->active_session_num < UINT32_MAX);
vsession->vdev->active_session_num++;
}
- complete_session_event(vsession, response);
+
+ vhost_session_cb_done(response);
}
void
-spdk_vhost_session_stop_done(struct spdk_vhost_session *vsession, int response)
+vhost_session_stop_done(struct spdk_vhost_session *vsession, int response)
{
if (response == 0) {
vsession->started = false;
+
assert(vsession->vdev->active_session_num > 0);
vsession->vdev->active_session_num--;
}
- complete_session_event(vsession, response);
+
+ vhost_session_cb_done(response);
}
static void
-spdk_vhost_event_cb(void *arg1, void *arg2)
+vhost_event_cb(void *arg1)
{
- struct spdk_vhost_session_fn_ctx *ctx = arg1;
+ struct vhost_session_fn_ctx *ctx = arg1;
struct spdk_vhost_session *vsession;
- struct spdk_event *ev;
- if (pthread_mutex_trylock(&g_spdk_vhost_mutex) != 0) {
- ev = spdk_event_allocate(spdk_env_get_current_core(),
- spdk_vhost_event_cb, arg1, NULL);
- spdk_event_call(ev);
+ if (pthread_mutex_trylock(&g_vhost_mutex) != 0) {
+ spdk_thread_send_msg(spdk_get_thread(), vhost_event_cb, arg1);
return;
}
- vsession = spdk_vhost_session_find_by_id(ctx->vdev, ctx->vsession_id);
+ vsession = vhost_session_find_by_id(ctx->vdev, ctx->vsession_id);
ctx->cb_fn(ctx->vdev, vsession, NULL);
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
+ pthread_mutex_unlock(&g_vhost_mutex);
}
-static void spdk_vhost_external_event_foreach_continue(struct spdk_vhost_dev *vdev,
- struct spdk_vhost_session *vsession,
- spdk_vhost_session_fn fn, void *arg);
-
-static void
-spdk_vhost_event_async_foreach_fn(void *arg1, void *arg2)
+int
+vhost_session_send_event(struct spdk_vhost_session *vsession,
+ spdk_vhost_session_fn cb_fn, unsigned timeout_sec,
+ const char *errmsg)
{
- struct spdk_vhost_session_fn_ctx *ctx = arg1;
- struct spdk_vhost_session *vsession = NULL;
- struct spdk_vhost_dev *vdev = ctx->vdev;
- struct spdk_event *ev;
- int rc;
+ struct vhost_session_fn_ctx ev_ctx = {0};
+ struct spdk_vhost_dev *vdev = vsession->vdev;
- if (pthread_mutex_trylock(&g_spdk_vhost_mutex) != 0) {
- ev = spdk_event_allocate(spdk_env_get_current_core(),
- spdk_vhost_event_async_foreach_fn, arg1, NULL);
- spdk_event_call(ev);
- return;
- }
+ ev_ctx.vdev = vdev;
+ ev_ctx.vsession_id = vsession->id;
+ ev_ctx.cb_fn = cb_fn;
- vsession = spdk_vhost_session_find_by_id(vdev, ctx->vsession_id);
- if (vsession == NULL || !vsession->initialized) {
- /* The session must have been removed in the meantime, so we
- * just skip it in our foreach chain
- */
- goto out_unlock_continue;
- }
+ spdk_thread_send_msg(vdev->thread, vhost_event_cb, &ev_ctx);
- if (vsession->started &&
- (uint32_t)vsession->lcore != spdk_env_get_current_core()) {
- /* if session has been relocated to other core, it is no longer thread-safe
- * to access its contents here. Even though we're running under the global
- * vhost mutex, the session itself (and its pollers) are not. We need to chase
- * the session thread as many times as necessary.
- */
- ev = spdk_event_allocate(vsession->lcore,
- spdk_vhost_event_async_foreach_fn, arg1, NULL);
- spdk_event_call(ev);
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
+ pthread_mutex_unlock(&g_vhost_mutex);
+ wait_for_semaphore(timeout_sec, errmsg);
+ pthread_mutex_lock(&g_vhost_mutex);
+
+ return g_dpdk_response;
+}
+
+static void
+foreach_session_finish_cb(void *arg1)
+{
+ struct vhost_session_fn_ctx *ev_ctx = arg1;
+ struct spdk_vhost_dev *vdev = ev_ctx->vdev;
+
+ if (pthread_mutex_trylock(&g_vhost_mutex) != 0) {
+ spdk_thread_send_msg(spdk_get_thread(),
+ foreach_session_finish_cb, arg1);
return;
}
- rc = ctx->cb_fn(vdev, vsession, ctx->user_ctx);
- if (rc < 0) {
- goto out_unlock;
+ assert(vdev->pending_async_op_num > 0);
+ vdev->pending_async_op_num--;
+ if (ev_ctx->cpl_fn != NULL) {
+ ev_ctx->cpl_fn(vdev, ev_ctx->user_ctx);
}
-out_unlock_continue:
- vsession = spdk_vhost_session_next(vdev, ctx->vsession_id);
- spdk_vhost_external_event_foreach_continue(vdev, vsession, ctx->cb_fn, ctx->user_ctx);
-out_unlock:
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
- free(ctx);
+ pthread_mutex_unlock(&g_vhost_mutex);
+ free(ev_ctx);
}
-int
-spdk_vhost_session_send_event(int32_t lcore, struct spdk_vhost_session *vsession,
- spdk_vhost_session_fn cb_fn, unsigned timeout_sec,
- const char *errmsg)
+static void
+foreach_session(void *arg1)
{
- struct spdk_vhost_session_fn_ctx ev_ctx = {0};
- struct spdk_event *ev;
- struct timespec timeout;
+ struct vhost_session_fn_ctx *ev_ctx = arg1;
+ struct spdk_vhost_session *vsession;
+ struct spdk_vhost_dev *vdev = ev_ctx->vdev;
int rc;
- rc = sem_init(&ev_ctx.sem, 0, 0);
- if (rc != 0) {
- SPDK_ERRLOG("Failed to initialize semaphore for vhost timed event\n");
- return -errno;
+ if (pthread_mutex_trylock(&g_vhost_mutex) != 0) {
+ spdk_thread_send_msg(spdk_get_thread(), foreach_session, arg1);
+ return;
}
- ev_ctx.vdev = vsession->vdev;
- ev_ctx.vsession_id = vsession->id;
- ev_ctx.cb_fn = cb_fn;
-
- vsession->lcore = lcore;
- vsession->event_ctx = &ev_ctx;
- ev = spdk_event_allocate(lcore, spdk_vhost_event_cb, &ev_ctx, NULL);
- assert(ev);
- spdk_event_call(ev);
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
-
- clock_gettime(CLOCK_REALTIME, &timeout);
- timeout.tv_sec += timeout_sec;
-
- rc = sem_timedwait(&ev_ctx.sem, &timeout);
- if (rc != 0) {
- SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg);
- sem_wait(&ev_ctx.sem);
+ TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) {
+ if (vsession->initialized) {
+ rc = ev_ctx->cb_fn(vdev, vsession, ev_ctx->user_ctx);
+ if (rc < 0) {
+ goto out;
+ }
+ }
}
- sem_destroy(&ev_ctx.sem);
- pthread_mutex_lock(&g_spdk_vhost_mutex);
- vsession->event_ctx = NULL;
- return ev_ctx.response;
+out:
+ pthread_mutex_unlock(&g_vhost_mutex);
+
+ spdk_thread_send_msg(g_vhost_init_thread, foreach_session_finish_cb, arg1);
}
-static int
-spdk_vhost_event_async_send_foreach_continue(struct spdk_vhost_session *vsession,
- spdk_vhost_session_fn cb_fn, void *arg)
+void
+vhost_dev_foreach_session(struct spdk_vhost_dev *vdev,
+ spdk_vhost_session_fn fn,
+ spdk_vhost_dev_fn cpl_fn,
+ void *arg)
{
- struct spdk_vhost_dev *vdev = vsession->vdev;
- struct spdk_vhost_session_fn_ctx *ev_ctx;
- struct spdk_event *ev;
+ struct vhost_session_fn_ctx *ev_ctx;
ev_ctx = calloc(1, sizeof(*ev_ctx));
if (ev_ctx == NULL) {
SPDK_ERRLOG("Failed to alloc vhost event.\n");
assert(false);
- return -ENOMEM;
+ return;
}
ev_ctx->vdev = vdev;
- ev_ctx->vsession_id = vsession->id;
- ev_ctx->cb_fn = cb_fn;
+ ev_ctx->cb_fn = fn;
+ ev_ctx->cpl_fn = cpl_fn;
ev_ctx->user_ctx = arg;
- ev = spdk_event_allocate(vsession->lcore,
- spdk_vhost_event_async_foreach_fn, ev_ctx, NULL);
- assert(ev);
- spdk_event_call(ev);
+ assert(vdev->pending_async_op_num < UINT32_MAX);
+ vdev->pending_async_op_num++;
- return 0;
+ spdk_thread_send_msg(vdev->thread, foreach_session, ev_ctx);
}
-static void
+static int
_stop_session(struct spdk_vhost_session *vsession)
{
struct spdk_vhost_dev *vdev = vsession->vdev;
rc = vdev->backend->stop_session(vsession);
if (rc != 0) {
SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vsession->vid);
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
- return;
+ pthread_mutex_unlock(&g_vhost_mutex);
+ return rc;
}
for (i = 0; i < vsession->max_queues; i++) {
q = &vsession->virtqueue[i];
+
+ /* vring.desc and vring.desc_packed are in a union struct
+ * so q->vring.desc can replace q->vring.desc_packed.
+ */
if (q->vring.desc == NULL) {
continue;
}
+
+ /* Packed virtqueues support up to 2^15 entries each
+ * so left one bit can be used as wrap counter.
+ */
+ if (q->packed.packed_ring) {
+ q->last_avail_idx = q->last_avail_idx |
+ ((uint16_t)q->packed.avail_phase << 15);
+ q->last_used_idx = q->last_used_idx |
+ ((uint16_t)q->packed.used_phase << 15);
+ }
+
rte_vhost_set_vring_base(vsession->vid, i, q->last_avail_idx, q->last_used_idx);
}
- spdk_vhost_session_mem_unregister(vsession);
+ vhost_session_mem_unregister(vsession->mem);
free(vsession->mem);
+
+ return 0;
}
-static void
-stop_device(int vid)
+int
+vhost_stop_device_cb(int vid)
{
struct spdk_vhost_session *vsession;
+ int rc;
- pthread_mutex_lock(&g_spdk_vhost_mutex);
- vsession = spdk_vhost_session_find_by_vid(vid);
+ pthread_mutex_lock(&g_vhost_mutex);
+ vsession = vhost_session_find_by_vid(vid);
if (vsession == NULL) {
SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
- return;
+ pthread_mutex_unlock(&g_vhost_mutex);
+ return -EINVAL;
}
if (!vsession->started) {
/* already stopped, nothing to do */
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
- return;
+ pthread_mutex_unlock(&g_vhost_mutex);
+ return -EALREADY;
}
- _stop_session(vsession);
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
+ rc = _stop_session(vsession);
+ pthread_mutex_unlock(&g_vhost_mutex);
+
+ return rc;
}
-static int
-start_device(int vid)
+int
+vhost_start_device_cb(int vid)
{
struct spdk_vhost_dev *vdev;
struct spdk_vhost_session *vsession;
int rc = -1;
uint16_t i;
+ bool packed_ring;
- pthread_mutex_lock(&g_spdk_vhost_mutex);
+ pthread_mutex_lock(&g_vhost_mutex);
- vsession = spdk_vhost_session_find_by_vid(vid);
+ vsession = vhost_session_find_by_vid(vid);
if (vsession == NULL) {
SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
goto out;
goto out;
}
+ if (vhost_get_negotiated_features(vid, &vsession->negotiated_features) != 0) {
+ SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid);
+ goto out;
+ }
+
+ packed_ring = ((vsession->negotiated_features & (1ULL << VIRTIO_F_RING_PACKED)) != 0);
+
vsession->max_queues = 0;
memset(vsession->virtqueue, 0, sizeof(vsession->virtqueue));
for (i = 0; i < SPDK_VHOST_MAX_VQUEUES; i++) {
continue;
}
q->vring_idx = i;
+ rte_vhost_get_vhost_ring_inflight(vid, i, &q->vring_inflight);
+ /* vring.desc and vring.desc_packed are in a union struct
+ * so q->vring.desc can replace q->vring.desc_packed.
+ */
if (q->vring.desc == NULL || q->vring.size == 0) {
continue;
}
continue;
}
- /* Disable notifications. */
- if (rte_vhost_enable_guest_notification(vid, i, 0) != 0) {
- SPDK_ERRLOG("vhost device %d: Failed to disable guest notification on queue %"PRIu16"\n", vid, i);
- goto out;
+ if (packed_ring) {
+ /* Packed virtqueues support up to 2^15 entries each
+ * so left one bit can be used as wrap counter.
+ */
+ q->packed.avail_phase = q->last_avail_idx >> 15;
+ q->last_avail_idx = q->last_avail_idx & 0x7FFF;
+ q->packed.used_phase = q->last_used_idx >> 15;
+ q->last_used_idx = q->last_used_idx & 0x7FFF;
+
+ /* Disable I/O submission notifications, we'll be polling. */
+ q->vring.device_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE;
+ } else {
+ /* Disable I/O submission notifications, we'll be polling. */
+ q->vring.used->flags = VRING_USED_F_NO_NOTIFY;
}
+ q->packed.packed_ring = packed_ring;
vsession->max_queues = i + 1;
}
- if (rte_vhost_get_negotiated_features(vid, &vsession->negotiated_features) != 0) {
- SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid);
- goto out;
- }
-
- if (rte_vhost_get_mem_table(vid, &vsession->mem) != 0) {
+ if (vhost_get_mem_table(vid, &vsession->mem) != 0) {
SPDK_ERRLOG("vhost device %d: Failed to get guest memory table\n", vid);
goto out;
}
for (i = 0; i < vsession->max_queues; i++) {
struct spdk_vhost_virtqueue *q = &vsession->virtqueue[i];
+ /* vring.desc and vring.desc_packed are in a union struct
+ * so q->vring.desc can replace q->vring.desc_packed.
+ */
if (q->vring.desc != NULL && q->vring.size > 0) {
rte_vhost_vring_call(vsession->vid, q->vring_idx);
}
}
- spdk_vhost_session_set_coalescing(vdev, vsession, NULL);
- spdk_vhost_session_mem_register(vsession);
+ vhost_session_set_coalescing(vdev, vsession, NULL);
+ vhost_session_mem_register(vsession->mem);
vsession->initialized = true;
rc = vdev->backend->start_session(vsession);
if (rc != 0) {
- spdk_vhost_session_mem_unregister(vsession);
+ vhost_session_mem_unregister(vsession->mem);
free(vsession->mem);
goto out;
}
out:
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
+ pthread_mutex_unlock(&g_vhost_mutex);
return rc;
}
#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB
-static int
-get_config(int vid, uint8_t *config, uint32_t len)
+int
+vhost_get_config_cb(int vid, uint8_t *config, uint32_t len)
{
struct spdk_vhost_session *vsession;
struct spdk_vhost_dev *vdev;
int rc = -1;
- pthread_mutex_lock(&g_spdk_vhost_mutex);
- vsession = spdk_vhost_session_find_by_vid(vid);
+ pthread_mutex_lock(&g_vhost_mutex);
+ vsession = vhost_session_find_by_vid(vid);
if (vsession == NULL) {
SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
goto out;
}
out:
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
+ pthread_mutex_unlock(&g_vhost_mutex);
return rc;
}
-static int
-set_config(int vid, uint8_t *config, uint32_t offset, uint32_t size, uint32_t flags)
+int
+vhost_set_config_cb(int vid, uint8_t *config, uint32_t offset, uint32_t size, uint32_t flags)
{
struct spdk_vhost_session *vsession;
struct spdk_vhost_dev *vdev;
int rc = -1;
- pthread_mutex_lock(&g_spdk_vhost_mutex);
- vsession = spdk_vhost_session_find_by_vid(vid);
+ pthread_mutex_lock(&g_vhost_mutex);
+ vsession = vhost_session_find_by_vid(vid);
if (vsession == NULL) {
SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
goto out;
}
out:
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
+ pthread_mutex_unlock(&g_vhost_mutex);
return rc;
}
#endif
}
void
-spdk_vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
+vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
{
assert(vdev->backend->dump_info_json != NULL);
vdev->backend->dump_info_json(vdev, w);
return vdev->backend->remove_device(vdev);
}
-static int
-new_connection(int vid)
+int
+vhost_new_connection_cb(int vid, const char *ifname)
{
struct spdk_vhost_dev *vdev;
struct spdk_vhost_session *vsession;
- char ifname[PATH_MAX];
- pthread_mutex_lock(&g_spdk_vhost_mutex);
-
- if (rte_vhost_get_ifname(vid, ifname, PATH_MAX) < 0) {
- SPDK_ERRLOG("Couldn't get a valid ifname for device with vid %d\n", vid);
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
- return -1;
- }
+ pthread_mutex_lock(&g_vhost_mutex);
vdev = spdk_vhost_dev_find(ifname);
if (vdev == NULL) {
SPDK_ERRLOG("Couldn't find device with vid %d to create connection for.\n", vid);
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
+ pthread_mutex_unlock(&g_vhost_mutex);
return -1;
}
if (posix_memalign((void **)&vsession, SPDK_CACHE_LINE_SIZE, sizeof(*vsession) +
vdev->backend->session_ctx_size)) {
SPDK_ERRLOG("vsession alloc failed\n");
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
+ pthread_mutex_unlock(&g_vhost_mutex);
return -1;
}
memset(vsession, 0, sizeof(*vsession) + vdev->backend->session_ctx_size);
vsession->vdev = vdev;
- vsession->id = vdev->vsessions_num++;
vsession->vid = vid;
- vsession->lcore = -1;
+ vsession->id = vdev->vsessions_num++;
+ vsession->name = spdk_sprintf_alloc("%ss%u", vdev->name, vsession->vid);
+ if (vsession->name == NULL) {
+ SPDK_ERRLOG("vsession alloc failed\n");
+ pthread_mutex_unlock(&g_vhost_mutex);
+ free(vsession);
+ return -1;
+ }
vsession->started = false;
vsession->initialized = false;
vsession->next_stats_check_time = 0;
spdk_get_ticks_hz() / 1000UL;
TAILQ_INSERT_TAIL(&vdev->vsessions, vsession, tailq);
- spdk_vhost_session_install_rte_compat_hooks(vsession);
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
+ vhost_session_install_rte_compat_hooks(vsession);
+ pthread_mutex_unlock(&g_vhost_mutex);
return 0;
}
-static void
-destroy_connection(int vid)
+int
+vhost_destroy_connection_cb(int vid)
{
struct spdk_vhost_session *vsession;
+ int rc = 0;
- pthread_mutex_lock(&g_spdk_vhost_mutex);
- vsession = spdk_vhost_session_find_by_vid(vid);
+ pthread_mutex_lock(&g_vhost_mutex);
+ vsession = vhost_session_find_by_vid(vid);
if (vsession == NULL) {
SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
- return;
+ pthread_mutex_unlock(&g_vhost_mutex);
+ return -EINVAL;
}
if (vsession->started) {
- _stop_session(vsession);
+ rc = _stop_session(vsession);
}
TAILQ_REMOVE(&vsession->vdev->vsessions, vsession, tailq);
+ free(vsession->name);
free(vsession);
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
-}
+ pthread_mutex_unlock(&g_vhost_mutex);
-static void
-spdk_vhost_external_event_foreach_continue(struct spdk_vhost_dev *vdev,
- struct spdk_vhost_session *vsession,
- spdk_vhost_session_fn fn, void *arg)
-{
- int rc;
-
- if (vsession == NULL) {
- goto out_finish_foreach;
- }
-
- while (!vsession->started) {
- if (vsession->initialized) {
- rc = fn(vdev, vsession, arg);
- if (rc < 0) {
- return;
- }
- }
-
- vsession = spdk_vhost_session_next(vdev, vsession->id);
- if (vsession == NULL) {
- goto out_finish_foreach;
- }
- }
-
- spdk_vhost_event_async_send_foreach_continue(vsession, fn, arg);
- return;
-
-out_finish_foreach:
- /* there are no more sessions to iterate through, so call the
- * fn one last time with vsession == NULL
- */
- assert(vdev->pending_async_op_num > 0);
- vdev->pending_async_op_num--;
- fn(vdev, NULL, arg);
-}
-
-void
-spdk_vhost_dev_foreach_session(struct spdk_vhost_dev *vdev,
- spdk_vhost_session_fn fn, void *arg)
-{
- struct spdk_vhost_session *vsession = TAILQ_FIRST(&vdev->vsessions);
-
- assert(vdev->pending_async_op_num < UINT32_MAX);
- vdev->pending_async_op_num++;
- spdk_vhost_external_event_foreach_continue(vdev, vsession, fn, arg);
+ return rc;
}
void
spdk_vhost_lock(void)
{
- pthread_mutex_lock(&g_spdk_vhost_mutex);
+ pthread_mutex_lock(&g_vhost_mutex);
}
int
spdk_vhost_trylock(void)
{
- return -pthread_mutex_trylock(&g_spdk_vhost_mutex);
+ return -pthread_mutex_trylock(&g_vhost_mutex);
}
void
spdk_vhost_unlock(void)
{
- pthread_mutex_unlock(&g_spdk_vhost_mutex);
+ pthread_mutex_unlock(&g_vhost_mutex);
}
-static void
-vhost_create_poll_group_done(void *ctx)
+void
+spdk_vhost_init(spdk_vhost_init_cb init_cb)
{
- spdk_vhost_init_cb init_cb = ctx;
+ size_t len;
int ret;
- if (TAILQ_EMPTY(&g_poll_groups)) {
- /* No threads? Iteration failed? */
- init_cb(-ECHILD);
- return;
+ g_vhost_init_thread = spdk_get_thread();
+ assert(g_vhost_init_thread != NULL);
+
+ if (dev_dirname[0] == '\0') {
+ if (getcwd(dev_dirname, sizeof(dev_dirname) - 1) == NULL) {
+ SPDK_ERRLOG("getcwd failed (%d): %s\n", errno, spdk_strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ len = strlen(dev_dirname);
+ if (dev_dirname[len - 1] != '/') {
+ dev_dirname[len] = '/';
+ dev_dirname[len + 1] = '\0';
+ }
}
- ret = spdk_vhost_scsi_controller_construct();
+ ret = sem_init(&g_dpdk_sem, 0, 0);
+ if (ret != 0) {
+ SPDK_ERRLOG("Failed to initialize semaphore for rte_vhost pthread.\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = vhost_scsi_controller_construct();
if (ret != 0) {
SPDK_ERRLOG("Cannot construct vhost controllers\n");
goto out;
}
- ret = spdk_vhost_blk_controller_construct();
+ ret = vhost_blk_controller_construct();
if (ret != 0) {
SPDK_ERRLOG("Cannot construct vhost block controllers\n");
goto out;
}
#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB
- ret = spdk_vhost_nvme_controller_construct();
+ ret = vhost_nvme_controller_construct();
if (ret != 0) {
SPDK_ERRLOG("Cannot construct vhost NVMe controllers\n");
goto out;
}
#endif
-out:
- init_cb(ret);
-}
-
-static void
-vhost_create_poll_group(void *ctx)
-{
- struct vhost_poll_group *pg;
+ spdk_cpuset_zero(&g_vhost_core_mask);
- pg = calloc(1, sizeof(*pg));
- if (!pg) {
- SPDK_ERRLOG("Not enough memory to allocate poll groups\n");
- spdk_app_stop(-ENOMEM);
- return;
- }
-
- pg->thread = spdk_get_thread();
- TAILQ_INSERT_TAIL(&g_poll_groups, pg, tailq);
-}
-
-void
-spdk_vhost_init(spdk_vhost_init_cb init_cb)
-{
- uint32_t last_core;
- size_t len;
- int ret;
-
- if (dev_dirname[0] == '\0') {
- if (getcwd(dev_dirname, sizeof(dev_dirname) - 1) == NULL) {
- SPDK_ERRLOG("getcwd failed (%d): %s\n", errno, spdk_strerror(errno));
- ret = -1;
- goto err_out;
- }
-
- len = strlen(dev_dirname);
- if (dev_dirname[len - 1] != '/') {
- dev_dirname[len] = '/';
- dev_dirname[len + 1] = '\0';
- }
- }
-
- last_core = spdk_env_get_last_core();
- g_num_ctrlrs = calloc(last_core + 1, sizeof(uint32_t));
- if (!g_num_ctrlrs) {
- SPDK_ERRLOG("Could not allocate array size=%u for g_num_ctrlrs\n",
- last_core + 1);
- ret = -1;
- goto err_out;
- }
-
- spdk_for_each_thread(vhost_create_poll_group,
- init_cb,
- vhost_create_poll_group_done);
+ /* iterate threads instead of using SPDK_ENV_FOREACH_CORE to ensure that threads are really
+ * created.
+ */
+ spdk_for_each_thread(vhost_setup_core_mask, init_cb, vhost_setup_core_mask_done);
return;
-err_out:
+out:
init_cb(ret);
}
static void
-_spdk_vhost_fini(void *arg1)
+vhost_fini(void *arg1)
{
struct spdk_vhost_dev *vdev, *tmp;
- struct vhost_poll_group *pg, *tpg;
spdk_vhost_lock();
vdev = spdk_vhost_dev_next(NULL);
}
spdk_vhost_unlock();
+ spdk_cpuset_zero(&g_vhost_core_mask);
+
/* All devices are removed now. */
- free(g_num_ctrlrs);
- TAILQ_FOREACH_SAFE(pg, &g_poll_groups, tailq, tpg) {
- TAILQ_REMOVE(&g_poll_groups, pg, tailq);
- free(pg);
- }
+ sem_destroy(&g_dpdk_sem);
+
g_fini_cpl_cb();
}
{
struct spdk_vhost_dev *vdev = NULL;
- TAILQ_FOREACH(vdev, &g_spdk_vhost_devices, tailq) {
- rte_vhost_driver_unregister(vdev->path);
+ TAILQ_FOREACH(vdev, &g_vhost_devices, tailq) {
+ vhost_driver_unregister(vdev->path);
vdev->registered = false;
}
SPDK_INFOLOG(SPDK_LOG_VHOST, "Exiting\n");
- spdk_thread_send_msg(g_fini_thread, _spdk_vhost_fini, NULL);
+ spdk_thread_send_msg(g_vhost_init_thread, vhost_fini, NULL);
return NULL;
}
pthread_t tid;
int rc;
- g_fini_thread = spdk_get_thread();
+ assert(spdk_get_thread() == g_vhost_init_thread);
g_fini_cpl_cb = fini_cb;
/* rte_vhost API for removing sockets is not asynchronous. Since it may call SPDK
spdk_vhost_get_coalescing(vdev, &delay_base_us, &iops_threshold);
if (delay_base_us) {
spdk_json_write_object_begin(w);
- spdk_json_write_named_string(w, "method", "set_vhost_controller_coalescing");
+ spdk_json_write_named_string(w, "method", "vhost_controller_set_coalescing");
spdk_json_write_named_object_begin(w, "params");
spdk_json_write_named_string(w, "ctrlr", vdev->name);