/*-
* BSD LICENSE
*
- * Copyright (c) Intel Corporation.
- * All rights reserved.
+ * Copyright (c) Intel Corporation. All rights reserved.
+ * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
#include "spdk_internal/nvme_tcp.h"
#define NVME_TCP_RW_BUFFER_SIZE 131072
+#define NVME_TCP_TIME_OUT_IN_SECONDS 2
#define NVME_TCP_HPDA_DEFAULT 0
#define NVME_TCP_MAX_R2T_DEFAULT 1
struct spdk_nvme_ctrlr ctrlr;
};
+struct nvme_tcp_poll_group {
+ struct spdk_nvme_transport_poll_group group;
+ struct spdk_sock_group *sock_group;
+ uint32_t completions_per_qpair;
+ int64_t num_completions;
+};
+
/* NVMe TCP qpair extensions for spdk_nvme_qpair */
struct nvme_tcp_qpair {
struct spdk_nvme_qpair qpair;
TAILQ_HEAD(, nvme_tcp_pdu) send_queue;
struct nvme_tcp_pdu recv_pdu;
struct nvme_tcp_pdu send_pdu; /* only for error pdu and init pdu */
+ struct nvme_tcp_pdu *send_pdus; /* Used by tcp_reqs */
enum nvme_tcp_pdu_recv_state recv_state;
struct nvme_tcp_req *tcp_reqs;
uint32_t r2tl_remain;
uint32_t active_r2ts;
bool in_capsule_data;
- struct nvme_tcp_pdu send_pdu;
+ /* It is used to track whether the req can be safely freed */
+ struct {
+ uint8_t send_ack : 1;
+ uint8_t data_recv : 1;
+ uint8_t r2t_recv : 1;
+ uint8_t reserved : 5;
+ } ordering;
+ struct nvme_tcp_pdu *send_pdu;
struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS];
uint32_t iovcnt;
+ struct nvme_tcp_qpair *tqpair;
TAILQ_ENTRY(nvme_tcp_req) link;
};
-static void spdk_nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req);
+static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req);
static inline struct nvme_tcp_qpair *
nvme_tcp_qpair(struct spdk_nvme_qpair *qpair)
return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair);
}
+static inline struct nvme_tcp_poll_group *
+nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group)
+{
+ return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group);
+}
+
static inline struct nvme_tcp_ctrlr *
nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
{
tcp_req->r2tl_remain = 0;
tcp_req->active_r2ts = 0;
tcp_req->iovcnt = 0;
- memset(&tcp_req->send_pdu, 0, sizeof(tcp_req->send_pdu));
+ tcp_req->ordering.send_ack = 0;
+ tcp_req->ordering.data_recv = 0;
+ tcp_req->ordering.r2t_recv = 0;
+ memset(tcp_req->send_pdu, 0, sizeof(struct nvme_tcp_pdu));
TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link);
return tcp_req;
{
assert(tcp_req->state != NVME_TCP_REQ_FREE);
tcp_req->state = NVME_TCP_REQ_FREE;
- TAILQ_REMOVE(&tqpair->outstanding_reqs, tcp_req, link);
- TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link);
+ TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link);
}
static int
{
free(tqpair->tcp_reqs);
tqpair->tcp_reqs = NULL;
+
+ spdk_free(tqpair->send_pdus);
+ tqpair->send_pdus = NULL;
}
static int
nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair)
{
- int i;
+ uint16_t i;
struct nvme_tcp_req *tcp_req;
tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req));
if (tqpair->tcp_reqs == NULL) {
- SPDK_ERRLOG("Failed to allocate tcp_reqs\n");
+ SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair);
+ goto fail;
+ }
+
+ tqpair->send_pdus = spdk_zmalloc(tqpair->num_entries * sizeof(struct nvme_tcp_pdu),
+ 0x1000, NULL,
+ SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
+
+ if (tqpair->send_pdus == NULL) {
+ SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair);
goto fail;
}
for (i = 0; i < tqpair->num_entries; i++) {
tcp_req = &tqpair->tcp_reqs[i];
tcp_req->cid = i;
+ tcp_req->tqpair = tqpair;
+ tcp_req->send_pdu = &tqpair->send_pdus[i];
TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link);
}
}
static void
-nvme_tcp_qpair_disconnect(struct spdk_nvme_qpair *qpair)
+nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
struct nvme_tcp_pdu *pdu;
}
}
+static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr);
+
static int
-nvme_tcp_qpair_destroy(struct spdk_nvme_qpair *qpair)
+nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
struct nvme_tcp_qpair *tqpair;
return -1;
}
- nvme_tcp_qpair_disconnect(qpair);
+ nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);
nvme_tcp_qpair_abort_reqs(qpair, 1);
nvme_qpair_deinit(qpair);
tqpair = nvme_tcp_qpair(qpair);
return 0;
}
-int
+static int
nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
{
return 0;
}
-/* This function must only be called while holding g_spdk_nvme_driver->lock */
-int
-nvme_tcp_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx,
- bool direct_connect)
-{
- struct spdk_nvme_ctrlr_opts discovery_opts;
- struct spdk_nvme_ctrlr *discovery_ctrlr;
- union spdk_nvme_cc_register cc;
- int rc;
- struct nvme_completion_poll_status status;
-
- if (strcmp(probe_ctx->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN) != 0) {
- /* Not a discovery controller - connect directly. */
- rc = nvme_ctrlr_probe(&probe_ctx->trid, probe_ctx, NULL);
- return rc;
- }
-
- spdk_nvme_ctrlr_get_default_ctrlr_opts(&discovery_opts, sizeof(discovery_opts));
- /* For discovery_ctrlr set the timeout to 0 */
- discovery_opts.keep_alive_timeout_ms = 0;
-
- discovery_ctrlr = nvme_tcp_ctrlr_construct(&probe_ctx->trid, &discovery_opts, NULL);
- if (discovery_ctrlr == NULL) {
- return -1;
- }
-
- /* TODO: this should be using the normal NVMe controller initialization process */
- cc.raw = 0;
- cc.bits.en = 1;
- cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
- cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
- rc = nvme_transport_ctrlr_set_reg_4(discovery_ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
- cc.raw);
- if (rc < 0) {
- SPDK_ERRLOG("Failed to set cc\n");
- nvme_ctrlr_destruct(discovery_ctrlr);
- return -1;
- }
-
- /* Direct attach through spdk_nvme_connect() API */
- if (direct_connect == true) {
- /* get the cdata info */
- status.done = false;
- rc = nvme_ctrlr_cmd_identify(discovery_ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0,
- &discovery_ctrlr->cdata, sizeof(discovery_ctrlr->cdata),
- nvme_completion_poll_cb, &status);
- if (rc != 0) {
- SPDK_ERRLOG("Failed to identify cdata\n");
- return rc;
- }
-
- if (spdk_nvme_wait_for_completion(discovery_ctrlr->adminq, &status)) {
- SPDK_ERRLOG("nvme_identify_controller failed!\n");
- return -ENXIO;
- }
- /* Set the ready state to skip the normal init process */
- discovery_ctrlr->state = NVME_CTRLR_STATE_READY;
- nvme_ctrlr_connected(probe_ctx, discovery_ctrlr);
- nvme_ctrlr_add_process(discovery_ctrlr, 0);
- return 0;
- }
-
- rc = nvme_fabric_ctrlr_discover(discovery_ctrlr, probe_ctx);
- nvme_ctrlr_destruct(discovery_ctrlr);
- SPDK_DEBUGLOG(SPDK_LOG_NVME, "leave\n");
- return rc;
-}
-
-int
+static int
nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
{
struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr);
if (ctrlr->adminq) {
- nvme_tcp_qpair_destroy(ctrlr->adminq);
+ nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq);
}
nvme_ctrlr_destruct_finish(ctrlr);
return 0;
}
-int
-nvme_tcp_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value)
-{
- return nvme_fabric_ctrlr_set_reg_4(ctrlr, offset, value);
-}
-
-int
-nvme_tcp_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value)
-{
- return nvme_fabric_ctrlr_set_reg_8(ctrlr, offset, value);
-}
-
-int
-nvme_tcp_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value)
-{
- return nvme_fabric_ctrlr_get_reg_4(ctrlr, offset, value);
-}
-
-int
-nvme_tcp_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value)
-{
- return nvme_fabric_ctrlr_get_reg_8(ctrlr, offset, value);
-}
-
-static int
-nvme_tcp_qpair_process_send_queue(struct nvme_tcp_qpair *tqpair)
+static void
+_pdu_write_done(void *cb_arg, int err)
{
- const int array_size = 32;
- struct iovec iovec_array[array_size];
- struct iovec *iov = iovec_array;
- int iovec_cnt = 0;
- int bytes = 0;
- uint32_t mapped_length;
- struct nvme_tcp_pdu *pdu;
- int pdu_length;
- TAILQ_HEAD(, nvme_tcp_pdu) completed_pdus_list;
+ struct nvme_tcp_pdu *pdu = cb_arg;
+ struct nvme_tcp_qpair *tqpair = pdu->qpair;
- pdu = TAILQ_FIRST(&tqpair->send_queue);
+ TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
- if (pdu == NULL) {
- return 0;
- }
-
- /*
- * Build up a list of iovecs for the first few PDUs in the
- * tqpair 's send_queue.
- */
- while (pdu != NULL && ((array_size - iovec_cnt) >= 3)) {
- iovec_cnt += nvme_tcp_build_iovecs(&iovec_array[iovec_cnt], array_size - iovec_cnt,
- pdu, tqpair->host_hdgst_enable,
- tqpair->host_ddgst_enable, &mapped_length);
- pdu = TAILQ_NEXT(pdu, tailq);
- }
-
- bytes = spdk_sock_writev(tqpair->sock, iov, iovec_cnt);
- SPDK_DEBUGLOG(SPDK_LOG_NVME, "bytes=%d are out\n", bytes);
- if (bytes == -1) {
- if (errno == EWOULDBLOCK || errno == EAGAIN) {
- return 1;
- } else {
- SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n",
- errno, spdk_strerror(errno));
- return -errno;
- }
- }
-
- pdu = TAILQ_FIRST(&tqpair->send_queue);
-
- /*
- * Free any PDUs that were fully written. If a PDU was only
- * partially written, update its writev_offset so that next
- * time only the unwritten portion will be sent to writev().
- */
- TAILQ_INIT(&completed_pdus_list);
- while (bytes > 0) {
- pdu_length = pdu->hdr.common.plen - pdu->writev_offset;
- assert(pdu_length > 0);
- if (bytes >= pdu_length) {
- bytes -= pdu_length;
- TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
- TAILQ_INSERT_TAIL(&completed_pdus_list, pdu, tailq);
- pdu = TAILQ_FIRST(&tqpair->send_queue);
-
- } else {
- pdu->writev_offset += bytes;
- bytes = 0;
- }
- }
-
- while (!TAILQ_EMPTY(&completed_pdus_list)) {
- pdu = TAILQ_FIRST(&completed_pdus_list);
- TAILQ_REMOVE(&completed_pdus_list, pdu, tailq);
- assert(pdu->cb_fn != NULL);
- pdu->cb_fn(pdu->cb_arg);
+ if (err != 0) {
+ nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair);
+ return;
}
- return TAILQ_EMPTY(&tqpair->send_queue) ? 0 : 1;
-
+ assert(pdu->cb_fn != NULL);
+ pdu->cb_fn(pdu->cb_arg);
}
static int
nvme_tcp_qpair_xfer_complete_cb cb_fn,
void *cb_arg)
{
- int enable_digest;
int hlen;
uint32_t crc32c;
+ uint32_t mapped_length = 0;
hlen = pdu->hdr.common.hlen;
- enable_digest = 1;
- if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ ||
- pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) {
- /* this PDU should be sent without digest */
- enable_digest = 0;
- }
/* Header Digest */
- if (enable_digest && tqpair->host_hdgst_enable) {
+ if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->host_hdgst_enable) {
crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c);
}
/* Data Digest */
- if (pdu->data_len > 0 && enable_digest && tqpair->host_ddgst_enable) {
+ if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && tqpair->host_ddgst_enable) {
crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
}
pdu->cb_fn = cb_fn;
pdu->cb_arg = cb_arg;
+
+ pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, NVME_TCP_MAX_SGL_DESCRIPTORS, pdu,
+ tqpair->host_hdgst_enable, tqpair->host_ddgst_enable,
+ &mapped_length);
+ pdu->qpair = tqpair;
+ pdu->sock_req.cb_fn = _pdu_write_done;
+ pdu->sock_req.cb_arg = pdu;
TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
+ spdk_sock_writev_async(tqpair->sock, &pdu->sock_req);
+
return 0;
}
static int
nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req)
{
- int rc, iovcnt;
- uint32_t length;
- uint64_t remaining_size;
+ int rc;
+ uint32_t length, remaining_size, iovcnt = 0, max_num_sgl;
struct nvme_request *req = tcp_req->req;
SPDK_DEBUGLOG(SPDK_LOG_NVME, "enter\n");
assert(req->payload.next_sge_fn != NULL);
req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
+ max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS);
remaining_size = req->payload_size;
- iovcnt = 0;
do {
rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base,
tcp_req->iov[iovcnt].iov_len = length;
remaining_size -= length;
iovcnt++;
- } while (remaining_size > 0 && iovcnt < NVME_TCP_MAX_SGL_DESCRIPTORS);
+ } while (remaining_size > 0 && iovcnt < max_num_sgl);
/* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */
if (remaining_size > 0) {
+ SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n",
+ tcp_req, iovcnt, remaining_size);
return -1;
}
return 0;
}
-static inline uint32_t
-nvme_tcp_icdsz_bytes(struct spdk_nvme_ctrlr *ctrlr)
-{
- return (ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd));
-}
-
static int
nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req,
struct nvme_tcp_req *tcp_req)
xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc);
}
if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
- max_incapsule_data_size = nvme_tcp_icdsz_bytes(ctrlr);
+ max_incapsule_data_size = ctrlr->ioccsz_bytes;
if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) {
max_incapsule_data_size = spdk_min(max_incapsule_data_size, NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE);
}
return 0;
}
-static void
-nvme_tcp_qpair_cmd_send_complete(void *cb_arg)
+static inline void
+nvme_tcp_req_put_safe(struct nvme_tcp_req *tcp_req)
{
+ if (tcp_req->ordering.send_ack && tcp_req->ordering.data_recv) {
+ assert(tcp_req->state == NVME_TCP_REQ_ACTIVE);
+ assert(tcp_req->tqpair != NULL);
+ nvme_tcp_req_put(tcp_req->tqpair, tcp_req);
+ }
}
static void
-nvme_tcp_pdu_set_data_buf(struct nvme_tcp_pdu *pdu,
- struct nvme_tcp_req *tcp_req,
- uint32_t data_len)
+nvme_tcp_qpair_cmd_send_complete(void *cb_arg)
{
- uint32_t i, remain_len, len;
- struct _nvme_tcp_sgl *pdu_sgl;
+ struct nvme_tcp_req *tcp_req = cb_arg;
- if (tcp_req->iovcnt == 1) {
- nvme_tcp_pdu_set_data(pdu, (void *)((uint64_t)tcp_req->iov[0].iov_base + tcp_req->datao), data_len);
+ tcp_req->ordering.send_ack = 1;
+ /* Handle the r2t case */
+ if (spdk_unlikely(tcp_req->ordering.r2t_recv)) {
+ nvme_tcp_send_h2c_data(tcp_req);
} else {
- i = 0;
- pdu_sgl = &pdu->sgl;
- assert(tcp_req->iovcnt <= NVME_TCP_MAX_SGL_DESCRIPTORS);
- _nvme_tcp_sgl_init(pdu_sgl, pdu->data_iov, tcp_req->iovcnt, tcp_req->datao);
- remain_len = data_len;
-
- while (remain_len > 0) {
- assert(i < NVME_TCP_MAX_SGL_DESCRIPTORS);
- len = spdk_min(remain_len, tcp_req->iov[i].iov_len);
- remain_len -= len;
- if (!_nvme_tcp_sgl_append(pdu_sgl, tcp_req->iov[i].iov_base, len)) {
- break;
- }
- i++;
- }
-
- assert(remain_len == 0);
- pdu->data_iovcnt = tcp_req->iovcnt - pdu_sgl->iovcnt;
- pdu->data_len = data_len;
+ nvme_tcp_req_put_safe(tcp_req);
}
}
uint8_t pdo;
SPDK_DEBUGLOG(SPDK_LOG_NVME, "enter\n");
- pdu = &tcp_req->send_pdu;
+ pdu = tcp_req->send_pdu;
capsule_cmd = &pdu->hdr.capsule_cmd;
capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD;
plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd);
capsule_cmd->ccsqe = tcp_req->req->cmd;
-
SPDK_DEBUGLOG(SPDK_LOG_NVME, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair);
if (tqpair->host_hdgst_enable) {
}
tcp_req->datao = 0;
- nvme_tcp_pdu_set_data_buf(pdu, tcp_req, tcp_req->req->payload_size);
+ nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt,
+ 0, tcp_req->req->payload_size);
end:
capsule_cmd->common.plen = plen;
- return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, NULL);
+ return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req);
}
-int
+static int
nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair,
struct nvme_request *req)
{
tcp_req = nvme_tcp_req_get(tqpair);
if (!tcp_req) {
- /*
- * No tcp_req is available, so queue the request to be
- * processed later.
- */
- STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
- return 0;
+ /* Inform the upper layer to try again later. */
+ return -EAGAIN;
}
if (nvme_tcp_req_init(tqpair, req, tcp_req)) {
SPDK_ERRLOG("nvme_tcp_req_init() failed\n");
+ TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link);
nvme_tcp_req_put(tqpair, tcp_req);
return -1;
}
return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req);
}
-int
-nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
-{
- return nvme_tcp_qpair_destroy(qpair);
-}
-
-int
+static int
nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair)
{
return 0;
}
static void
-nvme_tcp_req_complete(struct nvme_request *req,
+nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req,
struct spdk_nvme_cpl *rsp)
{
+ struct nvme_request *req;
+
+ assert(tcp_req->req != NULL);
+ req = tcp_req->req;
+
+ TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link);
nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, rsp);
nvme_free_request(req);
}
-void
+static void
nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
{
struct nvme_tcp_req *tcp_req, *tmp;
- struct nvme_request *req;
struct spdk_nvme_cpl cpl;
struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
cpl.status.dnr = dnr;
TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
- assert(tcp_req->req != NULL);
- req = tcp_req->req;
-
- nvme_tcp_req_complete(req, &cpl);
+ nvme_tcp_req_complete(tcp_req, &cpl);
nvme_tcp_req_put(tqpair, tcp_req);
}
}
goto err;
} else {
nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
+ nvme_tcp_pdu_calc_psh_len(&tqpair->recv_pdu, tqpair->host_hdgst_enable);
return;
}
err:
return &tqpair->tcp_reqs[cid];
}
-static void
-nvme_tcp_free_and_handle_queued_req(struct spdk_nvme_qpair *qpair)
-{
- struct nvme_request *req;
-
- if (!STAILQ_EMPTY(&qpair->queued_req) && !qpair->ctrlr->is_resetting) {
- req = STAILQ_FIRST(&qpair->queued_req);
- STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
- nvme_qpair_submit_request(qpair, req);
- }
-}
-
static void
nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair,
struct nvme_tcp_pdu *pdu, uint32_t *reaped)
struct spdk_nvme_cpl cpl = {};
uint8_t flags;
- tcp_req = pdu->ctx;
+ tcp_req = pdu->req;
assert(tcp_req != NULL);
SPDK_DEBUGLOG(SPDK_LOG_NVME, "enter\n");
cpl.cid = tcp_req->cid;
cpl.sqid = tqpair->qpair.id;
- nvme_tcp_req_complete(tcp_req->req, &cpl);
- nvme_tcp_req_put(tqpair, tcp_req);
- (*reaped)++;
- nvme_tcp_free_and_handle_queued_req(&tqpair->qpair);
+ nvme_tcp_req_complete(tcp_req, &cpl);
+ if (tcp_req->ordering.send_ack) {
+ (*reaped)++;
+ }
+
+ tcp_req->ordering.data_recv = 1;
+ nvme_tcp_req_put_safe(tcp_req);
}
}
struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp;
uint32_t error_offset = 0;
enum spdk_nvme_tcp_term_req_fes fes;
+ int recv_buf_size;
/* Only PFV 0 is defined currently */
if (ic_resp->pfv != 0) {
SPDK_DEBUGLOG(SPDK_LOG_NVME, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable);
SPDK_DEBUGLOG(SPDK_LOG_NVME, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable);
+ /* Now that we know whether digests are enabled, properly size the receive buffer to
+ * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR
+ * parameter. */
+ recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr);
+
+ if (tqpair->host_hdgst_enable) {
+ recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN;
+ }
+
+ if (tqpair->host_ddgst_enable) {
+ recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN;
+ }
+
+ if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) {
+ SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n",
+ tqpair,
+ recv_buf_size);
+ /* Not fatal. */
+ }
+
tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
return;
}
- assert(tcp_req->req != NULL);
- assert(tcp_req->state == NVME_TCP_REQ_ACTIVE);
- nvme_tcp_req_complete(tcp_req->req, &cpl);
- nvme_tcp_req_put(tqpair, tcp_req);
- (*reaped)++;
- nvme_tcp_free_and_handle_queued_req(&tqpair->qpair);
+ nvme_tcp_req_complete(tcp_req, &cpl);
+ if (tcp_req->ordering.send_ack) {
+ (*reaped)++;
+ }
+
+ tcp_req->ordering.data_recv = 1;
+ nvme_tcp_req_put_safe(tcp_req);
SPDK_DEBUGLOG(SPDK_LOG_NVME, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
}
- nvme_tcp_pdu_set_data_buf(pdu, tcp_req, c2h_data->datal);
- pdu->ctx = tcp_req;
+ nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt,
+ c2h_data->datao, c2h_data->datal);
+ pdu->req = tcp_req;
nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
return;
assert(tcp_req != NULL);
+ tcp_req->ordering.send_ack = 1;
if (tcp_req->r2tl_remain) {
- spdk_nvme_tcp_send_h2c_data(tcp_req);
+ nvme_tcp_send_h2c_data(tcp_req);
} else {
assert(tcp_req->active_r2ts > 0);
tcp_req->active_r2ts--;
tcp_req->state = NVME_TCP_REQ_ACTIVE;
+ /* Need also call this function to free the resource */
+ nvme_tcp_req_put_safe(tcp_req);
}
}
static void
-spdk_nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req)
+nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req)
{
struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair);
struct nvme_tcp_pdu *rsp_pdu;
struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
uint32_t plen, pdo, alignment;
- rsp_pdu = &tcp_req->send_pdu;
+ /* Reinit the send_ack and r2t_recv bits */
+ tcp_req->ordering.send_ack = 0;
+ tcp_req->ordering.r2t_recv = 0;
+ rsp_pdu = tcp_req->send_pdu;
memset(rsp_pdu, 0, sizeof(*rsp_pdu));
h2c_data = &rsp_pdu->hdr.h2c_data;
h2c_data->datao = tcp_req->datao;
h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata);
- nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req, h2c_data->datal);
+ nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt,
+ h2c_data->datao, h2c_data->datal);
tcp_req->r2tl_remain -= h2c_data->datal;
if (tqpair->host_hdgst_enable) {
goto end;
}
+ tcp_req->ordering.r2t_recv = 1;
SPDK_DEBUGLOG(SPDK_LOG_NVME, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl,
tqpair);
tcp_req->r2tl_remain = r2t->r2tl;
nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
- spdk_nvme_tcp_send_h2c_data(tcp_req);
+ if (spdk_likely(tcp_req->ordering.send_ack)) {
+ nvme_tcp_send_h2c_data(tcp_req);
+ }
return;
end:
int rc = 0;
struct nvme_tcp_pdu *pdu;
uint32_t data_len;
- uint8_t psh_len, pdo;
- int8_t padding_len;
enum nvme_tcp_pdu_recv_state prev_state;
/* The loop here is to allow for several back-to-back state changes. */
/* Wait for the pdu specific header */
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
pdu = &tqpair->recv_pdu;
- psh_len = pdu->hdr.common.hlen;
-
- /* The following pdus can have digest */
- if (((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP) ||
- (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) ||
- (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_R2T)) &&
- tqpair->host_hdgst_enable) {
- pdu->has_hdgst = true;
- psh_len += SPDK_NVME_TCP_DIGEST_LEN;
- if (pdu->hdr.common.plen > psh_len) {
- pdo = pdu->hdr.common.pdo;
- padding_len = pdo - psh_len;
- SPDK_DEBUGLOG(SPDK_LOG_NVME, "padding length is =%d for pdu=%p on tqpair=%p\n", padding_len,
- pdu, tqpair);
- if (padding_len > 0) {
- psh_len = pdo;
- }
- }
+ rc = nvme_tcp_read_data(tqpair->sock,
+ pdu->psh_len - pdu->psh_valid_bytes,
+ (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
+ if (rc < 0) {
+ nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
+ break;
}
- psh_len -= sizeof(struct spdk_nvme_tcp_common_pdu_hdr);
- /* The following will read psh + hdgest (if possbile) + padding (if posssible) */
- if (pdu->psh_valid_bytes < psh_len) {
- rc = nvme_tcp_read_data(tqpair->sock,
- psh_len - pdu->psh_valid_bytes,
- (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
- if (rc < 0) {
- nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
- break;
- }
-
- pdu->psh_valid_bytes += rc;
- if (pdu->psh_valid_bytes < psh_len) {
- return NVME_TCP_PDU_IN_PROGRESS;
- }
+ pdu->psh_valid_bytes += rc;
+ if (pdu->psh_valid_bytes < pdu->psh_len) {
+ return NVME_TCP_PDU_IN_PROGRESS;
}
/* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
tqpair->host_ddgst_enable)) {
data_len += SPDK_NVME_TCP_DIGEST_LEN;
pdu->ddgst_enable = true;
-
}
rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
}
if (nvme_qpair_is_admin_queue(qpair)) {
- active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
+ active_proc = nvme_ctrlr_get_current_process(ctrlr);
} else {
active_proc = qpair->active_proc;
}
}
}
-int
+static int
nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
{
struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
uint32_t reaped;
int rc;
- rc = nvme_tcp_qpair_process_send_queue(tqpair);
+ rc = spdk_sock_flush(tqpair->sock);
if (rc < 0) {
return rc;
}
do {
rc = nvme_tcp_read_pdu(tqpair, &reaped);
if (rc < 0) {
- SPDK_ERRLOG("Error polling CQ! (%d): %s\n",
- errno, spdk_strerror(errno));
- return -1;
+ SPDK_DEBUGLOG(SPDK_LOG_NVME, "Error polling CQ! (%d): %s\n",
+ errno, spdk_strerror(errno));
+ goto fail;
} else if (rc == 0) {
/* Partial PDU is read */
break;
}
return reaped;
+fail:
+
+ /*
+ * Since admin queues take the ctrlr_lock before entering this function,
+ * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need
+ * to call the generic function which will take the lock for us.
+ */
+ qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN;
+
+ if (nvme_qpair_is_admin_queue(qpair)) {
+ nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair);
+ } else {
+ nvme_ctrlr_disconnect_qpair(qpair);
+ }
+ return -ENXIO;
+}
+
+static void
+nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock)
+{
+ struct spdk_nvme_qpair *qpair = ctx;
+ struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group);
+ int32_t num_completions;
+
+ num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair);
+
+ if (pgroup->num_completions >= 0 && num_completions >= 0) {
+ pgroup->num_completions += num_completions;
+ } else {
+ pgroup->num_completions = -ENXIO;
+ }
}
static int
{
struct spdk_nvme_tcp_ic_req *ic_req;
struct nvme_tcp_pdu *pdu;
+ uint64_t icreq_timeout_tsc;
+ int rc;
pdu = &tqpair->send_pdu;
memset(&tqpair->send_pdu, 0, sizeof(tqpair->send_pdu));
nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair);
- while (tqpair->state == NVME_TCP_QPAIR_STATE_INVALID) {
- nvme_tcp_qpair_process_completions(&tqpair->qpair, 0);
- }
+ icreq_timeout_tsc = spdk_get_ticks() + (NVME_TCP_TIME_OUT_IN_SECONDS * spdk_get_ticks_hz());
+ do {
+ rc = nvme_tcp_qpair_process_completions(&tqpair->qpair, 0);
+ } while ((tqpair->state == NVME_TCP_QPAIR_STATE_INVALID) &&
+ (rc == 0) && (spdk_get_ticks() <= icreq_timeout_tsc));
if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) {
SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair);
}
static int
-nvme_tcp_qpair_connect(struct nvme_tcp_qpair *tqpair)
+nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
struct sockaddr_storage dst_addr;
struct sockaddr_storage src_addr;
int rc;
- struct spdk_nvme_ctrlr *ctrlr;
+ struct nvme_tcp_qpair *tqpair;
int family;
long int port;
+ struct spdk_sock_opts opts;
- ctrlr = tqpair->qpair.ctrlr;
+ tqpair = nvme_tcp_qpair(qpair);
switch (ctrlr->trid.adrfam) {
case SPDK_NVMF_ADRFAM_IPV4:
return -1;
}
- tqpair->sock = spdk_sock_connect(ctrlr->trid.traddr, port);
+ opts.opts_size = sizeof(opts);
+ spdk_sock_get_default_opts(&opts);
+ opts.priority = ctrlr->trid.priority;
+ tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, NULL, &opts);
if (!tqpair->sock) {
SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n",
tqpair, ctrlr->trid.traddr, port);
return 0;
}
-int
-nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
-{
- return nvme_tcp_qpair_connect(nvme_tcp_qpair(qpair));
-}
-
-void
-nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
-{
- return nvme_tcp_qpair_disconnect(qpair);
-}
-
static struct spdk_nvme_qpair *
nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,
uint16_t qid, uint32_t qsize,
tqpair->num_entries = qsize;
qpair = &tqpair->qpair;
-
rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests);
if (rc != 0) {
free(tqpair);
rc = nvme_tcp_alloc_reqs(tqpair);
if (rc) {
- nvme_tcp_qpair_destroy(qpair);
- return NULL;
- }
-
- rc = nvme_tcp_qpair_connect(tqpair);
- if (rc < 0) {
- nvme_tcp_qpair_destroy(qpair);
+ nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair);
return NULL;
}
return qpair;
}
-struct spdk_nvme_qpair *
+static struct spdk_nvme_qpair *
nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
const struct spdk_nvme_io_qpair_opts *opts)
{
opts->io_queue_requests);
}
-struct spdk_nvme_ctrlr *nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid,
+static struct spdk_nvme_ctrlr *nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid,
const struct spdk_nvme_ctrlr_opts *opts,
void *devhandle)
{
return NULL;
}
- tctrlr->ctrlr.trid.trtype = SPDK_NVME_TRANSPORT_TCP;
tctrlr->ctrlr.opts = *opts;
tctrlr->ctrlr.trid = *trid;
}
tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0,
- SPDK_NVMF_MIN_ADMIN_QUEUE_ENTRIES, 0, SPDK_NVMF_MIN_ADMIN_QUEUE_ENTRIES);
+ tctrlr->ctrlr.opts.admin_queue_size, 0,
+ tctrlr->ctrlr.opts.admin_queue_size);
if (!tctrlr->ctrlr.adminq) {
SPDK_ERRLOG("failed to create admin qpair\n");
nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr);
return NULL;
}
+ rc = nvme_transport_ctrlr_connect_qpair(&tctrlr->ctrlr, tctrlr->ctrlr.adminq);
+ if (rc < 0) {
+ SPDK_ERRLOG("failed to connect admin qpair\n");
+ nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr);
+ return NULL;
+ }
+
if (nvme_ctrlr_get_cap(&tctrlr->ctrlr, &cap)) {
SPDK_ERRLOG("get_cap() failed\n");
nvme_ctrlr_destruct(&tctrlr->ctrlr);
return &tctrlr->ctrlr;
}
-uint32_t
+static uint32_t
nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr)
{
- return NVME_TCP_RW_BUFFER_SIZE;
+ /* TCP transport doens't limit maximum IO transfer size. */
+ return UINT32_MAX;
}
-uint16_t
+static uint16_t
nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr)
{
/*
return 1;
}
-volatile struct spdk_nvme_registers *
-nvme_tcp_ctrlr_get_registers(struct spdk_nvme_ctrlr *ctrlr)
+static int
+nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair,
+ int (*iter_fn)(struct nvme_request *req, void *arg),
+ void *arg)
{
- return NULL;
-}
+ struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
+ struct nvme_tcp_req *tcp_req, *tmp;
+ int rc;
-void *
-nvme_tcp_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size)
-{
- return NULL;
-}
+ assert(iter_fn != NULL);
+
+ TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
+ assert(tcp_req->req != NULL);
+
+ rc = iter_fn(tcp_req->req, arg);
+ if (rc != 0) {
+ return rc;
+ }
+ }
-int
-nvme_tcp_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size)
-{
return 0;
}
-void
+static void
nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair)
{
struct nvme_tcp_req *tcp_req, *tmp;
- struct nvme_request *req;
struct spdk_nvme_cpl cpl;
struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
cpl.status.sct = SPDK_NVME_SCT_GENERIC;
TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
+ assert(tcp_req->req != NULL);
if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
continue;
}
- assert(tcp_req->req != NULL);
- req = tcp_req->req;
- nvme_tcp_req_complete(req, &cpl);
+ nvme_tcp_req_complete(tcp_req, &cpl);
nvme_tcp_req_put(tqpair, tcp_req);
}
}
+
+static struct spdk_nvme_transport_poll_group *
+nvme_tcp_poll_group_create(void)
+{
+ struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group));
+
+ if (group == NULL) {
+ SPDK_ERRLOG("Unable to allocate poll group.\n");
+ return NULL;
+ }
+
+ group->sock_group = spdk_sock_group_create(group);
+ if (group->sock_group == NULL) {
+ free(group);
+ SPDK_ERRLOG("Unable to allocate sock group.\n");
+ return NULL;
+ }
+
+ return &group->group;
+}
+
+static int
+nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair)
+{
+ struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group);
+ struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
+
+ if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) {
+ return -EPROTO;
+ }
+ return 0;
+}
+
+static int
+nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair)
+{
+ struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group);
+ struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
+
+ if (tqpair->sock && group->sock_group) {
+ if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) {
+ return -EPROTO;
+ }
+ }
+ return 0;
+}
+
+static int
+nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup,
+ struct spdk_nvme_qpair *qpair)
+{
+ struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
+ struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
+
+ /* disconnected qpairs won't have a sock to add. */
+ if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) {
+ if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) {
+ return -EPROTO;
+ }
+ }
+
+ return 0;
+}
+
+static int
+nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup,
+ struct spdk_nvme_qpair *qpair)
+{
+ if (qpair->poll_group_tailq_head == &tgroup->connected_qpairs) {
+ return nvme_poll_group_disconnect_qpair(qpair);
+ }
+
+ return 0;
+}
+
+static int64_t
+nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup,
+ uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb)
+{
+ struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
+ struct spdk_nvme_qpair *qpair, *tmp_qpair;
+
+ group->completions_per_qpair = completions_per_qpair;
+ group->num_completions = 0;
+
+ spdk_sock_group_poll(group->sock_group);
+
+ STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) {
+ disconnected_qpair_cb(qpair, tgroup->group->ctx);
+ }
+
+ return group->num_completions;
+}
+
+static int
+nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup)
+{
+ int rc;
+ struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
+
+ if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) {
+ return -EBUSY;
+ }
+
+ rc = spdk_sock_group_close(&group->sock_group);
+ if (rc != 0) {
+ SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n");
+ assert(false);
+ }
+
+ free(tgroup);
+
+ return 0;
+}
+
+const struct spdk_nvme_transport_ops tcp_ops = {
+ .name = "TCP",
+ .type = SPDK_NVME_TRANSPORT_TCP,
+ .ctrlr_construct = nvme_tcp_ctrlr_construct,
+ .ctrlr_scan = nvme_fabric_ctrlr_scan,
+ .ctrlr_destruct = nvme_tcp_ctrlr_destruct,
+ .ctrlr_enable = nvme_tcp_ctrlr_enable,
+
+ .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4,
+ .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8,
+ .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4,
+ .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8,
+
+ .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size,
+ .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges,
+
+ .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair,
+ .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair,
+ .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair,
+ .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair,
+
+ .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs,
+ .qpair_reset = nvme_tcp_qpair_reset,
+ .qpair_submit_request = nvme_tcp_qpair_submit_request,
+ .qpair_process_completions = nvme_tcp_qpair_process_completions,
+ .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests,
+ .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers,
+
+ .poll_group_create = nvme_tcp_poll_group_create,
+ .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair,
+ .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair,
+ .poll_group_add = nvme_tcp_poll_group_add,
+ .poll_group_remove = nvme_tcp_poll_group_remove,
+ .poll_group_process_completions = nvme_tcp_poll_group_process_completions,
+ .poll_group_destroy = nvme_tcp_poll_group_destroy,
+};
+
+SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops);