]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/lib/nvmf/tcp.c
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / lib / nvmf / tcp.c
CommitLineData
9f95a23c
TL
1/*-
2 * BSD LICENSE
3 *
f67539c2
TL
4 * Copyright (c) Intel Corporation. All rights reserved.
5 * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved.
9f95a23c
TL
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include "spdk/stdinc.h"
35#include "spdk/crc32.h"
36#include "spdk/endian.h"
37#include "spdk/assert.h"
38#include "spdk/thread.h"
f67539c2 39#include "spdk/nvmf_transport.h"
9f95a23c
TL
40#include "spdk/sock.h"
41#include "spdk/string.h"
42#include "spdk/trace.h"
43#include "spdk/util.h"
44
f67539c2 45#include "spdk_internal/assert.h"
9f95a23c
TL
46#include "spdk_internal/log.h"
47#include "spdk_internal/nvme_tcp.h"
48
f67539c2 49#include "nvmf_internal.h"
9f95a23c 50
f67539c2
TL
51#define NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME 16
52#define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6
9f95a23c 53
f67539c2 54const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp;
9f95a23c
TL
55
56/* spdk nvmf related structure */
57enum spdk_nvmf_tcp_req_state {
58
59 /* The request is not currently in use */
60 TCP_REQUEST_STATE_FREE = 0,
61
62 /* Initial state when request first received */
63 TCP_REQUEST_STATE_NEW,
64
65 /* The request is queued until a data buffer is available. */
66 TCP_REQUEST_STATE_NEED_BUFFER,
67
68 /* The request is currently transferring data from the host to the controller. */
69 TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
70
f67539c2
TL
71 /* The request is waiting for the R2T send acknowledgement. */
72 TCP_REQUEST_STATE_AWAITING_R2T_ACK,
73
9f95a23c
TL
74 /* The request is ready to execute at the block device */
75 TCP_REQUEST_STATE_READY_TO_EXECUTE,
76
77 /* The request is currently executing at the block device */
78 TCP_REQUEST_STATE_EXECUTING,
79
80 /* The request finished executing at the block device */
81 TCP_REQUEST_STATE_EXECUTED,
82
83 /* The request is ready to send a completion */
84 TCP_REQUEST_STATE_READY_TO_COMPLETE,
85
86 /* The request is currently transferring final pdus from the controller to the host. */
87 TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
88
89 /* The request completed and can be marked free. */
90 TCP_REQUEST_STATE_COMPLETED,
91
92 /* Terminator */
93 TCP_REQUEST_NUM_STATES,
94};
95
96static const char *spdk_nvmf_tcp_term_req_fes_str[] = {
97 "Invalid PDU Header Field",
98 "PDU Sequence Error",
99 "Header Digiest Error",
100 "Data Transfer Out of Range",
101 "R2T Limit Exceeded",
102 "Unsupported parameter",
103};
104
105#define OBJECT_NVMF_TCP_IO 0x80
106
107#define TRACE_GROUP_NVMF_TCP 0x5
108#define TRACE_TCP_REQUEST_STATE_NEW SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x0)
109#define TRACE_TCP_REQUEST_STATE_NEED_BUFFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x1)
110#define TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x2)
111#define TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x3)
112#define TRACE_TCP_REQUEST_STATE_EXECUTING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x4)
113#define TRACE_TCP_REQUEST_STATE_EXECUTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x5)
114#define TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x6)
115#define TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x7)
116#define TRACE_TCP_REQUEST_STATE_COMPLETED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x8)
117#define TRACE_TCP_FLUSH_WRITEBUF_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0x9)
118#define TRACE_TCP_FLUSH_WRITEBUF_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xA)
119#define TRACE_TCP_READ_FROM_SOCKET_DONE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xB)
f67539c2 120#define TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK SPDK_TPOINT_ID(TRACE_GROUP_NVMF_TCP, 0xC)
9f95a23c
TL
121
122SPDK_TRACE_REGISTER_FN(nvmf_tcp_trace, "nvmf_tcp", TRACE_GROUP_NVMF_TCP)
123{
124 spdk_trace_register_object(OBJECT_NVMF_TCP_IO, 'r');
125 spdk_trace_register_description("TCP_REQ_NEW",
126 TRACE_TCP_REQUEST_STATE_NEW,
127 OWNER_NONE, OBJECT_NVMF_TCP_IO, 1, 1, "");
128 spdk_trace_register_description("TCP_REQ_NEED_BUFFER",
129 TRACE_TCP_REQUEST_STATE_NEED_BUFFER,
130 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
131 spdk_trace_register_description("TCP_REQ_TX_H_TO_C",
132 TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
133 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
134 spdk_trace_register_description("TCP_REQ_RDY_TO_EXECUTE",
135 TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE,
136 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
137 spdk_trace_register_description("TCP_REQ_EXECUTING",
138 TRACE_TCP_REQUEST_STATE_EXECUTING,
139 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
140 spdk_trace_register_description("TCP_REQ_EXECUTED",
141 TRACE_TCP_REQUEST_STATE_EXECUTED,
142 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
143 spdk_trace_register_description("TCP_REQ_RDY_TO_COMPLETE",
144 TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE,
145 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
146 spdk_trace_register_description("TCP_REQ_TRANSFER_C2H",
147 TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
148 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
149 spdk_trace_register_description("TCP_REQ_COMPLETED",
150 TRACE_TCP_REQUEST_STATE_COMPLETED,
151 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
152 spdk_trace_register_description("TCP_WRITE_START",
153 TRACE_TCP_FLUSH_WRITEBUF_START,
154 OWNER_NONE, OBJECT_NONE, 0, 0, "");
155 spdk_trace_register_description("TCP_WRITE_DONE",
156 TRACE_TCP_FLUSH_WRITEBUF_DONE,
157 OWNER_NONE, OBJECT_NONE, 0, 0, "");
158 spdk_trace_register_description("TCP_READ_DONE",
159 TRACE_TCP_READ_FROM_SOCKET_DONE,
160 OWNER_NONE, OBJECT_NONE, 0, 0, "");
f67539c2
TL
161 spdk_trace_register_description("TCP_REQ_AWAIT_R2T_ACK",
162 TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK,
163 OWNER_NONE, OBJECT_NVMF_TCP_IO, 0, 1, "");
9f95a23c
TL
164}
165
166struct spdk_nvmf_tcp_req {
167 struct spdk_nvmf_request req;
168 struct spdk_nvme_cpl rsp;
169 struct spdk_nvme_cmd cmd;
170
f67539c2
TL
171 /* A PDU that can be used for sending responses. This is
172 * not the incoming PDU! */
173 struct nvme_tcp_pdu *pdu;
174
175 /*
176 * The PDU for a request may be used multiple times in serial over
177 * the request's lifetime. For example, first to send an R2T, then
178 * to send a completion. To catch mistakes where the PDU is used
179 * twice at the same time, add a debug flag here for init/fini.
180 */
181 bool pdu_in_use;
182
9f95a23c
TL
183 /* In-capsule data buffer */
184 uint8_t *buf;
185
f67539c2 186 bool has_incapsule_data;
9f95a23c
TL
187
188 /* transfer_tag */
189 uint16_t ttag;
190
f67539c2 191 enum spdk_nvmf_tcp_req_state state;
9f95a23c
TL
192
193 /*
f67539c2 194 * h2c_offset is used when we receive the h2c_data PDU.
9f95a23c 195 */
f67539c2 196 uint32_t h2c_offset;
9f95a23c 197
f67539c2 198 STAILQ_ENTRY(spdk_nvmf_tcp_req) link;
9f95a23c
TL
199 TAILQ_ENTRY(spdk_nvmf_tcp_req) state_link;
200};
201
202struct spdk_nvmf_tcp_qpair {
203 struct spdk_nvmf_qpair qpair;
204 struct spdk_nvmf_tcp_poll_group *group;
205 struct spdk_nvmf_tcp_port *port;
206 struct spdk_sock *sock;
9f95a23c
TL
207
208 enum nvme_tcp_pdu_recv_state recv_state;
209 enum nvme_tcp_qpair_state state;
210
f67539c2 211 /* PDU being actively received */
9f95a23c 212 struct nvme_tcp_pdu pdu_in_progress;
f67539c2
TL
213 uint32_t recv_buf_size;
214
215 /* This is a spare PDU used for sending special management
216 * operations. Primarily, this is used for the initial
217 * connection response and c2h termination request. */
218 struct nvme_tcp_pdu mgmt_pdu;
9f95a23c
TL
219
220 TAILQ_HEAD(, nvme_tcp_pdu) send_queue;
9f95a23c 221
f67539c2
TL
222 /* Arrays of in-capsule buffers, requests, and pdus.
223 * Each array is 'resource_count' number of elements */
224 void *bufs;
225 struct spdk_nvmf_tcp_req *reqs;
226 struct nvme_tcp_pdu *pdus;
227 uint32_t resource_count;
9f95a23c
TL
228
229 /* Queues to track the requests in all states */
230 TAILQ_HEAD(, spdk_nvmf_tcp_req) state_queue[TCP_REQUEST_NUM_STATES];
231 /* Number of requests in each state */
f67539c2 232 uint32_t state_cntr[TCP_REQUEST_NUM_STATES];
9f95a23c
TL
233
234 uint8_t cpda;
235
9f95a23c
TL
236 bool host_hdgst_enable;
237 bool host_ddgst_enable;
238
9f95a23c
TL
239 /* IP address */
240 char initiator_addr[SPDK_NVMF_TRADDR_MAX_LEN];
241 char target_addr[SPDK_NVMF_TRADDR_MAX_LEN];
242
243 /* IP port */
244 uint16_t initiator_port;
245 uint16_t target_port;
246
247 /* Timer used to destroy qpair after detecting transport error issue if initiator does
248 * not close the connection.
249 */
250 struct spdk_poller *timeout_poller;
251
252 TAILQ_ENTRY(spdk_nvmf_tcp_qpair) link;
253};
254
255struct spdk_nvmf_tcp_poll_group {
256 struct spdk_nvmf_transport_poll_group group;
257 struct spdk_sock_group *sock_group;
258
9f95a23c 259 TAILQ_HEAD(, spdk_nvmf_tcp_qpair) qpairs;
f67539c2 260 TAILQ_HEAD(, spdk_nvmf_tcp_qpair) await_req;
9f95a23c
TL
261};
262
263struct spdk_nvmf_tcp_port {
f67539c2 264 const struct spdk_nvme_transport_id *trid;
9f95a23c 265 struct spdk_sock *listen_sock;
9f95a23c
TL
266 TAILQ_ENTRY(spdk_nvmf_tcp_port) link;
267};
268
269struct spdk_nvmf_tcp_transport {
270 struct spdk_nvmf_transport transport;
271
272 pthread_mutex_t lock;
273
274 TAILQ_HEAD(, spdk_nvmf_tcp_port) ports;
275};
276
f67539c2
TL
277static bool nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
278 struct spdk_nvmf_tcp_req *tcp_req);
9f95a23c
TL
279
280static void
f67539c2
TL
281nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req,
282 enum spdk_nvmf_tcp_req_state state)
9f95a23c
TL
283{
284 struct spdk_nvmf_qpair *qpair;
285 struct spdk_nvmf_tcp_qpair *tqpair;
286
287 qpair = tcp_req->req.qpair;
288 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
289
290 TAILQ_REMOVE(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
f67539c2 291 assert(tqpair->state_cntr[tcp_req->state] > 0);
9f95a23c 292 tqpair->state_cntr[tcp_req->state]--;
9f95a23c
TL
293
294 TAILQ_INSERT_TAIL(&tqpair->state_queue[state], tcp_req, state_link);
295 tqpair->state_cntr[state]++;
296
297 tcp_req->state = state;
298}
299
f67539c2
TL
300static inline struct nvme_tcp_pdu *
301nvmf_tcp_req_pdu_init(struct spdk_nvmf_tcp_req *tcp_req)
9f95a23c 302{
f67539c2
TL
303 assert(tcp_req->pdu_in_use == false);
304 tcp_req->pdu_in_use = true;
9f95a23c 305
f67539c2
TL
306 memset(tcp_req->pdu, 0, sizeof(*tcp_req->pdu));
307 tcp_req->pdu->qpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
9f95a23c 308
f67539c2 309 return tcp_req->pdu;
9f95a23c
TL
310}
311
f67539c2
TL
312static inline void
313nvmf_tcp_req_pdu_fini(struct spdk_nvmf_tcp_req *tcp_req)
9f95a23c 314{
f67539c2 315 tcp_req->pdu_in_use = false;
9f95a23c
TL
316}
317
318static struct spdk_nvmf_tcp_req *
f67539c2 319nvmf_tcp_req_get(struct spdk_nvmf_tcp_qpair *tqpair)
9f95a23c
TL
320{
321 struct spdk_nvmf_tcp_req *tcp_req;
322
323 tcp_req = TAILQ_FIRST(&tqpair->state_queue[TCP_REQUEST_STATE_FREE]);
324 if (!tcp_req) {
9f95a23c
TL
325 return NULL;
326 }
327
9f95a23c 328 memset(&tcp_req->rsp, 0, sizeof(tcp_req->rsp));
f67539c2 329 tcp_req->h2c_offset = 0;
9f95a23c 330 tcp_req->has_incapsule_data = false;
f67539c2 331 tcp_req->req.dif.dif_insert_or_strip = false;
9f95a23c 332
f67539c2 333 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEW);
9f95a23c
TL
334 return tcp_req;
335}
336
337static void
338nvmf_tcp_request_free(struct spdk_nvmf_tcp_req *tcp_req)
339{
340 struct spdk_nvmf_tcp_transport *ttransport;
341
f67539c2 342 assert(tcp_req != NULL);
9f95a23c
TL
343
344 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req=%p will be freed\n", tcp_req);
345 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
346 struct spdk_nvmf_tcp_transport, transport);
f67539c2
TL
347 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
348 nvmf_tcp_req_process(ttransport, tcp_req);
9f95a23c
TL
349}
350
351static int
f67539c2 352nvmf_tcp_req_free(struct spdk_nvmf_request *req)
9f95a23c
TL
353{
354 struct spdk_nvmf_tcp_req *tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
355
356 nvmf_tcp_request_free(tcp_req);
357
358 return 0;
359}
360
361static void
f67539c2
TL
362nvmf_tcp_drain_state_queue(struct spdk_nvmf_tcp_qpair *tqpair,
363 enum spdk_nvmf_tcp_req_state state)
9f95a23c
TL
364{
365 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
366
367 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[state], state_link, req_tmp) {
368 nvmf_tcp_request_free(tcp_req);
369 }
370}
371
372static void
f67539c2 373nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair)
9f95a23c
TL
374{
375 struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
9f95a23c 376
f67539c2 377 assert(TAILQ_EMPTY(&tqpair->send_queue));
9f95a23c 378
f67539c2
TL
379 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
380 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW);
9f95a23c
TL
381
382 /* Wipe the requests waiting for buffer from the global list */
383 TAILQ_FOREACH_SAFE(tcp_req, &tqpair->state_queue[TCP_REQUEST_STATE_NEED_BUFFER], state_link,
384 req_tmp) {
f67539c2
TL
385 STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue, &tcp_req->req,
386 spdk_nvmf_request, buf_link);
9f95a23c
TL
387 }
388
f67539c2
TL
389 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
390 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
391 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
392 nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK);
9f95a23c
TL
393}
394
395static void
396nvmf_tcp_dump_qpair_req_contents(struct spdk_nvmf_tcp_qpair *tqpair)
397{
398 int i;
399 struct spdk_nvmf_tcp_req *tcp_req;
400
401 SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", tqpair->qpair.qid);
402 for (i = 1; i < TCP_REQUEST_NUM_STATES; i++) {
f67539c2 403 SPDK_ERRLOG("\tNum of requests in state[%d] = %u\n", i, tqpair->state_cntr[i]);
9f95a23c 404 TAILQ_FOREACH(tcp_req, &tqpair->state_queue[i], state_link) {
f67539c2 405 SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", tcp_req->req.data_from_pool);
9f95a23c
TL
406 SPDK_ERRLOG("\t\tRequest opcode: %d\n", tcp_req->req.cmd->nvmf_cmd.opcode);
407 }
408 }
409}
410
411static void
f67539c2 412nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair)
9f95a23c
TL
413{
414 int err = 0;
415
416 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
417
f67539c2
TL
418 err = spdk_sock_close(&tqpair->sock);
419 assert(err == 0);
420 nvmf_tcp_cleanup_all_states(tqpair);
9f95a23c 421
f67539c2 422 if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->resource_count) {
9f95a23c
TL
423 SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair,
424 tqpair->state_cntr[TCP_REQUEST_STATE_FREE],
f67539c2 425 tqpair->resource_count);
9f95a23c
TL
426 err++;
427 }
428
429 if (err > 0) {
430 nvmf_tcp_dump_qpair_req_contents(tqpair);
431 }
f67539c2
TL
432
433 spdk_dma_free(tqpair->pdus);
9f95a23c 434 free(tqpair->reqs);
f67539c2 435 spdk_free(tqpair->bufs);
9f95a23c
TL
436 free(tqpair);
437 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Leave\n");
438}
439
440static int
f67539c2 441nvmf_tcp_destroy(struct spdk_nvmf_transport *transport)
9f95a23c
TL
442{
443 struct spdk_nvmf_tcp_transport *ttransport;
444
445 assert(transport != NULL);
446 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
447
448 pthread_mutex_destroy(&ttransport->lock);
449 free(ttransport);
450 return 0;
451}
452
453static struct spdk_nvmf_transport *
f67539c2 454nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts)
9f95a23c
TL
455{
456 struct spdk_nvmf_tcp_transport *ttransport;
457 uint32_t sge_count;
458 uint32_t min_shared_buffers;
459
460 ttransport = calloc(1, sizeof(*ttransport));
461 if (!ttransport) {
462 return NULL;
463 }
464
465 TAILQ_INIT(&ttransport->ports);
466
467 ttransport->transport.ops = &spdk_nvmf_transport_tcp;
468
469 SPDK_NOTICELOG("*** TCP Transport Init ***\n");
470
471 SPDK_INFOLOG(SPDK_LOG_NVMF_TCP, "*** TCP Transport Init ***\n"
472 " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n"
f67539c2 473 " max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
9f95a23c 474 " in_capsule_data_size=%d, max_aq_depth=%d\n"
f67539c2
TL
475 " num_shared_buffers=%d, c2h_success=%d,\n"
476 " dif_insert_or_strip=%d, sock_priority=%d\n"
477 " abort_timeout_sec=%d\n",
9f95a23c
TL
478 opts->max_queue_depth,
479 opts->max_io_size,
f67539c2 480 opts->max_qpairs_per_ctrlr - 1,
9f95a23c
TL
481 opts->io_unit_size,
482 opts->in_capsule_data_size,
483 opts->max_aq_depth,
f67539c2
TL
484 opts->num_shared_buffers,
485 opts->c2h_success,
486 opts->dif_insert_or_strip,
487 opts->sock_priority,
488 opts->abort_timeout_sec);
489
490 if (opts->sock_priority > SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY) {
491 SPDK_ERRLOG("Unsupported socket_priority=%d, the current range is: 0 to %d\n"
492 "you can use man 7 socket to view the range of priority under SO_PRIORITY item\n",
493 opts->sock_priority, SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY);
494 free(ttransport);
495 return NULL;
496 }
9f95a23c
TL
497
498 /* I/O unit size cannot be larger than max I/O size */
499 if (opts->io_unit_size > opts->max_io_size) {
500 opts->io_unit_size = opts->max_io_size;
501 }
502
503 sge_count = opts->max_io_size / opts->io_unit_size;
504 if (sge_count > SPDK_NVMF_MAX_SGL_ENTRIES) {
505 SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
506 free(ttransport);
507 return NULL;
508 }
509
510 min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size;
511 if (min_shared_buffers > opts->num_shared_buffers) {
512 SPDK_ERRLOG("There are not enough buffers to satisfy"
513 "per-poll group caches for each thread. (%" PRIu32 ")"
514 "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers);
515 SPDK_ERRLOG("Please specify a larger number of shared buffers\n");
f67539c2 516 nvmf_tcp_destroy(&ttransport->transport);
9f95a23c
TL
517 return NULL;
518 }
519
520 pthread_mutex_init(&ttransport->lock, NULL);
521
522 return &ttransport->transport;
523}
524
525static int
f67539c2 526nvmf_tcp_trsvcid_to_int(const char *trsvcid)
9f95a23c
TL
527{
528 unsigned long long ull;
529 char *end = NULL;
530
531 ull = strtoull(trsvcid, &end, 10);
532 if (end == NULL || end == trsvcid || *end != '\0') {
533 return -1;
534 }
535
536 /* Valid TCP/IP port numbers are in [0, 65535] */
537 if (ull > 65535) {
538 return -1;
539 }
540
541 return (int)ull;
542}
543
544/**
545 * Canonicalize a listen address trid.
546 */
547static int
f67539c2
TL
548nvmf_tcp_canon_listen_trid(struct spdk_nvme_transport_id *canon_trid,
549 const struct spdk_nvme_transport_id *trid)
9f95a23c
TL
550{
551 int trsvcid_int;
552
f67539c2 553 trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid);
9f95a23c
TL
554 if (trsvcid_int < 0) {
555 return -EINVAL;
556 }
557
558 memset(canon_trid, 0, sizeof(*canon_trid));
f67539c2 559 spdk_nvme_trid_populate_transport(canon_trid, SPDK_NVME_TRANSPORT_TCP);
9f95a23c
TL
560 canon_trid->adrfam = trid->adrfam;
561 snprintf(canon_trid->traddr, sizeof(canon_trid->traddr), "%s", trid->traddr);
562 snprintf(canon_trid->trsvcid, sizeof(canon_trid->trsvcid), "%d", trsvcid_int);
563
564 return 0;
565}
566
567/**
568 * Find an existing listening port.
569 *
570 * Caller must hold ttransport->lock.
571 */
572static struct spdk_nvmf_tcp_port *
f67539c2
TL
573nvmf_tcp_find_port(struct spdk_nvmf_tcp_transport *ttransport,
574 const struct spdk_nvme_transport_id *trid)
9f95a23c
TL
575{
576 struct spdk_nvme_transport_id canon_trid;
577 struct spdk_nvmf_tcp_port *port;
578
f67539c2 579 if (nvmf_tcp_canon_listen_trid(&canon_trid, trid) != 0) {
9f95a23c
TL
580 return NULL;
581 }
582
583 TAILQ_FOREACH(port, &ttransport->ports, link) {
f67539c2 584 if (spdk_nvme_transport_id_compare(&canon_trid, port->trid) == 0) {
9f95a23c
TL
585 return port;
586 }
587 }
588
589 return NULL;
590}
591
592static int
f67539c2
TL
593nvmf_tcp_listen(struct spdk_nvmf_transport *transport,
594 const struct spdk_nvme_transport_id *trid)
9f95a23c
TL
595{
596 struct spdk_nvmf_tcp_transport *ttransport;
597 struct spdk_nvmf_tcp_port *port;
598 int trsvcid_int;
599 uint8_t adrfam;
f67539c2 600 struct spdk_sock_opts opts;
9f95a23c
TL
601
602 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
603
f67539c2 604 trsvcid_int = nvmf_tcp_trsvcid_to_int(trid->trsvcid);
9f95a23c
TL
605 if (trsvcid_int < 0) {
606 SPDK_ERRLOG("Invalid trsvcid '%s'\n", trid->trsvcid);
607 return -EINVAL;
608 }
609
610 pthread_mutex_lock(&ttransport->lock);
9f95a23c
TL
611 port = calloc(1, sizeof(*port));
612 if (!port) {
613 SPDK_ERRLOG("Port allocation failed\n");
9f95a23c
TL
614 pthread_mutex_unlock(&ttransport->lock);
615 return -ENOMEM;
616 }
617
f67539c2
TL
618 port->trid = trid;
619 opts.opts_size = sizeof(opts);
620 spdk_sock_get_default_opts(&opts);
621 opts.priority = transport->opts.sock_priority;
622 port->listen_sock = spdk_sock_listen_ext(trid->traddr, trsvcid_int,
623 NULL, &opts);
9f95a23c
TL
624 if (port->listen_sock == NULL) {
625 SPDK_ERRLOG("spdk_sock_listen(%s, %d) failed: %s (%d)\n",
626 trid->traddr, trsvcid_int,
627 spdk_strerror(errno), errno);
628 free(port);
629 pthread_mutex_unlock(&ttransport->lock);
630 return -errno;
631 }
632
633 if (spdk_sock_is_ipv4(port->listen_sock)) {
634 adrfam = SPDK_NVMF_ADRFAM_IPV4;
635 } else if (spdk_sock_is_ipv6(port->listen_sock)) {
636 adrfam = SPDK_NVMF_ADRFAM_IPV6;
637 } else {
638 SPDK_ERRLOG("Unhandled socket type\n");
639 adrfam = 0;
640 }
641
642 if (adrfam != trid->adrfam) {
643 SPDK_ERRLOG("Socket address family mismatch\n");
644 spdk_sock_close(&port->listen_sock);
645 free(port);
646 pthread_mutex_unlock(&ttransport->lock);
647 return -EINVAL;
648 }
649
f67539c2
TL
650 SPDK_NOTICELOG("*** NVMe/TCP Target Listening on %s port %s ***\n",
651 trid->traddr, trid->trsvcid);
9f95a23c
TL
652
653 TAILQ_INSERT_TAIL(&ttransport->ports, port, link);
654 pthread_mutex_unlock(&ttransport->lock);
9f95a23c
TL
655 return 0;
656}
657
f67539c2
TL
658static void
659nvmf_tcp_stop_listen(struct spdk_nvmf_transport *transport,
660 const struct spdk_nvme_transport_id *trid)
9f95a23c
TL
661{
662 struct spdk_nvmf_tcp_transport *ttransport;
663 struct spdk_nvmf_tcp_port *port;
9f95a23c
TL
664
665 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
666
667 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Removing listen address %s port %s\n",
668 trid->traddr, trid->trsvcid);
669
670 pthread_mutex_lock(&ttransport->lock);
f67539c2 671 port = nvmf_tcp_find_port(ttransport, trid);
9f95a23c 672 if (port) {
f67539c2
TL
673 TAILQ_REMOVE(&ttransport->ports, port, link);
674 spdk_sock_close(&port->listen_sock);
675 free(port);
9f95a23c 676 }
9f95a23c 677
f67539c2 678 pthread_mutex_unlock(&ttransport->lock);
9f95a23c
TL
679}
680
f67539c2
TL
681static void nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
682 enum nvme_tcp_pdu_recv_state state);
9f95a23c 683
f67539c2
TL
684static void
685nvmf_tcp_qpair_disconnect(struct spdk_nvmf_tcp_qpair *tqpair)
686{
687 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Disconnecting qpair %p\n", tqpair);
9f95a23c 688
f67539c2
TL
689 if (tqpair->state <= NVME_TCP_QPAIR_STATE_RUNNING) {
690 tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
691 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
692 spdk_poller_unregister(&tqpair->timeout_poller);
9f95a23c 693
f67539c2
TL
694 /* This will end up calling nvmf_tcp_close_qpair */
695 spdk_nvmf_qpair_disconnect(&tqpair->qpair, NULL, NULL);
9f95a23c 696 }
9f95a23c
TL
697}
698
f67539c2
TL
699static void
700_pdu_write_done(void *_pdu, int err)
9f95a23c 701{
f67539c2
TL
702 struct nvme_tcp_pdu *pdu = _pdu;
703 struct spdk_nvmf_tcp_qpair *tqpair = pdu->qpair;
9f95a23c 704
f67539c2
TL
705 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
706
707 if (err != 0) {
708 nvmf_tcp_qpair_disconnect(tqpair);
709 return;
9f95a23c
TL
710 }
711
f67539c2
TL
712 assert(pdu->cb_fn != NULL);
713 pdu->cb_fn(pdu->cb_arg);
9f95a23c
TL
714}
715
716static void
f67539c2
TL
717nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
718 struct nvme_tcp_pdu *pdu,
719 nvme_tcp_qpair_xfer_complete_cb cb_fn,
720 void *cb_arg)
9f95a23c 721{
9f95a23c
TL
722 int hlen;
723 uint32_t crc32c;
f67539c2
TL
724 uint32_t mapped_length = 0;
725 ssize_t rc;
726
727 assert(&tqpair->pdu_in_progress != pdu);
9f95a23c
TL
728
729 hlen = pdu->hdr.common.hlen;
9f95a23c
TL
730
731 /* Header Digest */
f67539c2 732 if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->host_hdgst_enable) {
9f95a23c
TL
733 crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
734 MAKE_DIGEST_WORD((uint8_t *)pdu->hdr.raw + hlen, crc32c);
735 }
736
737 /* Data Digest */
f67539c2 738 if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && tqpair->host_ddgst_enable) {
9f95a23c
TL
739 crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
740 MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
741 }
742
743 pdu->cb_fn = cb_fn;
744 pdu->cb_arg = cb_arg;
f67539c2
TL
745
746 pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu,
747 tqpair->host_hdgst_enable, tqpair->host_ddgst_enable,
748 &mapped_length);
749 pdu->sock_req.cb_fn = _pdu_write_done;
750 pdu->sock_req.cb_arg = pdu;
9f95a23c 751 TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
f67539c2
TL
752 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP ||
753 pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ) {
754 rc = spdk_sock_writev(tqpair->sock, pdu->iov, pdu->sock_req.iovcnt);
755 if (rc == mapped_length) {
756 _pdu_write_done(pdu, 0);
757 } else {
758 SPDK_ERRLOG("IC_RESP or TERM_REQ could not write to socket.\n");
759 _pdu_write_done(pdu, -1);
760 }
761 } else {
762 spdk_sock_writev_async(tqpair->sock, &pdu->sock_req);
763 }
9f95a23c
TL
764}
765
766static int
f67539c2 767nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair)
9f95a23c 768{
f67539c2
TL
769 uint32_t i;
770 struct spdk_nvmf_transport_opts *opts;
771 uint32_t in_capsule_data_size;
772
773 opts = &tqpair->qpair.transport->opts;
774
775 in_capsule_data_size = opts->in_capsule_data_size;
776 if (opts->dif_insert_or_strip) {
777 in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size);
778 }
9f95a23c 779
f67539c2
TL
780 tqpair->resource_count = opts->max_queue_depth;
781
782 tqpair->mgmt_pdu.qpair = tqpair;
783
784 tqpair->reqs = calloc(tqpair->resource_count, sizeof(*tqpair->reqs));
785 if (!tqpair->reqs) {
786 SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair);
787 return -1;
788 }
789
790 if (in_capsule_data_size) {
791 tqpair->bufs = spdk_zmalloc(tqpair->resource_count * in_capsule_data_size, 0x1000,
792 NULL, SPDK_ENV_LCORE_ID_ANY,
793 SPDK_MALLOC_DMA);
794 if (!tqpair->bufs) {
795 SPDK_ERRLOG("Unable to allocate bufs on tqpair=%p.\n", tqpair);
9f95a23c
TL
796 return -1;
797 }
f67539c2 798 }
9f95a23c 799
f67539c2
TL
800 tqpair->pdus = spdk_dma_malloc(tqpair->resource_count * sizeof(*tqpair->pdus), 0x1000, NULL);
801 if (!tqpair->pdus) {
802 SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair);
803 return -1;
804 }
9f95a23c 805
f67539c2
TL
806 for (i = 0; i < tqpair->resource_count; i++) {
807 struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i];
808
809 tcp_req->ttag = i + 1;
9f95a23c
TL
810 tcp_req->req.qpair = &tqpair->qpair;
811
f67539c2
TL
812 tcp_req->pdu = &tqpair->pdus[i];
813 tcp_req->pdu->qpair = tqpair;
814
9f95a23c 815 /* Set up memory to receive commands */
f67539c2
TL
816 if (tqpair->bufs) {
817 tcp_req->buf = (void *)((uintptr_t)tqpair->bufs + (i * in_capsule_data_size));
9f95a23c
TL
818 }
819
820 /* Set the cmdn and rsp */
821 tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
822 tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
823
824 /* Initialize request state to FREE */
825 tcp_req->state = TCP_REQUEST_STATE_FREE;
826 TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
f67539c2 827 tqpair->state_cntr[TCP_REQUEST_STATE_FREE]++;
9f95a23c
TL
828 }
829
f67539c2
TL
830 tqpair->recv_buf_size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 *
831 SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
832
9f95a23c
TL
833 return 0;
834}
835
836static int
f67539c2 837nvmf_tcp_qpair_init(struct spdk_nvmf_qpair *qpair)
9f95a23c
TL
838{
839 struct spdk_nvmf_tcp_qpair *tqpair;
840 int i;
841
842 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
843
844 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New TCP Connection: %p\n", qpair);
845
846 TAILQ_INIT(&tqpair->send_queue);
9f95a23c
TL
847
848 /* Initialise request state queues of the qpair */
849 for (i = TCP_REQUEST_STATE_FREE; i < TCP_REQUEST_NUM_STATES; i++) {
850 TAILQ_INIT(&tqpair->state_queue[i]);
851 }
852
853 tqpair->host_hdgst_enable = true;
854 tqpair->host_ddgst_enable = true;
855
856 return 0;
857}
858
859static int
f67539c2 860nvmf_tcp_qpair_sock_init(struct spdk_nvmf_tcp_qpair *tqpair)
9f95a23c 861{
9f95a23c 862 int rc;
9f95a23c
TL
863
864 /* set low water mark */
f67539c2 865 rc = spdk_sock_set_recvlowat(tqpair->sock, sizeof(struct spdk_nvme_tcp_common_pdu_hdr));
9f95a23c
TL
866 if (rc != 0) {
867 SPDK_ERRLOG("spdk_sock_set_recvlowat() failed\n");
868 return rc;
869 }
870
871 return 0;
872}
873
874static void
f67539c2
TL
875nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport,
876 struct spdk_nvmf_tcp_port *port,
877 struct spdk_sock *sock)
9f95a23c
TL
878{
879 struct spdk_nvmf_tcp_qpair *tqpair;
880 int rc;
881
882 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "New connection accepted on %s port %s\n",
f67539c2 883 port->trid->traddr, port->trid->trsvcid);
9f95a23c
TL
884
885 tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair));
886 if (tqpair == NULL) {
887 SPDK_ERRLOG("Could not allocate new connection.\n");
888 spdk_sock_close(&sock);
889 return;
890 }
891
892 tqpair->sock = sock;
f67539c2 893 tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = 0;
9f95a23c
TL
894 tqpair->port = port;
895 tqpair->qpair.transport = transport;
896
897 rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr,
898 sizeof(tqpair->target_addr), &tqpair->target_port,
899 tqpair->initiator_addr, sizeof(tqpair->initiator_addr),
900 &tqpair->initiator_port);
901 if (rc < 0) {
902 SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair);
f67539c2 903 nvmf_tcp_qpair_destroy(tqpair);
9f95a23c
TL
904 return;
905 }
906
f67539c2 907 spdk_nvmf_tgt_new_qpair(transport->tgt, &tqpair->qpair);
9f95a23c
TL
908}
909
f67539c2
TL
910static uint32_t
911nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port)
9f95a23c
TL
912{
913 struct spdk_sock *sock;
f67539c2 914 uint32_t count = 0;
9f95a23c
TL
915 int i;
916
917 for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) {
918 sock = spdk_sock_accept(port->listen_sock);
f67539c2
TL
919 if (sock == NULL) {
920 break;
9f95a23c 921 }
f67539c2
TL
922 count++;
923 nvmf_tcp_handle_connect(transport, port, sock);
9f95a23c 924 }
f67539c2
TL
925
926 return count;
9f95a23c
TL
927}
928
f67539c2
TL
929static uint32_t
930nvmf_tcp_accept(struct spdk_nvmf_transport *transport)
9f95a23c
TL
931{
932 struct spdk_nvmf_tcp_transport *ttransport;
933 struct spdk_nvmf_tcp_port *port;
f67539c2 934 uint32_t count = 0;
9f95a23c
TL
935
936 ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);
937
938 TAILQ_FOREACH(port, &ttransport->ports, link) {
f67539c2 939 count += nvmf_tcp_port_accept(transport, port);
9f95a23c 940 }
f67539c2
TL
941
942 return count;
9f95a23c
TL
943}
944
945static void
f67539c2
TL
946nvmf_tcp_discover(struct spdk_nvmf_transport *transport,
947 struct spdk_nvme_transport_id *trid,
948 struct spdk_nvmf_discovery_log_page_entry *entry)
9f95a23c
TL
949{
950 entry->trtype = SPDK_NVMF_TRTYPE_TCP;
951 entry->adrfam = trid->adrfam;
f67539c2 952 entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED;
9f95a23c
TL
953
954 spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
955 spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
956
957 entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE;
958}
959
960static struct spdk_nvmf_transport_poll_group *
f67539c2 961nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport)
9f95a23c
TL
962{
963 struct spdk_nvmf_tcp_poll_group *tgroup;
964
965 tgroup = calloc(1, sizeof(*tgroup));
966 if (!tgroup) {
967 return NULL;
968 }
969
f67539c2 970 tgroup->sock_group = spdk_sock_group_create(&tgroup->group);
9f95a23c
TL
971 if (!tgroup->sock_group) {
972 goto cleanup;
973 }
974
975 TAILQ_INIT(&tgroup->qpairs);
f67539c2 976 TAILQ_INIT(&tgroup->await_req);
9f95a23c
TL
977
978 return &tgroup->group;
979
980cleanup:
981 free(tgroup);
982 return NULL;
983}
984
f67539c2
TL
985static struct spdk_nvmf_transport_poll_group *
986nvmf_tcp_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
987{
988 struct spdk_nvmf_tcp_qpair *tqpair;
989 struct spdk_sock_group *group = NULL;
990 int rc;
991
992 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
993 rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group);
994 if (!rc && group != NULL) {
995 return spdk_sock_group_get_ctx(group);
996 }
997
998 return NULL;
999}
1000
9f95a23c 1001static void
f67539c2 1002nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
9f95a23c
TL
1003{
1004 struct spdk_nvmf_tcp_poll_group *tgroup;
1005
1006 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
1007 spdk_sock_group_close(&tgroup->sock_group);
1008
9f95a23c
TL
1009 free(tgroup);
1010}
1011
1012static void
f67539c2
TL
1013nvmf_tcp_qpair_set_recv_state(struct spdk_nvmf_tcp_qpair *tqpair,
1014 enum nvme_tcp_pdu_recv_state state)
9f95a23c
TL
1015{
1016 if (tqpair->recv_state == state) {
1017 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
1018 tqpair, state);
1019 return;
1020 }
1021
f67539c2
TL
1022 if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) {
1023 /* When leaving the await req state, move the qpair to the main list */
1024 TAILQ_REMOVE(&tqpair->group->await_req, tqpair, link);
1025 TAILQ_INSERT_TAIL(&tqpair->group->qpairs, tqpair, link);
1026 }
1027
9f95a23c
TL
1028 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv state=%d\n", tqpair, state);
1029 tqpair->recv_state = state;
1030
1031 switch (state) {
1032 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
1033 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
1034 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
1035 break;
f67539c2
TL
1036 case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ:
1037 TAILQ_REMOVE(&tqpair->group->qpairs, tqpair, link);
1038 TAILQ_INSERT_TAIL(&tqpair->group->await_req, tqpair, link);
1039 break;
9f95a23c
TL
1040 case NVME_TCP_PDU_RECV_STATE_ERROR:
1041 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
1042 memset(&tqpair->pdu_in_progress, 0, sizeof(tqpair->pdu_in_progress));
1043 break;
1044 default:
1045 SPDK_ERRLOG("The state(%d) is invalid\n", state);
1046 abort();
1047 break;
1048 }
1049}
1050
1051static int
f67539c2 1052nvmf_tcp_qpair_handle_timeout(void *ctx)
9f95a23c
TL
1053{
1054 struct spdk_nvmf_tcp_qpair *tqpair = ctx;
1055
1056 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR);
1057
1058 SPDK_ERRLOG("No pdu coming for tqpair=%p within %d seconds\n", tqpair,
1059 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT);
9f95a23c 1060
f67539c2
TL
1061 nvmf_tcp_qpair_disconnect(tqpair);
1062 return SPDK_POLLER_BUSY;
9f95a23c
TL
1063}
1064
1065static void
f67539c2 1066nvmf_tcp_send_c2h_term_req_complete(void *cb_arg)
9f95a23c
TL
1067{
1068 struct spdk_nvmf_tcp_qpair *tqpair = (struct spdk_nvmf_tcp_qpair *)cb_arg;
1069
1070 if (!tqpair->timeout_poller) {
f67539c2 1071 tqpair->timeout_poller = SPDK_POLLER_REGISTER(nvmf_tcp_qpair_handle_timeout, tqpair,
9f95a23c
TL
1072 SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT * 1000000);
1073 }
1074}
1075
1076static void
f67539c2
TL
1077nvmf_tcp_send_c2h_term_req(struct spdk_nvmf_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1078 enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
9f95a23c
TL
1079{
1080 struct nvme_tcp_pdu *rsp_pdu;
1081 struct spdk_nvme_tcp_term_req_hdr *c2h_term_req;
1082 uint32_t c2h_term_req_hdr_len = sizeof(*c2h_term_req);
1083 uint32_t copy_len;
1084
f67539c2 1085 rsp_pdu = &tqpair->mgmt_pdu;
9f95a23c
TL
1086
1087 c2h_term_req = &rsp_pdu->hdr.term_req;
1088 c2h_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ;
1089 c2h_term_req->common.hlen = c2h_term_req_hdr_len;
1090
1091 if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1092 (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1093 DSET32(&c2h_term_req->fei, error_offset);
1094 }
1095
f67539c2 1096 copy_len = spdk_min(pdu->hdr.common.hlen, SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE);
9f95a23c
TL
1097
1098 /* Copy the error info into the buffer */
1099 memcpy((uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, pdu->hdr.raw, copy_len);
1100 nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + c2h_term_req_hdr_len, copy_len);
1101
1102 /* Contain the header of the wrong received pdu */
1103 c2h_term_req->common.plen = c2h_term_req->common.hlen + copy_len;
f67539c2
TL
1104 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
1105 nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_send_c2h_term_req_complete, tqpair);
9f95a23c
TL
1106}
1107
1108static void
f67539c2
TL
1109nvmf_tcp_capsule_cmd_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1110 struct spdk_nvmf_tcp_qpair *tqpair,
1111 struct nvme_tcp_pdu *pdu)
9f95a23c
TL
1112{
1113 struct spdk_nvmf_tcp_req *tcp_req;
1114
f67539c2
TL
1115 assert(pdu->psh_valid_bytes == pdu->psh_len);
1116 assert(pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD);
1117
1118 tcp_req = nvmf_tcp_req_get(tqpair);
9f95a23c 1119 if (!tcp_req) {
f67539c2
TL
1120 /* Directly return and make the allocation retry again */
1121 if (tqpair->state_cntr[TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST] > 0) {
1122 return;
1123 }
1124
1125 /* The host sent more commands than the maximum queue depth. */
1126 SPDK_ERRLOG("Cannot allocate tcp_req on tqpair=%p\n", tqpair);
1127 nvmf_tcp_qpair_disconnect(tqpair);
9f95a23c
TL
1128 return;
1129 }
1130
f67539c2
TL
1131 pdu->req = tcp_req;
1132 assert(tcp_req->state == TCP_REQUEST_STATE_NEW);
1133 nvmf_tcp_req_process(ttransport, tcp_req);
9f95a23c
TL
1134}
1135
1136static void
f67539c2
TL
1137nvmf_tcp_capsule_cmd_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1138 struct spdk_nvmf_tcp_qpair *tqpair,
1139 struct nvme_tcp_pdu *pdu)
9f95a23c
TL
1140{
1141 struct spdk_nvmf_tcp_req *tcp_req;
1142 struct spdk_nvme_tcp_cmd *capsule_cmd;
1143 uint32_t error_offset = 0;
1144 enum spdk_nvme_tcp_term_req_fes fes;
1145
1146 capsule_cmd = &pdu->hdr.capsule_cmd;
f67539c2 1147 tcp_req = pdu->req;
9f95a23c
TL
1148 assert(tcp_req != NULL);
1149 if (capsule_cmd->common.pdo > SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET) {
1150 SPDK_ERRLOG("Expected ICReq capsule_cmd pdu offset <= %d, got %c\n",
1151 SPDK_NVME_TCP_PDU_PDO_MAX_OFFSET, capsule_cmd->common.pdo);
1152 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1153 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1154 goto err;
1155 }
1156
f67539c2
TL
1157 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1158 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1159 nvmf_tcp_req_process(ttransport, tcp_req);
9f95a23c
TL
1160
1161 return;
1162err:
f67539c2
TL
1163 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
1164}
1165
1166static int
1167nvmf_tcp_find_req_in_state(struct spdk_nvmf_tcp_qpair *tqpair,
1168 enum spdk_nvmf_tcp_req_state state,
1169 uint16_t cid, uint16_t tag,
1170 struct spdk_nvmf_tcp_req **req)
1171{
1172 struct spdk_nvmf_tcp_req *tcp_req = NULL;
1173
1174 TAILQ_FOREACH(tcp_req, &tqpair->state_queue[state], state_link) {
1175 if (tcp_req->req.cmd->nvme_cmd.cid != cid) {
1176 continue;
1177 }
1178
1179 if (tcp_req->ttag == tag) {
1180 *req = tcp_req;
1181 return 0;
1182 }
1183
1184 *req = NULL;
1185 return -1;
1186 }
1187
1188 /* Didn't find it, but not an error */
1189 *req = NULL;
1190 return 0;
9f95a23c
TL
1191}
1192
1193static void
f67539c2
TL
1194nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
1195 struct spdk_nvmf_tcp_qpair *tqpair,
1196 struct nvme_tcp_pdu *pdu)
9f95a23c
TL
1197{
1198 struct spdk_nvmf_tcp_req *tcp_req;
1199 uint32_t error_offset = 0;
1200 enum spdk_nvme_tcp_term_req_fes fes = 0;
1201 struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
f67539c2 1202 int rc;
9f95a23c
TL
1203
1204 h2c_data = &pdu->hdr.h2c_data;
1205
1206 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
1207 tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
1208
f67539c2
TL
1209 rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
1210 h2c_data->cccid, h2c_data->ttag, &tcp_req);
1211 if (rc == 0 && tcp_req == NULL) {
1212 rc = nvmf_tcp_find_req_in_state(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK, h2c_data->cccid,
1213 h2c_data->ttag, &tcp_req);
9f95a23c
TL
1214 }
1215
1216 if (!tcp_req) {
1217 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tcp_req is not found for tqpair=%p\n", tqpair);
1218 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
f67539c2 1219 if (rc == 0) {
9f95a23c
TL
1220 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
1221 } else {
1222 error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
1223 }
1224 goto err;
1225 }
1226
f67539c2 1227 if (tcp_req->h2c_offset != h2c_data->datao) {
9f95a23c 1228 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
f67539c2
TL
1229 "tcp_req(%p), tqpair=%p, expected data offset %u, but data offset is %u\n",
1230 tcp_req, tqpair, tcp_req->h2c_offset, h2c_data->datao);
9f95a23c
TL
1231 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1232 goto err;
1233 }
1234
1235 if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) {
1236 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1237 "tcp_req(%p), tqpair=%p, (datao=%u + datal=%u) execeeds requested length=%u\n",
1238 tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length);
f67539c2 1239 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
9f95a23c
TL
1240 goto err;
1241 }
1242
f67539c2
TL
1243 pdu->req = tcp_req;
1244
1245 if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
1246 pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
1247 }
1248
1249 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
1250 h2c_data->datao, h2c_data->datal);
1251 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
9f95a23c
TL
1252 return;
1253
1254err:
f67539c2 1255 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
9f95a23c
TL
1256}
1257
1258static void
f67539c2 1259nvmf_tcp_pdu_cmd_complete(void *cb_arg)
9f95a23c
TL
1260{
1261 struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1262 nvmf_tcp_request_free(tcp_req);
1263}
1264
1265static void
f67539c2
TL
1266nvmf_tcp_send_capsule_resp_pdu(struct spdk_nvmf_tcp_req *tcp_req,
1267 struct spdk_nvmf_tcp_qpair *tqpair)
9f95a23c
TL
1268{
1269 struct nvme_tcp_pdu *rsp_pdu;
1270 struct spdk_nvme_tcp_rsp *capsule_resp;
1271
1272 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter, tqpair=%p\n", tqpair);
f67539c2
TL
1273
1274 rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req);
1275 assert(rsp_pdu != NULL);
9f95a23c
TL
1276
1277 capsule_resp = &rsp_pdu->hdr.capsule_resp;
1278 capsule_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP;
1279 capsule_resp->common.plen = capsule_resp->common.hlen = sizeof(*capsule_resp);
1280 capsule_resp->rccqe = tcp_req->req.rsp->nvme_cpl;
1281 if (tqpair->host_hdgst_enable) {
1282 capsule_resp->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1283 capsule_resp->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1284 }
1285
f67539c2 1286 nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_pdu_cmd_complete, tcp_req);
9f95a23c
TL
1287}
1288
1289static void
f67539c2 1290nvmf_tcp_pdu_c2h_data_complete(void *cb_arg)
9f95a23c
TL
1291{
1292 struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1293 struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair,
1294 struct spdk_nvmf_tcp_qpair, qpair);
1295
1296 assert(tqpair != NULL);
f67539c2 1297 if (tqpair->qpair.transport->opts.c2h_success) {
9f95a23c 1298 nvmf_tcp_request_free(tcp_req);
f67539c2
TL
1299 } else {
1300 nvmf_tcp_req_pdu_fini(tcp_req);
1301 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
9f95a23c 1302 }
f67539c2
TL
1303}
1304
1305static void
1306nvmf_tcp_r2t_complete(void *cb_arg)
1307{
1308 struct spdk_nvmf_tcp_req *tcp_req = cb_arg;
1309 struct spdk_nvmf_tcp_transport *ttransport;
9f95a23c 1310
f67539c2
TL
1311 nvmf_tcp_req_pdu_fini(tcp_req);
1312
1313 ttransport = SPDK_CONTAINEROF(tcp_req->req.qpair->transport,
1314 struct spdk_nvmf_tcp_transport, transport);
1315
1316 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
1317
1318 if (tcp_req->h2c_offset == tcp_req->req.length) {
1319 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1320 nvmf_tcp_req_process(ttransport, tcp_req);
1321 }
9f95a23c
TL
1322}
1323
1324static void
f67539c2
TL
1325nvmf_tcp_send_r2t_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
1326 struct spdk_nvmf_tcp_req *tcp_req)
9f95a23c
TL
1327{
1328 struct nvme_tcp_pdu *rsp_pdu;
1329 struct spdk_nvme_tcp_r2t_hdr *r2t;
1330
f67539c2
TL
1331 rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req);
1332 assert(rsp_pdu != NULL);
9f95a23c
TL
1333
1334 r2t = &rsp_pdu->hdr.r2t;
1335 r2t->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_R2T;
1336 r2t->common.plen = r2t->common.hlen = sizeof(*r2t);
1337
1338 if (tqpair->host_hdgst_enable) {
1339 r2t->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1340 r2t->common.plen += SPDK_NVME_TCP_DIGEST_LEN;
1341 }
1342
1343 r2t->cccid = tcp_req->req.cmd->nvme_cmd.cid;
1344 r2t->ttag = tcp_req->ttag;
f67539c2
TL
1345 r2t->r2to = tcp_req->h2c_offset;
1346 r2t->r2tl = tcp_req->req.length;
1347
1348 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_AWAITING_R2T_ACK);
9f95a23c
TL
1349
1350 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
1351 "tcp_req(%p) on tqpair(%p), r2t_info: cccid=%u, ttag=%u, r2to=%u, r2tl=%u\n",
1352 tcp_req, tqpair, r2t->cccid, r2t->ttag, r2t->r2to, r2t->r2tl);
f67539c2 1353 nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_r2t_complete, tcp_req);
9f95a23c
TL
1354}
1355
1356static void
f67539c2
TL
1357nvmf_tcp_h2c_data_payload_handle(struct spdk_nvmf_tcp_transport *ttransport,
1358 struct spdk_nvmf_tcp_qpair *tqpair,
1359 struct nvme_tcp_pdu *pdu)
9f95a23c
TL
1360{
1361 struct spdk_nvmf_tcp_req *tcp_req;
1362
f67539c2 1363 tcp_req = pdu->req;
9f95a23c
TL
1364 assert(tcp_req != NULL);
1365
1366 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1367
f67539c2 1368 tcp_req->h2c_offset += pdu->data_len;
9f95a23c 1369
f67539c2
TL
1370 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1371
1372 /* Wait for all of the data to arrive AND for the initial R2T PDU send to be
1373 * acknowledged before moving on. */
1374 if (tcp_req->h2c_offset == tcp_req->req.length &&
1375 tcp_req->state == TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER) {
1376 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
1377 nvmf_tcp_req_process(ttransport, tcp_req);
9f95a23c
TL
1378 }
1379}
1380
1381static void
f67539c2 1382nvmf_tcp_h2c_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *h2c_term_req)
9f95a23c
TL
1383{
1384 SPDK_ERRLOG("Error info of pdu(%p): %s\n", h2c_term_req,
1385 spdk_nvmf_tcp_term_req_fes_str[h2c_term_req->fes]);
1386 if ((h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1387 (h2c_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1388 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The offset from the start of the PDU header is %u\n",
1389 DGET32(h2c_term_req->fei));
1390 }
1391}
1392
1393static void
f67539c2
TL
1394nvmf_tcp_h2c_term_req_hdr_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1395 struct nvme_tcp_pdu *pdu)
9f95a23c
TL
1396{
1397 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req;
1398 uint32_t error_offset = 0;
1399 enum spdk_nvme_tcp_term_req_fes fes;
1400
1401
1402 if (h2c_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
1403 SPDK_ERRLOG("Fatal Error Stauts(FES) is unknown for h2c_term_req pdu=%p\n", pdu);
1404 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1405 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
1406 goto end;
1407 }
1408
1409 /* set the data buffer */
1410 nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + h2c_term_req->common.hlen,
1411 h2c_term_req->common.plen - h2c_term_req->common.hlen);
f67539c2 1412 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
9f95a23c
TL
1413 return;
1414end:
f67539c2 1415 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
9f95a23c
TL
1416}
1417
1418static void
f67539c2
TL
1419nvmf_tcp_h2c_term_req_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1420 struct nvme_tcp_pdu *pdu)
9f95a23c
TL
1421{
1422 struct spdk_nvme_tcp_term_req_hdr *h2c_term_req = &pdu->hdr.term_req;
1423
f67539c2
TL
1424 nvmf_tcp_h2c_term_req_dump(h2c_term_req);
1425 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
9f95a23c
TL
1426}
1427
1428static void
f67539c2
TL
1429nvmf_tcp_pdu_payload_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1430 struct spdk_nvmf_tcp_transport *ttransport)
9f95a23c
TL
1431{
1432 int rc = 0;
1433 struct nvme_tcp_pdu *pdu;
1434 uint32_t crc32c, error_offset = 0;
1435 enum spdk_nvme_tcp_term_req_fes fes;
9f95a23c
TL
1436
1437 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1438 pdu = &tqpair->pdu_in_progress;
1439
1440 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1441 /* check data digest if need */
1442 if (pdu->ddgst_enable) {
1443 crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
1444 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
1445 if (rc == 0) {
1446 SPDK_ERRLOG("Data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1447 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
f67539c2 1448 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
9f95a23c
TL
1449 return;
1450
1451 }
1452 }
1453
9f95a23c
TL
1454 switch (pdu->hdr.common.pdu_type) {
1455 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
f67539c2 1456 nvmf_tcp_capsule_cmd_payload_handle(ttransport, tqpair, pdu);
9f95a23c
TL
1457 break;
1458 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
f67539c2 1459 nvmf_tcp_h2c_data_payload_handle(ttransport, tqpair, pdu);
9f95a23c
TL
1460 break;
1461
1462 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
f67539c2 1463 nvmf_tcp_h2c_term_req_payload_handle(tqpair, pdu);
9f95a23c
TL
1464 break;
1465
1466 default:
1467 /* The code should not go to here */
1468 SPDK_ERRLOG("The code should not go to here\n");
1469 break;
1470 }
1471}
1472
1473static void
f67539c2 1474nvmf_tcp_send_icresp_complete(void *cb_arg)
9f95a23c
TL
1475{
1476 struct spdk_nvmf_tcp_qpair *tqpair = cb_arg;
1477
1478 tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
1479}
1480
1481static void
f67539c2
TL
1482nvmf_tcp_icreq_handle(struct spdk_nvmf_tcp_transport *ttransport,
1483 struct spdk_nvmf_tcp_qpair *tqpair,
1484 struct nvme_tcp_pdu *pdu)
9f95a23c
TL
1485{
1486 struct spdk_nvme_tcp_ic_req *ic_req = &pdu->hdr.ic_req;
1487 struct nvme_tcp_pdu *rsp_pdu;
1488 struct spdk_nvme_tcp_ic_resp *ic_resp;
1489 uint32_t error_offset = 0;
1490 enum spdk_nvme_tcp_term_req_fes fes;
1491
1492 /* Only PFV 0 is defined currently */
1493 if (ic_req->pfv != 0) {
1494 SPDK_ERRLOG("Expected ICReq PFV %u, got %u\n", 0u, ic_req->pfv);
1495 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1496 error_offset = offsetof(struct spdk_nvme_tcp_ic_req, pfv);
1497 goto end;
1498 }
1499
1500 /* MAXR2T is 0's based */
1501 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "maxr2t =%u\n", (ic_req->maxr2t + 1u));
1502
1503 tqpair->host_hdgst_enable = ic_req->dgst.bits.hdgst_enable ? true : false;
f67539c2
TL
1504 if (!tqpair->host_hdgst_enable) {
1505 tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
1506 }
1507
9f95a23c 1508 tqpair->host_ddgst_enable = ic_req->dgst.bits.ddgst_enable ? true : false;
f67539c2
TL
1509 if (!tqpair->host_ddgst_enable) {
1510 tqpair->recv_buf_size -= SPDK_NVME_TCP_DIGEST_LEN * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
1511 }
1512
1513 /* Now that we know whether digests are enabled, properly size the receive buffer */
1514 if (spdk_sock_set_recvbuf(tqpair->sock, tqpair->recv_buf_size) < 0) {
1515 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n",
1516 tqpair,
1517 tqpair->recv_buf_size);
1518 /* Not fatal. */
1519 }
9f95a23c
TL
1520
1521 tqpair->cpda = spdk_min(ic_req->hpda, SPDK_NVME_TCP_CPDA_MAX);
1522 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "cpda of tqpair=(%p) is : %u\n", tqpair, tqpair->cpda);
1523
f67539c2 1524 rsp_pdu = &tqpair->mgmt_pdu;
9f95a23c
TL
1525
1526 ic_resp = &rsp_pdu->hdr.ic_resp;
1527 ic_resp->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_RESP;
1528 ic_resp->common.hlen = ic_resp->common.plen = sizeof(*ic_resp);
1529 ic_resp->pfv = 0;
1530 ic_resp->cpda = tqpair->cpda;
f67539c2 1531 ic_resp->maxh2cdata = ttransport->transport.opts.max_io_size;
9f95a23c
TL
1532 ic_resp->dgst.bits.hdgst_enable = tqpair->host_hdgst_enable ? 1 : 0;
1533 ic_resp->dgst.bits.ddgst_enable = tqpair->host_ddgst_enable ? 1 : 0;
1534
1535 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_hdgst_enable: %u\n", tqpair->host_hdgst_enable);
1536 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "host_ddgst_enable: %u\n", tqpair->host_ddgst_enable);
1537
f67539c2
TL
1538 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING;
1539 nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_send_icresp_complete, tqpair);
1540 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
9f95a23c
TL
1541 return;
1542end:
f67539c2 1543 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
9f95a23c
TL
1544}
1545
1546static void
f67539c2
TL
1547nvmf_tcp_pdu_psh_handle(struct spdk_nvmf_tcp_qpair *tqpair,
1548 struct spdk_nvmf_tcp_transport *ttransport)
9f95a23c
TL
1549{
1550 struct nvme_tcp_pdu *pdu;
1551 int rc;
1552 uint32_t crc32c, error_offset = 0;
1553 enum spdk_nvme_tcp_term_req_fes fes;
9f95a23c
TL
1554
1555 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1556 pdu = &tqpair->pdu_in_progress;
1557
1558 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "pdu type of tqpair(%p) is %d\n", tqpair,
1559 pdu->hdr.common.pdu_type);
1560 /* check header digest if needed */
1561 if (pdu->has_hdgst) {
1562 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Compare the header of pdu=%p on tqpair=%p\n", pdu, tqpair);
1563 crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
1564 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c);
1565 if (rc == 0) {
1566 SPDK_ERRLOG("Header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1567 fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
f67539c2 1568 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
9f95a23c
TL
1569 return;
1570
1571 }
1572 }
1573
9f95a23c
TL
1574 switch (pdu->hdr.common.pdu_type) {
1575 case SPDK_NVME_TCP_PDU_TYPE_IC_REQ:
f67539c2 1576 nvmf_tcp_icreq_handle(ttransport, tqpair, pdu);
9f95a23c
TL
1577 break;
1578 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
f67539c2 1579 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_REQ);
9f95a23c
TL
1580 break;
1581 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
f67539c2 1582 nvmf_tcp_h2c_data_hdr_handle(ttransport, tqpair, pdu);
9f95a23c
TL
1583 break;
1584
1585 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
f67539c2 1586 nvmf_tcp_h2c_term_req_hdr_handle(tqpair, pdu);
9f95a23c
TL
1587 break;
1588
1589 default:
1590 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->pdu_in_progress.hdr.common.pdu_type);
1591 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1592 error_offset = 1;
f67539c2 1593 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
9f95a23c
TL
1594 break;
1595 }
1596}
1597
1598static void
f67539c2 1599nvmf_tcp_pdu_ch_handle(struct spdk_nvmf_tcp_qpair *tqpair)
9f95a23c
TL
1600{
1601 struct nvme_tcp_pdu *pdu;
1602 uint32_t error_offset = 0;
1603 enum spdk_nvme_tcp_term_req_fes fes;
1604 uint8_t expected_hlen, pdo;
1605 bool plen_error = false, pdo_error = false;
1606
1607 assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
1608 pdu = &tqpair->pdu_in_progress;
1609
1610 if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_REQ) {
1611 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
1612 SPDK_ERRLOG("Already received ICreq PDU, and reject this pdu=%p\n", pdu);
1613 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1614 goto err;
1615 }
1616 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_req);
1617 if (pdu->hdr.common.plen != expected_hlen) {
1618 plen_error = true;
1619 }
1620 } else {
1621 if (tqpair->state != NVME_TCP_QPAIR_STATE_RUNNING) {
1622 SPDK_ERRLOG("The TCP/IP connection is not negotitated\n");
1623 fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1624 goto err;
1625 }
1626
1627 switch (pdu->hdr.common.pdu_type) {
1628 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1629 expected_hlen = sizeof(struct spdk_nvme_tcp_cmd);
1630 pdo = pdu->hdr.common.pdo;
1631 if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1632 pdo_error = true;
1633 break;
1634 }
1635
1636 if (pdu->hdr.common.plen < expected_hlen) {
1637 plen_error = true;
1638 }
1639 break;
1640 case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
1641 expected_hlen = sizeof(struct spdk_nvme_tcp_h2c_data_hdr);
1642 pdo = pdu->hdr.common.pdo;
1643 if ((tqpair->cpda != 0) && (pdo != ((tqpair->cpda + 1) << 2))) {
1644 pdo_error = true;
1645 break;
1646 }
1647 if (pdu->hdr.common.plen < expected_hlen) {
1648 plen_error = true;
1649 }
1650 break;
1651
1652 case SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1653 expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
1654 if ((pdu->hdr.common.plen <= expected_hlen) ||
1655 (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
1656 plen_error = true;
1657 }
1658 break;
1659
1660 default:
1661 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", pdu->hdr.common.pdu_type);
1662 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1663 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
1664 goto err;
1665 }
1666 }
1667
1668 if (pdu->hdr.common.hlen != expected_hlen) {
1669 SPDK_ERRLOG("PDU type=0x%02x, Expected ICReq header length %u, got %u on tqpair=%p\n",
1670 pdu->hdr.common.pdu_type,
1671 expected_hlen, pdu->hdr.common.hlen, tqpair);
1672 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1673 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
1674 goto err;
1675 } else if (pdo_error) {
1676 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1677 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdo);
1678 } else if (plen_error) {
1679 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1680 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
1681 goto err;
1682 } else {
f67539c2
TL
1683 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1684 nvme_tcp_pdu_calc_psh_len(&tqpair->pdu_in_progress, tqpair->host_hdgst_enable);
9f95a23c
TL
1685 return;
1686 }
1687err:
f67539c2 1688 nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
9f95a23c
TL
1689}
1690
1691static int
f67539c2
TL
1692nvmf_tcp_pdu_payload_insert_dif(struct nvme_tcp_pdu *pdu, uint32_t read_offset,
1693 int read_len)
1694{
1695 int rc;
1696
1697 rc = spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt,
1698 read_offset, read_len, pdu->dif_ctx);
1699 if (rc != 0) {
1700 SPDK_ERRLOG("DIF generate failed\n");
1701 }
1702
1703 return rc;
1704}
1705
1706static int
1707nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
9f95a23c
TL
1708{
1709 int rc = 0;
1710 struct nvme_tcp_pdu *pdu;
1711 enum nvme_tcp_pdu_recv_state prev_state;
1712 uint32_t data_len;
f67539c2
TL
1713 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport,
1714 struct spdk_nvmf_tcp_transport, transport);
9f95a23c
TL
1715
1716 /* The loop here is to allow for several back-to-back state changes. */
1717 do {
1718 prev_state = tqpair->recv_state;
9f95a23c
TL
1719 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state);
1720
f67539c2 1721 pdu = &tqpair->pdu_in_progress;
9f95a23c
TL
1722 switch (tqpair->recv_state) {
1723 /* Wait for the common header */
1724 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
1725 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
f67539c2
TL
1726 if (spdk_unlikely(tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) {
1727 return rc;
1728 }
9f95a23c
TL
1729
1730 rc = nvme_tcp_read_data(tqpair->sock,
1731 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
1732 (void *)&pdu->hdr.common + pdu->ch_valid_bytes);
1733 if (rc < 0) {
1734 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "will disconnect tqpair=%p\n", tqpair);
1735 return NVME_TCP_PDU_FATAL;
1736 } else if (rc > 0) {
1737 pdu->ch_valid_bytes += rc;
1738 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, 0, rc, 0, 0);
1739 if (spdk_likely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY)) {
f67539c2 1740 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
9f95a23c
TL
1741 }
1742 }
1743
1744 if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
1745 return NVME_TCP_PDU_IN_PROGRESS;
1746 }
1747
1748 /* The command header of this PDU has now been read from the socket. */
f67539c2 1749 nvmf_tcp_pdu_ch_handle(tqpair);
9f95a23c
TL
1750 break;
1751 /* Wait for the pdu specific header */
1752 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
f67539c2
TL
1753 rc = nvme_tcp_read_data(tqpair->sock,
1754 pdu->psh_len - pdu->psh_valid_bytes,
1755 (void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
1756 if (rc < 0) {
1757 return NVME_TCP_PDU_FATAL;
1758 } else if (rc > 0) {
1759 spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE,
1760 0, rc, 0, 0);
1761 pdu->psh_valid_bytes += rc;
9f95a23c
TL
1762 }
1763
f67539c2
TL
1764 if (pdu->psh_valid_bytes < pdu->psh_len) {
1765 return NVME_TCP_PDU_IN_PROGRESS;
9f95a23c
TL
1766 }
1767
1768 /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
f67539c2
TL
1769 nvmf_tcp_pdu_psh_handle(tqpair, ttransport);
1770 break;
1771 /* Wait for the req slot */
1772 case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ:
1773 nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu);
9f95a23c
TL
1774 break;
1775 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
9f95a23c
TL
1776 /* check whether the data is valid, if not we just return */
1777 if (!pdu->data_len) {
1778 return NVME_TCP_PDU_IN_PROGRESS;
1779 }
1780
1781 data_len = pdu->data_len;
1782 /* data digest */
1783 if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) &&
1784 tqpair->host_ddgst_enable)) {
1785 data_len += SPDK_NVME_TCP_DIGEST_LEN;
1786 pdu->ddgst_enable = true;
1787 }
1788
1789 rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
1790 if (rc < 0) {
f67539c2 1791 return NVME_TCP_PDU_FATAL;
9f95a23c 1792 }
9f95a23c 1793 pdu->readv_offset += rc;
f67539c2
TL
1794
1795 if (spdk_unlikely(pdu->dif_ctx != NULL)) {
1796 rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->readv_offset - rc, rc);
1797 if (rc != 0) {
1798 return NVME_TCP_PDU_FATAL;
1799 }
1800 }
1801
9f95a23c
TL
1802 if (pdu->readv_offset < data_len) {
1803 return NVME_TCP_PDU_IN_PROGRESS;
1804 }
1805
1806 /* All of this PDU has now been read from the socket. */
f67539c2 1807 nvmf_tcp_pdu_payload_handle(tqpair, ttransport);
9f95a23c
TL
1808 break;
1809 case NVME_TCP_PDU_RECV_STATE_ERROR:
f67539c2 1810 if (!spdk_sock_is_connected(tqpair->sock)) {
9f95a23c
TL
1811 return NVME_TCP_PDU_FATAL;
1812 }
1813 break;
1814 default:
1815 assert(0);
1816 SPDK_ERRLOG("code should not come to here");
1817 break;
1818 }
1819 } while (tqpair->recv_state != prev_state);
1820
1821 return rc;
1822}
1823
9f95a23c 1824static int
f67539c2
TL
1825nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
1826 struct spdk_nvmf_transport *transport,
1827 struct spdk_nvmf_transport_poll_group *group)
9f95a23c 1828{
f67539c2 1829 struct spdk_nvmf_request *req = &tcp_req->req;
9f95a23c
TL
1830 struct spdk_nvme_cmd *cmd;
1831 struct spdk_nvme_cpl *rsp;
1832 struct spdk_nvme_sgl_descriptor *sgl;
f67539c2 1833 uint32_t length;
9f95a23c 1834
f67539c2
TL
1835 cmd = &req->cmd->nvme_cmd;
1836 rsp = &req->rsp->nvme_cpl;
9f95a23c
TL
1837 sgl = &cmd->dptr.sgl1;
1838
f67539c2
TL
1839 length = sgl->unkeyed.length;
1840
9f95a23c
TL
1841 if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK &&
1842 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) {
f67539c2 1843 if (length > transport->opts.max_io_size) {
9f95a23c 1844 SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
f67539c2 1845 length, transport->opts.max_io_size);
9f95a23c
TL
1846 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
1847 return -1;
1848 }
1849
1850 /* fill request length and populate iovs */
f67539c2 1851 req->length = length;
9f95a23c 1852
f67539c2 1853 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Data requested length= 0x%x\n", length);
9f95a23c 1854
f67539c2
TL
1855 if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
1856 req->dif.orig_length = length;
1857 length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
1858 req->dif.elba_length = length;
1859 }
1860
1861 if (spdk_nvmf_request_get_buffers(req, group, transport, length)) {
9f95a23c 1862 /* No available buffers. Queue this request up. */
f67539c2
TL
1863 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No available large data buffers. Queueing request %p\n",
1864 tcp_req);
9f95a23c
TL
1865 return 0;
1866 }
1867
1868 /* backward compatible */
f67539c2 1869 req->data = req->iov[0].iov_base;
9f95a23c
TL
1870
1871 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p took %d buffer/s from central pool, and data=%p\n",
f67539c2 1872 tcp_req, req->iovcnt, req->data);
9f95a23c
TL
1873
1874 return 0;
1875 } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
1876 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
1877 uint64_t offset = sgl->address;
f67539c2 1878 uint32_t max_len = transport->opts.in_capsule_data_size;
9f95a23c
TL
1879
1880 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
f67539c2 1881 offset, length);
9f95a23c
TL
1882
1883 if (offset > max_len) {
1884 SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
1885 offset, max_len);
1886 rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
1887 return -1;
1888 }
1889 max_len -= (uint32_t)offset;
1890
f67539c2 1891 if (length > max_len) {
9f95a23c 1892 SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
f67539c2 1893 length, max_len);
9f95a23c
TL
1894 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
1895 return -1;
1896 }
1897
f67539c2
TL
1898 req->data = tcp_req->buf + offset;
1899 req->data_from_pool = false;
1900 req->length = length;
1901
1902 if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
1903 length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
1904 req->dif.elba_length = length;
1905 }
9f95a23c 1906
f67539c2
TL
1907 req->iov[0].iov_base = req->data;
1908 req->iov[0].iov_len = length;
1909 req->iovcnt = 1;
9f95a23c
TL
1910
1911 return 0;
1912 }
1913
1914 SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n",
1915 sgl->generic.type, sgl->generic.subtype);
1916 rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
1917 return -1;
1918}
1919
f67539c2
TL
1920static inline enum spdk_nvme_media_error_status_code
1921nvmf_tcp_dif_error_to_compl_status(uint8_t err_type) {
1922 enum spdk_nvme_media_error_status_code result;
1923
1924 switch (err_type)
1925 {
1926 case SPDK_DIF_REFTAG_ERROR:
1927 result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR;
1928 break;
1929 case SPDK_DIF_APPTAG_ERROR:
1930 result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR;
1931 break;
1932 case SPDK_DIF_GUARD_ERROR:
1933 result = SPDK_NVME_SC_GUARD_CHECK_ERROR;
1934 break;
1935 default:
1936 SPDK_UNREACHABLE();
1937 break;
1938 }
1939
1940 return result;
1941}
1942
9f95a23c 1943static void
f67539c2
TL
1944nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair,
1945 struct spdk_nvmf_tcp_req *tcp_req)
9f95a23c
TL
1946{
1947 struct nvme_tcp_pdu *rsp_pdu;
1948 struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
f67539c2
TL
1949 uint32_t plen, pdo, alignment;
1950 int rc;
9f95a23c
TL
1951
1952 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
1953
f67539c2 1954 rsp_pdu = nvmf_tcp_req_pdu_init(tcp_req);
9f95a23c
TL
1955 assert(rsp_pdu != NULL);
1956
1957 c2h_data = &rsp_pdu->hdr.c2h_data;
1958 c2h_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_C2H_DATA;
1959 plen = c2h_data->common.hlen = sizeof(*c2h_data);
1960
1961 if (tqpair->host_hdgst_enable) {
1962 plen += SPDK_NVME_TCP_DIGEST_LEN;
1963 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1964 }
1965
1966 /* set the psh */
1967 c2h_data->cccid = tcp_req->req.cmd->nvme_cmd.cid;
f67539c2
TL
1968 c2h_data->datal = tcp_req->req.length;
1969 c2h_data->datao = 0;
9f95a23c
TL
1970
1971 /* set the padding */
1972 rsp_pdu->padding_len = 0;
1973 pdo = plen;
1974 if (tqpair->cpda) {
1975 alignment = (tqpair->cpda + 1) << 2;
1976 if (alignment > plen) {
1977 rsp_pdu->padding_len = alignment - plen;
1978 pdo = plen = alignment;
1979 }
1980 }
1981
1982 c2h_data->common.pdo = pdo;
1983 plen += c2h_data->datal;
1984 if (tqpair->host_ddgst_enable) {
1985 c2h_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
1986 plen += SPDK_NVME_TCP_DIGEST_LEN;
1987 }
1988
1989 c2h_data->common.plen = plen;
1990
f67539c2
TL
1991 if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
1992 rsp_pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
9f95a23c
TL
1993 }
1994
f67539c2
TL
1995 nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
1996 c2h_data->datao, c2h_data->datal);
9f95a23c 1997
f67539c2
TL
1998 if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
1999 struct spdk_nvme_cpl *rsp = &tcp_req->req.rsp->nvme_cpl;
2000 struct spdk_dif_error err_blk = {};
9f95a23c 2001
f67539c2
TL
2002 rc = spdk_dif_verify_stream(rsp_pdu->data_iov, rsp_pdu->data_iovcnt,
2003 0, rsp_pdu->data_len, rsp_pdu->dif_ctx, &err_blk);
2004 if (rc != 0) {
2005 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
2006 err_blk.err_type, err_blk.err_offset);
2007 rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR;
2008 rsp->status.sc = nvmf_tcp_dif_error_to_compl_status(err_blk.err_type);
2009 nvmf_tcp_req_pdu_fini(tcp_req);
2010 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
2011 return;
2012 }
9f95a23c
TL
2013 }
2014
f67539c2
TL
2015 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
2016 if (tqpair->qpair.transport->opts.c2h_success) {
2017 c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
9f95a23c 2018 }
9f95a23c 2019
f67539c2 2020 nvmf_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvmf_tcp_pdu_c2h_data_complete, tcp_req);
9f95a23c
TL
2021}
2022
2023static int
2024request_transfer_out(struct spdk_nvmf_request *req)
2025{
2026 struct spdk_nvmf_tcp_req *tcp_req;
2027 struct spdk_nvmf_qpair *qpair;
2028 struct spdk_nvmf_tcp_qpair *tqpair;
2029 struct spdk_nvme_cpl *rsp;
2030
2031 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "enter\n");
2032
2033 qpair = req->qpair;
2034 rsp = &req->rsp->nvme_cpl;
2035 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2036
2037 /* Advance our sq_head pointer */
2038 if (qpair->sq_head == qpair->sq_head_max) {
2039 qpair->sq_head = 0;
2040 } else {
2041 qpair->sq_head++;
2042 }
2043 rsp->sqhd = qpair->sq_head;
2044
2045 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
f67539c2
TL
2046 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
2047 if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
2048 nvmf_tcp_send_c2h_data(tqpair, tcp_req);
9f95a23c 2049 } else {
f67539c2 2050 nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
9f95a23c
TL
2051 }
2052
2053 return 0;
2054}
2055
2056static void
f67539c2
TL
2057nvmf_tcp_set_incapsule_data(struct spdk_nvmf_tcp_qpair *tqpair,
2058 struct spdk_nvmf_tcp_req *tcp_req)
9f95a23c
TL
2059{
2060 struct nvme_tcp_pdu *pdu;
2061 uint32_t plen = 0;
2062
2063 pdu = &tqpair->pdu_in_progress;
2064 plen = pdu->hdr.common.hlen;
2065
2066 if (tqpair->host_hdgst_enable) {
2067 plen += SPDK_NVME_TCP_DIGEST_LEN;
2068 }
2069
2070 if (pdu->hdr.common.plen != plen) {
2071 tcp_req->has_incapsule_data = true;
2072 }
2073}
2074
2075static bool
f67539c2
TL
2076nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
2077 struct spdk_nvmf_tcp_req *tcp_req)
9f95a23c
TL
2078{
2079 struct spdk_nvmf_tcp_qpair *tqpair;
9f95a23c
TL
2080 int rc;
2081 enum spdk_nvmf_tcp_req_state prev_state;
2082 bool progress = false;
f67539c2 2083 struct spdk_nvmf_transport *transport = &ttransport->transport;
9f95a23c
TL
2084 struct spdk_nvmf_transport_poll_group *group;
2085
2086 tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
2087 group = &tqpair->group->group;
2088 assert(tcp_req->state != TCP_REQUEST_STATE_FREE);
2089
f67539c2
TL
2090 /* If the qpair is not active, we need to abort the outstanding requests. */
2091 if (tqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) {
2092 if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) {
2093 STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link);
2094 }
2095 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
2096 }
2097
9f95a23c
TL
2098 /* The loop here is to allow for several back-to-back state changes. */
2099 do {
2100 prev_state = tcp_req->state;
2101
2102 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Request %p entering state %d on tqpair=%p\n", tcp_req, prev_state,
2103 tqpair);
2104
2105 switch (tcp_req->state) {
2106 case TCP_REQUEST_STATE_FREE:
2107 /* Some external code must kick a request into TCP_REQUEST_STATE_NEW
2108 * to escape this state. */
2109 break;
2110 case TCP_REQUEST_STATE_NEW:
2111 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEW, 0, 0, (uintptr_t)tcp_req, 0);
2112
2113 /* copy the cmd from the receive pdu */
2114 tcp_req->cmd = tqpair->pdu_in_progress.hdr.capsule_cmd.ccsqe;
2115
f67539c2
TL
2116 if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&tcp_req->req, &tcp_req->req.dif.dif_ctx))) {
2117 tcp_req->req.dif.dif_insert_or_strip = true;
2118 tqpair->pdu_in_progress.dif_ctx = &tcp_req->req.dif.dif_ctx;
2119 }
2120
9f95a23c 2121 /* The next state transition depends on the data transfer needs of this request. */
f67539c2 2122 tcp_req->req.xfer = spdk_nvmf_req_get_xfer(&tcp_req->req);
9f95a23c
TL
2123
2124 /* If no data to transfer, ready to execute. */
2125 if (tcp_req->req.xfer == SPDK_NVME_DATA_NONE) {
2126 /* Reset the tqpair receving pdu state */
f67539c2
TL
2127 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2128 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
9f95a23c
TL
2129 break;
2130 }
2131
f67539c2 2132 nvmf_tcp_set_incapsule_data(tqpair, tcp_req);
9f95a23c
TL
2133
2134 if (!tcp_req->has_incapsule_data) {
f67539c2 2135 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
9f95a23c
TL
2136 }
2137
f67539c2
TL
2138 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_NEED_BUFFER);
2139 STAILQ_INSERT_TAIL(&group->pending_buf_queue, &tcp_req->req, buf_link);
9f95a23c
TL
2140 break;
2141 case TCP_REQUEST_STATE_NEED_BUFFER:
2142 spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, 0, 0, (uintptr_t)tcp_req, 0);
2143
2144 assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE);
2145
f67539c2 2146 if (!tcp_req->has_incapsule_data && (&tcp_req->req != STAILQ_FIRST(&group->pending_buf_queue))) {
9f95a23c
TL
2147 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP,
2148 "Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n",
2149 tcp_req, tqpair);
2150 /* This request needs to wait in line to obtain a buffer */
2151 break;
2152 }
2153
2154 /* Try to get a data buffer */
f67539c2 2155 rc = nvmf_tcp_req_parse_sgl(tcp_req, transport, group);
9f95a23c 2156 if (rc < 0) {
f67539c2 2157 STAILQ_REMOVE_HEAD(&group->pending_buf_queue, buf_link);
9f95a23c 2158 /* Reset the tqpair receving pdu state */
f67539c2
TL
2159 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
2160 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
9f95a23c
TL
2161 break;
2162 }
2163
2164 if (!tcp_req->req.data) {
2165 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "No buffer allocated for tcp_req(%p) on tqpair(%p\n)",
2166 tcp_req, tqpair);
2167 /* No buffers available. */
2168 break;
2169 }
2170
f67539c2 2171 STAILQ_REMOVE(&group->pending_buf_queue, &tcp_req->req, spdk_nvmf_request, buf_link);
9f95a23c
TL
2172
2173 /* If data is transferring from host to controller, we need to do a transfer from the host. */
2174 if (tcp_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
f67539c2
TL
2175 if (tcp_req->req.data_from_pool) {
2176 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Sending R2T for tcp_req(%p) on tqpair=%p\n", tcp_req, tqpair);
2177 nvmf_tcp_send_r2t_pdu(tqpair, tcp_req);
2178 } else {
2179 struct nvme_tcp_pdu *pdu;
2180
2181 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
2182
2183 pdu = &tqpair->pdu_in_progress;
2184 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Not need to send r2t for tcp_req(%p) on tqpair=%p\n", tcp_req,
2185 tqpair);
2186 /* No need to send r2t, contained in the capsuled data */
2187 nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
2188 0, tcp_req->req.length);
2189 nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
2190 }
9f95a23c
TL
2191 break;
2192 }
2193
f67539c2
TL
2194 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_EXECUTE);
2195 break;
2196 case TCP_REQUEST_STATE_AWAITING_R2T_ACK:
2197 spdk_trace_record(TRACE_TCP_REQUEST_STATE_AWAIT_R2T_ACK, 0, 0, (uintptr_t)tcp_req, 0);
2198 /* The R2T completion or the h2c data incoming will kick it out of this state. */
9f95a23c
TL
2199 break;
2200 case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
f67539c2 2201
9f95a23c
TL
2202 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
2203 (uintptr_t)tcp_req, 0);
2204 /* Some external code must kick a request into TCP_REQUEST_STATE_READY_TO_EXECUTE
2205 * to escape this state. */
2206 break;
2207 case TCP_REQUEST_STATE_READY_TO_EXECUTE:
2208 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, (uintptr_t)tcp_req, 0);
f67539c2
TL
2209
2210 if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2211 assert(tcp_req->req.dif.elba_length >= tcp_req->req.length);
2212 tcp_req->req.length = tcp_req->req.dif.elba_length;
2213 }
2214
2215 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTING);
9f95a23c
TL
2216 spdk_nvmf_request_exec(&tcp_req->req);
2217 break;
2218 case TCP_REQUEST_STATE_EXECUTING:
2219 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTING, 0, 0, (uintptr_t)tcp_req, 0);
2220 /* Some external code must kick a request into TCP_REQUEST_STATE_EXECUTED
2221 * to escape this state. */
2222 break;
2223 case TCP_REQUEST_STATE_EXECUTED:
2224 spdk_trace_record(TRACE_TCP_REQUEST_STATE_EXECUTED, 0, 0, (uintptr_t)tcp_req, 0);
f67539c2
TL
2225
2226 if (spdk_unlikely(tcp_req->req.dif.dif_insert_or_strip)) {
2227 tcp_req->req.length = tcp_req->req.dif.orig_length;
2228 }
2229
2230 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_READY_TO_COMPLETE);
9f95a23c
TL
2231 break;
2232 case TCP_REQUEST_STATE_READY_TO_COMPLETE:
2233 spdk_trace_record(TRACE_TCP_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, (uintptr_t)tcp_req, 0);
2234 rc = request_transfer_out(&tcp_req->req);
2235 assert(rc == 0); /* No good way to handle this currently */
2236 break;
2237 case TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
2238 spdk_trace_record(TRACE_TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
2239 (uintptr_t)tcp_req,
2240 0);
2241 /* Some external code must kick a request into TCP_REQUEST_STATE_COMPLETED
2242 * to escape this state. */
2243 break;
2244 case TCP_REQUEST_STATE_COMPLETED:
2245 spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0);
f67539c2
TL
2246 if (tcp_req->req.data_from_pool) {
2247 spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport);
9f95a23c
TL
2248 }
2249 tcp_req->req.length = 0;
2250 tcp_req->req.iovcnt = 0;
2251 tcp_req->req.data = NULL;
f67539c2
TL
2252
2253 nvmf_tcp_req_pdu_fini(tcp_req);
2254
2255 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_FREE);
9f95a23c
TL
2256 break;
2257 case TCP_REQUEST_NUM_STATES:
2258 default:
2259 assert(0);
2260 break;
2261 }
2262
2263 if (tcp_req->state != prev_state) {
2264 progress = true;
2265 }
2266 } while (tcp_req->state != prev_state);
2267
2268 return progress;
2269}
2270
2271static void
f67539c2 2272nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
9f95a23c
TL
2273{
2274 struct spdk_nvmf_tcp_qpair *tqpair = arg;
9f95a23c
TL
2275 int rc;
2276
2277 assert(tqpair != NULL);
f67539c2 2278 rc = nvmf_tcp_sock_process(tqpair);
9f95a23c 2279
f67539c2
TL
2280 /* If there was a new socket error, disconnect */
2281 if (rc < 0) {
2282 nvmf_tcp_qpair_disconnect(tqpair);
9f95a23c
TL
2283 }
2284}
2285
2286static int
f67539c2
TL
2287nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
2288 struct spdk_nvmf_qpair *qpair)
9f95a23c
TL
2289{
2290 struct spdk_nvmf_tcp_poll_group *tgroup;
2291 struct spdk_nvmf_tcp_qpair *tqpair;
2292 int rc;
2293
2294 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2295 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2296
2297 rc = spdk_sock_group_add_sock(tgroup->sock_group, tqpair->sock,
f67539c2 2298 nvmf_tcp_sock_cb, tqpair);
9f95a23c
TL
2299 if (rc != 0) {
2300 SPDK_ERRLOG("Could not add sock to sock_group: %s (%d)\n",
2301 spdk_strerror(errno), errno);
9f95a23c
TL
2302 return -1;
2303 }
2304
f67539c2 2305 rc = nvmf_tcp_qpair_sock_init(tqpair);
9f95a23c
TL
2306 if (rc != 0) {
2307 SPDK_ERRLOG("Cannot set sock opt for tqpair=%p\n", tqpair);
9f95a23c
TL
2308 return -1;
2309 }
2310
f67539c2 2311 rc = nvmf_tcp_qpair_init(&tqpair->qpair);
9f95a23c
TL
2312 if (rc < 0) {
2313 SPDK_ERRLOG("Cannot init tqpair=%p\n", tqpair);
9f95a23c
TL
2314 return -1;
2315 }
2316
f67539c2 2317 rc = nvmf_tcp_qpair_init_mem_resource(tqpair);
9f95a23c
TL
2318 if (rc < 0) {
2319 SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair);
9f95a23c
TL
2320 return -1;
2321 }
2322
2323 tqpair->group = tgroup;
2324 tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
2325 TAILQ_INSERT_TAIL(&tgroup->qpairs, tqpair, link);
2326
2327 return 0;
2328}
2329
2330static int
f67539c2
TL
2331nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
2332 struct spdk_nvmf_qpair *qpair)
9f95a23c
TL
2333{
2334 struct spdk_nvmf_tcp_poll_group *tgroup;
2335 struct spdk_nvmf_tcp_qpair *tqpair;
2336 int rc;
2337
2338 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2339 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2340
2341 assert(tqpair->group == tgroup);
2342
2343 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "remove tqpair=%p from the tgroup=%p\n", tqpair, tgroup);
f67539c2
TL
2344 if (tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_REQ) {
2345 TAILQ_REMOVE(&tgroup->await_req, tqpair, link);
2346 } else {
2347 TAILQ_REMOVE(&tgroup->qpairs, tqpair, link);
2348 }
2349
9f95a23c
TL
2350 rc = spdk_sock_group_remove_sock(tgroup->sock_group, tqpair->sock);
2351 if (rc != 0) {
2352 SPDK_ERRLOG("Could not remove sock from sock_group: %s (%d)\n",
2353 spdk_strerror(errno), errno);
2354 }
2355
2356 return rc;
2357}
2358
2359static int
f67539c2 2360nvmf_tcp_req_complete(struct spdk_nvmf_request *req)
9f95a23c
TL
2361{
2362 struct spdk_nvmf_tcp_transport *ttransport;
2363 struct spdk_nvmf_tcp_req *tcp_req;
2364
2365 ttransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_tcp_transport, transport);
2366 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2367
f67539c2
TL
2368 nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_EXECUTED);
2369 nvmf_tcp_req_process(ttransport, tcp_req);
9f95a23c
TL
2370
2371 return 0;
2372}
2373
2374static void
f67539c2 2375nvmf_tcp_close_qpair(struct spdk_nvmf_qpair *qpair)
9f95a23c 2376{
f67539c2
TL
2377 struct spdk_nvmf_tcp_qpair *tqpair;
2378
2379 SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Qpair: %p\n", qpair);
9f95a23c 2380
f67539c2
TL
2381 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
2382 tqpair->state = NVME_TCP_QPAIR_STATE_EXITED;
2383 nvmf_tcp_qpair_destroy(tqpair);
9f95a23c
TL
2384}
2385
2386static int
f67539c2 2387nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
9f95a23c
TL
2388{
2389 struct spdk_nvmf_tcp_poll_group *tgroup;
2390 int rc;
f67539c2
TL
2391 struct spdk_nvmf_request *req, *req_tmp;
2392 struct spdk_nvmf_tcp_req *tcp_req;
2393 struct spdk_nvmf_tcp_qpair *tqpair, *tqpair_tmp;
2394 struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport,
2395 struct spdk_nvmf_tcp_transport, transport);
9f95a23c
TL
2396
2397 tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
2398
f67539c2 2399 if (spdk_unlikely(TAILQ_EMPTY(&tgroup->qpairs) && TAILQ_EMPTY(&tgroup->await_req))) {
9f95a23c
TL
2400 return 0;
2401 }
2402
f67539c2
TL
2403 STAILQ_FOREACH_SAFE(req, &group->pending_buf_queue, buf_link, req_tmp) {
2404 tcp_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_tcp_req, req);
2405 if (nvmf_tcp_req_process(ttransport, tcp_req) == false) {
2406 break;
2407 }
2408 }
2409
9f95a23c
TL
2410 rc = spdk_sock_group_poll(tgroup->sock_group);
2411 if (rc < 0) {
2412 SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group);
9f95a23c
TL
2413 }
2414
f67539c2
TL
2415 TAILQ_FOREACH_SAFE(tqpair, &tgroup->await_req, link, tqpair_tmp) {
2416 nvmf_tcp_sock_process(tqpair);
2417 }
2418
2419 return rc;
9f95a23c
TL
2420}
2421
2422static int
f67539c2
TL
2423nvmf_tcp_qpair_get_trid(struct spdk_nvmf_qpair *qpair,
2424 struct spdk_nvme_transport_id *trid, bool peer)
9f95a23c
TL
2425{
2426 struct spdk_nvmf_tcp_qpair *tqpair;
2427 uint16_t port;
2428
2429 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
f67539c2 2430 spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_TCP);
9f95a23c
TL
2431
2432 if (peer) {
2433 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->initiator_addr);
2434 port = tqpair->initiator_port;
2435 } else {
2436 snprintf(trid->traddr, sizeof(trid->traddr), "%s", tqpair->target_addr);
2437 port = tqpair->target_port;
2438 }
2439
2440 if (spdk_sock_is_ipv4(tqpair->sock)) {
2441 trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
f67539c2 2442 } else if (spdk_sock_is_ipv6(tqpair->sock)) {
9f95a23c
TL
2443 trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
2444 } else {
2445 return -1;
2446 }
2447
2448 snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%d", port);
2449 return 0;
2450}
2451
2452static int
f67539c2
TL
2453nvmf_tcp_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
2454 struct spdk_nvme_transport_id *trid)
9f95a23c 2455{
f67539c2 2456 return nvmf_tcp_qpair_get_trid(qpair, trid, 0);
9f95a23c
TL
2457}
2458
2459static int
f67539c2
TL
2460nvmf_tcp_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
2461 struct spdk_nvme_transport_id *trid)
9f95a23c 2462{
f67539c2 2463 return nvmf_tcp_qpair_get_trid(qpair, trid, 1);
9f95a23c
TL
2464}
2465
2466static int
f67539c2
TL
2467nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
2468 struct spdk_nvme_transport_id *trid)
9f95a23c 2469{
f67539c2
TL
2470 return nvmf_tcp_qpair_get_trid(qpair, trid, 0);
2471}
2472
2473static void
2474nvmf_tcp_req_set_abort_status(struct spdk_nvmf_request *req,
2475 struct spdk_nvmf_tcp_req *tcp_req_to_abort)
2476{
2477 tcp_req_to_abort->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2478 tcp_req_to_abort->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
2479
2480 nvmf_tcp_req_set_state(tcp_req_to_abort, TCP_REQUEST_STATE_READY_TO_COMPLETE);
2481
2482 req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command was successfully aborted. */
9f95a23c
TL
2483}
2484
2485static int
f67539c2 2486_nvmf_tcp_qpair_abort_request(void *ctx)
9f95a23c 2487{
f67539c2
TL
2488 struct spdk_nvmf_request *req = ctx;
2489 struct spdk_nvmf_tcp_req *tcp_req_to_abort = SPDK_CONTAINEROF(req->req_to_abort,
2490 struct spdk_nvmf_tcp_req, req);
2491 struct spdk_nvmf_tcp_qpair *tqpair = SPDK_CONTAINEROF(req->req_to_abort->qpair,
2492 struct spdk_nvmf_tcp_qpair, qpair);
9f95a23c 2493 int rc;
f67539c2
TL
2494
2495 spdk_poller_unregister(&req->poller);
2496
2497 switch (tcp_req_to_abort->state) {
2498 case TCP_REQUEST_STATE_EXECUTING:
2499 rc = nvmf_ctrlr_abort_request(req);
2500 if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS) {
2501 return SPDK_POLLER_BUSY;
2502 }
2503 break;
2504
2505 case TCP_REQUEST_STATE_NEED_BUFFER:
2506 STAILQ_REMOVE(&tqpair->group->group.pending_buf_queue,
2507 &tcp_req_to_abort->req, spdk_nvmf_request, buf_link);
2508
2509 nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort);
2510 break;
2511
2512 case TCP_REQUEST_STATE_AWAITING_R2T_ACK:
2513 nvmf_tcp_req_set_abort_status(req, tcp_req_to_abort);
2514 break;
2515
2516 case TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
2517 if (spdk_get_ticks() < req->timeout_tsc) {
2518 req->poller = SPDK_POLLER_REGISTER(_nvmf_tcp_qpair_abort_request, req, 0);
2519 return SPDK_POLLER_BUSY;
2520 }
2521 break;
2522
2523 default:
2524 break;
2525 }
2526
2527 spdk_nvmf_request_complete(req);
2528 return SPDK_POLLER_BUSY;
2529}
2530
2531static void
2532nvmf_tcp_qpair_abort_request(struct spdk_nvmf_qpair *qpair,
2533 struct spdk_nvmf_request *req)
2534{
2535 struct spdk_nvmf_tcp_qpair *tqpair;
2536 struct spdk_nvmf_tcp_transport *ttransport;
2537 struct spdk_nvmf_transport *transport;
2538 uint16_t cid;
2539 uint32_t i;
2540 struct spdk_nvmf_tcp_req *tcp_req_to_abort = NULL;
2541
9f95a23c 2542 tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
f67539c2
TL
2543 ttransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_tcp_transport, transport);
2544 transport = &ttransport->transport;
9f95a23c 2545
f67539c2
TL
2546 cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid;
2547
2548 for (i = 0; i < tqpair->resource_count; i++) {
2549 tcp_req_to_abort = &tqpair->reqs[i];
2550
2551 if (tcp_req_to_abort->state != TCP_REQUEST_STATE_FREE &&
2552 tcp_req_to_abort->req.cmd->nvme_cmd.cid == cid) {
2553 break;
2554 }
9f95a23c
TL
2555 }
2556
f67539c2
TL
2557 if (tcp_req_to_abort == NULL) {
2558 spdk_nvmf_request_complete(req);
2559 return;
2560 }
2561
2562 req->req_to_abort = &tcp_req_to_abort->req;
2563 req->timeout_tsc = spdk_get_ticks() +
2564 transport->opts.abort_timeout_sec * spdk_get_ticks_hz();
2565 req->poller = NULL;
9f95a23c 2566
f67539c2 2567 _nvmf_tcp_qpair_abort_request(req);
9f95a23c
TL
2568}
2569
2570#define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128
2571#define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128
2572#define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128
2573#define SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
2574#define SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE 131072
2575#define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072
2576#define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511
2577#define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32
f67539c2
TL
2578#define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true
2579#define SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP false
2580#define SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY 0
2581#define SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC 1
9f95a23c
TL
2582
2583static void
f67539c2 2584nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts)
9f95a23c
TL
2585{
2586 opts->max_queue_depth = SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH;
2587 opts->max_qpairs_per_ctrlr = SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR;
2588 opts->in_capsule_data_size = SPDK_NVMF_TCP_DEFAULT_IN_CAPSULE_DATA_SIZE;
2589 opts->max_io_size = SPDK_NVMF_TCP_DEFAULT_MAX_IO_SIZE;
2590 opts->io_unit_size = SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE;
2591 opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH;
2592 opts->num_shared_buffers = SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS;
2593 opts->buf_cache_size = SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE;
f67539c2
TL
2594 opts->c2h_success = SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION;
2595 opts->dif_insert_or_strip = SPDK_NVMF_TCP_DEFAULT_DIF_INSERT_OR_STRIP;
2596 opts->sock_priority = SPDK_NVMF_TCP_DEFAULT_SOCK_PRIORITY;
2597 opts->abort_timeout_sec = SPDK_NVMF_TCP_DEFAULT_ABORT_TIMEOUT_SEC;
9f95a23c
TL
2598}
2599
2600const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = {
f67539c2 2601 .name = "TCP",
9f95a23c 2602 .type = SPDK_NVME_TRANSPORT_TCP,
f67539c2
TL
2603 .opts_init = nvmf_tcp_opts_init,
2604 .create = nvmf_tcp_create,
2605 .destroy = nvmf_tcp_destroy,
2606
2607 .listen = nvmf_tcp_listen,
2608 .stop_listen = nvmf_tcp_stop_listen,
2609 .accept = nvmf_tcp_accept,
2610
2611 .listener_discover = nvmf_tcp_discover,
2612
2613 .poll_group_create = nvmf_tcp_poll_group_create,
2614 .get_optimal_poll_group = nvmf_tcp_get_optimal_poll_group,
2615 .poll_group_destroy = nvmf_tcp_poll_group_destroy,
2616 .poll_group_add = nvmf_tcp_poll_group_add,
2617 .poll_group_remove = nvmf_tcp_poll_group_remove,
2618 .poll_group_poll = nvmf_tcp_poll_group_poll,
2619
2620 .req_free = nvmf_tcp_req_free,
2621 .req_complete = nvmf_tcp_req_complete,
2622
2623 .qpair_fini = nvmf_tcp_close_qpair,
2624 .qpair_get_local_trid = nvmf_tcp_qpair_get_local_trid,
2625 .qpair_get_peer_trid = nvmf_tcp_qpair_get_peer_trid,
2626 .qpair_get_listen_trid = nvmf_tcp_qpair_get_listen_trid,
2627 .qpair_abort_request = nvmf_tcp_qpair_abort_request,
9f95a23c
TL
2628};
2629
f67539c2 2630SPDK_NVMF_TRANSPORT_REGISTER(tcp, &spdk_nvmf_transport_tcp);
9f95a23c 2631SPDK_LOG_REGISTER_COMPONENT("nvmf_tcp", SPDK_LOG_NVMF_TCP)