]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/lib/nvme/nvme_rdma.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / lib / nvme / nvme_rdma.c
CommitLineData
7c673cae
FG
1/*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34/*
35 * NVMe over RDMA transport
36 */
37
11fdf7f2
TL
38#include "spdk/stdinc.h"
39
7c673cae
FG
40#include <infiniband/verbs.h>
41#include <rdma/rdma_cma.h>
42#include <rdma/rdma_verbs.h>
7c673cae
FG
43
44#include "spdk/assert.h"
45#include "spdk/log.h"
46#include "spdk/trace.h"
47#include "spdk/event.h"
48#include "spdk/queue.h"
49#include "spdk/nvme.h"
50#include "spdk/nvmf_spec.h"
51#include "spdk/string.h"
11fdf7f2
TL
52#include "spdk/endian.h"
53#include "spdk/likely.h"
7c673cae
FG
54
55#include "nvme_internal.h"
56
57#define NVME_RDMA_TIME_OUT_IN_MS 2000
58#define NVME_RDMA_RW_BUFFER_SIZE 131072
7c673cae
FG
59
60/*
11fdf7f2 61 * NVME RDMA qpair Resource Defaults
7c673cae
FG
62 */
63#define NVME_RDMA_DEFAULT_TX_SGE 2
64#define NVME_RDMA_DEFAULT_RX_SGE 1
65
11fdf7f2
TL
66
67/* Max number of NVMe-oF SGL descriptors supported by the host */
68#define NVME_RDMA_MAX_SGL_DESCRIPTORS 16
69struct spdk_nvmf_cmd {
70 struct spdk_nvme_cmd cmd;
71 struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS];
72};
73
9f95a23c
TL
74struct spdk_nvme_rdma_hooks g_nvme_hooks = {};
75
11fdf7f2
TL
76/* Mapping from virtual address to ibv_mr pointer for a protection domain */
77struct spdk_nvme_rdma_mr_map {
78 struct ibv_pd *pd;
79 struct spdk_mem_map *map;
80 uint64_t ref;
81 LIST_ENTRY(spdk_nvme_rdma_mr_map) link;
82};
83
7c673cae
FG
84/* NVMe RDMA transport extensions for spdk_nvme_ctrlr */
85struct nvme_rdma_ctrlr {
86 struct spdk_nvme_ctrlr ctrlr;
9f95a23c
TL
87
88 struct ibv_pd *pd;
7c673cae
FG
89};
90
91/* NVMe RDMA qpair extensions for spdk_nvme_qpair */
92struct nvme_rdma_qpair {
93 struct spdk_nvme_qpair qpair;
94
7c673cae
FG
95 struct rdma_cm_id *cm_id;
96
97 struct ibv_cq *cq;
98
99 struct spdk_nvme_rdma_req *rdma_reqs;
100
9f95a23c
TL
101 uint32_t max_send_sge;
102
103 uint32_t max_recv_sge;
104
7c673cae
FG
105 uint16_t num_entries;
106
107 /* Parallel arrays of response buffers + response SGLs of size num_entries */
108 struct ibv_sge *rsp_sgls;
109 struct spdk_nvme_cpl *rsps;
110
111 struct ibv_recv_wr *rsp_recv_wrs;
112
113 /* Memory region describing all rsps for this qpair */
114 struct ibv_mr *rsp_mr;
115
116 /*
117 * Array of num_entries NVMe commands registered as RDMA message buffers.
118 * Indexed by rdma_req->id.
119 */
11fdf7f2 120 struct spdk_nvmf_cmd *cmds;
7c673cae
FG
121
122 /* Memory region describing all cmds for this qpair */
123 struct ibv_mr *cmd_mr;
124
11fdf7f2 125 struct spdk_nvme_rdma_mr_map *mr_map;
7c673cae 126
11fdf7f2
TL
127 TAILQ_HEAD(, spdk_nvme_rdma_req) free_reqs;
128 TAILQ_HEAD(, spdk_nvme_rdma_req) outstanding_reqs;
9f95a23c
TL
129
130 /* Placed at the end of the struct since it is not used frequently */
131 struct rdma_event_channel *cm_channel;
7c673cae
FG
132};
133
134struct spdk_nvme_rdma_req {
135 int id;
136
137 struct ibv_send_wr send_wr;
138
11fdf7f2
TL
139 struct nvme_request *req;
140
141 struct ibv_sge send_sgl[NVME_RDMA_DEFAULT_TX_SGE];
7c673cae 142
11fdf7f2 143 TAILQ_ENTRY(spdk_nvme_rdma_req) link;
9f95a23c
TL
144
145 bool request_ready_to_put;
11fdf7f2 146};
7c673cae 147
11fdf7f2
TL
148static const char *rdma_cm_event_str[] = {
149 "RDMA_CM_EVENT_ADDR_RESOLVED",
150 "RDMA_CM_EVENT_ADDR_ERROR",
151 "RDMA_CM_EVENT_ROUTE_RESOLVED",
152 "RDMA_CM_EVENT_ROUTE_ERROR",
153 "RDMA_CM_EVENT_CONNECT_REQUEST",
154 "RDMA_CM_EVENT_CONNECT_RESPONSE",
155 "RDMA_CM_EVENT_CONNECT_ERROR",
156 "RDMA_CM_EVENT_UNREACHABLE",
157 "RDMA_CM_EVENT_REJECTED",
158 "RDMA_CM_EVENT_ESTABLISHED",
159 "RDMA_CM_EVENT_DISCONNECTED",
160 "RDMA_CM_EVENT_DEVICE_REMOVAL",
161 "RDMA_CM_EVENT_MULTICAST_JOIN",
162 "RDMA_CM_EVENT_MULTICAST_ERROR",
163 "RDMA_CM_EVENT_ADDR_CHANGE",
164 "RDMA_CM_EVENT_TIMEWAIT_EXIT"
7c673cae
FG
165};
166
11fdf7f2
TL
167static LIST_HEAD(, spdk_nvme_rdma_mr_map) g_rdma_mr_maps = LIST_HEAD_INITIALIZER(&g_rdma_mr_maps);
168static pthread_mutex_t g_rdma_mr_maps_mutex = PTHREAD_MUTEX_INITIALIZER;
169
7c673cae
FG
170static int nvme_rdma_qpair_destroy(struct spdk_nvme_qpair *qpair);
171
172static inline struct nvme_rdma_qpair *
173nvme_rdma_qpair(struct spdk_nvme_qpair *qpair)
174{
175 assert(qpair->trtype == SPDK_NVME_TRANSPORT_RDMA);
11fdf7f2 176 return SPDK_CONTAINEROF(qpair, struct nvme_rdma_qpair, qpair);
7c673cae
FG
177}
178
179static inline struct nvme_rdma_ctrlr *
180nvme_rdma_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
181{
182 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA);
11fdf7f2 183 return SPDK_CONTAINEROF(ctrlr, struct nvme_rdma_ctrlr, ctrlr);
7c673cae
FG
184}
185
186static struct spdk_nvme_rdma_req *
187nvme_rdma_req_get(struct nvme_rdma_qpair *rqpair)
188{
189 struct spdk_nvme_rdma_req *rdma_req;
190
11fdf7f2 191 rdma_req = TAILQ_FIRST(&rqpair->free_reqs);
7c673cae 192 if (rdma_req) {
11fdf7f2
TL
193 TAILQ_REMOVE(&rqpair->free_reqs, rdma_req, link);
194 TAILQ_INSERT_TAIL(&rqpair->outstanding_reqs, rdma_req, link);
7c673cae
FG
195 }
196
197 return rdma_req;
198}
199
200static void
201nvme_rdma_req_put(struct nvme_rdma_qpair *rqpair, struct spdk_nvme_rdma_req *rdma_req)
202{
9f95a23c 203 rdma_req->request_ready_to_put = false;
11fdf7f2
TL
204 TAILQ_REMOVE(&rqpair->outstanding_reqs, rdma_req, link);
205 TAILQ_INSERT_HEAD(&rqpair->free_reqs, rdma_req, link);
7c673cae
FG
206}
207
208static void
209nvme_rdma_req_complete(struct nvme_request *req,
210 struct spdk_nvme_cpl *rsp)
211{
9f95a23c 212 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, rsp);
7c673cae
FG
213 nvme_free_request(req);
214}
215
11fdf7f2
TL
216static const char *
217nvme_rdma_cm_event_str_get(uint32_t event)
218{
219 if (event < SPDK_COUNTOF(rdma_cm_event_str)) {
220 return rdma_cm_event_str[event];
221 } else {
222 return "Undefined";
223 }
224}
225
7c673cae
FG
226static struct rdma_cm_event *
227nvme_rdma_get_event(struct rdma_event_channel *channel,
228 enum rdma_cm_event_type evt)
229{
230 struct rdma_cm_event *event;
231 int rc;
232
233 rc = rdma_get_cm_event(channel, &event);
234 if (rc < 0) {
235 SPDK_ERRLOG("Failed to get event from CM event channel. Error %d (%s)\n",
11fdf7f2 236 errno, spdk_strerror(errno));
7c673cae
FG
237 return NULL;
238 }
239
240 if (event->event != evt) {
11fdf7f2
TL
241 SPDK_ERRLOG("Expected %s but received %s (%d) from CM event channel (status = %d)\n",
242 nvme_rdma_cm_event_str_get(evt),
243 nvme_rdma_cm_event_str_get(event->event), event->event, event->status);
7c673cae
FG
244 rdma_ack_cm_event(event);
245 return NULL;
246 }
247
248 return event;
249}
250
251static int
252nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
253{
254 int rc;
255 struct ibv_qp_init_attr attr;
9f95a23c
TL
256 struct ibv_device_attr dev_attr;
257 struct nvme_rdma_ctrlr *rctrlr;
258
259 rc = ibv_query_device(rqpair->cm_id->verbs, &dev_attr);
260 if (rc != 0) {
261 SPDK_ERRLOG("Failed to query RDMA device attributes.\n");
262 return -1;
263 }
7c673cae
FG
264
265 rqpair->cq = ibv_create_cq(rqpair->cm_id->verbs, rqpair->num_entries * 2, rqpair, NULL, 0);
266 if (!rqpair->cq) {
11fdf7f2 267 SPDK_ERRLOG("Unable to create completion queue: errno %d: %s\n", errno, spdk_strerror(errno));
7c673cae
FG
268 return -1;
269 }
270
9f95a23c
TL
271 rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr);
272 if (g_nvme_hooks.get_ibv_pd) {
273 rctrlr->pd = g_nvme_hooks.get_ibv_pd(&rctrlr->ctrlr.trid, rqpair->cm_id->verbs);
274 } else {
275 rctrlr->pd = NULL;
276 }
277
7c673cae
FG
278 memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
279 attr.qp_type = IBV_QPT_RC;
280 attr.send_cq = rqpair->cq;
281 attr.recv_cq = rqpair->cq;
282 attr.cap.max_send_wr = rqpair->num_entries; /* SEND operations */
283 attr.cap.max_recv_wr = rqpair->num_entries; /* RECV operations */
9f95a23c
TL
284 attr.cap.max_send_sge = spdk_min(NVME_RDMA_DEFAULT_TX_SGE, dev_attr.max_sge);
285 attr.cap.max_recv_sge = spdk_min(NVME_RDMA_DEFAULT_RX_SGE, dev_attr.max_sge);
286
287 rc = rdma_create_qp(rqpair->cm_id, rctrlr->pd, &attr);
7c673cae 288
7c673cae
FG
289 if (rc) {
290 SPDK_ERRLOG("rdma_create_qp failed\n");
291 return -1;
292 }
293
9f95a23c
TL
294 /* ibv_create_qp will change the values in attr.cap. Make sure we store the proper value. */
295 rqpair->max_send_sge = spdk_min(NVME_RDMA_DEFAULT_TX_SGE, attr.cap.max_send_sge);
296 rqpair->max_recv_sge = spdk_min(NVME_RDMA_DEFAULT_RX_SGE, attr.cap.max_recv_sge);
297
298 rctrlr->pd = rqpair->cm_id->qp->pd;
299
7c673cae
FG
300 rqpair->cm_id->context = &rqpair->qpair;
301
302 return 0;
303}
304
305#define nvme_rdma_trace_ibv_sge(sg_list) \
306 if (sg_list) { \
11fdf7f2 307 SPDK_DEBUGLOG(SPDK_LOG_NVME, "local addr %p length 0x%x lkey 0x%x\n", \
7c673cae
FG
308 (void *)(sg_list)->addr, (sg_list)->length, (sg_list)->lkey); \
309 }
310
311static int
312nvme_rdma_post_recv(struct nvme_rdma_qpair *rqpair, uint16_t rsp_idx)
313{
314 struct ibv_recv_wr *wr, *bad_wr = NULL;
315 int rc;
316
317 wr = &rqpair->rsp_recv_wrs[rsp_idx];
318 nvme_rdma_trace_ibv_sge(wr->sg_list);
319
320 rc = ibv_post_recv(rqpair->cm_id->qp, wr, &bad_wr);
321 if (rc) {
322 SPDK_ERRLOG("Failure posting rdma recv, rc = 0x%x\n", rc);
323 }
324
325 return rc;
326}
327
328static void
9f95a23c 329nvme_rdma_unregister_rsps(struct nvme_rdma_qpair *rqpair)
7c673cae
FG
330{
331 if (rqpair->rsp_mr && rdma_dereg_mr(rqpair->rsp_mr)) {
332 SPDK_ERRLOG("Unable to de-register rsp_mr\n");
333 }
334 rqpair->rsp_mr = NULL;
9f95a23c 335}
7c673cae 336
9f95a23c
TL
337static void
338nvme_rdma_free_rsps(struct nvme_rdma_qpair *rqpair)
339{
7c673cae
FG
340 free(rqpair->rsps);
341 rqpair->rsps = NULL;
342 free(rqpair->rsp_sgls);
343 rqpair->rsp_sgls = NULL;
344 free(rqpair->rsp_recv_wrs);
345 rqpair->rsp_recv_wrs = NULL;
346}
347
348static int
349nvme_rdma_alloc_rsps(struct nvme_rdma_qpair *rqpair)
350{
7c673cae
FG
351 rqpair->rsps = NULL;
352 rqpair->rsp_recv_wrs = NULL;
353
354 rqpair->rsp_sgls = calloc(rqpair->num_entries, sizeof(*rqpair->rsp_sgls));
355 if (!rqpair->rsp_sgls) {
356 SPDK_ERRLOG("Failed to allocate rsp_sgls\n");
357 goto fail;
358 }
359
360 rqpair->rsp_recv_wrs = calloc(rqpair->num_entries,
361 sizeof(*rqpair->rsp_recv_wrs));
362 if (!rqpair->rsp_recv_wrs) {
363 SPDK_ERRLOG("Failed to allocate rsp_recv_wrs\n");
364 goto fail;
365 }
366
367 rqpair->rsps = calloc(rqpair->num_entries, sizeof(*rqpair->rsps));
368 if (!rqpair->rsps) {
369 SPDK_ERRLOG("can not allocate rdma rsps\n");
370 goto fail;
371 }
372
9f95a23c
TL
373 return 0;
374fail:
375 nvme_rdma_free_rsps(rqpair);
376 return -ENOMEM;
377}
378
379static int
380nvme_rdma_register_rsps(struct nvme_rdma_qpair *rqpair)
381{
382 int i;
383
7c673cae
FG
384 rqpair->rsp_mr = rdma_reg_msgs(rqpair->cm_id, rqpair->rsps,
385 rqpair->num_entries * sizeof(*rqpair->rsps));
386 if (rqpair->rsp_mr == NULL) {
387 SPDK_ERRLOG("Unable to register rsp_mr\n");
388 goto fail;
389 }
390
391 for (i = 0; i < rqpair->num_entries; i++) {
392 struct ibv_sge *rsp_sgl = &rqpair->rsp_sgls[i];
393
394 rsp_sgl->addr = (uint64_t)&rqpair->rsps[i];
395 rsp_sgl->length = sizeof(rqpair->rsps[i]);
396 rsp_sgl->lkey = rqpair->rsp_mr->lkey;
397
398 rqpair->rsp_recv_wrs[i].wr_id = i;
399 rqpair->rsp_recv_wrs[i].next = NULL;
400 rqpair->rsp_recv_wrs[i].sg_list = rsp_sgl;
401 rqpair->rsp_recv_wrs[i].num_sge = 1;
402
403 if (nvme_rdma_post_recv(rqpair, i)) {
404 SPDK_ERRLOG("Unable to post connection rx desc\n");
405 goto fail;
406 }
407 }
408
409 return 0;
410
411fail:
9f95a23c 412 nvme_rdma_unregister_rsps(rqpair);
7c673cae
FG
413 return -ENOMEM;
414}
415
416static void
9f95a23c 417nvme_rdma_unregister_reqs(struct nvme_rdma_qpair *rqpair)
7c673cae 418{
7c673cae
FG
419 if (rqpair->cmd_mr && rdma_dereg_mr(rqpair->cmd_mr)) {
420 SPDK_ERRLOG("Unable to de-register cmd_mr\n");
421 }
422 rqpair->cmd_mr = NULL;
9f95a23c
TL
423}
424
425static void
426nvme_rdma_free_reqs(struct nvme_rdma_qpair *rqpair)
427{
428 if (!rqpair->rdma_reqs) {
429 return;
430 }
7c673cae
FG
431
432 free(rqpair->cmds);
433 rqpair->cmds = NULL;
434
435 free(rqpair->rdma_reqs);
436 rqpair->rdma_reqs = NULL;
437}
438
439static int
440nvme_rdma_alloc_reqs(struct nvme_rdma_qpair *rqpair)
441{
7c673cae
FG
442 rqpair->rdma_reqs = calloc(rqpair->num_entries, sizeof(struct spdk_nvme_rdma_req));
443 if (rqpair->rdma_reqs == NULL) {
444 SPDK_ERRLOG("Failed to allocate rdma_reqs\n");
445 goto fail;
446 }
447
448 rqpair->cmds = calloc(rqpair->num_entries, sizeof(*rqpair->cmds));
449 if (!rqpair->cmds) {
450 SPDK_ERRLOG("Failed to allocate RDMA cmds\n");
451 goto fail;
452 }
453
9f95a23c
TL
454 return 0;
455fail:
456 nvme_rdma_free_reqs(rqpair);
457 return -ENOMEM;
458}
459
460static int
461nvme_rdma_register_reqs(struct nvme_rdma_qpair *rqpair)
462{
463 int i;
464
7c673cae
FG
465 rqpair->cmd_mr = rdma_reg_msgs(rqpair->cm_id, rqpair->cmds,
466 rqpair->num_entries * sizeof(*rqpair->cmds));
467 if (!rqpair->cmd_mr) {
468 SPDK_ERRLOG("Unable to register cmd_mr\n");
469 goto fail;
470 }
471
11fdf7f2
TL
472 TAILQ_INIT(&rqpair->free_reqs);
473 TAILQ_INIT(&rqpair->outstanding_reqs);
7c673cae
FG
474 for (i = 0; i < rqpair->num_entries; i++) {
475 struct spdk_nvme_rdma_req *rdma_req;
11fdf7f2 476 struct spdk_nvmf_cmd *cmd;
7c673cae
FG
477
478 rdma_req = &rqpair->rdma_reqs[i];
479 cmd = &rqpair->cmds[i];
480
481 rdma_req->id = i;
482
11fdf7f2
TL
483 /* The first RDMA sgl element will always point
484 * at this data structure. Depending on whether
485 * an NVMe-oF SGL is required, the length of
486 * this element may change. */
487 rdma_req->send_sgl[0].addr = (uint64_t)cmd;
488 rdma_req->send_sgl[0].lkey = rqpair->cmd_mr->lkey;
7c673cae
FG
489
490 rdma_req->send_wr.wr_id = (uint64_t)rdma_req;
491 rdma_req->send_wr.next = NULL;
492 rdma_req->send_wr.opcode = IBV_WR_SEND;
493 rdma_req->send_wr.send_flags = IBV_SEND_SIGNALED;
11fdf7f2 494 rdma_req->send_wr.sg_list = rdma_req->send_sgl;
7c673cae
FG
495 rdma_req->send_wr.imm_data = 0;
496
11fdf7f2 497 TAILQ_INSERT_TAIL(&rqpair->free_reqs, rdma_req, link);
7c673cae
FG
498 }
499
500 return 0;
501
502fail:
9f95a23c 503 nvme_rdma_unregister_reqs(rqpair);
7c673cae
FG
504 return -ENOMEM;
505}
506
507static int
508nvme_rdma_recv(struct nvme_rdma_qpair *rqpair, uint64_t rsp_idx)
509{
510 struct spdk_nvme_qpair *qpair = &rqpair->qpair;
511 struct spdk_nvme_rdma_req *rdma_req;
512 struct spdk_nvme_cpl *rsp;
513 struct nvme_request *req;
514
515 assert(rsp_idx < rqpair->num_entries);
516 rsp = &rqpair->rsps[rsp_idx];
517 rdma_req = &rqpair->rdma_reqs[rsp->cid];
518
519 req = rdma_req->req;
520 nvme_rdma_req_complete(req, rsp);
521
9f95a23c
TL
522 if (rdma_req->request_ready_to_put) {
523 nvme_rdma_req_put(rqpair, rdma_req);
524 } else {
525 rdma_req->request_ready_to_put = true;
526 }
527
7c673cae
FG
528 if (nvme_rdma_post_recv(rqpair, rsp_idx)) {
529 SPDK_ERRLOG("Unable to re-post rx descriptor\n");
530 return -1;
531 }
532
533 if (!STAILQ_EMPTY(&qpair->queued_req) && !qpair->ctrlr->is_resetting) {
534 req = STAILQ_FIRST(&qpair->queued_req);
535 STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
536 nvme_qpair_submit_request(qpair, req);
537 }
538
539 return 0;
540}
541
542static int
543nvme_rdma_resolve_addr(struct nvme_rdma_qpair *rqpair,
11fdf7f2
TL
544 struct sockaddr *src_addr,
545 struct sockaddr *dst_addr,
7c673cae
FG
546 struct rdma_event_channel *cm_channel)
547{
548 int ret;
549 struct rdma_cm_event *event;
550
11fdf7f2 551 ret = rdma_resolve_addr(rqpair->cm_id, src_addr, dst_addr,
7c673cae
FG
552 NVME_RDMA_TIME_OUT_IN_MS);
553 if (ret) {
554 SPDK_ERRLOG("rdma_resolve_addr, %d\n", errno);
555 return ret;
556 }
557
558 event = nvme_rdma_get_event(cm_channel, RDMA_CM_EVENT_ADDR_RESOLVED);
559 if (event == NULL) {
560 SPDK_ERRLOG("RDMA address resolution error\n");
561 return -1;
562 }
563 rdma_ack_cm_event(event);
564
565 ret = rdma_resolve_route(rqpair->cm_id, NVME_RDMA_TIME_OUT_IN_MS);
566 if (ret) {
567 SPDK_ERRLOG("rdma_resolve_route\n");
568 return ret;
569 }
570
571 event = nvme_rdma_get_event(cm_channel, RDMA_CM_EVENT_ROUTE_RESOLVED);
572 if (event == NULL) {
573 SPDK_ERRLOG("RDMA route resolution error\n");
574 return -1;
575 }
576 rdma_ack_cm_event(event);
577
578 return 0;
579}
580
581static int
582nvme_rdma_connect(struct nvme_rdma_qpair *rqpair)
583{
584 struct rdma_conn_param param = {};
11fdf7f2 585 struct spdk_nvmf_rdma_request_private_data request_data = {};
7c673cae 586 struct spdk_nvmf_rdma_accept_private_data *accept_data;
11fdf7f2
TL
587 struct ibv_device_attr attr;
588 int ret;
589 struct rdma_cm_event *event;
590 struct spdk_nvme_ctrlr *ctrlr;
7c673cae
FG
591
592 ret = ibv_query_device(rqpair->cm_id->verbs, &attr);
593 if (ret != 0) {
594 SPDK_ERRLOG("Failed to query RDMA device attributes.\n");
595 return ret;
596 }
597
598 param.responder_resources = spdk_min(rqpair->num_entries, attr.max_qp_rd_atom);
599
600 ctrlr = rqpair->qpair.ctrlr;
601 if (!ctrlr) {
602 return -1;
603 }
604
7c673cae
FG
605 request_data.qid = rqpair->qpair.id;
606 request_data.hrqsize = rqpair->num_entries;
607 request_data.hsqsize = rqpair->num_entries - 1;
11fdf7f2 608 request_data.cntlid = ctrlr->cntlid;
7c673cae
FG
609
610 param.private_data = &request_data;
611 param.private_data_len = sizeof(request_data);
11fdf7f2
TL
612 param.retry_count = 7;
613 param.rnr_retry_count = 7;
7c673cae
FG
614
615 ret = rdma_connect(rqpair->cm_id, &param);
616 if (ret) {
617 SPDK_ERRLOG("nvme rdma connect error\n");
618 return ret;
619 }
620
621 event = nvme_rdma_get_event(rqpair->cm_channel, RDMA_CM_EVENT_ESTABLISHED);
622 if (event == NULL) {
623 SPDK_ERRLOG("RDMA connect error\n");
624 return -1;
625 }
626
627 accept_data = (struct spdk_nvmf_rdma_accept_private_data *)event->param.conn.private_data;
628 if (accept_data == NULL) {
629 rdma_ack_cm_event(event);
630 SPDK_ERRLOG("NVMe-oF target did not return accept data\n");
631 return -1;
632 }
633
11fdf7f2 634 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Requested queue depth %d. Actually got queue depth %d.\n",
7c673cae
FG
635 rqpair->num_entries, accept_data->crqsize);
636
637 rqpair->num_entries = spdk_min(rqpair->num_entries, accept_data->crqsize);
638
639 rdma_ack_cm_event(event);
640
641 return 0;
642}
643
644static int
645nvme_rdma_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service)
646{
647 struct addrinfo *res;
648 struct addrinfo hints;
649 int ret;
650
651 memset(&hints, 0, sizeof(hints));
652 hints.ai_family = family;
653 hints.ai_socktype = SOCK_STREAM;
654 hints.ai_protocol = 0;
655
656 ret = getaddrinfo(addr, service, &hints, &res);
657 if (ret) {
11fdf7f2 658 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret);
7c673cae
FG
659 return ret;
660 }
661
662 if (res->ai_addrlen > sizeof(*sa)) {
663 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen);
664 ret = EINVAL;
665 } else {
666 memcpy(sa, res->ai_addr, res->ai_addrlen);
667 }
668
669 freeaddrinfo(res);
670 return ret;
671}
672
673static int
7c673cae
FG
674nvme_rdma_mr_map_notify(void *cb_ctx, struct spdk_mem_map *map,
675 enum spdk_mem_map_notify_action action,
676 void *vaddr, size_t size)
677{
678 struct ibv_pd *pd = cb_ctx;
679 struct ibv_mr *mr;
11fdf7f2 680 int rc;
7c673cae
FG
681
682 switch (action) {
683 case SPDK_MEM_MAP_NOTIFY_REGISTER:
9f95a23c
TL
684 if (!g_nvme_hooks.get_rkey) {
685 mr = ibv_reg_mr(pd, vaddr, size,
686 IBV_ACCESS_LOCAL_WRITE |
687 IBV_ACCESS_REMOTE_READ |
688 IBV_ACCESS_REMOTE_WRITE);
689 if (mr == NULL) {
690 SPDK_ERRLOG("ibv_reg_mr() failed\n");
691 return -EFAULT;
692 } else {
693 rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
694 }
7c673cae 695 } else {
9f95a23c
TL
696 rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size,
697 g_nvme_hooks.get_rkey(pd, vaddr, size));
7c673cae
FG
698 }
699 break;
700 case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
9f95a23c
TL
701 if (!g_nvme_hooks.get_rkey) {
702 mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
703 if (mr) {
704 ibv_dereg_mr(mr);
705 }
7c673cae 706 }
9f95a23c 707 rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
7c673cae 708 break;
11fdf7f2
TL
709 default:
710 SPDK_UNREACHABLE();
7c673cae 711 }
7c673cae 712
11fdf7f2
TL
713 return rc;
714}
7c673cae 715
9f95a23c
TL
716static int
717nvme_rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2)
718{
719 /* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */
720 return addr_1 == addr_2;
721}
722
7c673cae
FG
723static int
724nvme_rdma_register_mem(struct nvme_rdma_qpair *rqpair)
725{
726 struct ibv_pd *pd = rqpair->cm_id->qp->pd;
11fdf7f2
TL
727 struct spdk_nvme_rdma_mr_map *mr_map;
728 const struct spdk_mem_map_ops nvme_rdma_map_ops = {
729 .notify_cb = nvme_rdma_mr_map_notify,
9f95a23c 730 .are_contiguous = nvme_rdma_check_contiguous_entries
11fdf7f2
TL
731 };
732
733 pthread_mutex_lock(&g_rdma_mr_maps_mutex);
734
735 /* Look up existing mem map registration for this pd */
736 LIST_FOREACH(mr_map, &g_rdma_mr_maps, link) {
737 if (mr_map->pd == pd) {
738 mr_map->ref++;
739 rqpair->mr_map = mr_map;
740 pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
741 return 0;
742 }
743 }
7c673cae 744
11fdf7f2 745 mr_map = calloc(1, sizeof(*mr_map));
7c673cae 746 if (mr_map == NULL) {
11fdf7f2
TL
747 SPDK_ERRLOG("calloc() failed\n");
748 pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
749 return -1;
750 }
751
752 mr_map->ref = 1;
753 mr_map->pd = pd;
754 mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops, pd);
755 if (mr_map->map == NULL) {
7c673cae 756 SPDK_ERRLOG("spdk_mem_map_alloc() failed\n");
11fdf7f2
TL
757 free(mr_map);
758 pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
7c673cae
FG
759 return -1;
760 }
761
762 rqpair->mr_map = mr_map;
11fdf7f2
TL
763 LIST_INSERT_HEAD(&g_rdma_mr_maps, mr_map, link);
764
765 pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
7c673cae
FG
766
767 return 0;
768}
769
770static void
771nvme_rdma_unregister_mem(struct nvme_rdma_qpair *rqpair)
772{
11fdf7f2
TL
773 struct spdk_nvme_rdma_mr_map *mr_map;
774
775 mr_map = rqpair->mr_map;
776 rqpair->mr_map = NULL;
777
778 if (mr_map == NULL) {
779 return;
780 }
781
782 pthread_mutex_lock(&g_rdma_mr_maps_mutex);
783
784 assert(mr_map->ref > 0);
785 mr_map->ref--;
786 if (mr_map->ref == 0) {
787 LIST_REMOVE(mr_map, link);
788 spdk_mem_map_free(&mr_map->map);
789 free(mr_map);
790 }
791
792 pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
7c673cae
FG
793}
794
795static int
796nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)
797{
11fdf7f2
TL
798 struct sockaddr_storage dst_addr;
799 struct sockaddr_storage src_addr;
800 bool src_addr_specified;
7c673cae
FG
801 int rc;
802 struct spdk_nvme_ctrlr *ctrlr;
803 int family;
804
805 rqpair->cm_channel = rdma_create_event_channel();
806 if (rqpair->cm_channel == NULL) {
807 SPDK_ERRLOG("rdma_create_event_channel() failed\n");
808 return -1;
809 }
810
811 ctrlr = rqpair->qpair.ctrlr;
812
813 switch (ctrlr->trid.adrfam) {
814 case SPDK_NVMF_ADRFAM_IPV4:
815 family = AF_INET;
816 break;
817 case SPDK_NVMF_ADRFAM_IPV6:
818 family = AF_INET6;
819 break;
820 default:
821 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam);
822 return -1;
823 }
824
11fdf7f2 825 SPDK_DEBUGLOG(SPDK_LOG_NVME, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family);
7c673cae 826
11fdf7f2 827 memset(&dst_addr, 0, sizeof(dst_addr));
7c673cae 828
11fdf7f2
TL
829 SPDK_DEBUGLOG(SPDK_LOG_NVME, "trsvcid is %s\n", ctrlr->trid.trsvcid);
830 rc = nvme_rdma_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid);
7c673cae 831 if (rc != 0) {
11fdf7f2 832 SPDK_ERRLOG("dst_addr nvme_rdma_parse_addr() failed\n");
7c673cae
FG
833 return -1;
834 }
835
11fdf7f2
TL
836 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) {
837 memset(&src_addr, 0, sizeof(src_addr));
838 rc = nvme_rdma_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid);
839 if (rc != 0) {
840 SPDK_ERRLOG("src_addr nvme_rdma_parse_addr() failed\n");
841 return -1;
842 }
843 src_addr_specified = true;
844 } else {
845 src_addr_specified = false;
846 }
847
7c673cae
FG
848 rc = rdma_create_id(rqpair->cm_channel, &rqpair->cm_id, rqpair, RDMA_PS_TCP);
849 if (rc < 0) {
850 SPDK_ERRLOG("rdma_create_id() failed\n");
851 return -1;
852 }
853
11fdf7f2
TL
854 rc = nvme_rdma_resolve_addr(rqpair,
855 src_addr_specified ? (struct sockaddr *)&src_addr : NULL,
856 (struct sockaddr *)&dst_addr, rqpair->cm_channel);
7c673cae
FG
857 if (rc < 0) {
858 SPDK_ERRLOG("nvme_rdma_resolve_addr() failed\n");
859 return -1;
860 }
861
862 rc = nvme_rdma_qpair_init(rqpair);
863 if (rc < 0) {
864 SPDK_ERRLOG("nvme_rdma_qpair_init() failed\n");
865 return -1;
866 }
867
868 rc = nvme_rdma_connect(rqpair);
869 if (rc != 0) {
870 SPDK_ERRLOG("Unable to connect the rqpair\n");
871 return -1;
872 }
873
9f95a23c 874 rc = nvme_rdma_register_reqs(rqpair);
11fdf7f2 875 SPDK_DEBUGLOG(SPDK_LOG_NVME, "rc =%d\n", rc);
7c673cae 876 if (rc) {
9f95a23c 877 SPDK_ERRLOG("Unable to register rqpair RDMA requests\n");
7c673cae
FG
878 return -1;
879 }
9f95a23c 880 SPDK_DEBUGLOG(SPDK_LOG_NVME, "RDMA requests registered\n");
7c673cae 881
9f95a23c 882 rc = nvme_rdma_register_rsps(rqpair);
11fdf7f2 883 SPDK_DEBUGLOG(SPDK_LOG_NVME, "rc =%d\n", rc);
7c673cae 884 if (rc < 0) {
9f95a23c 885 SPDK_ERRLOG("Unable to register rqpair RDMA responses\n");
7c673cae
FG
886 return -1;
887 }
9f95a23c 888 SPDK_DEBUGLOG(SPDK_LOG_NVME, "RDMA responses registered\n");
7c673cae
FG
889
890 rc = nvme_rdma_register_mem(rqpair);
891 if (rc < 0) {
892 SPDK_ERRLOG("Unable to register memory for RDMA\n");
893 return -1;
894 }
895
11fdf7f2 896 rc = nvme_fabric_qpair_connect(&rqpair->qpair, rqpair->num_entries);
7c673cae
FG
897 if (rc < 0) {
898 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n");
899 return -1;
900 }
901
902 return 0;
903}
904
905/*
906 * Build SGL describing empty payload.
907 */
908static int
11fdf7f2 909nvme_rdma_build_null_request(struct spdk_nvme_rdma_req *rdma_req)
7c673cae 910{
11fdf7f2 911 struct nvme_request *req = rdma_req->req;
7c673cae 912
11fdf7f2 913 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
7c673cae 914
11fdf7f2
TL
915 /* The first element of this SGL is pointing at an
916 * spdk_nvmf_cmd object. For this particular command,
917 * we only need the first 64 bytes corresponding to
918 * the NVMe command. */
919 rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
920
921 /* The RDMA SGL needs one element describing the NVMe command. */
922 rdma_req->send_wr.num_sge = 1;
923
924 req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
925 req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
926 req->cmd.dptr.sgl1.keyed.length = 0;
927 req->cmd.dptr.sgl1.keyed.key = 0;
928 req->cmd.dptr.sgl1.address = 0;
7c673cae
FG
929
930 return 0;
931}
932
933/*
11fdf7f2 934 * Build inline SGL describing contiguous payload buffer.
7c673cae
FG
935 */
936static int
11fdf7f2
TL
937nvme_rdma_build_contig_inline_request(struct nvme_rdma_qpair *rqpair,
938 struct spdk_nvme_rdma_req *rdma_req)
7c673cae 939{
11fdf7f2 940 struct nvme_request *req = rdma_req->req;
7c673cae 941 struct ibv_mr *mr;
11fdf7f2
TL
942 void *payload;
943 uint64_t requested_size;
7c673cae 944
11fdf7f2 945 payload = req->payload.contig_or_cb_arg + req->payload_offset;
7c673cae 946 assert(req->payload_size != 0);
11fdf7f2 947 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
7c673cae 948
11fdf7f2 949 requested_size = req->payload_size;
11fdf7f2 950
9f95a23c
TL
951 if (!g_nvme_hooks.get_rkey) {
952 mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map,
953 (uint64_t)payload, &requested_size);
954
955 if (mr == NULL || requested_size < req->payload_size) {
956 if (mr) {
957 SPDK_ERRLOG("Data buffer split over multiple RDMA Memory Regions\n");
958 }
959 return -EINVAL;
960 }
961 rdma_req->send_sgl[1].lkey = mr->lkey;
962 } else {
963 rdma_req->send_sgl[1].lkey = spdk_mem_map_translate(rqpair->mr_map->map,
964 (uint64_t)payload,
965 &requested_size);
966
7c673cae
FG
967 }
968
11fdf7f2
TL
969 /* The first element of this SGL is pointing at an
970 * spdk_nvmf_cmd object. For this particular command,
971 * we only need the first 64 bytes corresponding to
972 * the NVMe command. */
973 rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
974
975 rdma_req->send_sgl[1].addr = (uint64_t)payload;
976 rdma_req->send_sgl[1].length = (uint32_t)req->payload_size;
11fdf7f2
TL
977
978 /* The RDMA SGL contains two elements. The first describes
979 * the NVMe command and the second describes the data
980 * payload. */
981 rdma_req->send_wr.num_sge = 2;
982
983 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
984 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
985 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
986 req->cmd.dptr.sgl1.unkeyed.length = (uint32_t)req->payload_size;
987 /* Inline only supported for icdoff == 0 currently. This function will
988 * not get called for controllers with other values. */
989 req->cmd.dptr.sgl1.address = (uint64_t)0;
7c673cae
FG
990
991 return 0;
992}
993
994/*
11fdf7f2 995 * Build SGL describing contiguous payload buffer.
7c673cae
FG
996 */
997static int
11fdf7f2
TL
998nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
999 struct spdk_nvme_rdma_req *rdma_req)
7c673cae 1000{
11fdf7f2
TL
1001 struct nvme_request *req = rdma_req->req;
1002 void *payload = req->payload.contig_or_cb_arg + req->payload_offset;
7c673cae 1003 struct ibv_mr *mr;
11fdf7f2 1004 uint64_t requested_size;
7c673cae
FG
1005
1006 assert(req->payload_size != 0);
11fdf7f2 1007 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
7c673cae 1008
11fdf7f2 1009 requested_size = req->payload_size;
9f95a23c
TL
1010 if (!g_nvme_hooks.get_rkey) {
1011
1012 mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)payload,
1013 &requested_size);
1014 if (mr == NULL) {
1015 return -1;
1016 }
1017 req->cmd.dptr.sgl1.keyed.key = mr->rkey;
1018 } else {
1019 req->cmd.dptr.sgl1.keyed.key = spdk_mem_map_translate(rqpair->mr_map->map,
1020 (uint64_t)payload,
1021 &requested_size);
1022 }
1023
1024 if (requested_size < req->payload_size) {
1025 SPDK_ERRLOG("Data buffer split over multiple RDMA Memory Regions\n");
7c673cae
FG
1026 return -1;
1027 }
1028
11fdf7f2
TL
1029 /* The first element of this SGL is pointing at an
1030 * spdk_nvmf_cmd object. For this particular command,
1031 * we only need the first 64 bytes corresponding to
1032 * the NVMe command. */
1033 rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
7c673cae 1034
11fdf7f2
TL
1035 /* The RDMA SGL needs one element describing the NVMe command. */
1036 rdma_req->send_wr.num_sge = 1;
7c673cae 1037
11fdf7f2 1038 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
7c673cae
FG
1039 req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
1040 req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
11fdf7f2 1041 req->cmd.dptr.sgl1.keyed.length = req->payload_size;
11fdf7f2 1042 req->cmd.dptr.sgl1.address = (uint64_t)payload;
7c673cae
FG
1043
1044 return 0;
1045}
1046
11fdf7f2
TL
1047/*
1048 * Build SGL describing scattered payload buffer.
1049 */
7c673cae 1050static int
11fdf7f2
TL
1051nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
1052 struct spdk_nvme_rdma_req *rdma_req)
7c673cae 1053{
11fdf7f2
TL
1054 struct nvme_request *req = rdma_req->req;
1055 struct spdk_nvmf_cmd *cmd = &rqpair->cmds[rdma_req->id];
1056 struct ibv_mr *mr = NULL;
1057 void *virt_addr;
1058 uint64_t remaining_size, mr_length;
1059 uint32_t sge_length;
1060 int rc, max_num_sgl, num_sgl_desc;
7c673cae 1061
11fdf7f2
TL
1062 assert(req->payload_size != 0);
1063 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
1064 assert(req->payload.reset_sgl_fn != NULL);
1065 assert(req->payload.next_sge_fn != NULL);
1066 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
7c673cae 1067
11fdf7f2 1068 max_num_sgl = req->qpair->ctrlr->max_sges;
7c673cae 1069
11fdf7f2
TL
1070 remaining_size = req->payload_size;
1071 num_sgl_desc = 0;
1072 do {
1073 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &sge_length);
1074 if (rc) {
1075 return -1;
1076 }
7c673cae 1077
11fdf7f2
TL
1078 sge_length = spdk_min(remaining_size, sge_length);
1079 mr_length = sge_length;
7c673cae 1080
9f95a23c
TL
1081 if (!g_nvme_hooks.get_rkey) {
1082 mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map,
1083 (uint64_t)virt_addr,
1084 &mr_length);
1085 if (mr == NULL) {
1086 return -1;
1087 }
1088 cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
1089 } else {
1090 cmd->sgl[num_sgl_desc].keyed.key = spdk_mem_map_translate(rqpair->mr_map->map,
1091 (uint64_t)virt_addr,
1092 &mr_length);
1093 }
7c673cae 1094
9f95a23c
TL
1095 if (mr_length < sge_length) {
1096 SPDK_ERRLOG("Data buffer split over multiple RDMA Memory Regions\n");
11fdf7f2
TL
1097 return -1;
1098 }
7c673cae 1099
11fdf7f2
TL
1100 cmd->sgl[num_sgl_desc].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
1101 cmd->sgl[num_sgl_desc].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
1102 cmd->sgl[num_sgl_desc].keyed.length = sge_length;
11fdf7f2 1103 cmd->sgl[num_sgl_desc].address = (uint64_t)virt_addr;
7c673cae 1104
11fdf7f2
TL
1105 remaining_size -= sge_length;
1106 num_sgl_desc++;
1107 } while (remaining_size > 0 && num_sgl_desc < max_num_sgl);
1108
1109
1110 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */
1111 if (remaining_size > 0) {
7c673cae
FG
1112 return -1;
1113 }
1114
11fdf7f2 1115 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
7c673cae 1116
11fdf7f2
TL
1117 /* The RDMA SGL needs one element describing some portion
1118 * of the spdk_nvmf_cmd structure. */
1119 rdma_req->send_wr.num_sge = 1;
1120
1121 /*
1122 * If only one SGL descriptor is required, it can be embedded directly in the command
1123 * as a data block descriptor.
1124 */
1125 if (num_sgl_desc == 1) {
1126 /* The first element of this SGL is pointing at an
1127 * spdk_nvmf_cmd object. For this particular command,
1128 * we only need the first 64 bytes corresponding to
1129 * the NVMe command. */
1130 rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
1131
9f95a23c
TL
1132 req->cmd.dptr.sgl1.keyed.type = cmd->sgl[0].keyed.type;
1133 req->cmd.dptr.sgl1.keyed.subtype = cmd->sgl[0].keyed.subtype;
1134 req->cmd.dptr.sgl1.keyed.length = cmd->sgl[0].keyed.length;
1135 req->cmd.dptr.sgl1.keyed.key = cmd->sgl[0].keyed.key;
1136 req->cmd.dptr.sgl1.address = cmd->sgl[0].address;
11fdf7f2
TL
1137 } else {
1138 /*
1139 * Otherwise, The SGL descriptor embedded in the command must point to the list of
1140 * SGL descriptors used to describe the operation. In that case it is a last segment descriptor.
1141 */
1142 rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd) + sizeof(struct
1143 spdk_nvme_sgl_descriptor) * num_sgl_desc;
1144
1145 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT;
1146 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
1147 req->cmd.dptr.sgl1.unkeyed.length = num_sgl_desc * sizeof(struct spdk_nvme_sgl_descriptor);
1148 req->cmd.dptr.sgl1.address = (uint64_t)0;
7c673cae
FG
1149 }
1150
1151 return 0;
1152}
1153
11fdf7f2
TL
1154/*
1155 * Build inline SGL describing sgl payload buffer.
1156 */
7c673cae 1157static int
11fdf7f2
TL
1158nvme_rdma_build_sgl_inline_request(struct nvme_rdma_qpair *rqpair,
1159 struct spdk_nvme_rdma_req *rdma_req)
7c673cae 1160{
11fdf7f2
TL
1161 struct nvme_request *req = rdma_req->req;
1162 struct ibv_mr *mr;
1163 uint32_t length;
1164 uint64_t requested_size;
1165 void *virt_addr;
9f95a23c 1166 int rc, i;
7c673cae 1167
11fdf7f2
TL
1168 assert(req->payload_size != 0);
1169 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
1170 assert(req->payload.reset_sgl_fn != NULL);
1171 assert(req->payload.next_sge_fn != NULL);
1172 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
7c673cae 1173
11fdf7f2
TL
1174 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &length);
1175 if (rc) {
7c673cae
FG
1176 return -1;
1177 }
1178
11fdf7f2 1179 if (length < req->payload_size) {
9f95a23c
TL
1180 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Inline SGL request split so sending separately.\n");
1181 return nvme_rdma_build_sgl_request(rqpair, rdma_req);
7c673cae
FG
1182 }
1183
9f95a23c
TL
1184 if (length > req->payload_size) {
1185 length = req->payload_size;
1186 }
1187
1188 requested_size = length;
11fdf7f2
TL
1189 mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)virt_addr,
1190 &requested_size);
9f95a23c
TL
1191 if (mr == NULL || requested_size < length) {
1192 for (i = 1; i < rdma_req->send_wr.num_sge; i++) {
1193 rdma_req->send_sgl[i].addr = 0;
1194 rdma_req->send_sgl[i].length = 0;
1195 rdma_req->send_sgl[i].lkey = 0;
1196 }
1197
1198 if (mr) {
1199 SPDK_ERRLOG("Data buffer split over multiple RDMA Memory Regions\n");
1200 }
7c673cae
FG
1201 return -1;
1202 }
1203
9f95a23c
TL
1204 rdma_req->send_sgl[1].addr = (uint64_t)virt_addr;
1205 rdma_req->send_sgl[1].length = length;
1206 rdma_req->send_sgl[1].lkey = mr->lkey;
1207
1208 rdma_req->send_wr.num_sge = 2;
1209
11fdf7f2
TL
1210 /* The first element of this SGL is pointing at an
1211 * spdk_nvmf_cmd object. For this particular command,
1212 * we only need the first 64 bytes corresponding to
1213 * the NVMe command. */
1214 rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
1215
11fdf7f2
TL
1216 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
1217 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
1218 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
1219 req->cmd.dptr.sgl1.unkeyed.length = (uint32_t)req->payload_size;
1220 /* Inline only supported for icdoff == 0 currently. This function will
1221 * not get called for controllers with other values. */
1222 req->cmd.dptr.sgl1.address = (uint64_t)0;
1223
1224 return 0;
1225}
1226
1227static inline unsigned int
1228nvme_rdma_icdsz_bytes(struct spdk_nvme_ctrlr *ctrlr)
1229{
1230 return (ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd));
1231}
1232
1233static int
1234nvme_rdma_req_init(struct nvme_rdma_qpair *rqpair, struct nvme_request *req,
1235 struct spdk_nvme_rdma_req *rdma_req)
1236{
1237 struct spdk_nvme_ctrlr *ctrlr = rqpair->qpair.ctrlr;
1238 int rc;
1239
1240 rdma_req->req = req;
1241 req->cmd.cid = rdma_req->id;
7c673cae 1242
11fdf7f2
TL
1243 if (req->payload_size == 0) {
1244 rc = nvme_rdma_build_null_request(rdma_req);
1245 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) {
1246 /*
1247 * Check if icdoff is non zero, to avoid interop conflicts with
1248 * targets with non-zero icdoff. Both SPDK and the Linux kernel
1249 * targets use icdoff = 0. For targets with non-zero icdoff, we
1250 * will currently just not use inline data for now.
1251 */
1252 if (req->cmd.opc == SPDK_NVME_OPC_WRITE &&
1253 req->payload_size <= nvme_rdma_icdsz_bytes(ctrlr) &&
1254 (ctrlr->cdata.nvmf_specific.icdoff == 0)) {
1255 rc = nvme_rdma_build_contig_inline_request(rqpair, rdma_req);
1256 } else {
1257 rc = nvme_rdma_build_contig_request(rqpair, rdma_req);
1258 }
1259 } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) {
1260 if (req->cmd.opc == SPDK_NVME_OPC_WRITE &&
1261 req->payload_size <= nvme_rdma_icdsz_bytes(ctrlr) &&
1262 ctrlr->cdata.nvmf_specific.icdoff == 0) {
1263 rc = nvme_rdma_build_sgl_inline_request(rqpair, rdma_req);
1264 } else {
1265 rc = nvme_rdma_build_sgl_request(rqpair, rdma_req);
1266 }
7c673cae 1267 } else {
11fdf7f2 1268 rc = -1;
7c673cae
FG
1269 }
1270
11fdf7f2
TL
1271 if (rc) {
1272 return rc;
1273 }
1274
1275 memcpy(&rqpair->cmds[rdma_req->id], &req->cmd, sizeof(req->cmd));
7c673cae
FG
1276 return 0;
1277}
1278
1279static struct spdk_nvme_qpair *
1280nvme_rdma_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,
1281 uint16_t qid, uint32_t qsize,
1282 enum spdk_nvme_qprio qprio,
1283 uint32_t num_requests)
1284{
1285 struct nvme_rdma_qpair *rqpair;
1286 struct spdk_nvme_qpair *qpair;
1287 int rc;
1288
1289 rqpair = calloc(1, sizeof(struct nvme_rdma_qpair));
1290 if (!rqpair) {
1291 SPDK_ERRLOG("failed to get create rqpair\n");
1292 return NULL;
1293 }
1294
1295 rqpair->num_entries = qsize;
1296
1297 qpair = &rqpair->qpair;
1298
1299 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests);
1300 if (rc != 0) {
1301 return NULL;
1302 }
1303
9f95a23c
TL
1304 rc = nvme_rdma_alloc_reqs(rqpair);
1305 SPDK_DEBUGLOG(SPDK_LOG_NVME, "rc =%d\n", rc);
1306 if (rc) {
1307 SPDK_ERRLOG("Unable to allocate rqpair RDMA requests\n");
1308 return NULL;
1309 }
1310 SPDK_DEBUGLOG(SPDK_LOG_NVME, "RDMA requests allocated\n");
1311
1312 rc = nvme_rdma_alloc_rsps(rqpair);
1313 SPDK_DEBUGLOG(SPDK_LOG_NVME, "rc =%d\n", rc);
1314 if (rc < 0) {
1315 SPDK_ERRLOG("Unable to allocate rqpair RDMA responses\n");
1316 return NULL;
1317 }
1318 SPDK_DEBUGLOG(SPDK_LOG_NVME, "RDMA responses allocated\n");
1319
7c673cae
FG
1320 rc = nvme_rdma_qpair_connect(rqpair);
1321 if (rc < 0) {
1322 nvme_rdma_qpair_destroy(qpair);
1323 return NULL;
1324 }
1325
1326 return qpair;
1327}
1328
9f95a23c
TL
1329static void
1330nvme_rdma_qpair_disconnect(struct spdk_nvme_qpair *qpair)
7c673cae 1331{
9f95a23c 1332 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair);
7c673cae
FG
1333
1334 nvme_rdma_unregister_mem(rqpair);
9f95a23c
TL
1335 nvme_rdma_unregister_reqs(rqpair);
1336 nvme_rdma_unregister_rsps(rqpair);
7c673cae
FG
1337
1338 if (rqpair->cm_id) {
1339 if (rqpair->cm_id->qp) {
1340 rdma_destroy_qp(rqpair->cm_id);
1341 }
1342 rdma_destroy_id(rqpair->cm_id);
1343 }
1344
1345 if (rqpair->cq) {
1346 ibv_destroy_cq(rqpair->cq);
1347 }
1348
1349 if (rqpair->cm_channel) {
1350 rdma_destroy_event_channel(rqpair->cm_channel);
1351 }
9f95a23c 1352}
7c673cae 1353
9f95a23c
TL
1354static int
1355nvme_rdma_qpair_destroy(struct spdk_nvme_qpair *qpair)
1356{
1357 struct nvme_rdma_qpair *rqpair;
1358
1359 if (!qpair) {
1360 return -1;
1361 }
1362 nvme_rdma_qpair_disconnect(qpair);
1363 nvme_rdma_qpair_abort_reqs(qpair, 1);
1364 nvme_qpair_deinit(qpair);
1365
1366 rqpair = nvme_rdma_qpair(qpair);
1367
1368 nvme_rdma_free_reqs(rqpair);
1369 nvme_rdma_free_rsps(rqpair);
7c673cae
FG
1370 free(rqpair);
1371
1372 return 0;
1373}
1374
1375struct spdk_nvme_qpair *
1376nvme_rdma_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
11fdf7f2 1377 const struct spdk_nvme_io_qpair_opts *opts)
7c673cae 1378{
11fdf7f2
TL
1379 return nvme_rdma_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio,
1380 opts->io_queue_requests);
7c673cae
FG
1381}
1382
1383int
1384nvme_rdma_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
1385{
1386 /* do nothing here */
1387 return 0;
1388}
1389
7c673cae
FG
1390/* This function must only be called while holding g_spdk_nvme_driver->lock */
1391int
9f95a23c 1392nvme_rdma_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx,
11fdf7f2 1393 bool direct_connect)
7c673cae
FG
1394{
1395 struct spdk_nvme_ctrlr_opts discovery_opts;
1396 struct spdk_nvme_ctrlr *discovery_ctrlr;
7c673cae 1397 union spdk_nvme_cc_register cc;
7c673cae 1398 int rc;
11fdf7f2 1399 struct nvme_completion_poll_status status;
7c673cae 1400
9f95a23c 1401 if (strcmp(probe_ctx->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN) != 0) {
11fdf7f2 1402 /* It is not a discovery_ctrlr info and try to directly connect it */
9f95a23c 1403 rc = nvme_ctrlr_probe(&probe_ctx->trid, probe_ctx, NULL);
11fdf7f2
TL
1404 return rc;
1405 }
1406
1407 spdk_nvme_ctrlr_get_default_ctrlr_opts(&discovery_opts, sizeof(discovery_opts));
7c673cae
FG
1408 /* For discovery_ctrlr set the timeout to 0 */
1409 discovery_opts.keep_alive_timeout_ms = 0;
1410
9f95a23c 1411 discovery_ctrlr = nvme_rdma_ctrlr_construct(&probe_ctx->trid, &discovery_opts, NULL);
7c673cae
FG
1412 if (discovery_ctrlr == NULL) {
1413 return -1;
1414 }
1415
1416 /* TODO: this should be using the normal NVMe controller initialization process */
1417 cc.raw = 0;
1418 cc.bits.en = 1;
1419 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
1420 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
1421 rc = nvme_transport_ctrlr_set_reg_4(discovery_ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
1422 cc.raw);
1423 if (rc < 0) {
1424 SPDK_ERRLOG("Failed to set cc\n");
1425 nvme_ctrlr_destruct(discovery_ctrlr);
1426 return -1;
1427 }
1428
11fdf7f2
TL
1429 /* Direct attach through spdk_nvme_connect() API */
1430 if (direct_connect == true) {
9f95a23c
TL
1431 /* get the cdata info */
1432 rc = nvme_ctrlr_cmd_identify(discovery_ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0,
1433 &discovery_ctrlr->cdata, sizeof(discovery_ctrlr->cdata),
1434 nvme_completion_poll_cb, &status);
1435 if (rc != 0) {
1436 SPDK_ERRLOG("Failed to identify cdata\n");
1437 return rc;
1438 }
1439
1440 if (spdk_nvme_wait_for_completion(discovery_ctrlr->adminq, &status)) {
1441 SPDK_ERRLOG("nvme_identify_controller failed!\n");
1442 return -ENXIO;
1443 }
1444
11fdf7f2
TL
1445 /* Set the ready state to skip the normal init process */
1446 discovery_ctrlr->state = NVME_CTRLR_STATE_READY;
9f95a23c 1447 nvme_ctrlr_connected(probe_ctx, discovery_ctrlr);
11fdf7f2
TL
1448 nvme_ctrlr_add_process(discovery_ctrlr, 0);
1449 return 0;
7c673cae
FG
1450 }
1451
9f95a23c 1452 rc = nvme_fabric_ctrlr_discover(discovery_ctrlr, probe_ctx);
7c673cae 1453 nvme_ctrlr_destruct(discovery_ctrlr);
11fdf7f2 1454 return rc;
7c673cae
FG
1455}
1456
1457struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_transport_id *trid,
1458 const struct spdk_nvme_ctrlr_opts *opts,
1459 void *devhandle)
1460{
1461 struct nvme_rdma_ctrlr *rctrlr;
1462 union spdk_nvme_cap_register cap;
11fdf7f2 1463 union spdk_nvme_vs_register vs;
7c673cae
FG
1464 int rc;
1465
1466 rctrlr = calloc(1, sizeof(struct nvme_rdma_ctrlr));
1467 if (rctrlr == NULL) {
1468 SPDK_ERRLOG("could not allocate ctrlr\n");
1469 return NULL;
1470 }
1471
1472 rctrlr->ctrlr.trid.trtype = SPDK_NVME_TRANSPORT_RDMA;
1473 rctrlr->ctrlr.opts = *opts;
1474 memcpy(&rctrlr->ctrlr.trid, trid, sizeof(rctrlr->ctrlr.trid));
1475
1476 rc = nvme_ctrlr_construct(&rctrlr->ctrlr);
1477 if (rc != 0) {
11fdf7f2 1478 free(rctrlr);
7c673cae
FG
1479 return NULL;
1480 }
1481
1482 rctrlr->ctrlr.adminq = nvme_rdma_ctrlr_create_qpair(&rctrlr->ctrlr, 0,
1483 SPDK_NVMF_MIN_ADMIN_QUEUE_ENTRIES, 0, SPDK_NVMF_MIN_ADMIN_QUEUE_ENTRIES);
1484 if (!rctrlr->ctrlr.adminq) {
1485 SPDK_ERRLOG("failed to create admin qpair\n");
11fdf7f2 1486 nvme_rdma_ctrlr_destruct(&rctrlr->ctrlr);
7c673cae
FG
1487 return NULL;
1488 }
1489
1490 if (nvme_ctrlr_get_cap(&rctrlr->ctrlr, &cap)) {
1491 SPDK_ERRLOG("get_cap() failed\n");
1492 nvme_ctrlr_destruct(&rctrlr->ctrlr);
1493 return NULL;
1494 }
1495
11fdf7f2
TL
1496 if (nvme_ctrlr_get_vs(&rctrlr->ctrlr, &vs)) {
1497 SPDK_ERRLOG("get_vs() failed\n");
1498 nvme_ctrlr_destruct(&rctrlr->ctrlr);
1499 return NULL;
1500 }
1501
1502 if (nvme_ctrlr_add_process(&rctrlr->ctrlr, 0) != 0) {
1503 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n");
1504 nvme_ctrlr_destruct(&rctrlr->ctrlr);
1505 return NULL;
1506 }
1507
1508 nvme_ctrlr_init_cap(&rctrlr->ctrlr, &cap, &vs);
7c673cae 1509
11fdf7f2 1510 SPDK_DEBUGLOG(SPDK_LOG_NVME, "successfully initialized the nvmf ctrlr\n");
7c673cae
FG
1511 return &rctrlr->ctrlr;
1512}
1513
1514int
1515nvme_rdma_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
1516{
1517 struct nvme_rdma_ctrlr *rctrlr = nvme_rdma_ctrlr(ctrlr);
1518
1519 if (ctrlr->adminq) {
1520 nvme_rdma_qpair_destroy(ctrlr->adminq);
1521 }
1522
11fdf7f2
TL
1523 nvme_ctrlr_destruct_finish(ctrlr);
1524
7c673cae
FG
1525 free(rctrlr);
1526
1527 return 0;
1528}
1529
1530int
1531nvme_rdma_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value)
1532{
11fdf7f2 1533 return nvme_fabric_ctrlr_set_reg_4(ctrlr, offset, value);
7c673cae
FG
1534}
1535
1536int
1537nvme_rdma_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value)
1538{
11fdf7f2 1539 return nvme_fabric_ctrlr_set_reg_8(ctrlr, offset, value);
7c673cae
FG
1540}
1541
1542int
1543nvme_rdma_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value)
1544{
11fdf7f2 1545 return nvme_fabric_ctrlr_get_reg_4(ctrlr, offset, value);
7c673cae
FG
1546}
1547
1548int
1549nvme_rdma_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value)
1550{
11fdf7f2 1551 return nvme_fabric_ctrlr_get_reg_8(ctrlr, offset, value);
7c673cae
FG
1552}
1553
1554int
1555nvme_rdma_qpair_submit_request(struct spdk_nvme_qpair *qpair,
1556 struct nvme_request *req)
1557{
1558 struct nvme_rdma_qpair *rqpair;
1559 struct spdk_nvme_rdma_req *rdma_req;
1560 struct ibv_send_wr *wr, *bad_wr = NULL;
1561 int rc;
1562
1563 rqpair = nvme_rdma_qpair(qpair);
1564 assert(rqpair != NULL);
1565 assert(req != NULL);
1566
1567 rdma_req = nvme_rdma_req_get(rqpair);
1568 if (!rdma_req) {
1569 /*
9f95a23c
TL
1570 * No rdma_req is available, so queue the request to be
1571 * processed later.
7c673cae
FG
1572 */
1573 STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
1574 return 0;
1575 }
1576
1577 if (nvme_rdma_req_init(rqpair, req, rdma_req)) {
1578 SPDK_ERRLOG("nvme_rdma_req_init() failed\n");
1579 nvme_rdma_req_put(rqpair, rdma_req);
1580 return -1;
1581 }
1582
1583 wr = &rdma_req->send_wr;
1584
1585 nvme_rdma_trace_ibv_sge(wr->sg_list);
1586
1587 rc = ibv_post_send(rqpair->cm_id->qp, wr, &bad_wr);
1588 if (rc) {
11fdf7f2 1589 SPDK_ERRLOG("Failure posting rdma send for NVMf completion: %d (%s)\n", rc, spdk_strerror(rc));
7c673cae
FG
1590 }
1591
1592 return rc;
1593}
1594
1595int
1596nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
1597{
1598 return nvme_rdma_qpair_destroy(qpair);
1599}
1600
1601int
9f95a23c 1602nvme_rdma_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
7c673cae
FG
1603{
1604 return nvme_rdma_qpair_connect(nvme_rdma_qpair(qpair));
1605}
1606
9f95a23c
TL
1607void
1608nvme_rdma_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
7c673cae 1609{
9f95a23c 1610 nvme_rdma_qpair_disconnect(qpair);
7c673cae
FG
1611}
1612
1613int
1614nvme_rdma_qpair_reset(struct spdk_nvme_qpair *qpair)
1615{
1616 /* Currently, doing nothing here */
1617 return 0;
1618}
1619
9f95a23c
TL
1620void
1621nvme_rdma_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
7c673cae 1622{
9f95a23c
TL
1623 struct spdk_nvme_rdma_req *rdma_req, *tmp;
1624 struct nvme_request *req;
1625 struct spdk_nvme_cpl cpl;
1626 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair);
1627
1628 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
1629 cpl.status.sct = SPDK_NVME_SCT_GENERIC;
1630 cpl.status.dnr = dnr;
1631
1632 TAILQ_FOREACH_SAFE(rdma_req, &rqpair->outstanding_reqs, link, tmp) {
1633 assert(rdma_req->req != NULL);
1634 req = rdma_req->req;
1635
1636 nvme_rdma_req_complete(req, &cpl);
1637 nvme_rdma_req_put(rqpair, rdma_req);
1638 }
7c673cae
FG
1639}
1640
11fdf7f2
TL
1641static void
1642nvme_rdma_qpair_check_timeout(struct spdk_nvme_qpair *qpair)
1643{
1644 uint64_t t02;
1645 struct spdk_nvme_rdma_req *rdma_req, *tmp;
1646 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair);
1647 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
1648 struct spdk_nvme_ctrlr_process *active_proc;
1649
1650 /* Don't check timeouts during controller initialization. */
1651 if (ctrlr->state != NVME_CTRLR_STATE_READY) {
1652 return;
1653 }
1654
1655 if (nvme_qpair_is_admin_queue(qpair)) {
1656 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1657 } else {
1658 active_proc = qpair->active_proc;
1659 }
1660
1661 /* Only check timeouts if the current process has a timeout callback. */
1662 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) {
1663 return;
1664 }
1665
1666 t02 = spdk_get_ticks();
1667 TAILQ_FOREACH_SAFE(rdma_req, &rqpair->outstanding_reqs, link, tmp) {
1668 assert(rdma_req->req != NULL);
1669
1670 if (nvme_request_check_timeout(rdma_req->req, rdma_req->id, active_proc, t02)) {
1671 /*
1672 * The requests are in order, so as soon as one has not timed out,
1673 * stop iterating.
1674 */
1675 break;
1676 }
1677 }
1678}
1679
7c673cae
FG
1680#define MAX_COMPLETIONS_PER_POLL 128
1681
1682int
1683nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair,
1684 uint32_t max_completions)
1685{
9f95a23c
TL
1686 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair);
1687 struct ibv_wc wc[MAX_COMPLETIONS_PER_POLL];
1688 int i, rc, batch_size;
1689 uint32_t reaped;
1690 struct ibv_cq *cq;
1691 struct spdk_nvme_rdma_req *rdma_req;
7c673cae
FG
1692
1693 if (max_completions == 0) {
1694 max_completions = rqpair->num_entries;
1695 } else {
1696 max_completions = spdk_min(max_completions, rqpair->num_entries);
1697 }
1698
1699 cq = rqpair->cq;
1700
1701 reaped = 0;
1702 do {
1703 batch_size = spdk_min((max_completions - reaped),
1704 MAX_COMPLETIONS_PER_POLL);
1705 rc = ibv_poll_cq(cq, batch_size, wc);
1706 if (rc < 0) {
1707 SPDK_ERRLOG("Error polling CQ! (%d): %s\n",
11fdf7f2 1708 errno, spdk_strerror(errno));
7c673cae
FG
1709 return -1;
1710 } else if (rc == 0) {
1711 /* Ran out of completions */
1712 break;
1713 }
1714
1715 for (i = 0; i < rc; i++) {
1716 if (wc[i].status) {
1717 SPDK_ERRLOG("CQ error on Queue Pair %p, Response Index %lu (%d): %s\n",
1718 qpair, wc[i].wr_id, wc[i].status, ibv_wc_status_str(wc[i].status));
1719 return -1;
1720 }
1721
1722 switch (wc[i].opcode) {
1723 case IBV_WC_RECV:
11fdf7f2 1724 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CQ recv completion\n");
7c673cae
FG
1725
1726 reaped++;
1727
1728 if (wc[i].byte_len < sizeof(struct spdk_nvme_cpl)) {
1729 SPDK_ERRLOG("recv length %u less than expected response size\n", wc[i].byte_len);
1730 return -1;
1731 }
1732
1733 if (nvme_rdma_recv(rqpair, wc[i].wr_id)) {
1734 SPDK_ERRLOG("nvme_rdma_recv processing failure\n");
1735 return -1;
1736 }
1737 break;
1738
1739 case IBV_WC_SEND:
9f95a23c
TL
1740 rdma_req = (struct spdk_nvme_rdma_req *)wc[i].wr_id;
1741
1742 if (rdma_req->request_ready_to_put) {
1743 nvme_rdma_req_put(rqpair, rdma_req);
1744 } else {
1745 rdma_req->request_ready_to_put = true;
1746 }
7c673cae
FG
1747 break;
1748
1749 default:
1750 SPDK_ERRLOG("Received an unexpected opcode on the CQ: %d\n", wc[i].opcode);
1751 return -1;
1752 }
1753 }
1754 } while (reaped < max_completions);
1755
11fdf7f2
TL
1756 if (spdk_unlikely(rqpair->qpair.ctrlr->timeout_enabled)) {
1757 nvme_rdma_qpair_check_timeout(qpair);
1758 }
1759
7c673cae
FG
1760 return reaped;
1761}
1762
1763uint32_t
1764nvme_rdma_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr)
1765{
1766 /* Todo, which should get from the NVMF target */
1767 return NVME_RDMA_RW_BUFFER_SIZE;
1768}
1769
11fdf7f2
TL
1770uint16_t
1771nvme_rdma_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr)
7c673cae 1772{
11fdf7f2
TL
1773 return spdk_min(ctrlr->cdata.nvmf_specific.msdbd, NVME_RDMA_MAX_SGL_DESCRIPTORS);
1774}
1775
9f95a23c
TL
1776volatile struct spdk_nvme_registers *
1777nvme_rdma_ctrlr_get_registers(struct spdk_nvme_ctrlr *ctrlr)
1778{
1779 return NULL;
1780}
1781
11fdf7f2
TL
1782void *
1783nvme_rdma_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size)
1784{
1785 return NULL;
1786}
1787
1788int
1789nvme_rdma_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size)
1790{
1791 return 0;
7c673cae 1792}
9f95a23c
TL
1793
1794void
1795nvme_rdma_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair)
1796{
1797 struct spdk_nvme_rdma_req *rdma_req, *tmp;
1798 struct nvme_request *req;
1799 struct spdk_nvme_cpl cpl;
1800 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair);
1801
1802 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
1803 cpl.status.sct = SPDK_NVME_SCT_GENERIC;
1804
1805 TAILQ_FOREACH_SAFE(rdma_req, &rqpair->outstanding_reqs, link, tmp) {
1806 if (rdma_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
1807 continue;
1808 }
1809 assert(rdma_req->req != NULL);
1810 req = rdma_req->req;
1811
1812 nvme_rdma_req_complete(req, &cpl);
1813 nvme_rdma_req_put(rqpair, rdma_req);
1814 }
1815}
1816
1817void
1818spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks)
1819{
1820 g_nvme_hooks = *hooks;
1821}