]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - net/sunrpc/xprtrdma/verbs.c
xprtrdma: Add "destroy MRs" memreg op
[mirror_ubuntu-hirsute-kernel.git] / net / sunrpc / xprtrdma / verbs.c
CommitLineData
f58851e6 1/*
c56c65fb
TT
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
f58851e6
TT
38 */
39
c56c65fb
TT
40/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
a6b7a407 50#include <linux/interrupt.h>
5a0e3ad6 51#include <linux/slab.h>
eba8ff66 52#include <linux/prefetch.h>
0dd39cae 53#include <linux/sunrpc/addr.h>
65866f82 54#include <asm/bitops.h>
c56c65fb 55
f58851e6
TT
56#include "xprt_rdma.h"
57
c56c65fb
TT
58/*
59 * Globals/Macros
60 */
61
f895b252 62#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
c56c65fb
TT
63# define RPCDBG_FACILITY RPCDBG_TRANS
64#endif
65
66/*
67 * internal functions
68 */
69
70/*
71 * handle replies in tasklet context, using a single, global list
72 * rdma tasklet function -- just turn around and call the func
73 * for all replies on the list
74 */
75
76static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
77static LIST_HEAD(rpcrdma_tasklets_g);
78
79static void
80rpcrdma_run_tasklet(unsigned long data)
81{
82 struct rpcrdma_rep *rep;
83 void (*func)(struct rpcrdma_rep *);
84 unsigned long flags;
85
86 data = data;
87 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
88 while (!list_empty(&rpcrdma_tasklets_g)) {
89 rep = list_entry(rpcrdma_tasklets_g.next,
90 struct rpcrdma_rep, rr_list);
91 list_del(&rep->rr_list);
92 func = rep->rr_func;
93 rep->rr_func = NULL;
94 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
95
96 if (func)
97 func(rep);
98 else
99 rpcrdma_recv_buffer_put(rep);
100
101 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
102 }
103 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
104}
105
106static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
107
7ff11de1
CL
108static const char * const async_event[] = {
109 "CQ error",
110 "QP fatal error",
111 "QP request error",
112 "QP access error",
113 "communication established",
114 "send queue drained",
115 "path migration successful",
116 "path mig error",
117 "device fatal error",
118 "port active",
119 "port error",
120 "LID change",
121 "P_key change",
122 "SM change",
123 "SRQ error",
124 "SRQ limit reached",
125 "last WQE reached",
126 "client reregister",
127 "GID change",
128};
129
130#define ASYNC_MSG(status) \
131 ((status) < ARRAY_SIZE(async_event) ? \
132 async_event[(status)] : "unknown async error")
133
f1a03b76
CL
134static void
135rpcrdma_schedule_tasklet(struct list_head *sched_list)
136{
137 unsigned long flags;
138
139 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
140 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
141 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
142 tasklet_schedule(&rpcrdma_tasklet_g);
143}
144
c56c65fb
TT
145static void
146rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
147{
148 struct rpcrdma_ep *ep = context;
149
7ff11de1
CL
150 pr_err("RPC: %s: %s on device %s ep %p\n",
151 __func__, ASYNC_MSG(event->event),
152 event->device->name, context);
c56c65fb
TT
153 if (ep->rep_connected == 1) {
154 ep->rep_connected = -EIO;
afadc468 155 rpcrdma_conn_func(ep);
c56c65fb
TT
156 wake_up_all(&ep->rep_connect_wait);
157 }
158}
159
160static void
161rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
162{
163 struct rpcrdma_ep *ep = context;
164
7ff11de1
CL
165 pr_err("RPC: %s: %s on device %s ep %p\n",
166 __func__, ASYNC_MSG(event->event),
167 event->device->name, context);
c56c65fb
TT
168 if (ep->rep_connected == 1) {
169 ep->rep_connected = -EIO;
afadc468 170 rpcrdma_conn_func(ep);
c56c65fb
TT
171 wake_up_all(&ep->rep_connect_wait);
172 }
173}
174
8502427c
CL
175static const char * const wc_status[] = {
176 "success",
177 "local length error",
178 "local QP operation error",
179 "local EE context operation error",
180 "local protection error",
181 "WR flushed",
182 "memory management operation error",
183 "bad response error",
184 "local access error",
185 "remote invalid request error",
186 "remote access error",
187 "remote operation error",
188 "transport retry counter exceeded",
189 "RNR retrycounter exceeded",
190 "local RDD violation error",
191 "remove invalid RD request",
192 "operation aborted",
193 "invalid EE context number",
194 "invalid EE context state",
195 "fatal error",
196 "response timeout error",
197 "general error",
198};
199
200#define COMPLETION_MSG(status) \
201 ((status) < ARRAY_SIZE(wc_status) ? \
202 wc_status[(status)] : "unexpected completion error")
203
fc664485
CL
204static void
205rpcrdma_sendcq_process_wc(struct ib_wc *wc)
c56c65fb 206{
8502427c 207 if (likely(wc->status == IB_WC_SUCCESS))
c56c65fb 208 return;
8502427c
CL
209
210 /* WARNING: Only wr_id and status are reliable at this point */
211 if (wc->wr_id == 0ULL) {
212 if (wc->status != IB_WC_WR_FLUSH_ERR)
213 pr_err("RPC: %s: SEND: %s\n",
214 __func__, COMPLETION_MSG(wc->status));
215 } else {
216 struct rpcrdma_mw *r;
217
218 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
219 r->r.frmr.fr_state = FRMR_IS_STALE;
220 pr_err("RPC: %s: frmr %p (stale): %s\n",
221 __func__, r, COMPLETION_MSG(wc->status));
222 }
c56c65fb
TT
223}
224
fc664485 225static int
1c00dd07 226rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
c56c65fb 227{
1c00dd07 228 struct ib_wc *wcs;
8301a2c0 229 int budget, count, rc;
c56c65fb 230
8301a2c0 231 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
1c00dd07
CL
232 do {
233 wcs = ep->rep_send_wcs;
234
235 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
236 if (rc <= 0)
237 return rc;
238
239 count = rc;
240 while (count-- > 0)
241 rpcrdma_sendcq_process_wc(wcs++);
8301a2c0 242 } while (rc == RPCRDMA_POLLSIZE && --budget);
1c00dd07 243 return 0;
fc664485 244}
c56c65fb 245
fc664485
CL
246/*
247 * Handle send, fast_reg_mr, and local_inv completions.
248 *
249 * Send events are typically suppressed and thus do not result
250 * in an upcall. Occasionally one is signaled, however. This
251 * prevents the provider's completion queue from wrapping and
252 * losing a completion.
253 */
254static void
255rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
256{
1c00dd07 257 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
fc664485
CL
258 int rc;
259
1c00dd07 260 rc = rpcrdma_sendcq_poll(cq, ep);
fc664485
CL
261 if (rc) {
262 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
263 __func__, rc);
264 return;
c56c65fb
TT
265 }
266
7f23f6f6
CL
267 rc = ib_req_notify_cq(cq,
268 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
269 if (rc == 0)
270 return;
271 if (rc < 0) {
fc664485
CL
272 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
273 __func__, rc);
274 return;
275 }
276
1c00dd07 277 rpcrdma_sendcq_poll(cq, ep);
fc664485
CL
278}
279
280static void
bb96193d 281rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
fc664485
CL
282{
283 struct rpcrdma_rep *rep =
284 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
285
8502427c
CL
286 /* WARNING: Only wr_id and status are reliable at this point */
287 if (wc->status != IB_WC_SUCCESS)
288 goto out_fail;
fc664485 289
8502427c 290 /* status == SUCCESS means all fields in wc are trustworthy */
fc664485
CL
291 if (wc->opcode != IB_WC_RECV)
292 return;
293
8502427c
CL
294 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
295 __func__, rep, wc->byte_len);
296
fc664485
CL
297 rep->rr_len = wc->byte_len;
298 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
6b1184cd
CL
299 rdmab_addr(rep->rr_rdmabuf),
300 rep->rr_len, DMA_FROM_DEVICE);
301 prefetch(rdmab_to_msg(rep->rr_rdmabuf));
fc664485
CL
302
303out_schedule:
bb96193d 304 list_add_tail(&rep->rr_list, sched_list);
8502427c
CL
305 return;
306out_fail:
307 if (wc->status != IB_WC_WR_FLUSH_ERR)
308 pr_err("RPC: %s: rep %p: %s\n",
309 __func__, rep, COMPLETION_MSG(wc->status));
310 rep->rr_len = ~0U;
311 goto out_schedule;
fc664485
CL
312}
313
314static int
1c00dd07 315rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
fc664485 316{
bb96193d 317 struct list_head sched_list;
1c00dd07 318 struct ib_wc *wcs;
8301a2c0 319 int budget, count, rc;
fc664485 320
bb96193d 321 INIT_LIST_HEAD(&sched_list);
8301a2c0 322 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
1c00dd07
CL
323 do {
324 wcs = ep->rep_recv_wcs;
325
326 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
327 if (rc <= 0)
bb96193d 328 goto out_schedule;
1c00dd07
CL
329
330 count = rc;
331 while (count-- > 0)
bb96193d 332 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
8301a2c0 333 } while (rc == RPCRDMA_POLLSIZE && --budget);
bb96193d
CL
334 rc = 0;
335
336out_schedule:
f1a03b76 337 rpcrdma_schedule_tasklet(&sched_list);
bb96193d 338 return rc;
c56c65fb
TT
339}
340
341/*
fc664485 342 * Handle receive completions.
c56c65fb 343 *
c56c65fb
TT
344 * It is reentrant but processes single events in order to maintain
345 * ordering of receives to keep server credits.
346 *
347 * It is the responsibility of the scheduled tasklet to return
348 * recv buffers to the pool. NOTE: this affects synchronization of
349 * connection shutdown. That is, the structures required for
350 * the completion of the reply handler must remain intact until
351 * all memory has been reclaimed.
c56c65fb
TT
352 */
353static void
fc664485 354rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
c56c65fb 355{
1c00dd07 356 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
c56c65fb
TT
357 int rc;
358
1c00dd07 359 rc = rpcrdma_recvcq_poll(cq, ep);
fc664485
CL
360 if (rc) {
361 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
362 __func__, rc);
c56c65fb 363 return;
fc664485 364 }
c56c65fb 365
7f23f6f6
CL
366 rc = ib_req_notify_cq(cq,
367 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
368 if (rc == 0)
369 return;
370 if (rc < 0) {
fc664485 371 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
c56c65fb
TT
372 __func__, rc);
373 return;
374 }
375
1c00dd07 376 rpcrdma_recvcq_poll(cq, ep);
c56c65fb
TT
377}
378
a7bc211a
CL
379static void
380rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
381{
5c166bef
CL
382 struct ib_wc wc;
383 LIST_HEAD(sched_list);
384
385 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
386 rpcrdma_recvcq_process_wc(&wc, &sched_list);
387 if (!list_empty(&sched_list))
388 rpcrdma_schedule_tasklet(&sched_list);
389 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
390 rpcrdma_sendcq_process_wc(&wc);
a7bc211a
CL
391}
392
f895b252 393#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
c56c65fb
TT
394static const char * const conn[] = {
395 "address resolved",
396 "address error",
397 "route resolved",
398 "route error",
399 "connect request",
400 "connect response",
401 "connect error",
402 "unreachable",
403 "rejected",
404 "established",
405 "disconnected",
8079fb78
CL
406 "device removal",
407 "multicast join",
408 "multicast error",
409 "address change",
410 "timewait exit",
c56c65fb 411};
8079fb78
CL
412
413#define CONNECTION_MSG(status) \
414 ((status) < ARRAY_SIZE(conn) ? \
415 conn[(status)] : "unrecognized connection error")
c56c65fb
TT
416#endif
417
418static int
419rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
420{
421 struct rpcrdma_xprt *xprt = id->context;
422 struct rpcrdma_ia *ia = &xprt->rx_ia;
423 struct rpcrdma_ep *ep = &xprt->rx_ep;
f895b252 424#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
0dd39cae 425 struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
ff0db049 426#endif
ce1ab9ab
CL
427 struct ib_qp_attr *attr = &ia->ri_qp_attr;
428 struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
c56c65fb
TT
429 int connstate = 0;
430
431 switch (event->event) {
432 case RDMA_CM_EVENT_ADDR_RESOLVED:
433 case RDMA_CM_EVENT_ROUTE_RESOLVED:
5675add3 434 ia->ri_async_rc = 0;
c56c65fb
TT
435 complete(&ia->ri_done);
436 break;
437 case RDMA_CM_EVENT_ADDR_ERROR:
438 ia->ri_async_rc = -EHOSTUNREACH;
439 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
440 __func__, ep);
441 complete(&ia->ri_done);
442 break;
443 case RDMA_CM_EVENT_ROUTE_ERROR:
444 ia->ri_async_rc = -ENETUNREACH;
445 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
446 __func__, ep);
447 complete(&ia->ri_done);
448 break;
449 case RDMA_CM_EVENT_ESTABLISHED:
450 connstate = 1;
ce1ab9ab
CL
451 ib_query_qp(ia->ri_id->qp, attr,
452 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
453 iattr);
c56c65fb
TT
454 dprintk("RPC: %s: %d responder resources"
455 " (%d initiator)\n",
ce1ab9ab
CL
456 __func__, attr->max_dest_rd_atomic,
457 attr->max_rd_atomic);
c56c65fb
TT
458 goto connected;
459 case RDMA_CM_EVENT_CONNECT_ERROR:
460 connstate = -ENOTCONN;
461 goto connected;
462 case RDMA_CM_EVENT_UNREACHABLE:
463 connstate = -ENETDOWN;
464 goto connected;
465 case RDMA_CM_EVENT_REJECTED:
466 connstate = -ECONNREFUSED;
467 goto connected;
468 case RDMA_CM_EVENT_DISCONNECTED:
469 connstate = -ECONNABORTED;
470 goto connected;
471 case RDMA_CM_EVENT_DEVICE_REMOVAL:
472 connstate = -ENODEV;
473connected:
c56c65fb
TT
474 dprintk("RPC: %s: %sconnected\n",
475 __func__, connstate > 0 ? "" : "dis");
476 ep->rep_connected = connstate;
afadc468 477 rpcrdma_conn_func(ep);
c56c65fb 478 wake_up_all(&ep->rep_connect_wait);
8079fb78 479 /*FALLTHROUGH*/
c56c65fb 480 default:
0dd39cae
CL
481 dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
482 __func__, sap, rpc_get_port(sap), ep,
8079fb78 483 CONNECTION_MSG(event->event));
c56c65fb
TT
484 break;
485 }
486
f895b252 487#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
b3cd8d45 488 if (connstate == 1) {
ce1ab9ab 489 int ird = attr->max_dest_rd_atomic;
b3cd8d45 490 int tird = ep->rep_remote_cma.responder_resources;
0dd39cae 491
a0ce85f5 492 pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
0dd39cae 493 sap, rpc_get_port(sap),
b3cd8d45 494 ia->ri_id->device->name,
a0ce85f5 495 ia->ri_ops->ro_displayname,
b3cd8d45
TT
496 xprt->rx_buf.rb_max_requests,
497 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
498 } else if (connstate < 0) {
0dd39cae
CL
499 pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
500 sap, rpc_get_port(sap), connstate);
b3cd8d45
TT
501 }
502#endif
503
c56c65fb
TT
504 return 0;
505}
506
507static struct rdma_cm_id *
508rpcrdma_create_id(struct rpcrdma_xprt *xprt,
509 struct rpcrdma_ia *ia, struct sockaddr *addr)
510{
511 struct rdma_cm_id *id;
512 int rc;
513
1a954051
TT
514 init_completion(&ia->ri_done);
515
b26f9b99 516 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
c56c65fb
TT
517 if (IS_ERR(id)) {
518 rc = PTR_ERR(id);
519 dprintk("RPC: %s: rdma_create_id() failed %i\n",
520 __func__, rc);
521 return id;
522 }
523
5675add3 524 ia->ri_async_rc = -ETIMEDOUT;
c56c65fb
TT
525 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
526 if (rc) {
527 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
528 __func__, rc);
529 goto out;
530 }
5675add3
TT
531 wait_for_completion_interruptible_timeout(&ia->ri_done,
532 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
c56c65fb
TT
533 rc = ia->ri_async_rc;
534 if (rc)
535 goto out;
536
5675add3 537 ia->ri_async_rc = -ETIMEDOUT;
c56c65fb
TT
538 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
539 if (rc) {
540 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
541 __func__, rc);
542 goto out;
543 }
5675add3
TT
544 wait_for_completion_interruptible_timeout(&ia->ri_done,
545 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
c56c65fb
TT
546 rc = ia->ri_async_rc;
547 if (rc)
548 goto out;
549
550 return id;
551
552out:
553 rdma_destroy_id(id);
554 return ERR_PTR(rc);
555}
556
557/*
558 * Drain any cq, prior to teardown.
559 */
560static void
561rpcrdma_clean_cq(struct ib_cq *cq)
562{
563 struct ib_wc wc;
564 int count = 0;
565
566 while (1 == ib_poll_cq(cq, 1, &wc))
567 ++count;
568
569 if (count)
570 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
571 __func__, count, wc.opcode);
572}
573
574/*
575 * Exported functions.
576 */
577
578/*
579 * Open and initialize an Interface Adapter.
580 * o initializes fields of struct rpcrdma_ia, including
581 * interface and provider attributes and protection zone.
582 */
583int
584rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
585{
bd7ed1d1 586 int rc, mem_priv;
c56c65fb 587 struct rpcrdma_ia *ia = &xprt->rx_ia;
7bc7972c 588 struct ib_device_attr *devattr = &ia->ri_devattr;
c56c65fb 589
c56c65fb
TT
590 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
591 if (IS_ERR(ia->ri_id)) {
592 rc = PTR_ERR(ia->ri_id);
593 goto out1;
594 }
595
596 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
597 if (IS_ERR(ia->ri_pd)) {
598 rc = PTR_ERR(ia->ri_pd);
599 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
600 __func__, rc);
601 goto out2;
602 }
603
7bc7972c 604 rc = ib_query_device(ia->ri_id->device, devattr);
bd7ed1d1
TT
605 if (rc) {
606 dprintk("RPC: %s: ib_query_device failed %d\n",
607 __func__, rc);
5ae711a2 608 goto out3;
bd7ed1d1
TT
609 }
610
7bc7972c 611 if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
bd7ed1d1
TT
612 ia->ri_have_dma_lkey = 1;
613 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
614 }
615
f10eafd3 616 if (memreg == RPCRDMA_FRMR) {
3197d309 617 /* Requires both frmr reg and local dma lkey */
41f97028 618 if (((devattr->device_cap_flags &
3197d309 619 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
41f97028
CL
620 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) ||
621 (devattr->max_fast_reg_page_list_len == 0)) {
3197d309 622 dprintk("RPC: %s: FRMR registration "
f10eafd3
CL
623 "not supported by HCA\n", __func__);
624 memreg = RPCRDMA_MTHCAFMR;
0fc6c4e7
SW
625 } else {
626 /* Mind the ia limit on FRMR page list depth */
627 ia->ri_max_frmr_depth = min_t(unsigned int,
628 RPCRDMA_MAX_DATA_SEGS,
7bc7972c 629 devattr->max_fast_reg_page_list_len);
bd7ed1d1 630 }
f10eafd3
CL
631 }
632 if (memreg == RPCRDMA_MTHCAFMR) {
633 if (!ia->ri_id->device->alloc_fmr) {
634 dprintk("RPC: %s: MTHCAFMR registration "
635 "not supported by HCA\n", __func__);
f10eafd3 636 memreg = RPCRDMA_ALLPHYSICAL;
f10eafd3 637 }
bd7ed1d1
TT
638 }
639
c56c65fb
TT
640 /*
641 * Optionally obtain an underlying physical identity mapping in
642 * order to do a memory window-based bind. This base registration
643 * is protected from remote access - that is enabled only by binding
644 * for the specific bytes targeted during each RPC operation, and
645 * revoked after the corresponding completion similar to a storage
646 * adapter.
647 */
bd7ed1d1 648 switch (memreg) {
3197d309 649 case RPCRDMA_FRMR:
a0ce85f5 650 ia->ri_ops = &rpcrdma_frwr_memreg_ops;
bd7ed1d1 651 break;
bd7ed1d1 652 case RPCRDMA_ALLPHYSICAL:
a0ce85f5 653 ia->ri_ops = &rpcrdma_physical_memreg_ops;
bd7ed1d1
TT
654 mem_priv = IB_ACCESS_LOCAL_WRITE |
655 IB_ACCESS_REMOTE_WRITE |
656 IB_ACCESS_REMOTE_READ;
657 goto register_setup;
bd7ed1d1 658 case RPCRDMA_MTHCAFMR:
a0ce85f5 659 ia->ri_ops = &rpcrdma_fmr_memreg_ops;
bd7ed1d1 660 if (ia->ri_have_dma_lkey)
c56c65fb 661 break;
bd7ed1d1
TT
662 mem_priv = IB_ACCESS_LOCAL_WRITE;
663 register_setup:
c56c65fb
TT
664 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
665 if (IS_ERR(ia->ri_bind_mem)) {
666 printk(KERN_ALERT "%s: ib_get_dma_mr for "
0ac531c1 667 "phys register failed with %lX\n",
c56c65fb 668 __func__, PTR_ERR(ia->ri_bind_mem));
0ac531c1 669 rc = -ENOMEM;
5ae711a2 670 goto out3;
c56c65fb 671 }
bd7ed1d1
TT
672 break;
673 default:
cdd9ade7
CL
674 printk(KERN_ERR "RPC: Unsupported memory "
675 "registration mode: %d\n", memreg);
676 rc = -ENOMEM;
5ae711a2 677 goto out3;
c56c65fb 678 }
a0ce85f5
CL
679 dprintk("RPC: %s: memory registration strategy is '%s'\n",
680 __func__, ia->ri_ops->ro_displayname);
c56c65fb
TT
681
682 /* Else will do memory reg/dereg for each chunk */
683 ia->ri_memreg_strategy = memreg;
684
73806c88 685 rwlock_init(&ia->ri_qplock);
c56c65fb 686 return 0;
5ae711a2
CL
687
688out3:
689 ib_dealloc_pd(ia->ri_pd);
690 ia->ri_pd = NULL;
c56c65fb
TT
691out2:
692 rdma_destroy_id(ia->ri_id);
fee08caf 693 ia->ri_id = NULL;
c56c65fb
TT
694out1:
695 return rc;
696}
697
698/*
699 * Clean up/close an IA.
700 * o if event handles and PD have been initialized, free them.
701 * o close the IA
702 */
703void
704rpcrdma_ia_close(struct rpcrdma_ia *ia)
705{
706 int rc;
707
708 dprintk("RPC: %s: entering\n", __func__);
709 if (ia->ri_bind_mem != NULL) {
710 rc = ib_dereg_mr(ia->ri_bind_mem);
711 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
712 __func__, rc);
713 }
fee08caf
TT
714 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
715 if (ia->ri_id->qp)
716 rdma_destroy_qp(ia->ri_id);
717 rdma_destroy_id(ia->ri_id);
718 ia->ri_id = NULL;
719 }
c56c65fb
TT
720 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
721 rc = ib_dealloc_pd(ia->ri_pd);
722 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
723 __func__, rc);
724 }
c56c65fb
TT
725}
726
727/*
728 * Create unconnected endpoint.
729 */
730int
731rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
732 struct rpcrdma_create_data_internal *cdata)
733{
7bc7972c 734 struct ib_device_attr *devattr = &ia->ri_devattr;
fc664485 735 struct ib_cq *sendcq, *recvcq;
5d40a8a5 736 int rc, err;
c56c65fb 737
c56c65fb 738 /* check provider's send/recv wr limits */
7bc7972c
CL
739 if (cdata->max_requests > devattr->max_qp_wr)
740 cdata->max_requests = devattr->max_qp_wr;
c56c65fb
TT
741
742 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
743 ep->rep_attr.qp_context = ep;
744 /* send_cq and recv_cq initialized below */
745 ep->rep_attr.srq = NULL;
746 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
747 switch (ia->ri_memreg_strategy) {
0fc6c4e7
SW
748 case RPCRDMA_FRMR: {
749 int depth = 7;
750
15cdc644
TT
751 /* Add room for frmr register and invalidate WRs.
752 * 1. FRMR reg WR for head
753 * 2. FRMR invalidate WR for head
0fc6c4e7
SW
754 * 3. N FRMR reg WRs for pagelist
755 * 4. N FRMR invalidate WRs for pagelist
15cdc644
TT
756 * 5. FRMR reg WR for tail
757 * 6. FRMR invalidate WR for tail
758 * 7. The RDMA_SEND WR
759 */
0fc6c4e7
SW
760
761 /* Calculate N if the device max FRMR depth is smaller than
762 * RPCRDMA_MAX_DATA_SEGS.
763 */
764 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
765 int delta = RPCRDMA_MAX_DATA_SEGS -
766 ia->ri_max_frmr_depth;
767
768 do {
769 depth += 2; /* FRMR reg + invalidate */
770 delta -= ia->ri_max_frmr_depth;
771 } while (delta > 0);
772
773 }
774 ep->rep_attr.cap.max_send_wr *= depth;
7bc7972c
CL
775 if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
776 cdata->max_requests = devattr->max_qp_wr / depth;
15cdc644
TT
777 if (!cdata->max_requests)
778 return -EINVAL;
0fc6c4e7
SW
779 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
780 depth;
15cdc644 781 }
3197d309 782 break;
0fc6c4e7 783 }
c56c65fb
TT
784 default:
785 break;
786 }
787 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
788 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
789 ep->rep_attr.cap.max_recv_sge = 1;
790 ep->rep_attr.cap.max_inline_data = 0;
791 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
792 ep->rep_attr.qp_type = IB_QPT_RC;
793 ep->rep_attr.port_num = ~0;
794
c05fbb5a
CL
795 if (cdata->padding) {
796 ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding,
797 GFP_KERNEL);
798 if (IS_ERR(ep->rep_padbuf))
799 return PTR_ERR(ep->rep_padbuf);
800 } else
801 ep->rep_padbuf = NULL;
802
c56c65fb
TT
803 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
804 "iovs: send %d recv %d\n",
805 __func__,
806 ep->rep_attr.cap.max_send_wr,
807 ep->rep_attr.cap.max_recv_wr,
808 ep->rep_attr.cap.max_send_sge,
809 ep->rep_attr.cap.max_recv_sge);
810
811 /* set trigger for requesting send completion */
fc664485 812 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
e7104a2a
CL
813 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
814 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
815 else if (ep->rep_cqinit <= 2)
c56c65fb
TT
816 ep->rep_cqinit = 0;
817 INIT_CQCOUNT(ep);
c56c65fb 818 init_waitqueue_head(&ep->rep_connect_wait);
254f91e2 819 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
c56c65fb 820
fc664485 821 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
1c00dd07 822 rpcrdma_cq_async_error_upcall, ep,
c56c65fb 823 ep->rep_attr.cap.max_send_wr + 1, 0);
fc664485
CL
824 if (IS_ERR(sendcq)) {
825 rc = PTR_ERR(sendcq);
826 dprintk("RPC: %s: failed to create send CQ: %i\n",
c56c65fb
TT
827 __func__, rc);
828 goto out1;
829 }
830
fc664485 831 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
c56c65fb
TT
832 if (rc) {
833 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
834 __func__, rc);
835 goto out2;
836 }
837
fc664485 838 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
1c00dd07 839 rpcrdma_cq_async_error_upcall, ep,
fc664485
CL
840 ep->rep_attr.cap.max_recv_wr + 1, 0);
841 if (IS_ERR(recvcq)) {
842 rc = PTR_ERR(recvcq);
843 dprintk("RPC: %s: failed to create recv CQ: %i\n",
844 __func__, rc);
845 goto out2;
846 }
847
848 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
849 if (rc) {
850 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
851 __func__, rc);
852 ib_destroy_cq(recvcq);
853 goto out2;
854 }
855
856 ep->rep_attr.send_cq = sendcq;
857 ep->rep_attr.recv_cq = recvcq;
c56c65fb
TT
858
859 /* Initialize cma parameters */
860
861 /* RPC/RDMA does not use private data */
862 ep->rep_remote_cma.private_data = NULL;
863 ep->rep_remote_cma.private_data_len = 0;
864
865 /* Client offers RDMA Read but does not initiate */
b334eaab 866 ep->rep_remote_cma.initiator_depth = 0;
7bc7972c 867 if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
b334eaab
TT
868 ep->rep_remote_cma.responder_resources = 32;
869 else
7bc7972c
CL
870 ep->rep_remote_cma.responder_resources =
871 devattr->max_qp_rd_atom;
c56c65fb
TT
872
873 ep->rep_remote_cma.retry_count = 7;
874 ep->rep_remote_cma.flow_control = 0;
875 ep->rep_remote_cma.rnr_retry_count = 0;
876
877 return 0;
878
879out2:
fc664485 880 err = ib_destroy_cq(sendcq);
5d40a8a5
CL
881 if (err)
882 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
883 __func__, err);
c56c65fb 884out1:
c05fbb5a 885 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
c56c65fb
TT
886 return rc;
887}
888
889/*
890 * rpcrdma_ep_destroy
891 *
892 * Disconnect and destroy endpoint. After this, the only
893 * valid operations on the ep are to free it (if dynamically
894 * allocated) or re-create it.
c56c65fb 895 */
7f1d5419 896void
c56c65fb
TT
897rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
898{
899 int rc;
900
901 dprintk("RPC: %s: entering, connected is %d\n",
902 __func__, ep->rep_connected);
903
254f91e2
CL
904 cancel_delayed_work_sync(&ep->rep_connect_worker);
905
c56c65fb 906 if (ia->ri_id->qp) {
282191cb 907 rpcrdma_ep_disconnect(ep, ia);
fee08caf
TT
908 rdma_destroy_qp(ia->ri_id);
909 ia->ri_id->qp = NULL;
c56c65fb
TT
910 }
911
c05fbb5a 912 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
c56c65fb 913
fc664485
CL
914 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
915 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
916 if (rc)
917 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
918 __func__, rc);
919
920 rpcrdma_clean_cq(ep->rep_attr.send_cq);
921 rc = ib_destroy_cq(ep->rep_attr.send_cq);
c56c65fb
TT
922 if (rc)
923 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
924 __func__, rc);
c56c65fb
TT
925}
926
927/*
928 * Connect unconnected endpoint.
929 */
930int
931rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
932{
73806c88 933 struct rdma_cm_id *id, *old;
c56c65fb
TT
934 int rc = 0;
935 int retry_count = 0;
c56c65fb 936
c055551e 937 if (ep->rep_connected != 0) {
c56c65fb
TT
938 struct rpcrdma_xprt *xprt;
939retry:
ec62f40d 940 dprintk("RPC: %s: reconnecting...\n", __func__);
282191cb
CL
941
942 rpcrdma_ep_disconnect(ep, ia);
a7bc211a 943 rpcrdma_flush_cqs(ep);
c56c65fb
TT
944
945 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
31a701a9
CL
946 ia->ri_ops->ro_reset(xprt);
947
c56c65fb
TT
948 id = rpcrdma_create_id(xprt, ia,
949 (struct sockaddr *)&xprt->rx_data.addr);
950 if (IS_ERR(id)) {
ec62f40d 951 rc = -EHOSTUNREACH;
c56c65fb
TT
952 goto out;
953 }
954 /* TEMP TEMP TEMP - fail if new device:
955 * Deregister/remarshal *all* requests!
956 * Close and recreate adapter, pd, etc!
957 * Re-determine all attributes still sane!
958 * More stuff I haven't thought of!
959 * Rrrgh!
960 */
961 if (ia->ri_id->device != id->device) {
962 printk("RPC: %s: can't reconnect on "
963 "different device!\n", __func__);
964 rdma_destroy_id(id);
ec62f40d 965 rc = -ENETUNREACH;
c56c65fb
TT
966 goto out;
967 }
968 /* END TEMP */
ec62f40d
CL
969 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
970 if (rc) {
971 dprintk("RPC: %s: rdma_create_qp failed %i\n",
972 __func__, rc);
973 rdma_destroy_id(id);
974 rc = -ENETUNREACH;
975 goto out;
976 }
73806c88
CL
977
978 write_lock(&ia->ri_qplock);
979 old = ia->ri_id;
c56c65fb 980 ia->ri_id = id;
73806c88
CL
981 write_unlock(&ia->ri_qplock);
982
983 rdma_destroy_qp(old);
984 rdma_destroy_id(old);
ec62f40d
CL
985 } else {
986 dprintk("RPC: %s: connecting...\n", __func__);
987 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
988 if (rc) {
989 dprintk("RPC: %s: rdma_create_qp failed %i\n",
990 __func__, rc);
991 /* do not update ep->rep_connected */
992 return -ENETUNREACH;
993 }
c56c65fb
TT
994 }
995
c56c65fb
TT
996 ep->rep_connected = 0;
997
998 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
999 if (rc) {
1000 dprintk("RPC: %s: rdma_connect() failed with %i\n",
1001 __func__, rc);
1002 goto out;
1003 }
1004
c56c65fb
TT
1005 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
1006
1007 /*
1008 * Check state. A non-peer reject indicates no listener
1009 * (ECONNREFUSED), which may be a transient state. All
1010 * others indicate a transport condition which has already
1011 * undergone a best-effort.
1012 */
f64f9e71
JP
1013 if (ep->rep_connected == -ECONNREFUSED &&
1014 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
c56c65fb
TT
1015 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
1016 goto retry;
1017 }
1018 if (ep->rep_connected <= 0) {
1019 /* Sometimes, the only way to reliably connect to remote
1020 * CMs is to use same nonzero values for ORD and IRD. */
b334eaab
TT
1021 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
1022 (ep->rep_remote_cma.responder_resources == 0 ||
1023 ep->rep_remote_cma.initiator_depth !=
1024 ep->rep_remote_cma.responder_resources)) {
1025 if (ep->rep_remote_cma.responder_resources == 0)
1026 ep->rep_remote_cma.responder_resources = 1;
1027 ep->rep_remote_cma.initiator_depth =
1028 ep->rep_remote_cma.responder_resources;
c56c65fb 1029 goto retry;
b334eaab 1030 }
c56c65fb
TT
1031 rc = ep->rep_connected;
1032 } else {
1033 dprintk("RPC: %s: connected\n", __func__);
1034 }
1035
1036out:
1037 if (rc)
1038 ep->rep_connected = rc;
1039 return rc;
1040}
1041
1042/*
1043 * rpcrdma_ep_disconnect
1044 *
1045 * This is separate from destroy to facilitate the ability
1046 * to reconnect without recreating the endpoint.
1047 *
1048 * This call is not reentrant, and must not be made in parallel
1049 * on the same endpoint.
1050 */
282191cb 1051void
c56c65fb
TT
1052rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1053{
1054 int rc;
1055
a7bc211a 1056 rpcrdma_flush_cqs(ep);
c56c65fb
TT
1057 rc = rdma_disconnect(ia->ri_id);
1058 if (!rc) {
1059 /* returns without wait if not connected */
1060 wait_event_interruptible(ep->rep_connect_wait,
1061 ep->rep_connected != 1);
1062 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1063 (ep->rep_connected == 1) ? "still " : "dis");
1064 } else {
1065 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1066 ep->rep_connected = rc;
1067 }
c56c65fb
TT
1068}
1069
1392402c
CL
1070static struct rpcrdma_req *
1071rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1072{
1392402c 1073 struct rpcrdma_req *req;
1392402c 1074
85275c87 1075 req = kzalloc(sizeof(*req), GFP_KERNEL);
1392402c 1076 if (req == NULL)
85275c87 1077 return ERR_PTR(-ENOMEM);
1392402c 1078
1392402c
CL
1079 req->rl_buffer = &r_xprt->rx_buf;
1080 return req;
1392402c
CL
1081}
1082
1083static struct rpcrdma_rep *
1084rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1085{
1086 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
1392402c
CL
1087 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1088 struct rpcrdma_rep *rep;
1089 int rc;
1090
1091 rc = -ENOMEM;
6b1184cd 1092 rep = kzalloc(sizeof(*rep), GFP_KERNEL);
1392402c
CL
1093 if (rep == NULL)
1094 goto out;
1392402c 1095
6b1184cd
CL
1096 rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize,
1097 GFP_KERNEL);
1098 if (IS_ERR(rep->rr_rdmabuf)) {
1099 rc = PTR_ERR(rep->rr_rdmabuf);
1392402c 1100 goto out_free;
6b1184cd 1101 }
1392402c
CL
1102
1103 rep->rr_buffer = &r_xprt->rx_buf;
1104 return rep;
1105
1106out_free:
1107 kfree(rep);
1108out:
1109 return ERR_PTR(rc);
1110}
1111
c56c65fb 1112int
ac920d04 1113rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
c56c65fb 1114{
ac920d04
CL
1115 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1116 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1117 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
c56c65fb 1118 char *p;
1392402c 1119 size_t len;
c56c65fb
TT
1120 int i, rc;
1121
1122 buf->rb_max_requests = cdata->max_requests;
1123 spin_lock_init(&buf->rb_lock);
c56c65fb
TT
1124
1125 /* Need to allocate:
1126 * 1. arrays for send and recv pointers
1127 * 2. arrays of struct rpcrdma_req to fill in pointers
1128 * 3. array of struct rpcrdma_rep for replies
c56c65fb
TT
1129 * Send/recv buffers in req/rep need to be registered
1130 */
c56c65fb
TT
1131 len = buf->rb_max_requests *
1132 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
c56c65fb 1133
c56c65fb
TT
1134 p = kzalloc(len, GFP_KERNEL);
1135 if (p == NULL) {
1136 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1137 __func__, len);
1138 rc = -ENOMEM;
1139 goto out;
1140 }
1141 buf->rb_pool = p; /* for freeing it later */
1142
1143 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1144 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1145 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1146 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1147
91e70e70
CL
1148 rc = ia->ri_ops->ro_init(r_xprt);
1149 if (rc)
1150 goto out;
c56c65fb 1151
c56c65fb
TT
1152 for (i = 0; i < buf->rb_max_requests; i++) {
1153 struct rpcrdma_req *req;
1154 struct rpcrdma_rep *rep;
1155
1392402c
CL
1156 req = rpcrdma_create_req(r_xprt);
1157 if (IS_ERR(req)) {
c56c65fb
TT
1158 dprintk("RPC: %s: request buffer %d alloc"
1159 " failed\n", __func__, i);
1392402c 1160 rc = PTR_ERR(req);
c56c65fb
TT
1161 goto out;
1162 }
c56c65fb 1163 buf->rb_send_bufs[i] = req;
c56c65fb 1164
1392402c
CL
1165 rep = rpcrdma_create_rep(r_xprt);
1166 if (IS_ERR(rep)) {
c56c65fb
TT
1167 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1168 __func__, i);
1392402c 1169 rc = PTR_ERR(rep);
c56c65fb
TT
1170 goto out;
1171 }
c56c65fb 1172 buf->rb_recv_bufs[i] = rep;
c56c65fb 1173 }
1392402c 1174
c56c65fb
TT
1175 return 0;
1176out:
1177 rpcrdma_buffer_destroy(buf);
1178 return rc;
1179}
1180
1392402c
CL
1181static void
1182rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
1183{
1184 if (!rep)
1185 return;
1186
6b1184cd 1187 rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
1392402c
CL
1188 kfree(rep);
1189}
1190
1191static void
1192rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
1193{
1194 if (!req)
1195 return;
1196
0ca77dc3 1197 rpcrdma_free_regbuf(ia, req->rl_sendbuf);
85275c87 1198 rpcrdma_free_regbuf(ia, req->rl_rdmabuf);
1392402c
CL
1199 kfree(req);
1200}
1201
c56c65fb
TT
1202void
1203rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1204{
c56c65fb 1205 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
2e84522c 1206 int i;
c56c65fb
TT
1207
1208 /* clean up in reverse order from create
1209 * 1. recv mr memory (mr free, then kfree)
c56c65fb 1210 * 2. send mr memory (mr free, then kfree)
2e84522c 1211 * 3. MWs
c56c65fb
TT
1212 */
1213 dprintk("RPC: %s: entering\n", __func__);
1214
1215 for (i = 0; i < buf->rb_max_requests; i++) {
1392402c
CL
1216 if (buf->rb_recv_bufs)
1217 rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]);
1218 if (buf->rb_send_bufs)
1219 rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
c56c65fb
TT
1220 }
1221
4561f347 1222 ia->ri_ops->ro_destroy(buf);
4034ba04 1223
c56c65fb
TT
1224 kfree(buf->rb_pool);
1225}
1226
c2922c02
CL
1227/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1228 * some req segments uninitialized.
1229 */
1230static void
1231rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1232{
1233 if (*mw) {
1234 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1235 *mw = NULL;
1236 }
1237}
1238
1239/* Cycle mw's back in reverse order, and "spin" them.
1240 * This delays and scrambles reuse as much as possible.
1241 */
1242static void
1243rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1244{
1245 struct rpcrdma_mr_seg *seg = req->rl_segments;
1246 struct rpcrdma_mr_seg *seg1 = seg;
1247 int i;
1248
1249 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
3eb35810
CL
1250 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1251 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
c2922c02
CL
1252}
1253
1254static void
1255rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1256{
1257 buf->rb_send_bufs[--buf->rb_send_index] = req;
1258 req->rl_niovs = 0;
1259 if (req->rl_reply) {
1260 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1261 req->rl_reply->rr_func = NULL;
1262 req->rl_reply = NULL;
1263 }
1264}
1265
6814baea 1266/* rpcrdma_unmap_one() was already done during deregistration.
ddb6bebc
CL
1267 * Redo only the ib_post_send().
1268 */
1269static void
1270rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1271{
1272 struct rpcrdma_xprt *r_xprt =
1273 container_of(ia, struct rpcrdma_xprt, rx_ia);
1274 struct ib_send_wr invalidate_wr, *bad_wr;
1275 int rc;
1276
1277 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1278
1279 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
dab7e3b8 1280 r->r.frmr.fr_state = FRMR_IS_INVALID;
ddb6bebc
CL
1281
1282 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1283 invalidate_wr.wr_id = (unsigned long)(void *)r;
1284 invalidate_wr.opcode = IB_WR_LOCAL_INV;
ddb6bebc
CL
1285 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1286 DECR_CQCOUNT(&r_xprt->rx_ep);
1287
1288 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1289 __func__, r, r->r.frmr.fr_mr->rkey);
1290
1291 read_lock(&ia->ri_qplock);
1292 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1293 read_unlock(&ia->ri_qplock);
1294 if (rc) {
1295 /* Force rpcrdma_buffer_get() to retry */
1296 r->r.frmr.fr_state = FRMR_IS_STALE;
1297 dprintk("RPC: %s: ib_post_send failed, %i\n",
1298 __func__, rc);
1299 }
1300}
1301
1302static void
1303rpcrdma_retry_flushed_linv(struct list_head *stale,
1304 struct rpcrdma_buffer *buf)
1305{
1306 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1307 struct list_head *pos;
1308 struct rpcrdma_mw *r;
1309 unsigned long flags;
1310
1311 list_for_each(pos, stale) {
1312 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1313 rpcrdma_retry_local_inv(r, ia);
1314 }
1315
1316 spin_lock_irqsave(&buf->rb_lock, flags);
1317 list_splice_tail(stale, &buf->rb_mws);
1318 spin_unlock_irqrestore(&buf->rb_lock, flags);
1319}
1320
1321static struct rpcrdma_req *
1322rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1323 struct list_head *stale)
1324{
1325 struct rpcrdma_mw *r;
1326 int i;
1327
1328 i = RPCRDMA_MAX_SEGS - 1;
1329 while (!list_empty(&buf->rb_mws)) {
1330 r = list_entry(buf->rb_mws.next,
1331 struct rpcrdma_mw, mw_list);
1332 list_del(&r->mw_list);
1333 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1334 list_add(&r->mw_list, stale);
1335 continue;
1336 }
3eb35810 1337 req->rl_segments[i].rl_mw = r;
ddb6bebc
CL
1338 if (unlikely(i-- == 0))
1339 return req; /* Success */
1340 }
1341
1342 /* Not enough entries on rb_mws for this req */
1343 rpcrdma_buffer_put_sendbuf(req, buf);
1344 rpcrdma_buffer_put_mrs(req, buf);
1345 return NULL;
1346}
1347
c2922c02 1348static struct rpcrdma_req *
ddb6bebc 1349rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
c2922c02
CL
1350{
1351 struct rpcrdma_mw *r;
1352 int i;
1353
1354 i = RPCRDMA_MAX_SEGS - 1;
1355 while (!list_empty(&buf->rb_mws)) {
1356 r = list_entry(buf->rb_mws.next,
1357 struct rpcrdma_mw, mw_list);
1358 list_del(&r->mw_list);
3eb35810 1359 req->rl_segments[i].rl_mw = r;
c2922c02
CL
1360 if (unlikely(i-- == 0))
1361 return req; /* Success */
1362 }
1363
1364 /* Not enough entries on rb_mws for this req */
1365 rpcrdma_buffer_put_sendbuf(req, buf);
1366 rpcrdma_buffer_put_mrs(req, buf);
1367 return NULL;
1368}
1369
c56c65fb
TT
1370/*
1371 * Get a set of request/reply buffers.
1372 *
1373 * Reply buffer (if needed) is attached to send buffer upon return.
1374 * Rule:
1375 * rb_send_index and rb_recv_index MUST always be pointing to the
1376 * *next* available buffer (non-NULL). They are incremented after
1377 * removing buffers, and decremented *before* returning them.
1378 */
1379struct rpcrdma_req *
1380rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1381{
c2922c02 1382 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
ddb6bebc 1383 struct list_head stale;
c56c65fb
TT
1384 struct rpcrdma_req *req;
1385 unsigned long flags;
1386
1387 spin_lock_irqsave(&buffers->rb_lock, flags);
1388 if (buffers->rb_send_index == buffers->rb_max_requests) {
1389 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1390 dprintk("RPC: %s: out of request buffers\n", __func__);
1391 return ((struct rpcrdma_req *)NULL);
1392 }
1393
1394 req = buffers->rb_send_bufs[buffers->rb_send_index];
1395 if (buffers->rb_send_index < buffers->rb_recv_index) {
1396 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1397 __func__,
1398 buffers->rb_recv_index - buffers->rb_send_index);
1399 req->rl_reply = NULL;
1400 } else {
1401 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1402 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1403 }
1404 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
ddb6bebc
CL
1405
1406 INIT_LIST_HEAD(&stale);
c2922c02
CL
1407 switch (ia->ri_memreg_strategy) {
1408 case RPCRDMA_FRMR:
ddb6bebc
CL
1409 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1410 break;
c2922c02 1411 case RPCRDMA_MTHCAFMR:
ddb6bebc 1412 req = rpcrdma_buffer_get_fmrs(req, buffers);
c2922c02
CL
1413 break;
1414 default:
1415 break;
c56c65fb
TT
1416 }
1417 spin_unlock_irqrestore(&buffers->rb_lock, flags);
ddb6bebc
CL
1418 if (!list_empty(&stale))
1419 rpcrdma_retry_flushed_linv(&stale, buffers);
c56c65fb
TT
1420 return req;
1421}
1422
1423/*
1424 * Put request/reply buffers back into pool.
1425 * Pre-decrement counter/array index.
1426 */
1427void
1428rpcrdma_buffer_put(struct rpcrdma_req *req)
1429{
1430 struct rpcrdma_buffer *buffers = req->rl_buffer;
1431 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
c56c65fb
TT
1432 unsigned long flags;
1433
c56c65fb 1434 spin_lock_irqsave(&buffers->rb_lock, flags);
c2922c02 1435 rpcrdma_buffer_put_sendbuf(req, buffers);
c56c65fb 1436 switch (ia->ri_memreg_strategy) {
3197d309 1437 case RPCRDMA_FRMR:
c56c65fb 1438 case RPCRDMA_MTHCAFMR:
c2922c02 1439 rpcrdma_buffer_put_mrs(req, buffers);
c56c65fb
TT
1440 break;
1441 default:
1442 break;
1443 }
1444 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1445}
1446
1447/*
1448 * Recover reply buffers from pool.
1449 * This happens when recovering from error conditions.
1450 * Post-increment counter/array index.
1451 */
1452void
1453rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1454{
1455 struct rpcrdma_buffer *buffers = req->rl_buffer;
1456 unsigned long flags;
1457
c56c65fb
TT
1458 spin_lock_irqsave(&buffers->rb_lock, flags);
1459 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1460 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1461 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1462 }
1463 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1464}
1465
1466/*
1467 * Put reply buffers back into pool when not attached to
b45ccfd2 1468 * request. This happens in error conditions.
c56c65fb
TT
1469 */
1470void
1471rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1472{
1473 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1474 unsigned long flags;
1475
1476 rep->rr_func = NULL;
1477 spin_lock_irqsave(&buffers->rb_lock, flags);
1478 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1479 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1480}
1481
1482/*
1483 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1484 */
1485
df515ca7 1486static int
c56c65fb
TT
1487rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1488 struct ib_mr **mrp, struct ib_sge *iov)
1489{
1490 struct ib_phys_buf ipb;
1491 struct ib_mr *mr;
1492 int rc;
1493
1494 /*
1495 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1496 */
1497 iov->addr = ib_dma_map_single(ia->ri_id->device,
1498 va, len, DMA_BIDIRECTIONAL);
bf858ab0
YB
1499 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1500 return -ENOMEM;
1501
c56c65fb
TT
1502 iov->length = len;
1503
bd7ed1d1
TT
1504 if (ia->ri_have_dma_lkey) {
1505 *mrp = NULL;
1506 iov->lkey = ia->ri_dma_lkey;
1507 return 0;
1508 } else if (ia->ri_bind_mem != NULL) {
c56c65fb
TT
1509 *mrp = NULL;
1510 iov->lkey = ia->ri_bind_mem->lkey;
1511 return 0;
1512 }
1513
1514 ipb.addr = iov->addr;
1515 ipb.size = iov->length;
1516 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1517 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1518
1519 dprintk("RPC: %s: phys convert: 0x%llx "
1520 "registered 0x%llx length %d\n",
a56daeb7
AM
1521 __func__, (unsigned long long)ipb.addr,
1522 (unsigned long long)iov->addr, len);
c56c65fb
TT
1523
1524 if (IS_ERR(mr)) {
1525 *mrp = NULL;
1526 rc = PTR_ERR(mr);
1527 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1528 } else {
1529 *mrp = mr;
1530 iov->lkey = mr->lkey;
1531 rc = 0;
1532 }
1533
1534 return rc;
1535}
1536
df515ca7 1537static int
c56c65fb
TT
1538rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1539 struct ib_mr *mr, struct ib_sge *iov)
1540{
1541 int rc;
1542
1543 ib_dma_unmap_single(ia->ri_id->device,
1544 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1545
1546 if (NULL == mr)
1547 return 0;
1548
1549 rc = ib_dereg_mr(mr);
1550 if (rc)
1551 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1552 return rc;
1553}
1554
9128c3e7
CL
1555/**
1556 * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
1557 * @ia: controlling rpcrdma_ia
1558 * @size: size of buffer to be allocated, in bytes
1559 * @flags: GFP flags
1560 *
1561 * Returns pointer to private header of an area of internally
1562 * registered memory, or an ERR_PTR. The registered buffer follows
1563 * the end of the private header.
1564 *
1565 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1566 * receiving the payload of RDMA RECV operations. regbufs are not
1567 * used for RDMA READ/WRITE operations, thus are registered only for
1568 * LOCAL access.
1569 */
1570struct rpcrdma_regbuf *
1571rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
1572{
1573 struct rpcrdma_regbuf *rb;
1574 int rc;
1575
1576 rc = -ENOMEM;
1577 rb = kmalloc(sizeof(*rb) + size, flags);
1578 if (rb == NULL)
1579 goto out;
1580
1581 rb->rg_size = size;
1582 rb->rg_owner = NULL;
1583 rc = rpcrdma_register_internal(ia, rb->rg_base, size,
1584 &rb->rg_mr, &rb->rg_iov);
1585 if (rc)
1586 goto out_free;
1587
1588 return rb;
1589
1590out_free:
1591 kfree(rb);
1592out:
1593 return ERR_PTR(rc);
1594}
1595
1596/**
1597 * rpcrdma_free_regbuf - deregister and free registered buffer
1598 * @ia: controlling rpcrdma_ia
1599 * @rb: regbuf to be deregistered and freed
1600 */
1601void
1602rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
1603{
1604 if (rb) {
1605 rpcrdma_deregister_internal(ia, rb->rg_mr, &rb->rg_iov);
1606 kfree(rb);
1607 }
1608}
1609
c56c65fb
TT
1610/*
1611 * Wrappers for chunk registration, shared by read/write chunk code.
1612 */
1613
9c1b4d77
CL
1614void
1615rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, bool writing)
c56c65fb
TT
1616{
1617 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1618 seg->mr_dmalen = seg->mr_len;
1619 if (seg->mr_page)
1620 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1621 seg->mr_page, offset_in_page(seg->mr_offset),
1622 seg->mr_dmalen, seg->mr_dir);
1623 else
1624 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1625 seg->mr_offset,
1626 seg->mr_dmalen, seg->mr_dir);
5c635e09
TT
1627 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1628 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1629 __func__,
986d4abb
RD
1630 (unsigned long long)seg->mr_dma,
1631 seg->mr_offset, seg->mr_dmalen);
5c635e09 1632 }
c56c65fb
TT
1633}
1634
9c1b4d77 1635void
c56c65fb
TT
1636rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1637{
1638 if (seg->mr_page)
1639 ib_dma_unmap_page(ia->ri_id->device,
1640 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1641 else
1642 ib_dma_unmap_single(ia->ri_id->device,
1643 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1644}
1645
c56c65fb
TT
1646/*
1647 * Prepost any receive buffer, then post send.
1648 *
1649 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1650 */
1651int
1652rpcrdma_ep_post(struct rpcrdma_ia *ia,
1653 struct rpcrdma_ep *ep,
1654 struct rpcrdma_req *req)
1655{
1656 struct ib_send_wr send_wr, *send_wr_fail;
1657 struct rpcrdma_rep *rep = req->rl_reply;
1658 int rc;
1659
1660 if (rep) {
1661 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1662 if (rc)
1663 goto out;
1664 req->rl_reply = NULL;
1665 }
1666
1667 send_wr.next = NULL;
1668 send_wr.wr_id = 0ULL; /* no send cookie */
1669 send_wr.sg_list = req->rl_send_iov;
1670 send_wr.num_sge = req->rl_niovs;
1671 send_wr.opcode = IB_WR_SEND;
c56c65fb
TT
1672 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1673 ib_dma_sync_single_for_device(ia->ri_id->device,
1674 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1675 DMA_TO_DEVICE);
1676 ib_dma_sync_single_for_device(ia->ri_id->device,
1677 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1678 DMA_TO_DEVICE);
1679 ib_dma_sync_single_for_device(ia->ri_id->device,
1680 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1681 DMA_TO_DEVICE);
1682
1683 if (DECR_CQCOUNT(ep) > 0)
1684 send_wr.send_flags = 0;
1685 else { /* Provider must take a send completion every now and then */
1686 INIT_CQCOUNT(ep);
1687 send_wr.send_flags = IB_SEND_SIGNALED;
1688 }
1689
1690 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1691 if (rc)
1692 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1693 rc);
1694out:
1695 return rc;
1696}
1697
1698/*
1699 * (Re)post a receive buffer.
1700 */
1701int
1702rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1703 struct rpcrdma_ep *ep,
1704 struct rpcrdma_rep *rep)
1705{
1706 struct ib_recv_wr recv_wr, *recv_wr_fail;
1707 int rc;
1708
1709 recv_wr.next = NULL;
1710 recv_wr.wr_id = (u64) (unsigned long) rep;
6b1184cd 1711 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
c56c65fb
TT
1712 recv_wr.num_sge = 1;
1713
1714 ib_dma_sync_single_for_cpu(ia->ri_id->device,
6b1184cd
CL
1715 rdmab_addr(rep->rr_rdmabuf),
1716 rdmab_length(rep->rr_rdmabuf),
1717 DMA_BIDIRECTIONAL);
c56c65fb 1718
c56c65fb
TT
1719 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1720
1721 if (rc)
1722 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1723 rc);
1724 return rc;
1725}
43e95988 1726
1c9351ee 1727/* How many chunk list items fit within our inline buffers?
43e95988 1728 */
1c9351ee
CL
1729unsigned int
1730rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt)
43e95988
CL
1731{
1732 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
1c9351ee 1733 int bytes, segments;
43e95988 1734
1c9351ee
CL
1735 bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize);
1736 bytes -= RPCRDMA_HDRLEN_MIN;
1737 if (bytes < sizeof(struct rpcrdma_segment) * 2) {
1738 pr_warn("RPC: %s: inline threshold too small\n",
1739 __func__);
1740 return 0;
43e95988 1741 }
1c9351ee
CL
1742
1743 segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1);
1744 dprintk("RPC: %s: max chunk list size = %d segments\n",
1745 __func__, segments);
1746 return segments;
43e95988 1747}