]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - net/sunrpc/xprtrdma/verbs.c
xprtrdma: Make rpcrdma_ep_destroy() return void
[mirror_ubuntu-hirsute-kernel.git] / net / sunrpc / xprtrdma / verbs.c
CommitLineData
f58851e6 1/*
c56c65fb
TT
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
f58851e6
TT
38 */
39
c56c65fb
TT
40/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
a6b7a407 50#include <linux/interrupt.h>
c56c65fb 51#include <linux/pci.h> /* for Tavor hack below */
5a0e3ad6 52#include <linux/slab.h>
c56c65fb 53
f58851e6
TT
54#include "xprt_rdma.h"
55
c56c65fb
TT
56/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
64/*
65 * internal functions
66 */
67
68/*
69 * handle replies in tasklet context, using a single, global list
70 * rdma tasklet function -- just turn around and call the func
71 * for all replies on the list
72 */
73
74static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
75static LIST_HEAD(rpcrdma_tasklets_g);
76
77static void
78rpcrdma_run_tasklet(unsigned long data)
79{
80 struct rpcrdma_rep *rep;
81 void (*func)(struct rpcrdma_rep *);
82 unsigned long flags;
83
84 data = data;
85 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
86 while (!list_empty(&rpcrdma_tasklets_g)) {
87 rep = list_entry(rpcrdma_tasklets_g.next,
88 struct rpcrdma_rep, rr_list);
89 list_del(&rep->rr_list);
90 func = rep->rr_func;
91 rep->rr_func = NULL;
92 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
93
94 if (func)
95 func(rep);
96 else
97 rpcrdma_recv_buffer_put(rep);
98
99 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
100 }
101 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
102}
103
104static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
105
106static inline void
107rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
108{
109 unsigned long flags;
110
111 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
112 list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
113 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
114 tasklet_schedule(&rpcrdma_tasklet_g);
115}
116
117static void
118rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
119{
120 struct rpcrdma_ep *ep = context;
121
122 dprintk("RPC: %s: QP error %X on device %s ep %p\n",
123 __func__, event->event, event->device->name, context);
124 if (ep->rep_connected == 1) {
125 ep->rep_connected = -EIO;
126 ep->rep_func(ep);
127 wake_up_all(&ep->rep_connect_wait);
128 }
129}
130
131static void
132rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
133{
134 struct rpcrdma_ep *ep = context;
135
136 dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
137 __func__, event->event, event->device->name, context);
138 if (ep->rep_connected == 1) {
139 ep->rep_connected = -EIO;
140 ep->rep_func(ep);
141 wake_up_all(&ep->rep_connect_wait);
142 }
143}
144
145static inline
146void rpcrdma_event_process(struct ib_wc *wc)
147{
5c635e09 148 struct rpcrdma_mw *frmr;
c56c65fb
TT
149 struct rpcrdma_rep *rep =
150 (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
151
152 dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n",
153 __func__, rep, wc->status, wc->opcode, wc->byte_len);
154
b45ccfd2 155 if (!rep) /* send completion that we don't care about */
c56c65fb
TT
156 return;
157
158 if (IB_WC_SUCCESS != wc->status) {
5c635e09
TT
159 dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
160 __func__, wc->opcode, wc->status);
c56c65fb 161 rep->rr_len = ~0U;
5c635e09
TT
162 if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
163 rpcrdma_schedule_tasklet(rep);
c56c65fb
TT
164 return;
165 }
166
167 switch (wc->opcode) {
5c635e09
TT
168 case IB_WC_FAST_REG_MR:
169 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
170 frmr->r.frmr.state = FRMR_IS_VALID;
171 break;
172 case IB_WC_LOCAL_INV:
173 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
174 frmr->r.frmr.state = FRMR_IS_INVALID;
175 break;
c56c65fb
TT
176 case IB_WC_RECV:
177 rep->rr_len = wc->byte_len;
178 ib_dma_sync_single_for_cpu(
179 rdmab_to_ia(rep->rr_buffer)->ri_id->device,
180 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
181 /* Keep (only) the most recent credits, after check validity */
182 if (rep->rr_len >= 16) {
183 struct rpcrdma_msg *p =
184 (struct rpcrdma_msg *) rep->rr_base;
185 unsigned int credits = ntohl(p->rm_credit);
186 if (credits == 0) {
187 dprintk("RPC: %s: server"
188 " dropped credits to 0!\n", __func__);
189 /* don't deadlock */
190 credits = 1;
191 } else if (credits > rep->rr_buffer->rb_max_requests) {
192 dprintk("RPC: %s: server"
193 " over-crediting: %d (%d)\n",
194 __func__, credits,
195 rep->rr_buffer->rb_max_requests);
196 credits = rep->rr_buffer->rb_max_requests;
197 }
198 atomic_set(&rep->rr_buffer->rb_credits, credits);
199 }
c56c65fb
TT
200 rpcrdma_schedule_tasklet(rep);
201 break;
202 default:
203 dprintk("RPC: %s: unexpected WC event %X\n",
204 __func__, wc->opcode);
205 break;
206 }
207}
208
209static inline int
210rpcrdma_cq_poll(struct ib_cq *cq)
211{
212 struct ib_wc wc;
213 int rc;
214
215 for (;;) {
216 rc = ib_poll_cq(cq, 1, &wc);
217 if (rc < 0) {
218 dprintk("RPC: %s: ib_poll_cq failed %i\n",
219 __func__, rc);
220 return rc;
221 }
222 if (rc == 0)
223 break;
224
225 rpcrdma_event_process(&wc);
226 }
227
228 return 0;
229}
230
231/*
232 * rpcrdma_cq_event_upcall
233 *
b45ccfd2 234 * This upcall handles recv and send events.
c56c65fb
TT
235 * It is reentrant but processes single events in order to maintain
236 * ordering of receives to keep server credits.
237 *
238 * It is the responsibility of the scheduled tasklet to return
239 * recv buffers to the pool. NOTE: this affects synchronization of
240 * connection shutdown. That is, the structures required for
241 * the completion of the reply handler must remain intact until
242 * all memory has been reclaimed.
243 *
244 * Note that send events are suppressed and do not result in an upcall.
245 */
246static void
247rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
248{
249 int rc;
250
251 rc = rpcrdma_cq_poll(cq);
252 if (rc)
253 return;
254
255 rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
256 if (rc) {
257 dprintk("RPC: %s: ib_req_notify_cq failed %i\n",
258 __func__, rc);
259 return;
260 }
261
262 rpcrdma_cq_poll(cq);
263}
264
265#ifdef RPC_DEBUG
266static const char * const conn[] = {
267 "address resolved",
268 "address error",
269 "route resolved",
270 "route error",
271 "connect request",
272 "connect response",
273 "connect error",
274 "unreachable",
275 "rejected",
276 "established",
277 "disconnected",
278 "device removal"
279};
280#endif
281
282static int
283rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
284{
285 struct rpcrdma_xprt *xprt = id->context;
286 struct rpcrdma_ia *ia = &xprt->rx_ia;
287 struct rpcrdma_ep *ep = &xprt->rx_ep;
ff0db049 288#ifdef RPC_DEBUG
c56c65fb 289 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
ff0db049 290#endif
c56c65fb
TT
291 struct ib_qp_attr attr;
292 struct ib_qp_init_attr iattr;
293 int connstate = 0;
294
295 switch (event->event) {
296 case RDMA_CM_EVENT_ADDR_RESOLVED:
297 case RDMA_CM_EVENT_ROUTE_RESOLVED:
5675add3 298 ia->ri_async_rc = 0;
c56c65fb
TT
299 complete(&ia->ri_done);
300 break;
301 case RDMA_CM_EVENT_ADDR_ERROR:
302 ia->ri_async_rc = -EHOSTUNREACH;
303 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
304 __func__, ep);
305 complete(&ia->ri_done);
306 break;
307 case RDMA_CM_EVENT_ROUTE_ERROR:
308 ia->ri_async_rc = -ENETUNREACH;
309 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
310 __func__, ep);
311 complete(&ia->ri_done);
312 break;
313 case RDMA_CM_EVENT_ESTABLISHED:
314 connstate = 1;
315 ib_query_qp(ia->ri_id->qp, &attr,
316 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
317 &iattr);
318 dprintk("RPC: %s: %d responder resources"
319 " (%d initiator)\n",
320 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
321 goto connected;
322 case RDMA_CM_EVENT_CONNECT_ERROR:
323 connstate = -ENOTCONN;
324 goto connected;
325 case RDMA_CM_EVENT_UNREACHABLE:
326 connstate = -ENETDOWN;
327 goto connected;
328 case RDMA_CM_EVENT_REJECTED:
329 connstate = -ECONNREFUSED;
330 goto connected;
331 case RDMA_CM_EVENT_DISCONNECTED:
332 connstate = -ECONNABORTED;
333 goto connected;
334 case RDMA_CM_EVENT_DEVICE_REMOVAL:
335 connstate = -ENODEV;
336connected:
21454aaa 337 dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
c56c65fb
TT
338 __func__,
339 (event->event <= 11) ? conn[event->event] :
340 "unknown connection error",
21454aaa 341 &addr->sin_addr.s_addr,
c56c65fb
TT
342 ntohs(addr->sin_port),
343 ep, event->event);
344 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
345 dprintk("RPC: %s: %sconnected\n",
346 __func__, connstate > 0 ? "" : "dis");
347 ep->rep_connected = connstate;
348 ep->rep_func(ep);
349 wake_up_all(&ep->rep_connect_wait);
350 break;
351 default:
1a954051 352 dprintk("RPC: %s: unexpected CM event %d\n",
c56c65fb 353 __func__, event->event);
c56c65fb
TT
354 break;
355 }
356
b3cd8d45
TT
357#ifdef RPC_DEBUG
358 if (connstate == 1) {
359 int ird = attr.max_dest_rd_atomic;
360 int tird = ep->rep_remote_cma.responder_resources;
21454aaa 361 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
b3cd8d45 362 "on %s, memreg %d slots %d ird %d%s\n",
21454aaa 363 &addr->sin_addr.s_addr,
b3cd8d45
TT
364 ntohs(addr->sin_port),
365 ia->ri_id->device->name,
366 ia->ri_memreg_strategy,
367 xprt->rx_buf.rb_max_requests,
368 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
369 } else if (connstate < 0) {
21454aaa
HH
370 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
371 &addr->sin_addr.s_addr,
b3cd8d45
TT
372 ntohs(addr->sin_port),
373 connstate);
374 }
375#endif
376
c56c65fb
TT
377 return 0;
378}
379
380static struct rdma_cm_id *
381rpcrdma_create_id(struct rpcrdma_xprt *xprt,
382 struct rpcrdma_ia *ia, struct sockaddr *addr)
383{
384 struct rdma_cm_id *id;
385 int rc;
386
1a954051
TT
387 init_completion(&ia->ri_done);
388
b26f9b99 389 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
c56c65fb
TT
390 if (IS_ERR(id)) {
391 rc = PTR_ERR(id);
392 dprintk("RPC: %s: rdma_create_id() failed %i\n",
393 __func__, rc);
394 return id;
395 }
396
5675add3 397 ia->ri_async_rc = -ETIMEDOUT;
c56c65fb
TT
398 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
399 if (rc) {
400 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
401 __func__, rc);
402 goto out;
403 }
5675add3
TT
404 wait_for_completion_interruptible_timeout(&ia->ri_done,
405 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
c56c65fb
TT
406 rc = ia->ri_async_rc;
407 if (rc)
408 goto out;
409
5675add3 410 ia->ri_async_rc = -ETIMEDOUT;
c56c65fb
TT
411 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
412 if (rc) {
413 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
414 __func__, rc);
415 goto out;
416 }
5675add3
TT
417 wait_for_completion_interruptible_timeout(&ia->ri_done,
418 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
c56c65fb
TT
419 rc = ia->ri_async_rc;
420 if (rc)
421 goto out;
422
423 return id;
424
425out:
426 rdma_destroy_id(id);
427 return ERR_PTR(rc);
428}
429
430/*
431 * Drain any cq, prior to teardown.
432 */
433static void
434rpcrdma_clean_cq(struct ib_cq *cq)
435{
436 struct ib_wc wc;
437 int count = 0;
438
439 while (1 == ib_poll_cq(cq, 1, &wc))
440 ++count;
441
442 if (count)
443 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
444 __func__, count, wc.opcode);
445}
446
447/*
448 * Exported functions.
449 */
450
451/*
452 * Open and initialize an Interface Adapter.
453 * o initializes fields of struct rpcrdma_ia, including
454 * interface and provider attributes and protection zone.
455 */
456int
457rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
458{
bd7ed1d1
TT
459 int rc, mem_priv;
460 struct ib_device_attr devattr;
c56c65fb
TT
461 struct rpcrdma_ia *ia = &xprt->rx_ia;
462
c56c65fb
TT
463 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
464 if (IS_ERR(ia->ri_id)) {
465 rc = PTR_ERR(ia->ri_id);
466 goto out1;
467 }
468
469 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
470 if (IS_ERR(ia->ri_pd)) {
471 rc = PTR_ERR(ia->ri_pd);
472 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
473 __func__, rc);
474 goto out2;
475 }
476
bd7ed1d1
TT
477 /*
478 * Query the device to determine if the requested memory
479 * registration strategy is supported. If it isn't, set the
480 * strategy to a globally supported model.
481 */
482 rc = ib_query_device(ia->ri_id->device, &devattr);
483 if (rc) {
484 dprintk("RPC: %s: ib_query_device failed %d\n",
485 __func__, rc);
486 goto out2;
487 }
488
489 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
490 ia->ri_have_dma_lkey = 1;
491 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
492 }
493
f10eafd3 494 if (memreg == RPCRDMA_FRMR) {
3197d309
TT
495 /* Requires both frmr reg and local dma lkey */
496 if ((devattr.device_cap_flags &
497 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
498 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
3197d309 499 dprintk("RPC: %s: FRMR registration "
f10eafd3
CL
500 "not supported by HCA\n", __func__);
501 memreg = RPCRDMA_MTHCAFMR;
0fc6c4e7
SW
502 } else {
503 /* Mind the ia limit on FRMR page list depth */
504 ia->ri_max_frmr_depth = min_t(unsigned int,
505 RPCRDMA_MAX_DATA_SEGS,
506 devattr.max_fast_reg_page_list_len);
bd7ed1d1 507 }
f10eafd3
CL
508 }
509 if (memreg == RPCRDMA_MTHCAFMR) {
510 if (!ia->ri_id->device->alloc_fmr) {
511 dprintk("RPC: %s: MTHCAFMR registration "
512 "not supported by HCA\n", __func__);
513#if RPCRDMA_PERSISTENT_REGISTRATION
514 memreg = RPCRDMA_ALLPHYSICAL;
515#else
cdd9ade7 516 rc = -ENOMEM;
f10eafd3
CL
517 goto out2;
518#endif
519 }
bd7ed1d1
TT
520 }
521
c56c65fb
TT
522 /*
523 * Optionally obtain an underlying physical identity mapping in
524 * order to do a memory window-based bind. This base registration
525 * is protected from remote access - that is enabled only by binding
526 * for the specific bytes targeted during each RPC operation, and
527 * revoked after the corresponding completion similar to a storage
528 * adapter.
529 */
bd7ed1d1 530 switch (memreg) {
3197d309 531 case RPCRDMA_FRMR:
bd7ed1d1 532 break;
c56c65fb 533#if RPCRDMA_PERSISTENT_REGISTRATION
bd7ed1d1
TT
534 case RPCRDMA_ALLPHYSICAL:
535 mem_priv = IB_ACCESS_LOCAL_WRITE |
536 IB_ACCESS_REMOTE_WRITE |
537 IB_ACCESS_REMOTE_READ;
538 goto register_setup;
c56c65fb 539#endif
bd7ed1d1
TT
540 case RPCRDMA_MTHCAFMR:
541 if (ia->ri_have_dma_lkey)
c56c65fb 542 break;
bd7ed1d1 543 mem_priv = IB_ACCESS_LOCAL_WRITE;
b45ccfd2 544#if RPCRDMA_PERSISTENT_REGISTRATION
bd7ed1d1 545 register_setup:
b45ccfd2 546#endif
c56c65fb
TT
547 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
548 if (IS_ERR(ia->ri_bind_mem)) {
549 printk(KERN_ALERT "%s: ib_get_dma_mr for "
0ac531c1 550 "phys register failed with %lX\n",
c56c65fb 551 __func__, PTR_ERR(ia->ri_bind_mem));
0ac531c1
CL
552 rc = -ENOMEM;
553 goto out2;
c56c65fb 554 }
bd7ed1d1
TT
555 break;
556 default:
cdd9ade7
CL
557 printk(KERN_ERR "RPC: Unsupported memory "
558 "registration mode: %d\n", memreg);
559 rc = -ENOMEM;
bd7ed1d1 560 goto out2;
c56c65fb 561 }
bd7ed1d1
TT
562 dprintk("RPC: %s: memory registration strategy is %d\n",
563 __func__, memreg);
c56c65fb
TT
564
565 /* Else will do memory reg/dereg for each chunk */
566 ia->ri_memreg_strategy = memreg;
567
568 return 0;
569out2:
570 rdma_destroy_id(ia->ri_id);
fee08caf 571 ia->ri_id = NULL;
c56c65fb
TT
572out1:
573 return rc;
574}
575
576/*
577 * Clean up/close an IA.
578 * o if event handles and PD have been initialized, free them.
579 * o close the IA
580 */
581void
582rpcrdma_ia_close(struct rpcrdma_ia *ia)
583{
584 int rc;
585
586 dprintk("RPC: %s: entering\n", __func__);
587 if (ia->ri_bind_mem != NULL) {
588 rc = ib_dereg_mr(ia->ri_bind_mem);
589 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
590 __func__, rc);
591 }
fee08caf
TT
592 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
593 if (ia->ri_id->qp)
594 rdma_destroy_qp(ia->ri_id);
595 rdma_destroy_id(ia->ri_id);
596 ia->ri_id = NULL;
597 }
c56c65fb
TT
598 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
599 rc = ib_dealloc_pd(ia->ri_pd);
600 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
601 __func__, rc);
602 }
c56c65fb
TT
603}
604
605/*
606 * Create unconnected endpoint.
607 */
608int
609rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
610 struct rpcrdma_create_data_internal *cdata)
611{
612 struct ib_device_attr devattr;
5d40a8a5 613 int rc, err;
c56c65fb
TT
614
615 rc = ib_query_device(ia->ri_id->device, &devattr);
616 if (rc) {
617 dprintk("RPC: %s: ib_query_device failed %d\n",
618 __func__, rc);
619 return rc;
620 }
621
622 /* check provider's send/recv wr limits */
623 if (cdata->max_requests > devattr.max_qp_wr)
624 cdata->max_requests = devattr.max_qp_wr;
625
626 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
627 ep->rep_attr.qp_context = ep;
628 /* send_cq and recv_cq initialized below */
629 ep->rep_attr.srq = NULL;
630 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
631 switch (ia->ri_memreg_strategy) {
0fc6c4e7
SW
632 case RPCRDMA_FRMR: {
633 int depth = 7;
634
15cdc644
TT
635 /* Add room for frmr register and invalidate WRs.
636 * 1. FRMR reg WR for head
637 * 2. FRMR invalidate WR for head
0fc6c4e7
SW
638 * 3. N FRMR reg WRs for pagelist
639 * 4. N FRMR invalidate WRs for pagelist
15cdc644
TT
640 * 5. FRMR reg WR for tail
641 * 6. FRMR invalidate WR for tail
642 * 7. The RDMA_SEND WR
643 */
0fc6c4e7
SW
644
645 /* Calculate N if the device max FRMR depth is smaller than
646 * RPCRDMA_MAX_DATA_SEGS.
647 */
648 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
649 int delta = RPCRDMA_MAX_DATA_SEGS -
650 ia->ri_max_frmr_depth;
651
652 do {
653 depth += 2; /* FRMR reg + invalidate */
654 delta -= ia->ri_max_frmr_depth;
655 } while (delta > 0);
656
657 }
658 ep->rep_attr.cap.max_send_wr *= depth;
15cdc644 659 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
0fc6c4e7 660 cdata->max_requests = devattr.max_qp_wr / depth;
15cdc644
TT
661 if (!cdata->max_requests)
662 return -EINVAL;
0fc6c4e7
SW
663 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
664 depth;
15cdc644 665 }
3197d309 666 break;
0fc6c4e7 667 }
c56c65fb
TT
668 default:
669 break;
670 }
671 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
672 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
673 ep->rep_attr.cap.max_recv_sge = 1;
674 ep->rep_attr.cap.max_inline_data = 0;
675 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
676 ep->rep_attr.qp_type = IB_QPT_RC;
677 ep->rep_attr.port_num = ~0;
678
679 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
680 "iovs: send %d recv %d\n",
681 __func__,
682 ep->rep_attr.cap.max_send_wr,
683 ep->rep_attr.cap.max_recv_wr,
684 ep->rep_attr.cap.max_send_sge,
685 ep->rep_attr.cap.max_recv_sge);
686
687 /* set trigger for requesting send completion */
688 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/;
c56c65fb
TT
689 if (ep->rep_cqinit <= 2)
690 ep->rep_cqinit = 0;
691 INIT_CQCOUNT(ep);
692 ep->rep_ia = ia;
693 init_waitqueue_head(&ep->rep_connect_wait);
254f91e2 694 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
c56c65fb 695
c56c65fb
TT
696 ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
697 rpcrdma_cq_async_error_upcall, NULL,
698 ep->rep_attr.cap.max_recv_wr +
699 ep->rep_attr.cap.max_send_wr + 1, 0);
700 if (IS_ERR(ep->rep_cq)) {
701 rc = PTR_ERR(ep->rep_cq);
702 dprintk("RPC: %s: ib_create_cq failed: %i\n",
703 __func__, rc);
704 goto out1;
705 }
706
707 rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
708 if (rc) {
709 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
710 __func__, rc);
711 goto out2;
712 }
713
714 ep->rep_attr.send_cq = ep->rep_cq;
715 ep->rep_attr.recv_cq = ep->rep_cq;
716
717 /* Initialize cma parameters */
718
719 /* RPC/RDMA does not use private data */
720 ep->rep_remote_cma.private_data = NULL;
721 ep->rep_remote_cma.private_data_len = 0;
722
723 /* Client offers RDMA Read but does not initiate */
b334eaab 724 ep->rep_remote_cma.initiator_depth = 0;
03ff8821 725 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
b334eaab
TT
726 ep->rep_remote_cma.responder_resources = 32;
727 else
c56c65fb 728 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
c56c65fb
TT
729
730 ep->rep_remote_cma.retry_count = 7;
731 ep->rep_remote_cma.flow_control = 0;
732 ep->rep_remote_cma.rnr_retry_count = 0;
733
734 return 0;
735
736out2:
5d40a8a5
CL
737 err = ib_destroy_cq(ep->rep_cq);
738 if (err)
739 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
740 __func__, err);
c56c65fb
TT
741out1:
742 return rc;
743}
744
745/*
746 * rpcrdma_ep_destroy
747 *
748 * Disconnect and destroy endpoint. After this, the only
749 * valid operations on the ep are to free it (if dynamically
750 * allocated) or re-create it.
c56c65fb 751 */
7f1d5419 752void
c56c65fb
TT
753rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
754{
755 int rc;
756
757 dprintk("RPC: %s: entering, connected is %d\n",
758 __func__, ep->rep_connected);
759
254f91e2
CL
760 cancel_delayed_work_sync(&ep->rep_connect_worker);
761
c56c65fb
TT
762 if (ia->ri_id->qp) {
763 rc = rpcrdma_ep_disconnect(ep, ia);
764 if (rc)
765 dprintk("RPC: %s: rpcrdma_ep_disconnect"
766 " returned %i\n", __func__, rc);
fee08caf
TT
767 rdma_destroy_qp(ia->ri_id);
768 ia->ri_id->qp = NULL;
c56c65fb
TT
769 }
770
c56c65fb
TT
771 /* padding - could be done in rpcrdma_buffer_destroy... */
772 if (ep->rep_pad_mr) {
773 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
774 ep->rep_pad_mr = NULL;
775 }
776
c56c65fb
TT
777 rpcrdma_clean_cq(ep->rep_cq);
778 rc = ib_destroy_cq(ep->rep_cq);
779 if (rc)
780 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
781 __func__, rc);
c56c65fb
TT
782}
783
784/*
785 * Connect unconnected endpoint.
786 */
787int
788rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
789{
790 struct rdma_cm_id *id;
791 int rc = 0;
792 int retry_count = 0;
c56c65fb 793
c055551e 794 if (ep->rep_connected != 0) {
c56c65fb
TT
795 struct rpcrdma_xprt *xprt;
796retry:
797 rc = rpcrdma_ep_disconnect(ep, ia);
798 if (rc && rc != -ENOTCONN)
799 dprintk("RPC: %s: rpcrdma_ep_disconnect"
800 " status %i\n", __func__, rc);
801 rpcrdma_clean_cq(ep->rep_cq);
802
803 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
804 id = rpcrdma_create_id(xprt, ia,
805 (struct sockaddr *)&xprt->rx_data.addr);
806 if (IS_ERR(id)) {
807 rc = PTR_ERR(id);
808 goto out;
809 }
810 /* TEMP TEMP TEMP - fail if new device:
811 * Deregister/remarshal *all* requests!
812 * Close and recreate adapter, pd, etc!
813 * Re-determine all attributes still sane!
814 * More stuff I haven't thought of!
815 * Rrrgh!
816 */
817 if (ia->ri_id->device != id->device) {
818 printk("RPC: %s: can't reconnect on "
819 "different device!\n", __func__);
820 rdma_destroy_id(id);
821 rc = -ENETDOWN;
822 goto out;
823 }
824 /* END TEMP */
1a954051 825 rdma_destroy_qp(ia->ri_id);
c56c65fb
TT
826 rdma_destroy_id(ia->ri_id);
827 ia->ri_id = id;
828 }
829
830 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
831 if (rc) {
832 dprintk("RPC: %s: rdma_create_qp failed %i\n",
833 __func__, rc);
834 goto out;
835 }
836
837/* XXX Tavor device performs badly with 2K MTU! */
838if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
839 struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
840 if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
841 (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
842 pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
843 struct ib_qp_attr attr = {
844 .path_mtu = IB_MTU_1024
845 };
846 rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
847 }
848}
849
c56c65fb
TT
850 ep->rep_connected = 0;
851
852 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
853 if (rc) {
854 dprintk("RPC: %s: rdma_connect() failed with %i\n",
855 __func__, rc);
856 goto out;
857 }
858
c56c65fb
TT
859 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
860
861 /*
862 * Check state. A non-peer reject indicates no listener
863 * (ECONNREFUSED), which may be a transient state. All
864 * others indicate a transport condition which has already
865 * undergone a best-effort.
866 */
f64f9e71
JP
867 if (ep->rep_connected == -ECONNREFUSED &&
868 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
c56c65fb
TT
869 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
870 goto retry;
871 }
872 if (ep->rep_connected <= 0) {
873 /* Sometimes, the only way to reliably connect to remote
874 * CMs is to use same nonzero values for ORD and IRD. */
b334eaab
TT
875 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
876 (ep->rep_remote_cma.responder_resources == 0 ||
877 ep->rep_remote_cma.initiator_depth !=
878 ep->rep_remote_cma.responder_resources)) {
879 if (ep->rep_remote_cma.responder_resources == 0)
880 ep->rep_remote_cma.responder_resources = 1;
881 ep->rep_remote_cma.initiator_depth =
882 ep->rep_remote_cma.responder_resources;
c56c65fb 883 goto retry;
b334eaab 884 }
c56c65fb
TT
885 rc = ep->rep_connected;
886 } else {
887 dprintk("RPC: %s: connected\n", __func__);
888 }
889
890out:
891 if (rc)
892 ep->rep_connected = rc;
893 return rc;
894}
895
896/*
897 * rpcrdma_ep_disconnect
898 *
899 * This is separate from destroy to facilitate the ability
900 * to reconnect without recreating the endpoint.
901 *
902 * This call is not reentrant, and must not be made in parallel
903 * on the same endpoint.
904 */
905int
906rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
907{
908 int rc;
909
910 rpcrdma_clean_cq(ep->rep_cq);
911 rc = rdma_disconnect(ia->ri_id);
912 if (!rc) {
913 /* returns without wait if not connected */
914 wait_event_interruptible(ep->rep_connect_wait,
915 ep->rep_connected != 1);
916 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
917 (ep->rep_connected == 1) ? "still " : "dis");
918 } else {
919 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
920 ep->rep_connected = rc;
921 }
922 return rc;
923}
924
925/*
926 * Initialize buffer memory
927 */
928int
929rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
930 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
931{
932 char *p;
933 size_t len;
934 int i, rc;
8d4ba034 935 struct rpcrdma_mw *r;
c56c65fb
TT
936
937 buf->rb_max_requests = cdata->max_requests;
938 spin_lock_init(&buf->rb_lock);
939 atomic_set(&buf->rb_credits, 1);
940
941 /* Need to allocate:
942 * 1. arrays for send and recv pointers
943 * 2. arrays of struct rpcrdma_req to fill in pointers
944 * 3. array of struct rpcrdma_rep for replies
945 * 4. padding, if any
3197d309 946 * 5. mw's, fmr's or frmr's, if any
c56c65fb
TT
947 * Send/recv buffers in req/rep need to be registered
948 */
949
950 len = buf->rb_max_requests *
951 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
952 len += cdata->padding;
953 switch (ia->ri_memreg_strategy) {
3197d309
TT
954 case RPCRDMA_FRMR:
955 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
956 sizeof(struct rpcrdma_mw);
957 break;
c56c65fb
TT
958 case RPCRDMA_MTHCAFMR:
959 /* TBD we are perhaps overallocating here */
960 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
961 sizeof(struct rpcrdma_mw);
962 break;
c56c65fb
TT
963 default:
964 break;
965 }
966
967 /* allocate 1, 4 and 5 in one shot */
968 p = kzalloc(len, GFP_KERNEL);
969 if (p == NULL) {
970 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
971 __func__, len);
972 rc = -ENOMEM;
973 goto out;
974 }
975 buf->rb_pool = p; /* for freeing it later */
976
977 buf->rb_send_bufs = (struct rpcrdma_req **) p;
978 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
979 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
980 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
981
982 /*
983 * Register the zeroed pad buffer, if any.
984 */
985 if (cdata->padding) {
986 rc = rpcrdma_register_internal(ia, p, cdata->padding,
987 &ep->rep_pad_mr, &ep->rep_pad);
988 if (rc)
989 goto out;
990 }
991 p += cdata->padding;
992
c56c65fb 993 INIT_LIST_HEAD(&buf->rb_mws);
8d4ba034 994 r = (struct rpcrdma_mw *)p;
c56c65fb 995 switch (ia->ri_memreg_strategy) {
3197d309
TT
996 case RPCRDMA_FRMR:
997 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
998 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
0fc6c4e7 999 ia->ri_max_frmr_depth);
3197d309
TT
1000 if (IS_ERR(r->r.frmr.fr_mr)) {
1001 rc = PTR_ERR(r->r.frmr.fr_mr);
1002 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1003 " failed %i\n", __func__, rc);
1004 goto out;
1005 }
0fc6c4e7
SW
1006 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1007 ia->ri_id->device,
1008 ia->ri_max_frmr_depth);
3197d309
TT
1009 if (IS_ERR(r->r.frmr.fr_pgl)) {
1010 rc = PTR_ERR(r->r.frmr.fr_pgl);
1011 dprintk("RPC: %s: "
1012 "ib_alloc_fast_reg_page_list "
1013 "failed %i\n", __func__, rc);
4034ba04
AA
1014
1015 ib_dereg_mr(r->r.frmr.fr_mr);
3197d309
TT
1016 goto out;
1017 }
1018 list_add(&r->mw_list, &buf->rb_mws);
1019 ++r;
1020 }
1021 break;
c56c65fb 1022 case RPCRDMA_MTHCAFMR:
c56c65fb
TT
1023 /* TBD we are perhaps overallocating here */
1024 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
8d4ba034
TT
1025 static struct ib_fmr_attr fa =
1026 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
c56c65fb
TT
1027 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
1028 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
1029 &fa);
1030 if (IS_ERR(r->r.fmr)) {
1031 rc = PTR_ERR(r->r.fmr);
1032 dprintk("RPC: %s: ib_alloc_fmr"
1033 " failed %i\n", __func__, rc);
1034 goto out;
1035 }
1036 list_add(&r->mw_list, &buf->rb_mws);
1037 ++r;
1038 }
c56c65fb 1039 break;
c56c65fb
TT
1040 default:
1041 break;
1042 }
1043
1044 /*
1045 * Allocate/init the request/reply buffers. Doing this
1046 * using kmalloc for now -- one for each buf.
1047 */
1048 for (i = 0; i < buf->rb_max_requests; i++) {
1049 struct rpcrdma_req *req;
1050 struct rpcrdma_rep *rep;
1051
1052 len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
1053 /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
1054 /* Typical ~2400b, so rounding up saves work later */
1055 if (len < 4096)
1056 len = 4096;
1057 req = kmalloc(len, GFP_KERNEL);
1058 if (req == NULL) {
1059 dprintk("RPC: %s: request buffer %d alloc"
1060 " failed\n", __func__, i);
1061 rc = -ENOMEM;
1062 goto out;
1063 }
1064 memset(req, 0, sizeof(struct rpcrdma_req));
1065 buf->rb_send_bufs[i] = req;
1066 buf->rb_send_bufs[i]->rl_buffer = buf;
1067
1068 rc = rpcrdma_register_internal(ia, req->rl_base,
1069 len - offsetof(struct rpcrdma_req, rl_base),
1070 &buf->rb_send_bufs[i]->rl_handle,
1071 &buf->rb_send_bufs[i]->rl_iov);
1072 if (rc)
1073 goto out;
1074
1075 buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
1076
1077 len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
1078 rep = kmalloc(len, GFP_KERNEL);
1079 if (rep == NULL) {
1080 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1081 __func__, i);
1082 rc = -ENOMEM;
1083 goto out;
1084 }
1085 memset(rep, 0, sizeof(struct rpcrdma_rep));
1086 buf->rb_recv_bufs[i] = rep;
1087 buf->rb_recv_bufs[i]->rr_buffer = buf;
c56c65fb
TT
1088
1089 rc = rpcrdma_register_internal(ia, rep->rr_base,
1090 len - offsetof(struct rpcrdma_rep, rr_base),
1091 &buf->rb_recv_bufs[i]->rr_handle,
1092 &buf->rb_recv_bufs[i]->rr_iov);
1093 if (rc)
1094 goto out;
1095
1096 }
1097 dprintk("RPC: %s: max_requests %d\n",
1098 __func__, buf->rb_max_requests);
1099 /* done */
1100 return 0;
1101out:
1102 rpcrdma_buffer_destroy(buf);
1103 return rc;
1104}
1105
1106/*
1107 * Unregister and destroy buffer memory. Need to deal with
1108 * partial initialization, so it's callable from failed create.
1109 * Must be called before destroying endpoint, as registrations
1110 * reference it.
1111 */
1112void
1113rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1114{
1115 int rc, i;
1116 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
8d4ba034 1117 struct rpcrdma_mw *r;
c56c65fb
TT
1118
1119 /* clean up in reverse order from create
1120 * 1. recv mr memory (mr free, then kfree)
c56c65fb
TT
1121 * 2. send mr memory (mr free, then kfree)
1122 * 3. padding (if any) [moved to rpcrdma_ep_destroy]
1123 * 4. arrays
1124 */
1125 dprintk("RPC: %s: entering\n", __func__);
1126
1127 for (i = 0; i < buf->rb_max_requests; i++) {
1128 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1129 rpcrdma_deregister_internal(ia,
1130 buf->rb_recv_bufs[i]->rr_handle,
1131 &buf->rb_recv_bufs[i]->rr_iov);
1132 kfree(buf->rb_recv_bufs[i]);
1133 }
1134 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
c56c65fb
TT
1135 rpcrdma_deregister_internal(ia,
1136 buf->rb_send_bufs[i]->rl_handle,
1137 &buf->rb_send_bufs[i]->rl_iov);
1138 kfree(buf->rb_send_bufs[i]);
1139 }
1140 }
1141
4034ba04
AA
1142 while (!list_empty(&buf->rb_mws)) {
1143 r = list_entry(buf->rb_mws.next,
1144 struct rpcrdma_mw, mw_list);
1145 list_del(&r->mw_list);
1146 switch (ia->ri_memreg_strategy) {
1147 case RPCRDMA_FRMR:
1148 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1149 if (rc)
1150 dprintk("RPC: %s:"
1151 " ib_dereg_mr"
1152 " failed %i\n",
1153 __func__, rc);
1154 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1155 break;
1156 case RPCRDMA_MTHCAFMR:
1157 rc = ib_dealloc_fmr(r->r.fmr);
1158 if (rc)
1159 dprintk("RPC: %s:"
1160 " ib_dealloc_fmr"
1161 " failed %i\n",
1162 __func__, rc);
1163 break;
4034ba04
AA
1164 default:
1165 break;
1166 }
1167 }
1168
c56c65fb
TT
1169 kfree(buf->rb_pool);
1170}
1171
1172/*
1173 * Get a set of request/reply buffers.
1174 *
1175 * Reply buffer (if needed) is attached to send buffer upon return.
1176 * Rule:
1177 * rb_send_index and rb_recv_index MUST always be pointing to the
1178 * *next* available buffer (non-NULL). They are incremented after
1179 * removing buffers, and decremented *before* returning them.
1180 */
1181struct rpcrdma_req *
1182rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1183{
1184 struct rpcrdma_req *req;
1185 unsigned long flags;
8d4ba034
TT
1186 int i;
1187 struct rpcrdma_mw *r;
c56c65fb
TT
1188
1189 spin_lock_irqsave(&buffers->rb_lock, flags);
1190 if (buffers->rb_send_index == buffers->rb_max_requests) {
1191 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1192 dprintk("RPC: %s: out of request buffers\n", __func__);
1193 return ((struct rpcrdma_req *)NULL);
1194 }
1195
1196 req = buffers->rb_send_bufs[buffers->rb_send_index];
1197 if (buffers->rb_send_index < buffers->rb_recv_index) {
1198 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1199 __func__,
1200 buffers->rb_recv_index - buffers->rb_send_index);
1201 req->rl_reply = NULL;
1202 } else {
1203 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1204 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1205 }
1206 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
1207 if (!list_empty(&buffers->rb_mws)) {
8d4ba034 1208 i = RPCRDMA_MAX_SEGS - 1;
c56c65fb 1209 do {
c56c65fb
TT
1210 r = list_entry(buffers->rb_mws.next,
1211 struct rpcrdma_mw, mw_list);
1212 list_del(&r->mw_list);
1213 req->rl_segments[i].mr_chunk.rl_mw = r;
1214 } while (--i >= 0);
1215 }
1216 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1217 return req;
1218}
1219
1220/*
1221 * Put request/reply buffers back into pool.
1222 * Pre-decrement counter/array index.
1223 */
1224void
1225rpcrdma_buffer_put(struct rpcrdma_req *req)
1226{
1227 struct rpcrdma_buffer *buffers = req->rl_buffer;
1228 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
1229 int i;
1230 unsigned long flags;
1231
1232 BUG_ON(req->rl_nchunks != 0);
1233 spin_lock_irqsave(&buffers->rb_lock, flags);
1234 buffers->rb_send_bufs[--buffers->rb_send_index] = req;
1235 req->rl_niovs = 0;
1236 if (req->rl_reply) {
1237 buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
c56c65fb
TT
1238 req->rl_reply->rr_func = NULL;
1239 req->rl_reply = NULL;
1240 }
1241 switch (ia->ri_memreg_strategy) {
3197d309 1242 case RPCRDMA_FRMR:
c56c65fb 1243 case RPCRDMA_MTHCAFMR:
c56c65fb
TT
1244 /*
1245 * Cycle mw's back in reverse order, and "spin" them.
1246 * This delays and scrambles reuse as much as possible.
1247 */
1248 i = 1;
1249 do {
1250 struct rpcrdma_mw **mw;
1251 mw = &req->rl_segments[i].mr_chunk.rl_mw;
1252 list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
1253 *mw = NULL;
1254 } while (++i < RPCRDMA_MAX_SEGS);
1255 list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
1256 &buffers->rb_mws);
1257 req->rl_segments[0].mr_chunk.rl_mw = NULL;
1258 break;
1259 default:
1260 break;
1261 }
1262 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1263}
1264
1265/*
1266 * Recover reply buffers from pool.
1267 * This happens when recovering from error conditions.
1268 * Post-increment counter/array index.
1269 */
1270void
1271rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1272{
1273 struct rpcrdma_buffer *buffers = req->rl_buffer;
1274 unsigned long flags;
1275
1276 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1277 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1278 spin_lock_irqsave(&buffers->rb_lock, flags);
1279 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1280 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1281 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1282 }
1283 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1284}
1285
1286/*
1287 * Put reply buffers back into pool when not attached to
b45ccfd2 1288 * request. This happens in error conditions.
c56c65fb
TT
1289 */
1290void
1291rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1292{
1293 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1294 unsigned long flags;
1295
1296 rep->rr_func = NULL;
1297 spin_lock_irqsave(&buffers->rb_lock, flags);
1298 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1299 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1300}
1301
1302/*
1303 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1304 */
1305
1306int
1307rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1308 struct ib_mr **mrp, struct ib_sge *iov)
1309{
1310 struct ib_phys_buf ipb;
1311 struct ib_mr *mr;
1312 int rc;
1313
1314 /*
1315 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1316 */
1317 iov->addr = ib_dma_map_single(ia->ri_id->device,
1318 va, len, DMA_BIDIRECTIONAL);
1319 iov->length = len;
1320
bd7ed1d1
TT
1321 if (ia->ri_have_dma_lkey) {
1322 *mrp = NULL;
1323 iov->lkey = ia->ri_dma_lkey;
1324 return 0;
1325 } else if (ia->ri_bind_mem != NULL) {
c56c65fb
TT
1326 *mrp = NULL;
1327 iov->lkey = ia->ri_bind_mem->lkey;
1328 return 0;
1329 }
1330
1331 ipb.addr = iov->addr;
1332 ipb.size = iov->length;
1333 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1334 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1335
1336 dprintk("RPC: %s: phys convert: 0x%llx "
1337 "registered 0x%llx length %d\n",
a56daeb7
AM
1338 __func__, (unsigned long long)ipb.addr,
1339 (unsigned long long)iov->addr, len);
c56c65fb
TT
1340
1341 if (IS_ERR(mr)) {
1342 *mrp = NULL;
1343 rc = PTR_ERR(mr);
1344 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1345 } else {
1346 *mrp = mr;
1347 iov->lkey = mr->lkey;
1348 rc = 0;
1349 }
1350
1351 return rc;
1352}
1353
1354int
1355rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1356 struct ib_mr *mr, struct ib_sge *iov)
1357{
1358 int rc;
1359
1360 ib_dma_unmap_single(ia->ri_id->device,
1361 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1362
1363 if (NULL == mr)
1364 return 0;
1365
1366 rc = ib_dereg_mr(mr);
1367 if (rc)
1368 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1369 return rc;
1370}
1371
1372/*
1373 * Wrappers for chunk registration, shared by read/write chunk code.
1374 */
1375
1376static void
1377rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1378{
1379 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1380 seg->mr_dmalen = seg->mr_len;
1381 if (seg->mr_page)
1382 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1383 seg->mr_page, offset_in_page(seg->mr_offset),
1384 seg->mr_dmalen, seg->mr_dir);
1385 else
1386 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1387 seg->mr_offset,
1388 seg->mr_dmalen, seg->mr_dir);
5c635e09
TT
1389 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1390 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1391 __func__,
986d4abb
RD
1392 (unsigned long long)seg->mr_dma,
1393 seg->mr_offset, seg->mr_dmalen);
5c635e09 1394 }
c56c65fb
TT
1395}
1396
1397static void
1398rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1399{
1400 if (seg->mr_page)
1401 ib_dma_unmap_page(ia->ri_id->device,
1402 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1403 else
1404 ib_dma_unmap_single(ia->ri_id->device,
1405 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1406}
1407
3197d309
TT
1408static int
1409rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1410 int *nsegs, int writing, struct rpcrdma_ia *ia,
1411 struct rpcrdma_xprt *r_xprt)
1412{
1413 struct rpcrdma_mr_seg *seg1 = seg;
5c635e09
TT
1414 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1415
3197d309
TT
1416 u8 key;
1417 int len, pageoff;
1418 int i, rc;
9b78145c
TT
1419 int seg_len;
1420 u64 pa;
1421 int page_no;
3197d309
TT
1422
1423 pageoff = offset_in_page(seg1->mr_offset);
1424 seg1->mr_offset -= pageoff; /* start of page */
1425 seg1->mr_len += pageoff;
1426 len = -pageoff;
0fc6c4e7
SW
1427 if (*nsegs > ia->ri_max_frmr_depth)
1428 *nsegs = ia->ri_max_frmr_depth;
9b78145c 1429 for (page_no = i = 0; i < *nsegs;) {
3197d309 1430 rpcrdma_map_one(ia, seg, writing);
9b78145c
TT
1431 pa = seg->mr_dma;
1432 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
1433 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->
1434 page_list[page_no++] = pa;
1435 pa += PAGE_SIZE;
1436 }
3197d309
TT
1437 len += seg->mr_len;
1438 ++seg;
1439 ++i;
1440 /* Check for holes */
1441 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1442 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1443 break;
1444 }
1445 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1446 __func__, seg1->mr_chunk.rl_mw, i);
1447
5c635e09
TT
1448 if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
1449 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
1450 __func__,
1451 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
1452 /* Invalidate before using. */
1453 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1454 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1455 invalidate_wr.next = &frmr_wr;
1456 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1457 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1458 invalidate_wr.ex.invalidate_rkey =
1459 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1460 DECR_CQCOUNT(&r_xprt->rx_ep);
1461 post_wr = &invalidate_wr;
1462 } else
1463 post_wr = &frmr_wr;
1464
3197d309
TT
1465 /* Bump the key */
1466 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1467 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1468
1469 /* Prepare FRMR WR */
1470 memset(&frmr_wr, 0, sizeof frmr_wr);
5c635e09 1471 frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
3197d309 1472 frmr_wr.opcode = IB_WR_FAST_REG_MR;
5c635e09 1473 frmr_wr.send_flags = IB_SEND_SIGNALED;
7a8b80eb 1474 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
3197d309 1475 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
9b78145c 1476 frmr_wr.wr.fast_reg.page_list_len = page_no;
3197d309 1477 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
9b78145c 1478 frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
5c635e09 1479 BUG_ON(frmr_wr.wr.fast_reg.length < len);
3197d309 1480 frmr_wr.wr.fast_reg.access_flags = (writing ?
68743082
VP
1481 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1482 IB_ACCESS_REMOTE_READ);
3197d309
TT
1483 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1484 DECR_CQCOUNT(&r_xprt->rx_ep);
1485
5c635e09 1486 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
3197d309
TT
1487
1488 if (rc) {
1489 dprintk("RPC: %s: failed ib_post_send for register,"
1490 " status %i\n", __func__, rc);
1491 while (i--)
1492 rpcrdma_unmap_one(ia, --seg);
1493 } else {
1494 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1495 seg1->mr_base = seg1->mr_dma + pageoff;
1496 seg1->mr_nsegs = i;
1497 seg1->mr_len = len;
1498 }
1499 *nsegs = i;
1500 return rc;
1501}
1502
1503static int
1504rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1505 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1506{
1507 struct rpcrdma_mr_seg *seg1 = seg;
1508 struct ib_send_wr invalidate_wr, *bad_wr;
1509 int rc;
1510
1511 while (seg1->mr_nsegs--)
1512 rpcrdma_unmap_one(ia, seg++);
1513
1514 memset(&invalidate_wr, 0, sizeof invalidate_wr);
5c635e09 1515 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
3197d309 1516 invalidate_wr.opcode = IB_WR_LOCAL_INV;
5c635e09 1517 invalidate_wr.send_flags = IB_SEND_SIGNALED;
3197d309
TT
1518 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1519 DECR_CQCOUNT(&r_xprt->rx_ep);
1520
1521 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1522 if (rc)
1523 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1524 " status %i\n", __func__, rc);
1525 return rc;
1526}
1527
8d4ba034
TT
1528static int
1529rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1530 int *nsegs, int writing, struct rpcrdma_ia *ia)
1531{
1532 struct rpcrdma_mr_seg *seg1 = seg;
1533 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1534 int len, pageoff, i, rc;
1535
1536 pageoff = offset_in_page(seg1->mr_offset);
1537 seg1->mr_offset -= pageoff; /* start of page */
1538 seg1->mr_len += pageoff;
1539 len = -pageoff;
1540 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1541 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1542 for (i = 0; i < *nsegs;) {
1543 rpcrdma_map_one(ia, seg, writing);
1544 physaddrs[i] = seg->mr_dma;
1545 len += seg->mr_len;
1546 ++seg;
1547 ++i;
1548 /* Check for holes */
1549 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1550 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1551 break;
1552 }
1553 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1554 physaddrs, i, seg1->mr_dma);
1555 if (rc) {
1556 dprintk("RPC: %s: failed ib_map_phys_fmr "
1557 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1558 len, (unsigned long long)seg1->mr_dma,
1559 pageoff, i, rc);
1560 while (i--)
1561 rpcrdma_unmap_one(ia, --seg);
1562 } else {
1563 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1564 seg1->mr_base = seg1->mr_dma + pageoff;
1565 seg1->mr_nsegs = i;
1566 seg1->mr_len = len;
1567 }
1568 *nsegs = i;
1569 return rc;
1570}
1571
1572static int
1573rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1574 struct rpcrdma_ia *ia)
1575{
1576 struct rpcrdma_mr_seg *seg1 = seg;
1577 LIST_HEAD(l);
1578 int rc;
1579
1580 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1581 rc = ib_unmap_fmr(&l);
1582 while (seg1->mr_nsegs--)
1583 rpcrdma_unmap_one(ia, seg++);
1584 if (rc)
1585 dprintk("RPC: %s: failed ib_unmap_fmr,"
1586 " status %i\n", __func__, rc);
1587 return rc;
1588}
1589
c56c65fb
TT
1590int
1591rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1592 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1593{
1594 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
c56c65fb
TT
1595 int rc = 0;
1596
1597 switch (ia->ri_memreg_strategy) {
1598
1599#if RPCRDMA_PERSISTENT_REGISTRATION
1600 case RPCRDMA_ALLPHYSICAL:
1601 rpcrdma_map_one(ia, seg, writing);
1602 seg->mr_rkey = ia->ri_bind_mem->rkey;
1603 seg->mr_base = seg->mr_dma;
1604 seg->mr_nsegs = 1;
1605 nsegs = 1;
1606 break;
1607#endif
1608
3197d309
TT
1609 /* Registration using frmr registration */
1610 case RPCRDMA_FRMR:
1611 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1612 break;
1613
8d4ba034 1614 /* Registration using fmr memory registration */
c56c65fb 1615 case RPCRDMA_MTHCAFMR:
8d4ba034 1616 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
c56c65fb
TT
1617 break;
1618
c56c65fb 1619 default:
0ac531c1 1620 return -1;
c56c65fb
TT
1621 }
1622 if (rc)
1623 return -1;
1624
1625 return nsegs;
1626}
1627
1628int
1629rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
13c9ff8f 1630 struct rpcrdma_xprt *r_xprt)
c56c65fb
TT
1631{
1632 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
c56c65fb
TT
1633 int nsegs = seg->mr_nsegs, rc;
1634
1635 switch (ia->ri_memreg_strategy) {
1636
1637#if RPCRDMA_PERSISTENT_REGISTRATION
1638 case RPCRDMA_ALLPHYSICAL:
1639 BUG_ON(nsegs != 1);
1640 rpcrdma_unmap_one(ia, seg);
1641 rc = 0;
1642 break;
1643#endif
1644
3197d309
TT
1645 case RPCRDMA_FRMR:
1646 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1647 break;
1648
c56c65fb 1649 case RPCRDMA_MTHCAFMR:
8d4ba034 1650 rc = rpcrdma_deregister_fmr_external(seg, ia);
c56c65fb
TT
1651 break;
1652
c56c65fb 1653 default:
c56c65fb
TT
1654 break;
1655 }
c56c65fb
TT
1656 return nsegs;
1657}
1658
1659/*
1660 * Prepost any receive buffer, then post send.
1661 *
1662 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1663 */
1664int
1665rpcrdma_ep_post(struct rpcrdma_ia *ia,
1666 struct rpcrdma_ep *ep,
1667 struct rpcrdma_req *req)
1668{
1669 struct ib_send_wr send_wr, *send_wr_fail;
1670 struct rpcrdma_rep *rep = req->rl_reply;
1671 int rc;
1672
1673 if (rep) {
1674 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1675 if (rc)
1676 goto out;
1677 req->rl_reply = NULL;
1678 }
1679
1680 send_wr.next = NULL;
1681 send_wr.wr_id = 0ULL; /* no send cookie */
1682 send_wr.sg_list = req->rl_send_iov;
1683 send_wr.num_sge = req->rl_niovs;
1684 send_wr.opcode = IB_WR_SEND;
c56c65fb
TT
1685 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1686 ib_dma_sync_single_for_device(ia->ri_id->device,
1687 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1688 DMA_TO_DEVICE);
1689 ib_dma_sync_single_for_device(ia->ri_id->device,
1690 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1691 DMA_TO_DEVICE);
1692 ib_dma_sync_single_for_device(ia->ri_id->device,
1693 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1694 DMA_TO_DEVICE);
1695
1696 if (DECR_CQCOUNT(ep) > 0)
1697 send_wr.send_flags = 0;
1698 else { /* Provider must take a send completion every now and then */
1699 INIT_CQCOUNT(ep);
1700 send_wr.send_flags = IB_SEND_SIGNALED;
1701 }
1702
1703 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1704 if (rc)
1705 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1706 rc);
1707out:
1708 return rc;
1709}
1710
1711/*
1712 * (Re)post a receive buffer.
1713 */
1714int
1715rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1716 struct rpcrdma_ep *ep,
1717 struct rpcrdma_rep *rep)
1718{
1719 struct ib_recv_wr recv_wr, *recv_wr_fail;
1720 int rc;
1721
1722 recv_wr.next = NULL;
1723 recv_wr.wr_id = (u64) (unsigned long) rep;
1724 recv_wr.sg_list = &rep->rr_iov;
1725 recv_wr.num_sge = 1;
1726
1727 ib_dma_sync_single_for_cpu(ia->ri_id->device,
1728 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
1729
1730 DECR_CQCOUNT(ep);
1731 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1732
1733 if (rc)
1734 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1735 rc);
1736 return rc;
1737}