1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
3 * Copyright (c) 2014-2017 Oracle. All rights reserved.
4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the BSD-type
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
16 * Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
19 * Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
24 * Neither the name of the Network Appliance, Inc. nor the names of
25 * its contributors may be used to endorse or promote products
26 * derived from this software without specific prior written
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 * Encapsulates the major functions managing:
52 #include <linux/interrupt.h>
53 #include <linux/slab.h>
54 #include <linux/sunrpc/addr.h>
55 #include <linux/sunrpc/svc_rdma.h>
57 #include <asm-generic/barrier.h>
58 #include <asm/bitops.h>
60 #include <rdma/ib_cm.h>
62 #include "xprt_rdma.h"
63 #include <trace/events/rpcrdma.h>
69 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
70 # define RPCDBG_FACILITY RPCDBG_TRANS
76 static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx
*sc
);
77 static void rpcrdma_mrs_create(struct rpcrdma_xprt
*r_xprt
);
78 static void rpcrdma_mrs_destroy(struct rpcrdma_buffer
*buf
);
79 static int rpcrdma_create_rep(struct rpcrdma_xprt
*r_xprt
, bool temp
);
80 static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf
*rb
);
81 static void rpcrdma_post_recvs(struct rpcrdma_xprt
*r_xprt
, bool temp
);
83 /* Wait for outstanding transport work to finish.
85 static void rpcrdma_xprt_drain(struct rpcrdma_xprt
*r_xprt
)
87 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
88 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
90 /* Flush Receives, then wait for deferred Reply work
93 ib_drain_qp(ia
->ri_id
->qp
);
94 drain_workqueue(buf
->rb_completion_wq
);
96 /* Deferred Reply processing might have scheduled
97 * local invalidations.
99 ib_drain_sq(ia
->ri_id
->qp
);
103 * rpcrdma_qp_event_handler - Handle one QP event (error notification)
104 * @event: details of the event
105 * @context: ep that owns QP where event occurred
107 * Called from the RDMA provider (device driver) possibly in an interrupt
111 rpcrdma_qp_event_handler(struct ib_event
*event
, void *context
)
113 struct rpcrdma_ep
*ep
= context
;
114 struct rpcrdma_xprt
*r_xprt
= container_of(ep
, struct rpcrdma_xprt
,
117 trace_xprtrdma_qp_event(r_xprt
, event
);
121 * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC
122 * @cq: completion queue (ignored)
127 rpcrdma_wc_send(struct ib_cq
*cq
, struct ib_wc
*wc
)
129 struct ib_cqe
*cqe
= wc
->wr_cqe
;
130 struct rpcrdma_sendctx
*sc
=
131 container_of(cqe
, struct rpcrdma_sendctx
, sc_cqe
);
133 /* WARNING: Only wr_cqe and status are reliable at this point */
134 trace_xprtrdma_wc_send(sc
, wc
);
135 if (wc
->status
!= IB_WC_SUCCESS
&& wc
->status
!= IB_WC_WR_FLUSH_ERR
)
136 pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
137 ib_wc_status_msg(wc
->status
),
138 wc
->status
, wc
->vendor_err
);
140 rpcrdma_sendctx_put_locked(sc
);
144 * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
145 * @cq: completion queue (ignored)
150 rpcrdma_wc_receive(struct ib_cq
*cq
, struct ib_wc
*wc
)
152 struct ib_cqe
*cqe
= wc
->wr_cqe
;
153 struct rpcrdma_rep
*rep
= container_of(cqe
, struct rpcrdma_rep
,
155 struct rpcrdma_xprt
*r_xprt
= rep
->rr_rxprt
;
157 /* WARNING: Only wr_cqe and status are reliable at this point */
158 trace_xprtrdma_wc_receive(wc
);
159 --r_xprt
->rx_ep
.rep_receive_count
;
160 if (wc
->status
!= IB_WC_SUCCESS
)
163 /* status == SUCCESS means all fields in wc are trustworthy */
164 rpcrdma_set_xdrlen(&rep
->rr_hdrbuf
, wc
->byte_len
);
165 rep
->rr_wc_flags
= wc
->wc_flags
;
166 rep
->rr_inv_rkey
= wc
->ex
.invalidate_rkey
;
168 ib_dma_sync_single_for_cpu(rdmab_device(rep
->rr_rdmabuf
),
169 rdmab_addr(rep
->rr_rdmabuf
),
170 wc
->byte_len
, DMA_FROM_DEVICE
);
172 rpcrdma_post_recvs(r_xprt
, false);
173 rpcrdma_reply_handler(rep
);
177 if (wc
->status
!= IB_WC_WR_FLUSH_ERR
)
178 pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
179 ib_wc_status_msg(wc
->status
),
180 wc
->status
, wc
->vendor_err
);
181 rpcrdma_recv_buffer_put(rep
);
185 rpcrdma_update_connect_private(struct rpcrdma_xprt
*r_xprt
,
186 struct rdma_conn_param
*param
)
188 struct rpcrdma_create_data_internal
*cdata
= &r_xprt
->rx_data
;
189 const struct rpcrdma_connect_private
*pmsg
= param
->private_data
;
190 unsigned int rsize
, wsize
;
192 /* Default settings for RPC-over-RDMA Version One */
193 r_xprt
->rx_ia
.ri_implicit_roundup
= xprt_rdma_pad_optimize
;
194 rsize
= RPCRDMA_V1_DEF_INLINE_SIZE
;
195 wsize
= RPCRDMA_V1_DEF_INLINE_SIZE
;
198 pmsg
->cp_magic
== rpcrdma_cmp_magic
&&
199 pmsg
->cp_version
== RPCRDMA_CMP_VERSION
) {
200 r_xprt
->rx_ia
.ri_implicit_roundup
= true;
201 rsize
= rpcrdma_decode_buffer_size(pmsg
->cp_send_size
);
202 wsize
= rpcrdma_decode_buffer_size(pmsg
->cp_recv_size
);
205 if (rsize
< cdata
->inline_rsize
)
206 cdata
->inline_rsize
= rsize
;
207 if (wsize
< cdata
->inline_wsize
)
208 cdata
->inline_wsize
= wsize
;
209 dprintk("RPC: %s: max send %u, max recv %u\n",
210 __func__
, cdata
->inline_wsize
, cdata
->inline_rsize
);
211 rpcrdma_set_max_header_sizes(r_xprt
);
215 * rpcrdma_cm_event_handler - Handle RDMA CM events
216 * @id: rdma_cm_id on which an event has occurred
217 * @event: details of the event
219 * Called with @id's mutex held. Returns 1 if caller should
220 * destroy @id, otherwise 0.
223 rpcrdma_cm_event_handler(struct rdma_cm_id
*id
, struct rdma_cm_event
*event
)
225 struct rpcrdma_xprt
*r_xprt
= id
->context
;
226 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
227 struct rpcrdma_ep
*ep
= &r_xprt
->rx_ep
;
228 struct rpc_xprt
*xprt
= &r_xprt
->rx_xprt
;
232 trace_xprtrdma_cm_event(r_xprt
, event
);
233 switch (event
->event
) {
234 case RDMA_CM_EVENT_ADDR_RESOLVED
:
235 case RDMA_CM_EVENT_ROUTE_RESOLVED
:
237 complete(&ia
->ri_done
);
239 case RDMA_CM_EVENT_ADDR_ERROR
:
240 ia
->ri_async_rc
= -EPROTO
;
241 complete(&ia
->ri_done
);
243 case RDMA_CM_EVENT_ROUTE_ERROR
:
244 ia
->ri_async_rc
= -ENETUNREACH
;
245 complete(&ia
->ri_done
);
247 case RDMA_CM_EVENT_DEVICE_REMOVAL
:
248 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
249 pr_info("rpcrdma: removing device %s for %s:%s\n",
251 rpcrdma_addrstr(r_xprt
), rpcrdma_portstr(r_xprt
));
253 set_bit(RPCRDMA_IAF_REMOVING
, &ia
->ri_flags
);
254 ep
->rep_connected
= -ENODEV
;
255 xprt_force_disconnect(xprt
);
256 wait_for_completion(&ia
->ri_remove_done
);
259 ia
->ri_device
= NULL
;
260 /* Return 1 to ensure the core destroys the id. */
262 case RDMA_CM_EVENT_ESTABLISHED
:
263 ++xprt
->connect_cookie
;
264 ep
->rep_connected
= 1;
265 rpcrdma_update_connect_private(r_xprt
, &event
->param
.conn
);
266 wake_up_all(&ep
->rep_connect_wait
);
268 case RDMA_CM_EVENT_CONNECT_ERROR
:
269 ep
->rep_connected
= -ENOTCONN
;
271 case RDMA_CM_EVENT_UNREACHABLE
:
272 ep
->rep_connected
= -ENETUNREACH
;
274 case RDMA_CM_EVENT_REJECTED
:
275 dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
276 rpcrdma_addrstr(r_xprt
), rpcrdma_portstr(r_xprt
),
277 rdma_reject_msg(id
, event
->status
));
278 ep
->rep_connected
= -ECONNREFUSED
;
279 if (event
->status
== IB_CM_REJ_STALE_CONN
)
280 ep
->rep_connected
= -EAGAIN
;
282 case RDMA_CM_EVENT_DISCONNECTED
:
283 ep
->rep_connected
= -ECONNABORTED
;
285 xprt_force_disconnect(xprt
);
286 wake_up_all(&ep
->rep_connect_wait
);
292 dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__
,
293 rpcrdma_addrstr(r_xprt
), rpcrdma_portstr(r_xprt
),
294 ia
->ri_device
->name
, rdma_event_msg(event
->event
));
298 static struct rdma_cm_id
*
299 rpcrdma_create_id(struct rpcrdma_xprt
*xprt
, struct rpcrdma_ia
*ia
)
301 unsigned long wtimeout
= msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT
) + 1;
302 struct rdma_cm_id
*id
;
305 trace_xprtrdma_conn_start(xprt
);
307 init_completion(&ia
->ri_done
);
308 init_completion(&ia
->ri_remove_done
);
310 id
= rdma_create_id(xprt
->rx_xprt
.xprt_net
, rpcrdma_cm_event_handler
,
311 xprt
, RDMA_PS_TCP
, IB_QPT_RC
);
315 ia
->ri_async_rc
= -ETIMEDOUT
;
316 rc
= rdma_resolve_addr(id
, NULL
,
317 (struct sockaddr
*)&xprt
->rx_xprt
.addr
,
318 RDMA_RESOLVE_TIMEOUT
);
321 rc
= wait_for_completion_interruptible_timeout(&ia
->ri_done
, wtimeout
);
323 trace_xprtrdma_conn_tout(xprt
);
327 rc
= ia
->ri_async_rc
;
331 ia
->ri_async_rc
= -ETIMEDOUT
;
332 rc
= rdma_resolve_route(id
, RDMA_RESOLVE_TIMEOUT
);
335 rc
= wait_for_completion_interruptible_timeout(&ia
->ri_done
, wtimeout
);
337 trace_xprtrdma_conn_tout(xprt
);
340 rc
= ia
->ri_async_rc
;
352 * Exported functions.
356 * rpcrdma_ia_open - Open and initialize an Interface Adapter.
357 * @xprt: transport with IA to (re)initialize
359 * Returns 0 on success, negative errno if an appropriate
360 * Interface Adapter could not be found and opened.
363 rpcrdma_ia_open(struct rpcrdma_xprt
*xprt
)
365 struct rpcrdma_ia
*ia
= &xprt
->rx_ia
;
368 ia
->ri_id
= rpcrdma_create_id(xprt
, ia
);
369 if (IS_ERR(ia
->ri_id
)) {
370 rc
= PTR_ERR(ia
->ri_id
);
373 ia
->ri_device
= ia
->ri_id
->device
;
375 ia
->ri_pd
= ib_alloc_pd(ia
->ri_device
, 0);
376 if (IS_ERR(ia
->ri_pd
)) {
377 rc
= PTR_ERR(ia
->ri_pd
);
378 pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc
);
382 switch (xprt_rdma_memreg_strategy
) {
384 if (frwr_is_supported(ia
))
388 pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
389 ia
->ri_device
->name
, xprt_rdma_memreg_strategy
);
397 rpcrdma_ia_close(ia
);
402 * rpcrdma_ia_remove - Handle device driver unload
403 * @ia: interface adapter being removed
405 * Divest transport H/W resources associated with this adapter,
406 * but allow it to be restored later.
409 rpcrdma_ia_remove(struct rpcrdma_ia
*ia
)
411 struct rpcrdma_xprt
*r_xprt
= container_of(ia
, struct rpcrdma_xprt
,
413 struct rpcrdma_ep
*ep
= &r_xprt
->rx_ep
;
414 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
415 struct rpcrdma_req
*req
;
416 struct rpcrdma_rep
*rep
;
418 cancel_delayed_work_sync(&buf
->rb_refresh_worker
);
420 /* This is similar to rpcrdma_ep_destroy, but:
421 * - Don't cancel the connect worker.
422 * - Don't call rpcrdma_ep_disconnect, which waits
423 * for another conn upcall, which will deadlock.
424 * - rdma_disconnect is unneeded, the underlying
425 * connection is already gone.
428 rpcrdma_xprt_drain(r_xprt
);
429 rdma_destroy_qp(ia
->ri_id
);
430 ia
->ri_id
->qp
= NULL
;
432 ib_free_cq(ep
->rep_attr
.recv_cq
);
433 ep
->rep_attr
.recv_cq
= NULL
;
434 ib_free_cq(ep
->rep_attr
.send_cq
);
435 ep
->rep_attr
.send_cq
= NULL
;
437 /* The ULP is responsible for ensuring all DMA
438 * mappings and MRs are gone.
440 list_for_each_entry(rep
, &buf
->rb_recv_bufs
, rr_list
)
441 rpcrdma_dma_unmap_regbuf(rep
->rr_rdmabuf
);
442 list_for_each_entry(req
, &buf
->rb_allreqs
, rl_all
) {
443 rpcrdma_dma_unmap_regbuf(req
->rl_rdmabuf
);
444 rpcrdma_dma_unmap_regbuf(req
->rl_sendbuf
);
445 rpcrdma_dma_unmap_regbuf(req
->rl_recvbuf
);
447 rpcrdma_mrs_destroy(buf
);
448 ib_dealloc_pd(ia
->ri_pd
);
451 /* Allow waiters to continue */
452 complete(&ia
->ri_remove_done
);
454 trace_xprtrdma_remove(r_xprt
);
458 * rpcrdma_ia_close - Clean up/close an IA.
459 * @ia: interface adapter to close
463 rpcrdma_ia_close(struct rpcrdma_ia
*ia
)
465 if (ia
->ri_id
!= NULL
&& !IS_ERR(ia
->ri_id
)) {
467 rdma_destroy_qp(ia
->ri_id
);
468 rdma_destroy_id(ia
->ri_id
);
471 ia
->ri_device
= NULL
;
473 /* If the pd is still busy, xprtrdma missed freeing a resource */
474 if (ia
->ri_pd
&& !IS_ERR(ia
->ri_pd
))
475 ib_dealloc_pd(ia
->ri_pd
);
480 * Create unconnected endpoint.
483 rpcrdma_ep_create(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
,
484 struct rpcrdma_create_data_internal
*cdata
)
486 struct rpcrdma_connect_private
*pmsg
= &ep
->rep_cm_private
;
487 struct ib_cq
*sendcq
, *recvcq
;
488 unsigned int max_sge
;
491 max_sge
= min_t(unsigned int, ia
->ri_device
->attrs
.max_send_sge
,
492 RPCRDMA_MAX_SEND_SGES
);
493 if (max_sge
< RPCRDMA_MIN_SEND_SGES
) {
494 pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge
);
497 ia
->ri_max_send_sges
= max_sge
;
499 rc
= frwr_open(ia
, ep
, cdata
);
503 ep
->rep_attr
.event_handler
= rpcrdma_qp_event_handler
;
504 ep
->rep_attr
.qp_context
= ep
;
505 ep
->rep_attr
.srq
= NULL
;
506 ep
->rep_attr
.cap
.max_send_sge
= max_sge
;
507 ep
->rep_attr
.cap
.max_recv_sge
= 1;
508 ep
->rep_attr
.cap
.max_inline_data
= 0;
509 ep
->rep_attr
.sq_sig_type
= IB_SIGNAL_REQ_WR
;
510 ep
->rep_attr
.qp_type
= IB_QPT_RC
;
511 ep
->rep_attr
.port_num
= ~0;
513 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
514 "iovs: send %d recv %d\n",
516 ep
->rep_attr
.cap
.max_send_wr
,
517 ep
->rep_attr
.cap
.max_recv_wr
,
518 ep
->rep_attr
.cap
.max_send_sge
,
519 ep
->rep_attr
.cap
.max_recv_sge
);
521 /* set trigger for requesting send completion */
522 ep
->rep_send_batch
= min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH
,
523 cdata
->max_requests
>> 2);
524 ep
->rep_send_count
= ep
->rep_send_batch
;
525 init_waitqueue_head(&ep
->rep_connect_wait
);
526 ep
->rep_receive_count
= 0;
528 sendcq
= ib_alloc_cq(ia
->ri_device
, NULL
,
529 ep
->rep_attr
.cap
.max_send_wr
+ 1,
530 ia
->ri_device
->num_comp_vectors
> 1 ? 1 : 0,
532 if (IS_ERR(sendcq
)) {
533 rc
= PTR_ERR(sendcq
);
537 recvcq
= ib_alloc_cq(ia
->ri_device
, NULL
,
538 ep
->rep_attr
.cap
.max_recv_wr
+ 1,
539 0, IB_POLL_WORKQUEUE
);
540 if (IS_ERR(recvcq
)) {
541 rc
= PTR_ERR(recvcq
);
545 ep
->rep_attr
.send_cq
= sendcq
;
546 ep
->rep_attr
.recv_cq
= recvcq
;
548 /* Initialize cma parameters */
549 memset(&ep
->rep_remote_cma
, 0, sizeof(ep
->rep_remote_cma
));
551 /* Prepare RDMA-CM private message */
552 pmsg
->cp_magic
= rpcrdma_cmp_magic
;
553 pmsg
->cp_version
= RPCRDMA_CMP_VERSION
;
554 pmsg
->cp_flags
|= RPCRDMA_CMP_F_SND_W_INV_OK
;
555 pmsg
->cp_send_size
= rpcrdma_encode_buffer_size(cdata
->inline_wsize
);
556 pmsg
->cp_recv_size
= rpcrdma_encode_buffer_size(cdata
->inline_rsize
);
557 ep
->rep_remote_cma
.private_data
= pmsg
;
558 ep
->rep_remote_cma
.private_data_len
= sizeof(*pmsg
);
560 /* Client offers RDMA Read but does not initiate */
561 ep
->rep_remote_cma
.initiator_depth
= 0;
562 ep
->rep_remote_cma
.responder_resources
=
563 min_t(int, U8_MAX
, ia
->ri_device
->attrs
.max_qp_rd_atom
);
565 /* Limit transport retries so client can detect server
566 * GID changes quickly. RPC layer handles re-establishing
567 * transport connection and retransmission.
569 ep
->rep_remote_cma
.retry_count
= 6;
571 /* RPC-over-RDMA handles its own flow control. In addition,
572 * make all RNR NAKs visible so we know that RPC-over-RDMA
573 * flow control is working correctly (no NAKs should be seen).
575 ep
->rep_remote_cma
.flow_control
= 0;
576 ep
->rep_remote_cma
.rnr_retry_count
= 0;
589 * Disconnect and destroy endpoint. After this, the only
590 * valid operations on the ep are to free it (if dynamically
591 * allocated) or re-create it.
594 rpcrdma_ep_destroy(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
596 if (ia
->ri_id
&& ia
->ri_id
->qp
) {
597 rpcrdma_ep_disconnect(ep
, ia
);
598 rdma_destroy_qp(ia
->ri_id
);
599 ia
->ri_id
->qp
= NULL
;
602 if (ep
->rep_attr
.recv_cq
)
603 ib_free_cq(ep
->rep_attr
.recv_cq
);
604 if (ep
->rep_attr
.send_cq
)
605 ib_free_cq(ep
->rep_attr
.send_cq
);
608 /* Re-establish a connection after a device removal event.
609 * Unlike a normal reconnection, a fresh PD and a new set
610 * of MRs and buffers is needed.
613 rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt
*r_xprt
,
614 struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
618 trace_xprtrdma_reinsert(r_xprt
);
621 if (rpcrdma_ia_open(r_xprt
))
625 err
= rpcrdma_ep_create(ep
, ia
, &r_xprt
->rx_data
);
627 pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err
);
632 err
= rdma_create_qp(ia
->ri_id
, ia
->ri_pd
, &ep
->rep_attr
);
634 pr_err("rpcrdma: rdma_create_qp returned %d\n", err
);
638 rpcrdma_mrs_create(r_xprt
);
642 rpcrdma_ep_destroy(ep
, ia
);
644 rpcrdma_ia_close(ia
);
650 rpcrdma_ep_reconnect(struct rpcrdma_xprt
*r_xprt
, struct rpcrdma_ep
*ep
,
651 struct rpcrdma_ia
*ia
)
653 struct rdma_cm_id
*id
, *old
;
656 trace_xprtrdma_reconnect(r_xprt
);
658 rpcrdma_ep_disconnect(ep
, ia
);
661 id
= rpcrdma_create_id(r_xprt
, ia
);
665 /* As long as the new ID points to the same device as the
666 * old ID, we can reuse the transport's existing PD and all
667 * previously allocated MRs. Also, the same device means
668 * the transport's previous DMA mappings are still valid.
670 * This is a sanity check only. There should be no way these
671 * point to two different devices here.
675 if (ia
->ri_device
!= id
->device
) {
676 pr_err("rpcrdma: can't reconnect on different device!\n");
680 err
= rdma_create_qp(id
, ia
->ri_pd
, &ep
->rep_attr
);
684 /* Atomically replace the transport's ID and QP. */
688 rdma_destroy_qp(old
);
691 rdma_destroy_id(old
);
697 * Connect unconnected endpoint.
700 rpcrdma_ep_connect(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
702 struct rpcrdma_xprt
*r_xprt
= container_of(ia
, struct rpcrdma_xprt
,
704 struct rpc_xprt
*xprt
= &r_xprt
->rx_xprt
;
708 switch (ep
->rep_connected
) {
710 dprintk("RPC: %s: connecting...\n", __func__
);
711 rc
= rdma_create_qp(ia
->ri_id
, ia
->ri_pd
, &ep
->rep_attr
);
718 rc
= rpcrdma_ep_recreate_xprt(r_xprt
, ep
, ia
);
723 rc
= rpcrdma_ep_reconnect(r_xprt
, ep
, ia
);
728 ep
->rep_connected
= 0;
729 xprt_clear_connected(xprt
);
731 rpcrdma_post_recvs(r_xprt
, true);
733 rc
= rdma_connect(ia
->ri_id
, &ep
->rep_remote_cma
);
737 wait_event_interruptible(ep
->rep_connect_wait
, ep
->rep_connected
!= 0);
738 if (ep
->rep_connected
<= 0) {
739 if (ep
->rep_connected
== -EAGAIN
)
741 rc
= ep
->rep_connected
;
745 dprintk("RPC: %s: connected\n", __func__
);
749 ep
->rep_connected
= rc
;
756 * rpcrdma_ep_disconnect - Disconnect underlying transport
757 * @ep: endpoint to disconnect
758 * @ia: associated interface adapter
760 * This is separate from destroy to facilitate the ability
761 * to reconnect without recreating the endpoint.
763 * This call is not reentrant, and must not be made in parallel
764 * on the same endpoint.
767 rpcrdma_ep_disconnect(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
769 struct rpcrdma_xprt
*r_xprt
= container_of(ep
, struct rpcrdma_xprt
,
773 /* returns without wait if ID is not connected */
774 rc
= rdma_disconnect(ia
->ri_id
);
776 wait_event_interruptible(ep
->rep_connect_wait
,
777 ep
->rep_connected
!= 1);
779 ep
->rep_connected
= rc
;
780 trace_xprtrdma_disconnect(r_xprt
, rc
);
782 rpcrdma_xprt_drain(r_xprt
);
785 /* Fixed-size circular FIFO queue. This implementation is wait-free and
788 * Consumer is the code path that posts Sends. This path dequeues a
789 * sendctx for use by a Send operation. Multiple consumer threads
790 * are serialized by the RPC transport lock, which allows only one
791 * ->send_request call at a time.
793 * Producer is the code path that handles Send completions. This path
794 * enqueues a sendctx that has been completed. Multiple producer
795 * threads are serialized by the ib_poll_cq() function.
798 /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
799 * queue activity, and ib_drain_qp has flushed all remaining Send
802 static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer
*buf
)
806 for (i
= 0; i
<= buf
->rb_sc_last
; i
++)
807 kfree(buf
->rb_sc_ctxs
[i
]);
808 kfree(buf
->rb_sc_ctxs
);
811 static struct rpcrdma_sendctx
*rpcrdma_sendctx_create(struct rpcrdma_ia
*ia
)
813 struct rpcrdma_sendctx
*sc
;
815 sc
= kzalloc(sizeof(*sc
) +
816 ia
->ri_max_send_sges
* sizeof(struct ib_sge
),
821 sc
->sc_wr
.wr_cqe
= &sc
->sc_cqe
;
822 sc
->sc_wr
.sg_list
= sc
->sc_sges
;
823 sc
->sc_wr
.opcode
= IB_WR_SEND
;
824 sc
->sc_cqe
.done
= rpcrdma_wc_send
;
828 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt
*r_xprt
)
830 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
831 struct rpcrdma_sendctx
*sc
;
834 /* Maximum number of concurrent outstanding Send WRs. Capping
835 * the circular queue size stops Send Queue overflow by causing
836 * the ->send_request call to fail temporarily before too many
839 i
= buf
->rb_max_requests
+ RPCRDMA_MAX_BC_REQUESTS
;
840 dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__
, i
);
841 buf
->rb_sc_ctxs
= kcalloc(i
, sizeof(sc
), GFP_KERNEL
);
842 if (!buf
->rb_sc_ctxs
)
845 buf
->rb_sc_last
= i
- 1;
846 for (i
= 0; i
<= buf
->rb_sc_last
; i
++) {
847 sc
= rpcrdma_sendctx_create(&r_xprt
->rx_ia
);
851 sc
->sc_xprt
= r_xprt
;
852 buf
->rb_sc_ctxs
[i
] = sc
;
858 /* The sendctx queue is not guaranteed to have a size that is a
859 * power of two, thus the helpers in circ_buf.h cannot be used.
860 * The other option is to use modulus (%), which can be expensive.
862 static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer
*buf
,
865 return likely(item
< buf
->rb_sc_last
) ? item
+ 1 : 0;
869 * rpcrdma_sendctx_get_locked - Acquire a send context
870 * @buf: transport buffers from which to acquire an unused context
872 * Returns pointer to a free send completion context; or NULL if
873 * the queue is empty.
875 * Usage: Called to acquire an SGE array before preparing a Send WR.
877 * The caller serializes calls to this function (per rpcrdma_buffer),
878 * and provides an effective memory barrier that flushes the new value
881 struct rpcrdma_sendctx
*rpcrdma_sendctx_get_locked(struct rpcrdma_buffer
*buf
)
883 struct rpcrdma_xprt
*r_xprt
;
884 struct rpcrdma_sendctx
*sc
;
885 unsigned long next_head
;
887 next_head
= rpcrdma_sendctx_next(buf
, buf
->rb_sc_head
);
889 if (next_head
== READ_ONCE(buf
->rb_sc_tail
))
892 /* ORDER: item must be accessed _before_ head is updated */
893 sc
= buf
->rb_sc_ctxs
[next_head
];
895 /* Releasing the lock in the caller acts as a memory
896 * barrier that flushes rb_sc_head.
898 buf
->rb_sc_head
= next_head
;
903 /* The queue is "empty" if there have not been enough Send
904 * completions recently. This is a sign the Send Queue is
905 * backing up. Cause the caller to pause and try again.
907 set_bit(RPCRDMA_BUF_F_EMPTY_SCQ
, &buf
->rb_flags
);
908 r_xprt
= container_of(buf
, struct rpcrdma_xprt
, rx_buf
);
909 r_xprt
->rx_stats
.empty_sendctx_q
++;
914 * rpcrdma_sendctx_put_locked - Release a send context
915 * @sc: send context to release
917 * Usage: Called from Send completion to return a sendctxt
920 * The caller serializes calls to this function (per rpcrdma_buffer).
923 rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx
*sc
)
925 struct rpcrdma_buffer
*buf
= &sc
->sc_xprt
->rx_buf
;
926 unsigned long next_tail
;
928 /* Unmap SGEs of previously completed by unsignaled
929 * Sends by walking up the queue until @sc is found.
931 next_tail
= buf
->rb_sc_tail
;
933 next_tail
= rpcrdma_sendctx_next(buf
, next_tail
);
935 /* ORDER: item must be accessed _before_ tail is updated */
936 rpcrdma_unmap_sendctx(buf
->rb_sc_ctxs
[next_tail
]);
938 } while (buf
->rb_sc_ctxs
[next_tail
] != sc
);
940 /* Paired with READ_ONCE */
941 smp_store_release(&buf
->rb_sc_tail
, next_tail
);
943 if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ
, &buf
->rb_flags
)) {
944 smp_mb__after_atomic();
945 xprt_write_space(&sc
->sc_xprt
->rx_xprt
);
950 rpcrdma_mrs_create(struct rpcrdma_xprt
*r_xprt
)
952 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
953 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
958 for (count
= 0; count
< ia
->ri_max_segs
; count
++) {
959 struct rpcrdma_mr
*mr
;
962 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
966 rc
= frwr_init_mr(ia
, mr
);
972 mr
->mr_xprt
= r_xprt
;
974 list_add(&mr
->mr_list
, &free
);
975 list_add(&mr
->mr_all
, &all
);
978 spin_lock(&buf
->rb_mrlock
);
979 list_splice(&free
, &buf
->rb_mrs
);
980 list_splice(&all
, &buf
->rb_all
);
981 r_xprt
->rx_stats
.mrs_allocated
+= count
;
982 spin_unlock(&buf
->rb_mrlock
);
983 trace_xprtrdma_createmrs(r_xprt
, count
);
985 xprt_write_space(&r_xprt
->rx_xprt
);
989 rpcrdma_mr_refresh_worker(struct work_struct
*work
)
991 struct rpcrdma_buffer
*buf
= container_of(work
, struct rpcrdma_buffer
,
992 rb_refresh_worker
.work
);
993 struct rpcrdma_xprt
*r_xprt
= container_of(buf
, struct rpcrdma_xprt
,
996 rpcrdma_mrs_create(r_xprt
);
1000 rpcrdma_create_req(struct rpcrdma_xprt
*r_xprt
)
1002 struct rpcrdma_buffer
*buffer
= &r_xprt
->rx_buf
;
1003 struct rpcrdma_regbuf
*rb
;
1004 struct rpcrdma_req
*req
;
1006 req
= kzalloc(sizeof(*req
), GFP_KERNEL
);
1008 return ERR_PTR(-ENOMEM
);
1010 rb
= rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE
,
1011 DMA_TO_DEVICE
, GFP_KERNEL
);
1014 return ERR_PTR(-ENOMEM
);
1016 req
->rl_rdmabuf
= rb
;
1017 xdr_buf_init(&req
->rl_hdrbuf
, rb
->rg_base
, rdmab_length(rb
));
1018 req
->rl_buffer
= buffer
;
1019 INIT_LIST_HEAD(&req
->rl_registered
);
1021 spin_lock(&buffer
->rb_lock
);
1022 list_add(&req
->rl_all
, &buffer
->rb_allreqs
);
1023 spin_unlock(&buffer
->rb_lock
);
1028 rpcrdma_create_rep(struct rpcrdma_xprt
*r_xprt
, bool temp
)
1030 struct rpcrdma_create_data_internal
*cdata
= &r_xprt
->rx_data
;
1031 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1032 struct rpcrdma_rep
*rep
;
1036 rep
= kzalloc(sizeof(*rep
), GFP_KERNEL
);
1040 rep
->rr_rdmabuf
= rpcrdma_alloc_regbuf(cdata
->inline_rsize
,
1041 DMA_FROM_DEVICE
, GFP_KERNEL
);
1042 if (IS_ERR(rep
->rr_rdmabuf
)) {
1043 rc
= PTR_ERR(rep
->rr_rdmabuf
);
1046 xdr_buf_init(&rep
->rr_hdrbuf
, rep
->rr_rdmabuf
->rg_base
,
1047 rdmab_length(rep
->rr_rdmabuf
));
1049 rep
->rr_cqe
.done
= rpcrdma_wc_receive
;
1050 rep
->rr_rxprt
= r_xprt
;
1051 INIT_WORK(&rep
->rr_work
, rpcrdma_deferred_completion
);
1052 rep
->rr_recv_wr
.next
= NULL
;
1053 rep
->rr_recv_wr
.wr_cqe
= &rep
->rr_cqe
;
1054 rep
->rr_recv_wr
.sg_list
= &rep
->rr_rdmabuf
->rg_iov
;
1055 rep
->rr_recv_wr
.num_sge
= 1;
1056 rep
->rr_temp
= temp
;
1058 spin_lock(&buf
->rb_lock
);
1059 list_add(&rep
->rr_list
, &buf
->rb_recv_bufs
);
1060 spin_unlock(&buf
->rb_lock
);
1070 rpcrdma_buffer_create(struct rpcrdma_xprt
*r_xprt
)
1072 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1076 buf
->rb_max_requests
= r_xprt
->rx_data
.max_requests
;
1077 buf
->rb_bc_srv_max_requests
= 0;
1078 spin_lock_init(&buf
->rb_mrlock
);
1079 spin_lock_init(&buf
->rb_lock
);
1080 INIT_LIST_HEAD(&buf
->rb_mrs
);
1081 INIT_LIST_HEAD(&buf
->rb_all
);
1082 INIT_DELAYED_WORK(&buf
->rb_refresh_worker
,
1083 rpcrdma_mr_refresh_worker
);
1085 rpcrdma_mrs_create(r_xprt
);
1087 INIT_LIST_HEAD(&buf
->rb_send_bufs
);
1088 INIT_LIST_HEAD(&buf
->rb_allreqs
);
1089 for (i
= 0; i
< buf
->rb_max_requests
; i
++) {
1090 struct rpcrdma_req
*req
;
1092 req
= rpcrdma_create_req(r_xprt
);
1094 dprintk("RPC: %s: request buffer %d alloc"
1095 " failed\n", __func__
, i
);
1099 list_add(&req
->rl_list
, &buf
->rb_send_bufs
);
1102 buf
->rb_credits
= 1;
1103 INIT_LIST_HEAD(&buf
->rb_recv_bufs
);
1105 rc
= rpcrdma_sendctxs_create(r_xprt
);
1109 buf
->rb_completion_wq
= alloc_workqueue("rpcrdma-%s",
1110 WQ_MEM_RECLAIM
| WQ_HIGHPRI
,
1112 r_xprt
->rx_xprt
.address_strings
[RPC_DISPLAY_ADDR
]);
1113 if (!buf
->rb_completion_wq
) {
1120 rpcrdma_buffer_destroy(buf
);
1125 rpcrdma_destroy_rep(struct rpcrdma_rep
*rep
)
1127 rpcrdma_free_regbuf(rep
->rr_rdmabuf
);
1132 * rpcrdma_req_destroy - Destroy an rpcrdma_req object
1133 * @req: unused object to be destroyed
1135 * This function assumes that the caller prevents concurrent device
1136 * unload and transport tear-down.
1139 rpcrdma_req_destroy(struct rpcrdma_req
*req
)
1141 list_del(&req
->rl_all
);
1143 rpcrdma_free_regbuf(req
->rl_recvbuf
);
1144 rpcrdma_free_regbuf(req
->rl_sendbuf
);
1145 rpcrdma_free_regbuf(req
->rl_rdmabuf
);
1150 rpcrdma_mrs_destroy(struct rpcrdma_buffer
*buf
)
1152 struct rpcrdma_xprt
*r_xprt
= container_of(buf
, struct rpcrdma_xprt
,
1154 struct rpcrdma_mr
*mr
;
1158 spin_lock(&buf
->rb_mrlock
);
1159 while (!list_empty(&buf
->rb_all
)) {
1160 mr
= list_entry(buf
->rb_all
.next
, struct rpcrdma_mr
, mr_all
);
1161 list_del(&mr
->mr_all
);
1163 spin_unlock(&buf
->rb_mrlock
);
1165 /* Ensure MW is not on any rl_registered list */
1166 if (!list_empty(&mr
->mr_list
))
1167 list_del(&mr
->mr_list
);
1169 frwr_release_mr(mr
);
1171 spin_lock(&buf
->rb_mrlock
);
1173 spin_unlock(&buf
->rb_mrlock
);
1174 r_xprt
->rx_stats
.mrs_allocated
= 0;
1176 dprintk("RPC: %s: released %u MRs\n", __func__
, count
);
1180 * rpcrdma_buffer_destroy - Release all hw resources
1181 * @buf: root control block for resources
1183 * ORDERING: relies on a prior ib_drain_qp :
1184 * - No more Send or Receive completions can occur
1185 * - All MRs, reps, and reqs are returned to their free lists
1188 rpcrdma_buffer_destroy(struct rpcrdma_buffer
*buf
)
1190 cancel_delayed_work_sync(&buf
->rb_refresh_worker
);
1192 if (buf
->rb_completion_wq
) {
1193 destroy_workqueue(buf
->rb_completion_wq
);
1194 buf
->rb_completion_wq
= NULL
;
1197 rpcrdma_sendctxs_destroy(buf
);
1199 while (!list_empty(&buf
->rb_recv_bufs
)) {
1200 struct rpcrdma_rep
*rep
;
1202 rep
= list_first_entry(&buf
->rb_recv_bufs
,
1203 struct rpcrdma_rep
, rr_list
);
1204 list_del(&rep
->rr_list
);
1205 rpcrdma_destroy_rep(rep
);
1208 while (!list_empty(&buf
->rb_send_bufs
)) {
1209 struct rpcrdma_req
*req
;
1211 req
= list_first_entry(&buf
->rb_send_bufs
,
1212 struct rpcrdma_req
, rl_list
);
1213 list_del(&req
->rl_list
);
1214 rpcrdma_req_destroy(req
);
1217 rpcrdma_mrs_destroy(buf
);
1221 * rpcrdma_mr_get - Allocate an rpcrdma_mr object
1222 * @r_xprt: controlling transport
1224 * Returns an initialized rpcrdma_mr or NULL if no free
1225 * rpcrdma_mr objects are available.
1228 rpcrdma_mr_get(struct rpcrdma_xprt
*r_xprt
)
1230 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1231 struct rpcrdma_mr
*mr
= NULL
;
1233 spin_lock(&buf
->rb_mrlock
);
1234 if (!list_empty(&buf
->rb_mrs
))
1235 mr
= rpcrdma_mr_pop(&buf
->rb_mrs
);
1236 spin_unlock(&buf
->rb_mrlock
);
1243 trace_xprtrdma_nomrs(r_xprt
);
1244 if (r_xprt
->rx_ep
.rep_connected
!= -ENODEV
)
1245 schedule_delayed_work(&buf
->rb_refresh_worker
, 0);
1247 /* Allow the reply handler and refresh worker to run */
1254 __rpcrdma_mr_put(struct rpcrdma_buffer
*buf
, struct rpcrdma_mr
*mr
)
1256 spin_lock(&buf
->rb_mrlock
);
1257 rpcrdma_mr_push(mr
, &buf
->rb_mrs
);
1258 spin_unlock(&buf
->rb_mrlock
);
1262 * rpcrdma_mr_put - Release an rpcrdma_mr object
1263 * @mr: object to release
1267 rpcrdma_mr_put(struct rpcrdma_mr
*mr
)
1269 __rpcrdma_mr_put(&mr
->mr_xprt
->rx_buf
, mr
);
1273 * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
1274 * @mr: object to release
1278 rpcrdma_mr_unmap_and_put(struct rpcrdma_mr
*mr
)
1280 struct rpcrdma_xprt
*r_xprt
= mr
->mr_xprt
;
1282 if (mr
->mr_dir
!= DMA_NONE
) {
1283 trace_xprtrdma_mr_unmap(mr
);
1284 ib_dma_unmap_sg(r_xprt
->rx_ia
.ri_device
,
1285 mr
->mr_sg
, mr
->mr_nents
, mr
->mr_dir
);
1286 mr
->mr_dir
= DMA_NONE
;
1288 __rpcrdma_mr_put(&r_xprt
->rx_buf
, mr
);
1292 * rpcrdma_buffer_get - Get a request buffer
1293 * @buffers: Buffer pool from which to obtain a buffer
1295 * Returns a fresh rpcrdma_req, or NULL if none are available.
1297 struct rpcrdma_req
*
1298 rpcrdma_buffer_get(struct rpcrdma_buffer
*buffers
)
1300 struct rpcrdma_req
*req
;
1302 spin_lock(&buffers
->rb_lock
);
1303 req
= list_first_entry_or_null(&buffers
->rb_send_bufs
,
1304 struct rpcrdma_req
, rl_list
);
1306 list_del_init(&req
->rl_list
);
1307 spin_unlock(&buffers
->rb_lock
);
1312 * rpcrdma_buffer_put - Put request/reply buffers back into pool
1313 * @req: object to return
1317 rpcrdma_buffer_put(struct rpcrdma_req
*req
)
1319 struct rpcrdma_buffer
*buffers
= req
->rl_buffer
;
1320 struct rpcrdma_rep
*rep
= req
->rl_reply
;
1322 req
->rl_reply
= NULL
;
1324 spin_lock(&buffers
->rb_lock
);
1325 list_add(&req
->rl_list
, &buffers
->rb_send_bufs
);
1327 if (!rep
->rr_temp
) {
1328 list_add(&rep
->rr_list
, &buffers
->rb_recv_bufs
);
1332 spin_unlock(&buffers
->rb_lock
);
1334 rpcrdma_destroy_rep(rep
);
1338 * Put reply buffers back into pool when not attached to
1339 * request. This happens in error conditions.
1342 rpcrdma_recv_buffer_put(struct rpcrdma_rep
*rep
)
1344 struct rpcrdma_buffer
*buffers
= &rep
->rr_rxprt
->rx_buf
;
1346 if (!rep
->rr_temp
) {
1347 spin_lock(&buffers
->rb_lock
);
1348 list_add(&rep
->rr_list
, &buffers
->rb_recv_bufs
);
1349 spin_unlock(&buffers
->rb_lock
);
1351 rpcrdma_destroy_rep(rep
);
1356 * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers
1357 * @size: size of buffer to be allocated, in bytes
1358 * @direction: direction of data movement
1361 * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that
1362 * can be persistently DMA-mapped for I/O.
1364 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1365 * receiving the payload of RDMA RECV operations. During Long Calls
1366 * or Replies they may be registered externally via frwr_map.
1368 struct rpcrdma_regbuf
*
1369 rpcrdma_alloc_regbuf(size_t size
, enum dma_data_direction direction
,
1372 struct rpcrdma_regbuf
*rb
;
1374 rb
= kmalloc(sizeof(*rb
) + size
, flags
);
1376 return ERR_PTR(-ENOMEM
);
1378 rb
->rg_device
= NULL
;
1379 rb
->rg_direction
= direction
;
1380 rb
->rg_iov
.length
= size
;
1386 * __rpcrdma_map_regbuf - DMA-map a regbuf
1387 * @ia: controlling rpcrdma_ia
1388 * @rb: regbuf to be mapped
1391 __rpcrdma_dma_map_regbuf(struct rpcrdma_ia
*ia
, struct rpcrdma_regbuf
*rb
)
1393 struct ib_device
*device
= ia
->ri_device
;
1395 if (rb
->rg_direction
== DMA_NONE
)
1398 rb
->rg_iov
.addr
= ib_dma_map_single(device
,
1399 (void *)rb
->rg_base
,
1402 if (ib_dma_mapping_error(device
, rdmab_addr(rb
))) {
1403 trace_xprtrdma_dma_maperr(rdmab_addr(rb
));
1407 rb
->rg_device
= device
;
1408 rb
->rg_iov
.lkey
= ia
->ri_pd
->local_dma_lkey
;
1413 rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf
*rb
)
1418 if (!rpcrdma_regbuf_is_mapped(rb
))
1421 ib_dma_unmap_single(rb
->rg_device
, rdmab_addr(rb
),
1422 rdmab_length(rb
), rb
->rg_direction
);
1423 rb
->rg_device
= NULL
;
1427 * rpcrdma_free_regbuf - deregister and free registered buffer
1428 * @rb: regbuf to be deregistered and freed
1431 rpcrdma_free_regbuf(struct rpcrdma_regbuf
*rb
)
1433 rpcrdma_dma_unmap_regbuf(rb
);
1438 * rpcrdma_ep_post - Post WRs to a transport's Send Queue
1439 * @ia: transport's device information
1440 * @ep: transport's RDMA endpoint information
1441 * @req: rpcrdma_req containing the Send WR to post
1443 * Returns 0 if the post was successful, otherwise -ENOTCONN
1447 rpcrdma_ep_post(struct rpcrdma_ia
*ia
,
1448 struct rpcrdma_ep
*ep
,
1449 struct rpcrdma_req
*req
)
1451 struct ib_send_wr
*send_wr
= &req
->rl_sendctx
->sc_wr
;
1454 if (!ep
->rep_send_count
||
1455 test_bit(RPCRDMA_REQ_F_TX_RESOURCES
, &req
->rl_flags
)) {
1456 send_wr
->send_flags
|= IB_SEND_SIGNALED
;
1457 ep
->rep_send_count
= ep
->rep_send_batch
;
1459 send_wr
->send_flags
&= ~IB_SEND_SIGNALED
;
1460 --ep
->rep_send_count
;
1463 rc
= frwr_send(ia
, req
);
1464 trace_xprtrdma_post_send(req
, rc
);
1471 rpcrdma_post_recvs(struct rpcrdma_xprt
*r_xprt
, bool temp
)
1473 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1474 struct rpcrdma_ep
*ep
= &r_xprt
->rx_ep
;
1475 struct ib_recv_wr
*wr
, *bad_wr
;
1476 int needed
, count
, rc
;
1480 needed
= buf
->rb_credits
+ (buf
->rb_bc_srv_max_requests
<< 1);
1481 if (ep
->rep_receive_count
> needed
)
1483 needed
-= ep
->rep_receive_count
;
1488 struct rpcrdma_regbuf
*rb
;
1489 struct rpcrdma_rep
*rep
;
1491 spin_lock(&buf
->rb_lock
);
1492 rep
= list_first_entry_or_null(&buf
->rb_recv_bufs
,
1493 struct rpcrdma_rep
, rr_list
);
1495 list_del(&rep
->rr_list
);
1496 spin_unlock(&buf
->rb_lock
);
1498 if (rpcrdma_create_rep(r_xprt
, temp
))
1503 rb
= rep
->rr_rdmabuf
;
1504 if (!rpcrdma_regbuf_is_mapped(rb
)) {
1505 if (!__rpcrdma_dma_map_regbuf(&r_xprt
->rx_ia
, rb
)) {
1506 rpcrdma_recv_buffer_put(rep
);
1511 trace_xprtrdma_post_recv(rep
->rr_recv_wr
.wr_cqe
);
1512 rep
->rr_recv_wr
.next
= wr
;
1513 wr
= &rep
->rr_recv_wr
;
1520 rc
= ib_post_recv(r_xprt
->rx_ia
.ri_id
->qp
, wr
,
1521 (const struct ib_recv_wr
**)&bad_wr
);
1523 for (wr
= bad_wr
; wr
; wr
= wr
->next
) {
1524 struct rpcrdma_rep
*rep
;
1526 rep
= container_of(wr
, struct rpcrdma_rep
, rr_recv_wr
);
1527 rpcrdma_recv_buffer_put(rep
);
1531 ep
->rep_receive_count
+= count
;
1533 trace_xprtrdma_post_recvs(r_xprt
, count
, rc
);