]>
Commit | Line | Data |
---|---|---|
a2268cfb | 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ |
f58851e6 | 2 | /* |
62b56a67 | 3 | * Copyright (c) 2014-2017 Oracle. All rights reserved. |
f58851e6 TT |
4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
5 | * | |
6 | * This software is available to you under a choice of one of two | |
7 | * licenses. You may choose to be licensed under the terms of the GNU | |
8 | * General Public License (GPL) Version 2, available from the file | |
9 | * COPYING in the main directory of this source tree, or the BSD-type | |
10 | * license below: | |
11 | * | |
12 | * Redistribution and use in source and binary forms, with or without | |
13 | * modification, are permitted provided that the following conditions | |
14 | * are met: | |
15 | * | |
16 | * Redistributions of source code must retain the above copyright | |
17 | * notice, this list of conditions and the following disclaimer. | |
18 | * | |
19 | * Redistributions in binary form must reproduce the above | |
20 | * copyright notice, this list of conditions and the following | |
21 | * disclaimer in the documentation and/or other materials provided | |
22 | * with the distribution. | |
23 | * | |
24 | * Neither the name of the Network Appliance, Inc. nor the names of | |
25 | * its contributors may be used to endorse or promote products | |
26 | * derived from this software without specific prior written | |
27 | * permission. | |
28 | * | |
29 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
30 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
31 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
32 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
33 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
34 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
35 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
36 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
37 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
38 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
39 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
40 | */ | |
41 | ||
42 | #ifndef _LINUX_SUNRPC_XPRT_RDMA_H | |
43 | #define _LINUX_SUNRPC_XPRT_RDMA_H | |
44 | ||
45 | #include <linux/wait.h> /* wait_queue_head_t, etc */ | |
46 | #include <linux/spinlock.h> /* spinlock_t, etc */ | |
60063497 | 47 | #include <linux/atomic.h> /* atomic_t, etc */ |
254f91e2 | 48 | #include <linux/workqueue.h> /* struct work_struct */ |
f58851e6 TT |
49 | |
50 | #include <rdma/rdma_cm.h> /* RDMA connection api */ | |
51 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ | |
52 | ||
53 | #include <linux/sunrpc/clnt.h> /* rpc_xprt */ | |
54 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ | |
55 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ | |
56 | ||
5675add3 TT |
57 | #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ |
58 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | |
59 | ||
5d252f90 CL |
60 | #define RPCRDMA_BIND_TO (60U * HZ) |
61 | #define RPCRDMA_INIT_REEST_TO (5U * HZ) | |
62 | #define RPCRDMA_MAX_REEST_TO (30U * HZ) | |
63 | #define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) | |
64 | ||
f58851e6 TT |
65 | /* |
66 | * Interface Adapter -- one per transport instance | |
67 | */ | |
68 | struct rpcrdma_ia { | |
89e0d112 | 69 | struct ib_device *ri_device; |
f58851e6 TT |
70 | struct rdma_cm_id *ri_id; |
71 | struct ib_pd *ri_pd; | |
f58851e6 | 72 | struct completion ri_done; |
bebd0318 | 73 | struct completion ri_remove_done; |
f58851e6 | 74 | int ri_async_rc; |
87cfb9a0 | 75 | unsigned int ri_max_segs; |
ce5b3717 | 76 | unsigned int ri_max_frwr_depth; |
302d3deb CL |
77 | unsigned int ri_max_inline_write; |
78 | unsigned int ri_max_inline_read; | |
16f906d6 | 79 | unsigned int ri_max_send_sges; |
b5f0afbe | 80 | bool ri_implicit_roundup; |
5e9fc6a0 | 81 | enum ib_mr_type ri_mrtype; |
bebd0318 | 82 | unsigned long ri_flags; |
ce1ab9ab CL |
83 | struct ib_qp_attr ri_qp_attr; |
84 | struct ib_qp_init_attr ri_qp_init_attr; | |
f58851e6 TT |
85 | }; |
86 | ||
bebd0318 CL |
87 | enum { |
88 | RPCRDMA_IAF_REMOVING = 0, | |
89 | }; | |
90 | ||
f58851e6 TT |
91 | /* |
92 | * RDMA Endpoint -- one per transport instance | |
93 | */ | |
94 | ||
95 | struct rpcrdma_ep { | |
ae72950a CL |
96 | unsigned int rep_send_count; |
97 | unsigned int rep_send_batch; | |
f58851e6 | 98 | int rep_connected; |
f58851e6 TT |
99 | struct ib_qp_init_attr rep_attr; |
100 | wait_queue_head_t rep_connect_wait; | |
87cfb9a0 | 101 | struct rpcrdma_connect_private rep_cm_private; |
f58851e6 | 102 | struct rdma_conn_param rep_remote_cma; |
6ceea368 | 103 | int rep_receive_count; |
f58851e6 TT |
104 | }; |
105 | ||
124fa17d CL |
106 | /* Pre-allocate extra Work Requests for handling backward receives |
107 | * and sends. This is a fixed value because the Work Queues are | |
108 | * allocated when the forward channel is set up. | |
109 | */ | |
110 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | |
111 | #define RPCRDMA_BACKWARD_WRS (8) | |
112 | #else | |
113 | #define RPCRDMA_BACKWARD_WRS (0) | |
114 | #endif | |
115 | ||
9128c3e7 CL |
116 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV |
117 | * | |
118 | * The below structure appears at the front of a large region of kmalloc'd | |
119 | * memory, which always starts on a good alignment boundary. | |
120 | */ | |
121 | ||
122 | struct rpcrdma_regbuf { | |
9128c3e7 | 123 | struct ib_sge rg_iov; |
54cbd6b0 | 124 | struct ib_device *rg_device; |
99ef4db3 | 125 | enum dma_data_direction rg_direction; |
9128c3e7 CL |
126 | __be32 rg_base[0] __attribute__ ((aligned(256))); |
127 | }; | |
128 | ||
129 | static inline u64 | |
130 | rdmab_addr(struct rpcrdma_regbuf *rb) | |
131 | { | |
132 | return rb->rg_iov.addr; | |
133 | } | |
134 | ||
135 | static inline u32 | |
136 | rdmab_length(struct rpcrdma_regbuf *rb) | |
137 | { | |
138 | return rb->rg_iov.length; | |
139 | } | |
140 | ||
141 | static inline u32 | |
142 | rdmab_lkey(struct rpcrdma_regbuf *rb) | |
143 | { | |
144 | return rb->rg_iov.lkey; | |
145 | } | |
146 | ||
91a10c52 CL |
147 | static inline struct ib_device * |
148 | rdmab_device(struct rpcrdma_regbuf *rb) | |
149 | { | |
150 | return rb->rg_device; | |
151 | } | |
152 | ||
5d252f90 CL |
153 | #define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) |
154 | ||
94931746 CL |
155 | /* To ensure a transport can always make forward progress, |
156 | * the number of RDMA segments allowed in header chunk lists | |
157 | * is capped at 8. This prevents less-capable devices and | |
158 | * memory registrations from overrunning the Send buffer | |
159 | * while building chunk lists. | |
160 | * | |
161 | * Elements of the Read list take up more room than the | |
162 | * Write list or Reply chunk. 8 read segments means the Read | |
163 | * list (or Write list or Reply chunk) cannot consume more | |
164 | * than | |
165 | * | |
166 | * ((8 + 2) * read segment size) + 1 XDR words, or 244 bytes. | |
167 | * | |
168 | * And the fixed part of the header is another 24 bytes. | |
169 | * | |
170 | * The smallest inline threshold is 1024 bytes, ensuring that | |
171 | * at least 750 bytes are available for RPC messages. | |
172 | */ | |
08cf2efd CL |
173 | enum { |
174 | RPCRDMA_MAX_HDR_SEGS = 8, | |
175 | RPCRDMA_HDRBUF_SIZE = 256, | |
176 | }; | |
94931746 | 177 | |
f58851e6 | 178 | /* |
e1352c96 CL |
179 | * struct rpcrdma_rep -- this structure encapsulates state required |
180 | * to receive and complete an RPC Reply, asychronously. It needs | |
181 | * several pieces of state: | |
f58851e6 | 182 | * |
e1352c96 CL |
183 | * o receive buffer and ib_sge (donated to provider) |
184 | * o status of receive (success or not, length, inv rkey) | |
185 | * o bookkeeping state to get run by reply handler (XDR stream) | |
f58851e6 | 186 | * |
e1352c96 CL |
187 | * These structures are allocated during transport initialization. |
188 | * N of these are associated with a transport instance, managed by | |
189 | * struct rpcrdma_buffer. N is the max number of outstanding RPCs. | |
f58851e6 TT |
190 | */ |
191 | ||
f58851e6 | 192 | struct rpcrdma_rep { |
552bf225 | 193 | struct ib_cqe rr_cqe; |
5381e0ec CL |
194 | __be32 rr_xid; |
195 | __be32 rr_vers; | |
196 | __be32 rr_proc; | |
c8b920bb CL |
197 | int rr_wc_flags; |
198 | u32 rr_inv_rkey; | |
7c8d9e7c | 199 | bool rr_temp; |
c1bcb68e | 200 | struct rpcrdma_regbuf *rr_rdmabuf; |
fed171b3 | 201 | struct rpcrdma_xprt *rr_rxprt; |
fe97b47c | 202 | struct work_struct rr_work; |
96f8778f CL |
203 | struct xdr_buf rr_hdrbuf; |
204 | struct xdr_stream rr_stream; | |
e1352c96 | 205 | struct rpc_rqst *rr_rqst; |
6b1184cd | 206 | struct list_head rr_list; |
6ea8e711 | 207 | struct ib_recv_wr rr_recv_wr; |
f58851e6 TT |
208 | }; |
209 | ||
ae72950a CL |
210 | /* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes |
211 | */ | |
01bb35c8 | 212 | struct rpcrdma_req; |
ae72950a CL |
213 | struct rpcrdma_xprt; |
214 | struct rpcrdma_sendctx { | |
215 | struct ib_send_wr sc_wr; | |
216 | struct ib_cqe sc_cqe; | |
217 | struct rpcrdma_xprt *sc_xprt; | |
01bb35c8 | 218 | struct rpcrdma_req *sc_req; |
ae72950a CL |
219 | unsigned int sc_unmap_count; |
220 | struct ib_sge sc_sges[]; | |
221 | }; | |
222 | ||
223 | /* Limit the number of SGEs that can be unmapped during one | |
224 | * Send completion. This caps the amount of work a single | |
225 | * completion can do before returning to the provider. | |
226 | * | |
227 | * Setting this to zero disables Send completion batching. | |
228 | */ | |
229 | enum { | |
230 | RPCRDMA_MAX_SEND_BATCH = 7, | |
231 | }; | |
232 | ||
0dbb4108 | 233 | /* |
96ceddea | 234 | * struct rpcrdma_mr - external memory region metadata |
0dbb4108 CL |
235 | * |
236 | * An external memory region is any buffer or page that is registered | |
237 | * on the fly (ie, not pre-registered). | |
238 | * | |
96ceddea | 239 | * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During |
0dbb4108 CL |
240 | * call_allocate, rpcrdma_buffer_get() assigns one to each segment in |
241 | * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep | |
242 | * track of registration metadata while each RPC is pending. | |
243 | * rpcrdma_deregister_external() uses this metadata to unmap and | |
244 | * release these resources when an RPC is complete. | |
245 | */ | |
ce5b3717 CL |
246 | enum rpcrdma_frwr_state { |
247 | FRWR_IS_INVALID, /* ready to be used */ | |
248 | FRWR_IS_VALID, /* in use */ | |
249 | FRWR_FLUSHED_FR, /* flushed FASTREG WR */ | |
250 | FRWR_FLUSHED_LI, /* flushed LOCALINV WR */ | |
0dbb4108 CL |
251 | }; |
252 | ||
ce5b3717 | 253 | struct rpcrdma_frwr { |
0dbb4108 | 254 | struct ib_mr *fr_mr; |
2fa8f88d | 255 | struct ib_cqe fr_cqe; |
ce5b3717 | 256 | enum rpcrdma_frwr_state fr_state; |
2fa8f88d | 257 | struct completion fr_linv_done; |
3cf4e169 CL |
258 | union { |
259 | struct ib_reg_wr fr_regwr; | |
260 | struct ib_send_wr fr_invwr; | |
261 | }; | |
0dbb4108 CL |
262 | }; |
263 | ||
96ceddea CL |
264 | struct rpcrdma_mr { |
265 | struct list_head mr_list; | |
266 | struct scatterlist *mr_sg; | |
267 | int mr_nents; | |
268 | enum dma_data_direction mr_dir; | |
ba69cd12 | 269 | struct rpcrdma_frwr frwr; |
96ceddea CL |
270 | struct rpcrdma_xprt *mr_xprt; |
271 | u32 mr_handle; | |
272 | u32 mr_length; | |
273 | u64 mr_offset; | |
61da886b | 274 | struct work_struct mr_recycle; |
96ceddea | 275 | struct list_head mr_all; |
0dbb4108 CL |
276 | }; |
277 | ||
f58851e6 TT |
278 | /* |
279 | * struct rpcrdma_req -- structure central to the request/reply sequence. | |
280 | * | |
281 | * N of these are associated with a transport instance, and stored in | |
282 | * struct rpcrdma_buffer. N is the max number of outstanding requests. | |
283 | * | |
284 | * It includes pre-registered buffer memory for send AND recv. | |
285 | * The recv buffer, however, is not owned by this structure, and | |
286 | * is "donated" to the hardware when a recv is posted. When a | |
287 | * reply is handled, the recv buffer used is given back to the | |
288 | * struct rpcrdma_req associated with the request. | |
289 | * | |
290 | * In addition to the basic memory, this structure includes an array | |
291 | * of iovs for send operations. The reason is that the iovs passed to | |
292 | * ib_post_{send,recv} must not be modified until the work request | |
293 | * completes. | |
f58851e6 TT |
294 | */ |
295 | ||
5ab81428 CL |
296 | /* Maximum number of page-sized "segments" per chunk list to be |
297 | * registered or invalidated. Must handle a Reply chunk: | |
298 | */ | |
299 | enum { | |
300 | RPCRDMA_MAX_IOV_SEGS = 3, | |
301 | RPCRDMA_MAX_DATA_SEGS = ((1 * 1024 * 1024) / PAGE_SIZE) + 1, | |
302 | RPCRDMA_MAX_SEGS = RPCRDMA_MAX_DATA_SEGS + | |
303 | RPCRDMA_MAX_IOV_SEGS, | |
304 | }; | |
305 | ||
f58851e6 | 306 | struct rpcrdma_mr_seg { /* chunk descriptors */ |
f58851e6 | 307 | u32 mr_len; /* length of chunk or segment */ |
f58851e6 TT |
308 | struct page *mr_page; /* owning page, if any */ |
309 | char *mr_offset; /* kva if no page, else offset */ | |
310 | }; | |
311 | ||
c6f5b47f CL |
312 | /* The Send SGE array is provisioned to send a maximum size |
313 | * inline request: | |
655fec69 CL |
314 | * - RPC-over-RDMA header |
315 | * - xdr_buf head iovec | |
c6f5b47f | 316 | * - RPCRDMA_MAX_INLINE bytes, in pages |
655fec69 | 317 | * - xdr_buf tail iovec |
c6f5b47f CL |
318 | * |
319 | * The actual number of array elements consumed by each RPC | |
320 | * depends on the device's max_sge limit. | |
655fec69 CL |
321 | */ |
322 | enum { | |
16f906d6 | 323 | RPCRDMA_MIN_SEND_SGES = 3, |
c6f5b47f | 324 | RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT, |
655fec69 CL |
325 | RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, |
326 | }; | |
b3221d6a | 327 | |
5ab81428 | 328 | struct rpcrdma_buffer; |
f58851e6 | 329 | struct rpcrdma_req { |
a80d66c9 | 330 | struct list_head rl_list; |
edb41e61 | 331 | struct rpc_rqst rl_slot; |
b3221d6a | 332 | struct rpcrdma_buffer *rl_buffer; |
90aab602 | 333 | struct rpcrdma_rep *rl_reply; |
7a80f3f0 CL |
334 | struct xdr_stream rl_stream; |
335 | struct xdr_buf rl_hdrbuf; | |
ae72950a | 336 | struct rpcrdma_sendctx *rl_sendctx; |
9c40c49f CL |
337 | struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ |
338 | struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ | |
339 | struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ | |
f531a5db CL |
340 | |
341 | struct list_head rl_all; | |
531cca0c | 342 | unsigned long rl_flags; |
5ab81428 CL |
343 | |
344 | struct list_head rl_registered; /* registered segments */ | |
345 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; | |
f58851e6 | 346 | }; |
0ca77dc3 | 347 | |
531cca0c CL |
348 | /* rl_flags */ |
349 | enum { | |
6c537f2c | 350 | RPCRDMA_REQ_F_PENDING = 0, |
01bb35c8 | 351 | RPCRDMA_REQ_F_TX_RESOURCES, |
531cca0c CL |
352 | }; |
353 | ||
0ca77dc3 | 354 | static inline struct rpcrdma_req * |
fc1eb807 | 355 | rpcr_to_rdmar(const struct rpc_rqst *rqst) |
0ca77dc3 | 356 | { |
edb41e61 | 357 | return container_of(rqst, struct rpcrdma_req, rl_slot); |
0ca77dc3 | 358 | } |
f58851e6 | 359 | |
9a5c63e9 | 360 | static inline void |
96ceddea | 361 | rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list) |
9a5c63e9 | 362 | { |
96ceddea | 363 | list_add_tail(&mr->mr_list, list); |
9a5c63e9 CL |
364 | } |
365 | ||
96ceddea CL |
366 | static inline struct rpcrdma_mr * |
367 | rpcrdma_mr_pop(struct list_head *list) | |
9a5c63e9 | 368 | { |
96ceddea | 369 | struct rpcrdma_mr *mr; |
9a5c63e9 | 370 | |
96ceddea | 371 | mr = list_first_entry(list, struct rpcrdma_mr, mr_list); |
054f1557 | 372 | list_del_init(&mr->mr_list); |
96ceddea | 373 | return mr; |
9a5c63e9 CL |
374 | } |
375 | ||
f58851e6 TT |
376 | /* |
377 | * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for | |
378 | * inline requests/replies, and client/server credits. | |
379 | * | |
380 | * One of these is associated with a transport instance | |
381 | */ | |
382 | struct rpcrdma_buffer { | |
96ceddea CL |
383 | spinlock_t rb_mrlock; /* protect rb_mrs list */ |
384 | struct list_head rb_mrs; | |
58d1dcf5 | 385 | struct list_head rb_all; |
58d1dcf5 | 386 | |
ae72950a CL |
387 | unsigned long rb_sc_head; |
388 | unsigned long rb_sc_tail; | |
389 | unsigned long rb_sc_last; | |
390 | struct rpcrdma_sendctx **rb_sc_ctxs; | |
391 | ||
1e465fd4 CL |
392 | spinlock_t rb_lock; /* protect buf lists */ |
393 | struct list_head rb_send_bufs; | |
394 | struct list_head rb_recv_bufs; | |
92f4433e CL |
395 | struct list_head rb_allreqs; |
396 | ||
2fad6592 | 397 | unsigned long rb_flags; |
58d1dcf5 | 398 | u32 rb_max_requests; |
be798f90 | 399 | u32 rb_credits; /* most recent credit grant */ |
f531a5db CL |
400 | |
401 | u32 rb_bc_srv_max_requests; | |
5d252f90 | 402 | u32 rb_bc_max_requests; |
505bbe64 | 403 | |
6d2d0ee2 | 404 | struct workqueue_struct *rb_completion_wq; |
e2ac236c | 405 | struct delayed_work rb_refresh_worker; |
f58851e6 | 406 | }; |
f58851e6 | 407 | |
2fad6592 CL |
408 | /* rb_flags */ |
409 | enum { | |
410 | RPCRDMA_BUF_F_EMPTY_SCQ = 0, | |
411 | }; | |
412 | ||
f58851e6 TT |
413 | /* |
414 | * Internal structure for transport instance creation. This | |
415 | * exists primarily for modularity. | |
416 | * | |
417 | * This data should be set with mount options | |
418 | */ | |
419 | struct rpcrdma_create_data_internal { | |
f58851e6 TT |
420 | unsigned int max_requests; /* max requests (slots) in flight */ |
421 | unsigned int rsize; /* mount rsize - max read hdr+data */ | |
422 | unsigned int wsize; /* mount wsize - max write hdr+data */ | |
423 | unsigned int inline_rsize; /* max non-rdma read data payload */ | |
424 | unsigned int inline_wsize; /* max non-rdma write data payload */ | |
f58851e6 TT |
425 | }; |
426 | ||
f58851e6 TT |
427 | /* |
428 | * Statistics for RPCRDMA | |
429 | */ | |
430 | struct rpcrdma_stats { | |
67af6f65 | 431 | /* accessed when sending a call */ |
f58851e6 TT |
432 | unsigned long read_chunk_count; |
433 | unsigned long write_chunk_count; | |
434 | unsigned long reply_chunk_count; | |
f58851e6 | 435 | unsigned long long total_rdma_request; |
f58851e6 | 436 | |
67af6f65 | 437 | /* rarely accessed error counters */ |
f58851e6 | 438 | unsigned long long pullup_copy_count; |
f58851e6 TT |
439 | unsigned long hardway_register_count; |
440 | unsigned long failed_marshal_count; | |
441 | unsigned long bad_reply_count; | |
61da886b | 442 | unsigned long mrs_recycled; |
505bbe64 | 443 | unsigned long mrs_orphaned; |
e2ac236c | 444 | unsigned long mrs_allocated; |
ae72950a | 445 | unsigned long empty_sendctx_q; |
67af6f65 CL |
446 | |
447 | /* accessed when receiving a reply */ | |
448 | unsigned long long total_rdma_reply; | |
449 | unsigned long long fixup_copy_count; | |
01bb35c8 | 450 | unsigned long reply_waits_for_send; |
c8b920bb | 451 | unsigned long local_inv_needed; |
67af6f65 CL |
452 | unsigned long nomsg_call_count; |
453 | unsigned long bcall_count; | |
f58851e6 TT |
454 | }; |
455 | ||
456 | /* | |
457 | * RPCRDMA transport -- encapsulates the structures above for | |
458 | * integration with RPC. | |
459 | * | |
460 | * The contained structures are embedded, not pointers, | |
461 | * for convenience. This structure need not be visible externally. | |
462 | * | |
463 | * It is allocated and initialized during mount, and released | |
464 | * during unmount. | |
465 | */ | |
466 | struct rpcrdma_xprt { | |
5abefb86 | 467 | struct rpc_xprt rx_xprt; |
f58851e6 TT |
468 | struct rpcrdma_ia rx_ia; |
469 | struct rpcrdma_ep rx_ep; | |
470 | struct rpcrdma_buffer rx_buf; | |
471 | struct rpcrdma_create_data_internal rx_data; | |
5abefb86 | 472 | struct delayed_work rx_connect_worker; |
f58851e6 TT |
473 | struct rpcrdma_stats rx_stats; |
474 | }; | |
475 | ||
5abefb86 | 476 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) |
f58851e6 TT |
477 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) |
478 | ||
d461f1f2 CL |
479 | static inline const char * |
480 | rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt) | |
481 | { | |
482 | return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]; | |
483 | } | |
484 | ||
485 | static inline const char * | |
486 | rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt) | |
487 | { | |
488 | return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT]; | |
489 | } | |
490 | ||
9191ca3b TT |
491 | /* Setting this to 0 ensures interoperability with early servers. |
492 | * Setting this to 1 enhances certain unaligned read/write performance. | |
493 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ | |
494 | extern int xprt_rdma_pad_optimize; | |
495 | ||
fff09594 CL |
496 | /* This setting controls the hunt for a supported memory |
497 | * registration strategy. | |
498 | */ | |
499 | extern unsigned int xprt_rdma_memreg_strategy; | |
500 | ||
f58851e6 TT |
501 | /* |
502 | * Interface Adapter calls - xprtrdma/verbs.c | |
503 | */ | |
dd229cee | 504 | int rpcrdma_ia_open(struct rpcrdma_xprt *xprt); |
bebd0318 | 505 | void rpcrdma_ia_remove(struct rpcrdma_ia *ia); |
f58851e6 TT |
506 | void rpcrdma_ia_close(struct rpcrdma_ia *); |
507 | ||
508 | /* | |
509 | * Endpoint calls - xprtrdma/verbs.c | |
510 | */ | |
511 | int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, | |
512 | struct rpcrdma_create_data_internal *); | |
7f1d5419 | 513 | void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); |
f58851e6 | 514 | int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
282191cb | 515 | void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
f58851e6 TT |
516 | |
517 | int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, | |
518 | struct rpcrdma_req *); | |
f58851e6 TT |
519 | |
520 | /* | |
521 | * Buffer calls - xprtrdma/verbs.c | |
522 | */ | |
f531a5db | 523 | struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); |
92f4433e | 524 | void rpcrdma_req_destroy(struct rpcrdma_req *req); |
ac920d04 | 525 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); |
f58851e6 | 526 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); |
ae72950a | 527 | struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); |
f58851e6 | 528 | |
96ceddea CL |
529 | struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); |
530 | void rpcrdma_mr_put(struct rpcrdma_mr *mr); | |
ec12e479 | 531 | void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr); |
61da886b CL |
532 | |
533 | static inline void | |
534 | rpcrdma_mr_recycle(struct rpcrdma_mr *mr) | |
535 | { | |
536 | schedule_work(&mr->mr_recycle); | |
537 | } | |
96ceddea | 538 | |
f58851e6 TT |
539 | struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); |
540 | void rpcrdma_buffer_put(struct rpcrdma_req *); | |
f58851e6 TT |
541 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); |
542 | ||
13650c23 | 543 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, |
99ef4db3 | 544 | gfp_t); |
54cbd6b0 | 545 | bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); |
13650c23 | 546 | void rpcrdma_free_regbuf(struct rpcrdma_regbuf *); |
9128c3e7 | 547 | |
54cbd6b0 CL |
548 | static inline bool |
549 | rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb) | |
550 | { | |
551 | return rb->rg_device != NULL; | |
552 | } | |
553 | ||
554 | static inline bool | |
555 | rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) | |
556 | { | |
557 | if (likely(rpcrdma_regbuf_is_mapped(rb))) | |
558 | return true; | |
559 | return __rpcrdma_dma_map_regbuf(ia, rb); | |
560 | } | |
561 | ||
d654788e CL |
562 | /* |
563 | * Wrappers for chunk registration, shared by read/write chunk code. | |
564 | */ | |
565 | ||
d654788e CL |
566 | static inline enum dma_data_direction |
567 | rpcrdma_data_dir(bool writing) | |
568 | { | |
569 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | |
570 | } | |
571 | ||
5f62412b CL |
572 | /* Memory registration calls xprtrdma/frwr_ops.c |
573 | */ | |
574 | bool frwr_is_supported(struct rpcrdma_ia *); | |
575 | int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |
576 | struct rpcrdma_create_data_internal *cdata); | |
577 | int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); | |
578 | void frwr_release_mr(struct rpcrdma_mr *mr); | |
579 | size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt); | |
580 | struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, | |
581 | struct rpcrdma_mr_seg *seg, | |
0a93fbcb | 582 | int nsegs, bool writing, u32 xid, |
5f62412b CL |
583 | struct rpcrdma_mr **mr); |
584 | int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); | |
585 | void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); | |
586 | void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, | |
587 | struct list_head *mrs); | |
588 | ||
f58851e6 TT |
589 | /* |
590 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | |
591 | */ | |
655fec69 CL |
592 | |
593 | enum rpcrdma_chunktype { | |
594 | rpcrdma_noch = 0, | |
595 | rpcrdma_readch, | |
596 | rpcrdma_areadch, | |
597 | rpcrdma_writech, | |
598 | rpcrdma_replych | |
599 | }; | |
600 | ||
857f9aca CL |
601 | int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, |
602 | struct rpcrdma_req *req, u32 hdrlen, | |
603 | struct xdr_buf *xdr, | |
604 | enum rpcrdma_chunktype rtype); | |
ae72950a | 605 | void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc); |
09e60641 | 606 | int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); |
87cfb9a0 | 607 | void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); |
e1352c96 | 608 | void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); |
d8f532d2 | 609 | void rpcrdma_reply_handler(struct rpcrdma_rep *rep); |
0ba6f370 CL |
610 | void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, |
611 | struct rpcrdma_req *req); | |
d8f532d2 | 612 | void rpcrdma_deferred_completion(struct work_struct *work); |
f58851e6 | 613 | |
96f8778f CL |
614 | static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) |
615 | { | |
616 | xdr->head[0].iov_len = len; | |
617 | xdr->len = len; | |
618 | } | |
619 | ||
ffe1f0df CL |
620 | /* RPC/RDMA module init - xprtrdma/transport.c |
621 | */ | |
5d252f90 CL |
622 | extern unsigned int xprt_rdma_max_inline_read; |
623 | void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); | |
624 | void xprt_rdma_free_addresses(struct rpc_xprt *xprt); | |
0c0829bc | 625 | void xprt_rdma_close(struct rpc_xprt *xprt); |
5d252f90 | 626 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); |
ffe1f0df CL |
627 | int xprt_rdma_init(void); |
628 | void xprt_rdma_cleanup(void); | |
629 | ||
f531a5db CL |
630 | /* Backchannel calls - xprtrdma/backchannel.c |
631 | */ | |
632 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | |
633 | int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); | |
76566773 | 634 | int xprt_rdma_bc_up(struct svc_serv *, struct net *); |
6b26cc8c | 635 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); |
f531a5db | 636 | int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); |
63cae470 | 637 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); |
cf73daf5 | 638 | int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst); |
f531a5db CL |
639 | void xprt_rdma_bc_free_rqst(struct rpc_rqst *); |
640 | void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); | |
641 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | |
642 | ||
5d252f90 | 643 | extern struct xprt_class xprt_rdma_bc; |
cec56c8f | 644 | |
f58851e6 | 645 | #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ |