]>
Commit | Line | Data |
---|---|---|
a2268cfb | 1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
f58851e6 | 2 | /* |
62b56a67 | 3 | * Copyright (c) 2014-2017 Oracle. All rights reserved. |
f58851e6 TT |
4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
5 | * | |
6 | * This software is available to you under a choice of one of two | |
7 | * licenses. You may choose to be licensed under the terms of the GNU | |
8 | * General Public License (GPL) Version 2, available from the file | |
9 | * COPYING in the main directory of this source tree, or the BSD-type | |
10 | * license below: | |
11 | * | |
12 | * Redistribution and use in source and binary forms, with or without | |
13 | * modification, are permitted provided that the following conditions | |
14 | * are met: | |
15 | * | |
16 | * Redistributions of source code must retain the above copyright | |
17 | * notice, this list of conditions and the following disclaimer. | |
18 | * | |
19 | * Redistributions in binary form must reproduce the above | |
20 | * copyright notice, this list of conditions and the following | |
21 | * disclaimer in the documentation and/or other materials provided | |
22 | * with the distribution. | |
23 | * | |
24 | * Neither the name of the Network Appliance, Inc. nor the names of | |
25 | * its contributors may be used to endorse or promote products | |
26 | * derived from this software without specific prior written | |
27 | * permission. | |
28 | * | |
29 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
30 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
31 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
32 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
33 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
34 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
35 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
36 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
37 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
38 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
39 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
40 | */ | |
41 | ||
42 | /* | |
43 | * transport.c | |
44 | * | |
45 | * This file contains the top-level implementation of an RPC RDMA | |
46 | * transport. | |
47 | * | |
48 | * Naming convention: functions beginning with xprt_ are part of the | |
49 | * transport switch. All others are RPC RDMA internal. | |
50 | */ | |
51 | ||
52 | #include <linux/module.h> | |
5a0e3ad6 | 53 | #include <linux/slab.h> |
f58851e6 | 54 | #include <linux/seq_file.h> |
bd2abef3 CL |
55 | #include <linux/smp.h> |
56 | ||
5976687a | 57 | #include <linux/sunrpc/addr.h> |
bd2abef3 | 58 | #include <linux/sunrpc/svc_rdma.h> |
f58851e6 TT |
59 | |
60 | #include "xprt_rdma.h" | |
b6e717cb | 61 | #include <trace/events/rpcrdma.h> |
f58851e6 | 62 | |
f895b252 | 63 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
f58851e6 TT |
64 | # define RPCDBG_FACILITY RPCDBG_TRANS |
65 | #endif | |
66 | ||
f58851e6 TT |
67 | /* |
68 | * tunables | |
69 | */ | |
70 | ||
86c4ccd9 | 71 | unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; |
5d252f90 | 72 | unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
94087e97 | 73 | unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
ce5b3717 | 74 | unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; |
fff09594 | 75 | int xprt_rdma_pad_optimize; |
f58851e6 | 76 | |
f895b252 | 77 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
f58851e6 TT |
78 | |
79 | static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; | |
80 | static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; | |
29c55422 CL |
81 | static unsigned int min_inline_size = RPCRDMA_MIN_INLINE; |
82 | static unsigned int max_inline_size = RPCRDMA_MAX_INLINE; | |
f58851e6 TT |
83 | static unsigned int max_padding = PAGE_SIZE; |
84 | static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; | |
85 | static unsigned int max_memreg = RPCRDMA_LAST - 1; | |
10492704 | 86 | static unsigned int dummy; |
f58851e6 TT |
87 | |
88 | static struct ctl_table_header *sunrpc_table_header; | |
89 | ||
fe2c6338 | 90 | static struct ctl_table xr_tunables_table[] = { |
f58851e6 | 91 | { |
f58851e6 TT |
92 | .procname = "rdma_slot_table_entries", |
93 | .data = &xprt_rdma_slot_table_entries, | |
94 | .maxlen = sizeof(unsigned int), | |
95 | .mode = 0644, | |
6d456111 | 96 | .proc_handler = proc_dointvec_minmax, |
f58851e6 TT |
97 | .extra1 = &min_slot_table_size, |
98 | .extra2 = &max_slot_table_size | |
99 | }, | |
100 | { | |
f58851e6 TT |
101 | .procname = "rdma_max_inline_read", |
102 | .data = &xprt_rdma_max_inline_read, | |
103 | .maxlen = sizeof(unsigned int), | |
104 | .mode = 0644, | |
44829d02 | 105 | .proc_handler = proc_dointvec_minmax, |
29c55422 CL |
106 | .extra1 = &min_inline_size, |
107 | .extra2 = &max_inline_size, | |
f58851e6 TT |
108 | }, |
109 | { | |
f58851e6 TT |
110 | .procname = "rdma_max_inline_write", |
111 | .data = &xprt_rdma_max_inline_write, | |
112 | .maxlen = sizeof(unsigned int), | |
113 | .mode = 0644, | |
44829d02 | 114 | .proc_handler = proc_dointvec_minmax, |
29c55422 CL |
115 | .extra1 = &min_inline_size, |
116 | .extra2 = &max_inline_size, | |
f58851e6 TT |
117 | }, |
118 | { | |
f58851e6 | 119 | .procname = "rdma_inline_write_padding", |
10492704 | 120 | .data = &dummy, |
f58851e6 TT |
121 | .maxlen = sizeof(unsigned int), |
122 | .mode = 0644, | |
6d456111 | 123 | .proc_handler = proc_dointvec_minmax, |
eec4844f | 124 | .extra1 = SYSCTL_ZERO, |
f58851e6 TT |
125 | .extra2 = &max_padding, |
126 | }, | |
127 | { | |
f58851e6 TT |
128 | .procname = "rdma_memreg_strategy", |
129 | .data = &xprt_rdma_memreg_strategy, | |
130 | .maxlen = sizeof(unsigned int), | |
131 | .mode = 0644, | |
6d456111 | 132 | .proc_handler = proc_dointvec_minmax, |
f58851e6 TT |
133 | .extra1 = &min_memreg, |
134 | .extra2 = &max_memreg, | |
135 | }, | |
9191ca3b | 136 | { |
9191ca3b TT |
137 | .procname = "rdma_pad_optimize", |
138 | .data = &xprt_rdma_pad_optimize, | |
139 | .maxlen = sizeof(unsigned int), | |
140 | .mode = 0644, | |
6d456111 | 141 | .proc_handler = proc_dointvec, |
9191ca3b | 142 | }, |
f8572d8f | 143 | { }, |
f58851e6 TT |
144 | }; |
145 | ||
fe2c6338 | 146 | static struct ctl_table sunrpc_table[] = { |
f58851e6 | 147 | { |
f58851e6 TT |
148 | .procname = "sunrpc", |
149 | .mode = 0555, | |
150 | .child = xr_tunables_table | |
151 | }, | |
f8572d8f | 152 | { }, |
f58851e6 TT |
153 | }; |
154 | ||
155 | #endif | |
156 | ||
d31ae254 | 157 | static const struct rpc_xprt_ops xprt_rdma_procs; |
f58851e6 | 158 | |
0dd39cae CL |
159 | static void |
160 | xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) | |
161 | { | |
162 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | |
163 | char buf[20]; | |
164 | ||
165 | snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); | |
166 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | |
167 | ||
168 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA; | |
169 | } | |
170 | ||
171 | static void | |
172 | xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) | |
173 | { | |
174 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | |
175 | char buf[40]; | |
176 | ||
177 | snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); | |
178 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | |
179 | ||
180 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; | |
181 | } | |
182 | ||
5d252f90 | 183 | void |
5231eb97 | 184 | xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap) |
f58851e6 | 185 | { |
0dd39cae CL |
186 | char buf[128]; |
187 | ||
188 | switch (sap->sa_family) { | |
189 | case AF_INET: | |
190 | xprt_rdma_format_addresses4(xprt, sap); | |
191 | break; | |
192 | case AF_INET6: | |
193 | xprt_rdma_format_addresses6(xprt, sap); | |
194 | break; | |
195 | default: | |
196 | pr_err("rpcrdma: Unrecognized address family\n"); | |
197 | return; | |
198 | } | |
f58851e6 | 199 | |
c877b849 CL |
200 | (void)rpc_ntop(sap, buf, sizeof(buf)); |
201 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); | |
f58851e6 | 202 | |
81160e66 | 203 | snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); |
c877b849 | 204 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); |
f58851e6 | 205 | |
81160e66 | 206 | snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); |
c877b849 | 207 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); |
f58851e6 | 208 | |
0dd39cae | 209 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; |
f58851e6 TT |
210 | } |
211 | ||
5d252f90 | 212 | void |
f58851e6 TT |
213 | xprt_rdma_free_addresses(struct rpc_xprt *xprt) |
214 | { | |
33e01dc7 CL |
215 | unsigned int i; |
216 | ||
217 | for (i = 0; i < RPC_DISPLAY_MAX; i++) | |
218 | switch (i) { | |
219 | case RPC_DISPLAY_PROTO: | |
220 | case RPC_DISPLAY_NETID: | |
221 | continue; | |
222 | default: | |
223 | kfree(xprt->address_strings[i]); | |
224 | } | |
f58851e6 TT |
225 | } |
226 | ||
31e62d25 CL |
227 | /** |
228 | * xprt_rdma_connect_worker - establish connection in the background | |
229 | * @work: worker thread context | |
230 | * | |
231 | * Requester holds the xprt's send lock to prevent activity on this | |
232 | * transport while a fresh connection is being established. RPC tasks | |
233 | * sleep on the xprt's pending queue waiting for connect to complete. | |
234 | */ | |
f58851e6 TT |
235 | static void |
236 | xprt_rdma_connect_worker(struct work_struct *work) | |
237 | { | |
5abefb86 CL |
238 | struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, |
239 | rx_connect_worker.work); | |
240 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; | |
31e62d25 | 241 | int rc; |
d19751e7 | 242 | |
d19751e7 | 243 | rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); |
f58851e6 | 244 | xprt_clear_connecting(xprt); |
31e62d25 | 245 | if (r_xprt->rx_ep.rep_connected > 0) { |
3968a8a5 CL |
246 | if (!xprt_test_and_set_connected(xprt)) { |
247 | xprt->stat.connect_count++; | |
248 | xprt->stat.connect_time += (long)jiffies - | |
249 | xprt->stat.connect_start; | |
31e62d25 | 250 | xprt_wake_pending_tasks(xprt, -EAGAIN); |
3968a8a5 | 251 | } |
3a72dc77 CL |
252 | } else { |
253 | if (xprt_test_and_clear_connected(xprt)) | |
31e62d25 | 254 | xprt_wake_pending_tasks(xprt, rc); |
3a72dc77 | 255 | } |
f58851e6 TT |
256 | } |
257 | ||
f26c32fa CL |
258 | /** |
259 | * xprt_rdma_inject_disconnect - inject a connection fault | |
260 | * @xprt: transport context | |
261 | * | |
262 | * If @xprt is connected, disconnect it to simulate spurious connection | |
263 | * loss. | |
264 | */ | |
4a068258 CL |
265 | static void |
266 | xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) | |
267 | { | |
ad091180 | 268 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
4a068258 | 269 | |
395069fc | 270 | trace_xprtrdma_op_inject_dsc(r_xprt); |
4a068258 CL |
271 | rdma_disconnect(r_xprt->rx_ia.ri_id); |
272 | } | |
273 | ||
f26c32fa CL |
274 | /** |
275 | * xprt_rdma_destroy - Full tear down of transport | |
276 | * @xprt: doomed transport context | |
f58851e6 | 277 | * |
f26c32fa CL |
278 | * Caller guarantees there will be no more calls to us with |
279 | * this @xprt. | |
f58851e6 TT |
280 | */ |
281 | static void | |
282 | xprt_rdma_destroy(struct rpc_xprt *xprt) | |
283 | { | |
284 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
f58851e6 | 285 | |
395069fc | 286 | trace_xprtrdma_op_destroy(r_xprt); |
f58851e6 | 287 | |
5abefb86 | 288 | cancel_delayed_work_sync(&r_xprt->rx_connect_worker); |
f58851e6 | 289 | |
86c4ccd9 | 290 | rpcrdma_ep_destroy(r_xprt); |
72c02173 | 291 | rpcrdma_buffer_destroy(&r_xprt->rx_buf); |
f58851e6 TT |
292 | rpcrdma_ia_close(&r_xprt->rx_ia); |
293 | ||
294 | xprt_rdma_free_addresses(xprt); | |
e204e621 | 295 | xprt_free(xprt); |
f58851e6 | 296 | |
f58851e6 TT |
297 | module_put(THIS_MODULE); |
298 | } | |
299 | ||
675dd90a | 300 | /* 60 second timeout, no retries */ |
2881ae74 TM |
301 | static const struct rpc_timeout xprt_rdma_default_timeout = { |
302 | .to_initval = 60 * HZ, | |
303 | .to_maxval = 60 * HZ, | |
304 | }; | |
305 | ||
f58851e6 TT |
306 | /** |
307 | * xprt_setup_rdma - Set up transport to use RDMA | |
308 | * | |
309 | * @args: rpc transport arguments | |
310 | */ | |
311 | static struct rpc_xprt * | |
312 | xprt_setup_rdma(struct xprt_create *args) | |
313 | { | |
f58851e6 TT |
314 | struct rpc_xprt *xprt; |
315 | struct rpcrdma_xprt *new_xprt; | |
5231eb97 | 316 | struct sockaddr *sap; |
f58851e6 TT |
317 | int rc; |
318 | ||
ddbb347f | 319 | if (args->addrlen > sizeof(xprt->addr)) |
f58851e6 | 320 | return ERR_PTR(-EBADF); |
f58851e6 | 321 | |
edb41e61 | 322 | xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); |
ddbb347f | 323 | if (!xprt) |
f58851e6 | 324 | return ERR_PTR(-ENOMEM); |
f58851e6 | 325 | |
ba7392bb | 326 | xprt->timeout = &xprt_rdma_default_timeout; |
675dd90a CL |
327 | xprt->connect_timeout = xprt->timeout->to_initval; |
328 | xprt->max_reconnect_timeout = xprt->timeout->to_maxval; | |
bfaee096 CL |
329 | xprt->bind_timeout = RPCRDMA_BIND_TO; |
330 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; | |
331 | xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; | |
f58851e6 TT |
332 | |
333 | xprt->resvport = 0; /* privileged port not needed */ | |
f58851e6 TT |
334 | xprt->ops = &xprt_rdma_procs; |
335 | ||
336 | /* | |
337 | * Set up RDMA-specific connect data. | |
338 | */ | |
dd229cee | 339 | sap = args->dstaddr; |
f58851e6 TT |
340 | |
341 | /* Ensure xprt->addr holds valid server TCP (not RDMA) | |
342 | * address, for any side protocols which peek at it */ | |
343 | xprt->prot = IPPROTO_TCP; | |
344 | xprt->addrlen = args->addrlen; | |
5231eb97 | 345 | memcpy(&xprt->addr, sap, xprt->addrlen); |
f58851e6 | 346 | |
5231eb97 | 347 | if (rpc_get_port(sap)) |
f58851e6 | 348 | xprt_set_bound(xprt); |
d461f1f2 | 349 | xprt_rdma_format_addresses(xprt, sap); |
f58851e6 | 350 | |
f58851e6 | 351 | new_xprt = rpcx_to_rdmax(xprt); |
dd229cee | 352 | rc = rpcrdma_ia_open(new_xprt); |
f58851e6 TT |
353 | if (rc) |
354 | goto out1; | |
355 | ||
86c4ccd9 | 356 | rc = rpcrdma_ep_create(new_xprt); |
f58851e6 TT |
357 | if (rc) |
358 | goto out2; | |
359 | ||
ac920d04 | 360 | rc = rpcrdma_buffer_create(new_xprt); |
f58851e6 TT |
361 | if (rc) |
362 | goto out3; | |
363 | ||
5abefb86 CL |
364 | INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, |
365 | xprt_rdma_connect_worker); | |
f58851e6 | 366 | |
5f62412b | 367 | xprt->max_payload = frwr_maxpages(new_xprt); |
1c9351ee CL |
368 | if (xprt->max_payload == 0) |
369 | goto out4; | |
370 | xprt->max_payload <<= PAGE_SHIFT; | |
43e95988 CL |
371 | dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", |
372 | __func__, xprt->max_payload); | |
f58851e6 TT |
373 | |
374 | if (!try_module_get(THIS_MODULE)) | |
375 | goto out4; | |
376 | ||
5231eb97 CL |
377 | dprintk("RPC: %s: %s:%s\n", __func__, |
378 | xprt->address_strings[RPC_DISPLAY_ADDR], | |
379 | xprt->address_strings[RPC_DISPLAY_PORT]); | |
b4744e00 | 380 | trace_xprtrdma_create(new_xprt); |
f58851e6 TT |
381 | return xprt; |
382 | ||
383 | out4: | |
03ac1a76 | 384 | rpcrdma_buffer_destroy(&new_xprt->rx_buf); |
03ac1a76 | 385 | rc = -ENODEV; |
f58851e6 | 386 | out3: |
86c4ccd9 | 387 | rpcrdma_ep_destroy(new_xprt); |
f58851e6 TT |
388 | out2: |
389 | rpcrdma_ia_close(&new_xprt->rx_ia); | |
390 | out1: | |
395069fc | 391 | trace_xprtrdma_op_destroy(new_xprt); |
d461f1f2 | 392 | xprt_rdma_free_addresses(xprt); |
e204e621 | 393 | xprt_free(xprt); |
f58851e6 TT |
394 | return ERR_PTR(rc); |
395 | } | |
396 | ||
bebd0318 | 397 | /** |
f26c32fa CL |
398 | * xprt_rdma_close - close a transport connection |
399 | * @xprt: transport context | |
bebd0318 | 400 | * |
395069fc CL |
401 | * Called during autoclose or device removal. |
402 | * | |
f26c32fa CL |
403 | * Caller holds @xprt's send lock to prevent activity on this |
404 | * transport while the connection is torn down. | |
f58851e6 | 405 | */ |
0c0829bc | 406 | void xprt_rdma_close(struct rpc_xprt *xprt) |
f58851e6 TT |
407 | { |
408 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
bebd0318 CL |
409 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; |
410 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | |
411 | ||
6d2d0ee2 CL |
412 | might_sleep(); |
413 | ||
395069fc CL |
414 | trace_xprtrdma_op_close(r_xprt); |
415 | ||
6d2d0ee2 CL |
416 | /* Prevent marshaling and sending of new requests */ |
417 | xprt_clear_connected(xprt); | |
f58851e6 | 418 | |
bebd0318 | 419 | if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) { |
bebd0318 | 420 | rpcrdma_ia_remove(ia); |
0c0829bc | 421 | goto out; |
bebd0318 | 422 | } |
0c0829bc | 423 | |
bebd0318 CL |
424 | if (ep->rep_connected == -ENODEV) |
425 | return; | |
426 | if (ep->rep_connected > 0) | |
08ca0dce | 427 | xprt->reestablish_timeout = 0; |
bebd0318 | 428 | rpcrdma_ep_disconnect(ep, ia); |
ef739b21 CL |
429 | |
430 | /* Prepare @xprt for the next connection by reinitializing | |
431 | * its credit grant to one (see RFC 8166, Section 3.3.3). | |
432 | */ | |
433 | r_xprt->rx_buf.rb_credits = 1; | |
434 | xprt->cwnd = RPC_CWNDSHIFT; | |
0c0829bc CL |
435 | |
436 | out: | |
437 | ++xprt->connect_cookie; | |
438 | xprt_disconnect_done(xprt); | |
f58851e6 TT |
439 | } |
440 | ||
20035edf CL |
441 | /** |
442 | * xprt_rdma_set_port - update server port with rpcbind result | |
443 | * @xprt: controlling RPC transport | |
444 | * @port: new port value | |
445 | * | |
446 | * Transport connect status is unchanged. | |
447 | */ | |
f58851e6 TT |
448 | static void |
449 | xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) | |
450 | { | |
20035edf CL |
451 | struct sockaddr *sap = (struct sockaddr *)&xprt->addr; |
452 | char buf[8]; | |
453 | ||
454 | dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n", | |
455 | __func__, xprt, | |
456 | xprt->address_strings[RPC_DISPLAY_ADDR], | |
457 | xprt->address_strings[RPC_DISPLAY_PORT], | |
458 | port); | |
459 | ||
460 | rpc_set_port(sap, port); | |
f58851e6 | 461 | |
20035edf CL |
462 | kfree(xprt->address_strings[RPC_DISPLAY_PORT]); |
463 | snprintf(buf, sizeof(buf), "%u", port); | |
464 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); | |
465 | ||
466 | kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); | |
467 | snprintf(buf, sizeof(buf), "%4hx", port); | |
468 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); | |
f58851e6 TT |
469 | } |
470 | ||
33849792 CL |
471 | /** |
472 | * xprt_rdma_timer - invoked when an RPC times out | |
473 | * @xprt: controlling RPC transport | |
474 | * @task: RPC task that timed out | |
475 | * | |
476 | * Invoked when the transport is still connected, but an RPC | |
477 | * retransmit timeout occurs. | |
478 | * | |
479 | * Since RDMA connections don't have a keep-alive, forcibly | |
480 | * disconnect and retry to connect. This drives full | |
481 | * detection of the network path, and retransmissions of | |
482 | * all pending RPCs. | |
483 | */ | |
484 | static void | |
485 | xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) | |
486 | { | |
33849792 CL |
487 | xprt_force_disconnect(xprt); |
488 | } | |
489 | ||
f26c32fa | 490 | /** |
675dd90a CL |
491 | * xprt_rdma_set_connect_timeout - set timeouts for establishing a connection |
492 | * @xprt: controlling transport instance | |
493 | * @connect_timeout: reconnect timeout after client disconnects | |
494 | * @reconnect_timeout: reconnect timeout after server disconnects | |
495 | * | |
496 | */ | |
497 | static void xprt_rdma_tcp_set_connect_timeout(struct rpc_xprt *xprt, | |
498 | unsigned long connect_timeout, | |
499 | unsigned long reconnect_timeout) | |
500 | { | |
501 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
502 | ||
503 | trace_xprtrdma_op_set_cto(r_xprt, connect_timeout, reconnect_timeout); | |
504 | ||
505 | spin_lock(&xprt->transport_lock); | |
506 | ||
507 | if (connect_timeout < xprt->connect_timeout) { | |
508 | struct rpc_timeout to; | |
509 | unsigned long initval; | |
510 | ||
511 | to = *xprt->timeout; | |
512 | initval = connect_timeout; | |
513 | if (initval < RPCRDMA_INIT_REEST_TO << 1) | |
514 | initval = RPCRDMA_INIT_REEST_TO << 1; | |
515 | to.to_initval = initval; | |
516 | to.to_maxval = initval; | |
517 | r_xprt->rx_timeout = to; | |
518 | xprt->timeout = &r_xprt->rx_timeout; | |
519 | xprt->connect_timeout = connect_timeout; | |
520 | } | |
521 | ||
522 | if (reconnect_timeout < xprt->max_reconnect_timeout) | |
523 | xprt->max_reconnect_timeout = reconnect_timeout; | |
524 | ||
525 | spin_unlock(&xprt->transport_lock); | |
526 | } | |
527 | ||
528 | /** | |
529 | * xprt_rdma_connect - schedule an attempt to reconnect | |
f26c32fa | 530 | * @xprt: transport state |
675dd90a | 531 | * @task: RPC scheduler context (unused) |
f26c32fa CL |
532 | * |
533 | */ | |
f58851e6 | 534 | static void |
1b092092 | 535 | xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) |
f58851e6 | 536 | { |
f58851e6 | 537 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
675dd90a | 538 | unsigned long delay; |
f58851e6 | 539 | |
395069fc | 540 | trace_xprtrdma_op_connect(r_xprt); |
675dd90a CL |
541 | |
542 | delay = 0; | |
0b9e7943 | 543 | if (r_xprt->rx_ep.rep_connected != 0) { |
675dd90a CL |
544 | delay = xprt_reconnect_delay(xprt); |
545 | xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); | |
f58851e6 | 546 | } |
675dd90a CL |
547 | queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker, |
548 | delay); | |
f58851e6 TT |
549 | } |
550 | ||
48be539d CL |
551 | /** |
552 | * xprt_rdma_alloc_slot - allocate an rpc_rqst | |
553 | * @xprt: controlling RPC transport | |
554 | * @task: RPC task requesting a fresh rpc_rqst | |
555 | * | |
556 | * tk_status values: | |
557 | * %0 if task->tk_rqstp points to a fresh rpc_rqst | |
558 | * %-EAGAIN if no rpc_rqst is available; queued on backlog | |
559 | */ | |
560 | static void | |
561 | xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) | |
562 | { | |
edb41e61 CL |
563 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
564 | struct rpcrdma_req *req; | |
48be539d | 565 | |
edb41e61 CL |
566 | req = rpcrdma_buffer_get(&r_xprt->rx_buf); |
567 | if (!req) | |
48be539d | 568 | goto out_sleep; |
edb41e61 | 569 | task->tk_rqstp = &req->rl_slot; |
48be539d CL |
570 | task->tk_status = 0; |
571 | return; | |
572 | ||
573 | out_sleep: | |
dd31d4d1 | 574 | set_bit(XPRT_CONGESTED, &xprt->state); |
48be539d | 575 | rpc_sleep_on(&xprt->backlog, task, NULL); |
48be539d CL |
576 | task->tk_status = -EAGAIN; |
577 | } | |
578 | ||
579 | /** | |
580 | * xprt_rdma_free_slot - release an rpc_rqst | |
581 | * @xprt: controlling RPC transport | |
582 | * @rqst: rpc_rqst to release | |
583 | * | |
584 | */ | |
585 | static void | |
586 | xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) | |
587 | { | |
5828ceba CL |
588 | struct rpcrdma_xprt *r_xprt = |
589 | container_of(xprt, struct rpcrdma_xprt, rx_xprt); | |
590 | ||
48be539d | 591 | memset(rqst, 0, sizeof(*rqst)); |
5828ceba | 592 | rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); |
dd31d4d1 CL |
593 | if (unlikely(!rpc_wake_up_next(&xprt->backlog))) |
594 | clear_bit(XPRT_CONGESTED, &xprt->state); | |
48be539d CL |
595 | } |
596 | ||
0f665ceb CL |
597 | static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt, |
598 | struct rpcrdma_regbuf *rb, size_t size, | |
599 | gfp_t flags) | |
9c40c49f | 600 | { |
0f665ceb CL |
601 | if (unlikely(rdmab_length(rb) < size)) { |
602 | if (!rpcrdma_regbuf_realloc(rb, size, flags)) | |
603 | return false; | |
604 | r_xprt->rx_stats.hardway_register_count += size; | |
605 | } | |
9c40c49f CL |
606 | return true; |
607 | } | |
608 | ||
5fe6eaa1 CL |
609 | /** |
610 | * xprt_rdma_allocate - allocate transport resources for an RPC | |
611 | * @task: RPC task | |
612 | * | |
613 | * Return values: | |
614 | * 0: Success; rq_buffer points to RPC buffer to use | |
615 | * ENOMEM: Out of memory, call again later | |
616 | * EIO: A permanent error occurred, do not retry | |
f58851e6 | 617 | */ |
5fe6eaa1 CL |
618 | static int |
619 | xprt_rdma_allocate(struct rpc_task *task) | |
f58851e6 | 620 | { |
5fe6eaa1 | 621 | struct rpc_rqst *rqst = task->tk_rqstp; |
5fe6eaa1 | 622 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); |
edb41e61 | 623 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
a0a1d50c | 624 | gfp_t flags; |
f58851e6 | 625 | |
5d252f90 | 626 | flags = RPCRDMA_DEF_GFP; |
a0a1d50c CL |
627 | if (RPC_IS_SWAPPER(task)) |
628 | flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; | |
629 | ||
0f665ceb CL |
630 | if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, |
631 | flags)) | |
9c40c49f | 632 | goto out_fail; |
0f665ceb CL |
633 | if (!rpcrdma_check_regbuf(r_xprt, req->rl_recvbuf, rqst->rq_rcvsize, |
634 | flags)) | |
9c40c49f CL |
635 | goto out_fail; |
636 | ||
8cec3dba CL |
637 | rqst->rq_buffer = rdmab_data(req->rl_sendbuf); |
638 | rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf); | |
395069fc | 639 | trace_xprtrdma_op_allocate(task, req); |
5fe6eaa1 | 640 | return 0; |
0ca77dc3 | 641 | |
0ca77dc3 | 642 | out_fail: |
395069fc | 643 | trace_xprtrdma_op_allocate(task, NULL); |
5fe6eaa1 | 644 | return -ENOMEM; |
f58851e6 TT |
645 | } |
646 | ||
3435c74a CL |
647 | /** |
648 | * xprt_rdma_free - release resources allocated by xprt_rdma_allocate | |
649 | * @task: RPC task | |
650 | * | |
651 | * Caller guarantees rqst->rq_buffer is non-NULL. | |
f58851e6 TT |
652 | */ |
653 | static void | |
3435c74a | 654 | xprt_rdma_free(struct rpc_task *task) |
f58851e6 | 655 | { |
3435c74a CL |
656 | struct rpc_rqst *rqst = task->tk_rqstp; |
657 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | |
658 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | |
f58851e6 | 659 | |
395069fc | 660 | trace_xprtrdma_op_free(task, req); |
0ab11523 CL |
661 | |
662 | if (!list_empty(&req->rl_registered)) | |
663 | frwr_unmap_sync(r_xprt, req); | |
664 | ||
665 | /* XXX: If the RPC is completing because of a signal and | |
666 | * not because a reply was received, we ought to ensure | |
667 | * that the Send completion has fired, so that memory | |
668 | * involved with the Send is not still visible to the NIC. | |
669 | */ | |
f58851e6 TT |
670 | } |
671 | ||
7a89f9c6 CL |
672 | /** |
673 | * xprt_rdma_send_request - marshal and send an RPC request | |
adfa7144 | 674 | * @rqst: RPC message in rq_snd_buf |
7a89f9c6 | 675 | * |
bebd0318 CL |
676 | * Caller holds the transport's write lock. |
677 | * | |
cf73daf5 CL |
678 | * Returns: |
679 | * %0 if the RPC message has been sent | |
680 | * %-ENOTCONN if the caller should reconnect and call again | |
9e679d5e CL |
681 | * %-EAGAIN if the caller should call again |
682 | * %-ENOBUFS if the caller should call again after a delay | |
6946f823 CL |
683 | * %-EMSGSIZE if encoding ran out of buffer space. The request |
684 | * was not sent. Do not try to send this message again. | |
685 | * %-EIO if an I/O error occurred. The request was not sent. | |
686 | * Do not try to send this message again. | |
f58851e6 | 687 | */ |
f58851e6 | 688 | static int |
adfa7144 | 689 | xprt_rdma_send_request(struct rpc_rqst *rqst) |
f58851e6 | 690 | { |
a4f0835c | 691 | struct rpc_xprt *xprt = rqst->rq_xprt; |
f58851e6 TT |
692 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
693 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
6ab59945 | 694 | int rc = 0; |
f58851e6 | 695 | |
cf73daf5 CL |
696 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
697 | if (unlikely(!rqst->rq_buffer)) | |
698 | return xprt_rdma_bc_send_reply(rqst); | |
699 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | |
700 | ||
bebd0318 | 701 | if (!xprt_connected(xprt)) |
0c0829bc | 702 | return -ENOTCONN; |
bebd0318 | 703 | |
75891f50 TM |
704 | if (!xprt_request_get_cong(xprt, rqst)) |
705 | return -EBADSLT; | |
706 | ||
09e60641 | 707 | rc = rpcrdma_marshal_req(r_xprt, rqst); |
6ab59945 CL |
708 | if (rc < 0) |
709 | goto failed_marshal; | |
f58851e6 | 710 | |
575448bd | 711 | /* Must suppress retransmit to maintain credits */ |
8a14793e | 712 | if (rqst->rq_connect_cookie == xprt->connect_cookie) |
575448bd | 713 | goto drop_connection; |
78215759 | 714 | rqst->rq_xtime = ktime_get(); |
575448bd TT |
715 | |
716 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) | |
717 | goto drop_connection; | |
f58851e6 | 718 | |
d60dbb20 | 719 | rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; |
fb14ae88 CL |
720 | |
721 | /* An RPC with no reply will throw off credit accounting, | |
722 | * so drop the connection to reset the credit grant. | |
723 | */ | |
50f484e2 | 724 | if (!rpc_reply_expected(rqst->rq_task)) |
fb14ae88 | 725 | goto drop_connection; |
f58851e6 | 726 | return 0; |
575448bd | 727 | |
c93c6223 | 728 | failed_marshal: |
7a89f9c6 CL |
729 | if (rc != -ENOTCONN) |
730 | return rc; | |
575448bd | 731 | drop_connection: |
0c0829bc CL |
732 | xprt_rdma_close(xprt); |
733 | return -ENOTCONN; | |
f58851e6 TT |
734 | } |
735 | ||
5d252f90 | 736 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
f58851e6 TT |
737 | { |
738 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
739 | long idle_time = 0; | |
740 | ||
741 | if (xprt_connected(xprt)) | |
742 | idle_time = (long)(jiffies - xprt->last_used) / HZ; | |
743 | ||
763f7e4e CL |
744 | seq_puts(seq, "\txprt:\trdma "); |
745 | seq_printf(seq, "%u %lu %lu %lu %ld %lu %lu %lu %llu %llu ", | |
746 | 0, /* need a local port? */ | |
747 | xprt->stat.bind_count, | |
748 | xprt->stat.connect_count, | |
8440a886 | 749 | xprt->stat.connect_time / HZ, |
763f7e4e CL |
750 | idle_time, |
751 | xprt->stat.sends, | |
752 | xprt->stat.recvs, | |
753 | xprt->stat.bad_xids, | |
754 | xprt->stat.req_u, | |
755 | xprt->stat.bklog_u); | |
505bbe64 | 756 | seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu ", |
763f7e4e CL |
757 | r_xprt->rx_stats.read_chunk_count, |
758 | r_xprt->rx_stats.write_chunk_count, | |
759 | r_xprt->rx_stats.reply_chunk_count, | |
760 | r_xprt->rx_stats.total_rdma_request, | |
761 | r_xprt->rx_stats.total_rdma_reply, | |
762 | r_xprt->rx_stats.pullup_copy_count, | |
763 | r_xprt->rx_stats.fixup_copy_count, | |
764 | r_xprt->rx_stats.hardway_register_count, | |
765 | r_xprt->rx_stats.failed_marshal_count, | |
860477d1 CL |
766 | r_xprt->rx_stats.bad_reply_count, |
767 | r_xprt->rx_stats.nomsg_call_count); | |
01bb35c8 | 768 | seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n", |
61da886b | 769 | r_xprt->rx_stats.mrs_recycled, |
e2ac236c | 770 | r_xprt->rx_stats.mrs_orphaned, |
c8b920bb | 771 | r_xprt->rx_stats.mrs_allocated, |
ae72950a | 772 | r_xprt->rx_stats.local_inv_needed, |
01bb35c8 CL |
773 | r_xprt->rx_stats.empty_sendctx_q, |
774 | r_xprt->rx_stats.reply_waits_for_send); | |
f58851e6 TT |
775 | } |
776 | ||
d67fa4d8 JL |
777 | static int |
778 | xprt_rdma_enable_swap(struct rpc_xprt *xprt) | |
779 | { | |
a0451788 | 780 | return 0; |
d67fa4d8 JL |
781 | } |
782 | ||
783 | static void | |
784 | xprt_rdma_disable_swap(struct rpc_xprt *xprt) | |
785 | { | |
786 | } | |
787 | ||
f58851e6 TT |
788 | /* |
789 | * Plumbing for rpc transport switch and kernel module | |
790 | */ | |
791 | ||
d31ae254 | 792 | static const struct rpc_xprt_ops xprt_rdma_procs = { |
e7ce710a | 793 | .reserve_xprt = xprt_reserve_xprt_cong, |
f58851e6 | 794 | .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ |
48be539d CL |
795 | .alloc_slot = xprt_rdma_alloc_slot, |
796 | .free_slot = xprt_rdma_free_slot, | |
f58851e6 | 797 | .release_request = xprt_release_rqst_cong, /* ditto */ |
8ba6a92d | 798 | .wait_for_reply_request = xprt_wait_for_reply_request_def, /* ditto */ |
33849792 | 799 | .timer = xprt_rdma_timer, |
f58851e6 TT |
800 | .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ |
801 | .set_port = xprt_rdma_set_port, | |
802 | .connect = xprt_rdma_connect, | |
803 | .buf_alloc = xprt_rdma_allocate, | |
804 | .buf_free = xprt_rdma_free, | |
805 | .send_request = xprt_rdma_send_request, | |
806 | .close = xprt_rdma_close, | |
807 | .destroy = xprt_rdma_destroy, | |
675dd90a | 808 | .set_connect_timeout = xprt_rdma_tcp_set_connect_timeout, |
d67fa4d8 JL |
809 | .print_stats = xprt_rdma_print_stats, |
810 | .enable_swap = xprt_rdma_enable_swap, | |
811 | .disable_swap = xprt_rdma_disable_swap, | |
f531a5db CL |
812 | .inject_disconnect = xprt_rdma_inject_disconnect, |
813 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | |
814 | .bc_setup = xprt_rdma_bc_setup, | |
6b26cc8c | 815 | .bc_maxpayload = xprt_rdma_bc_maxpayload, |
7402a4fe | 816 | .bc_num_slots = xprt_rdma_bc_max_slots, |
f531a5db CL |
817 | .bc_free_rqst = xprt_rdma_bc_free_rqst, |
818 | .bc_destroy = xprt_rdma_bc_destroy, | |
819 | #endif | |
f58851e6 TT |
820 | }; |
821 | ||
822 | static struct xprt_class xprt_rdma = { | |
823 | .list = LIST_HEAD_INIT(xprt_rdma.list), | |
824 | .name = "rdma", | |
825 | .owner = THIS_MODULE, | |
826 | .ident = XPRT_TRANSPORT_RDMA, | |
827 | .setup = xprt_setup_rdma, | |
828 | }; | |
829 | ||
ffe1f0df | 830 | void xprt_rdma_cleanup(void) |
f58851e6 | 831 | { |
f895b252 | 832 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
f58851e6 TT |
833 | if (sunrpc_table_header) { |
834 | unregister_sysctl_table(sunrpc_table_header); | |
835 | sunrpc_table_header = NULL; | |
836 | } | |
837 | #endif | |
5d252f90 | 838 | |
ddbb347f CL |
839 | xprt_unregister_transport(&xprt_rdma); |
840 | xprt_unregister_transport(&xprt_rdma_bc); | |
f58851e6 TT |
841 | } |
842 | ||
ffe1f0df | 843 | int xprt_rdma_init(void) |
f58851e6 TT |
844 | { |
845 | int rc; | |
846 | ||
951e721c | 847 | rc = xprt_register_transport(&xprt_rdma); |
6d2d0ee2 | 848 | if (rc) |
951e721c | 849 | return rc; |
951e721c | 850 | |
5d252f90 CL |
851 | rc = xprt_register_transport(&xprt_rdma_bc); |
852 | if (rc) { | |
853 | xprt_unregister_transport(&xprt_rdma); | |
5d252f90 CL |
854 | return rc; |
855 | } | |
856 | ||
f895b252 | 857 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
f58851e6 TT |
858 | if (!sunrpc_table_header) |
859 | sunrpc_table_header = register_sysctl_table(sunrpc_table); | |
860 | #endif | |
861 | return 0; | |
862 | } |