]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
a0ce85f5 | 2 | /* |
ce5b3717 | 3 | * Copyright (c) 2015, 2017 Oracle. All rights reserved. |
a0ce85f5 CL |
4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
5 | */ | |
6 | ||
7 | /* Lightweight memory registration using Fast Registration Work | |
ce5b3717 | 8 | * Requests (FRWR). |
a0ce85f5 CL |
9 | * |
10 | * FRWR features ordered asynchronous registration and deregistration | |
11 | * of arbitrarily sized memory regions. This is the fastest and safest | |
12 | * but most complex memory registration mode. | |
13 | */ | |
14 | ||
c14d86e5 CL |
15 | /* Normal operation |
16 | * | |
17 | * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG | |
ce5b3717 | 18 | * Work Request (frwr_op_map). When the RDMA operation is finished, this |
c14d86e5 | 19 | * Memory Region is invalidated using a LOCAL_INV Work Request |
96ceddea | 20 | * (frwr_op_unmap_sync). |
c14d86e5 CL |
21 | * |
22 | * Typically these Work Requests are not signaled, and neither are RDMA | |
23 | * SEND Work Requests (with the exception of signaling occasionally to | |
24 | * prevent provider work queue overflows). This greatly reduces HCA | |
25 | * interrupt workload. | |
26 | * | |
27 | * As an optimization, frwr_op_unmap marks MRs INVALID before the | |
28 | * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on | |
96ceddea | 29 | * rb_mrs immediately so that no work (like managing a linked list |
c14d86e5 CL |
30 | * under a spinlock) is needed in the completion upcall. |
31 | * | |
32 | * But this means that frwr_op_map() can occasionally encounter an MR | |
33 | * that is INVALID but the LOCAL_INV WR has not completed. Work Queue | |
34 | * ordering prevents a subsequent FAST_REG WR from executing against | |
35 | * that MR while it is still being invalidated. | |
36 | */ | |
37 | ||
38 | /* Transport recovery | |
39 | * | |
40 | * ->op_map and the transport connect worker cannot run at the same | |
41 | * time, but ->op_unmap can fire while the transport connect worker | |
42 | * is running. Thus MR recovery is handled in ->op_map, to guarantee | |
43 | * that recovered MRs are owned by a sending RPC, and not one where | |
44 | * ->op_unmap could fire at the same time transport reconnect is | |
45 | * being done. | |
46 | * | |
47 | * When the underlying transport disconnects, MRs are left in one of | |
62bdf94a | 48 | * four states: |
c14d86e5 CL |
49 | * |
50 | * INVALID: The MR was not in use before the QP entered ERROR state. | |
c14d86e5 CL |
51 | * |
52 | * VALID: The MR was registered before the QP entered ERROR state. | |
53 | * | |
62bdf94a CL |
54 | * FLUSHED_FR: The MR was being registered when the QP entered ERROR |
55 | * state, and the pending WR was flushed. | |
56 | * | |
57 | * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR | |
58 | * state, and the pending WR was flushed. | |
59 | * | |
60 | * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered | |
61 | * with ib_dereg_mr and then are re-initialized. Because MR recovery | |
c14d86e5 | 62 | * allocates fresh resources, it is deferred to a workqueue, and the |
96ceddea | 63 | * recovered MRs are placed back on the rb_mrs list when recovery is |
c14d86e5 CL |
64 | * complete. frwr_op_map allocates another MR for the current RPC while |
65 | * the broken MR is reset. | |
66 | * | |
67 | * To ensure that frwr_op_map doesn't encounter an MR that is marked | |
68 | * INVALID but that is about to be flushed due to a previous transport | |
69 | * disconnect, the transport connect worker attempts to drain all | |
70 | * pending send queue WRs before the transport is reconnected. | |
71 | */ | |
72 | ||
c8b920bb CL |
73 | #include <linux/sunrpc/rpc_rdma.h> |
74 | ||
a0ce85f5 CL |
75 | #include "xprt_rdma.h" |
76 | ||
77 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | |
78 | # define RPCDBG_FACILITY RPCDBG_TRANS | |
79 | #endif | |
80 | ||
b54054ca CL |
81 | bool |
82 | frwr_is_supported(struct rpcrdma_ia *ia) | |
83 | { | |
84 | struct ib_device_attr *attrs = &ia->ri_device->attrs; | |
85 | ||
86 | if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) | |
87 | goto out_not_supported; | |
88 | if (attrs->max_fast_reg_page_list_len == 0) | |
89 | goto out_not_supported; | |
90 | return true; | |
91 | ||
92 | out_not_supported: | |
93 | pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n", | |
94 | ia->ri_device->name); | |
95 | return false; | |
96 | } | |
97 | ||
d48b1d29 | 98 | static int |
96ceddea | 99 | frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) |
d48b1d29 | 100 | { |
ce5b3717 | 101 | unsigned int depth = ia->ri_max_frwr_depth; |
96ceddea | 102 | struct rpcrdma_frwr *frwr = &mr->frwr; |
d48b1d29 CL |
103 | int rc; |
104 | ||
ce5b3717 CL |
105 | frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); |
106 | if (IS_ERR(frwr->fr_mr)) | |
d48b1d29 CL |
107 | goto out_mr_err; |
108 | ||
96ceddea CL |
109 | mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL); |
110 | if (!mr->mr_sg) | |
d48b1d29 CL |
111 | goto out_list_err; |
112 | ||
96ceddea | 113 | sg_init_table(mr->mr_sg, depth); |
ce5b3717 | 114 | init_completion(&frwr->fr_linv_done); |
d48b1d29 CL |
115 | return 0; |
116 | ||
117 | out_mr_err: | |
ce5b3717 | 118 | rc = PTR_ERR(frwr->fr_mr); |
d48b1d29 CL |
119 | dprintk("RPC: %s: ib_alloc_mr status %i\n", |
120 | __func__, rc); | |
121 | return rc; | |
122 | ||
123 | out_list_err: | |
124 | rc = -ENOMEM; | |
125 | dprintk("RPC: %s: sg allocation failure\n", | |
126 | __func__); | |
ce5b3717 | 127 | ib_dereg_mr(frwr->fr_mr); |
d48b1d29 CL |
128 | return rc; |
129 | } | |
130 | ||
131 | static void | |
96ceddea | 132 | frwr_op_release_mr(struct rpcrdma_mr *mr) |
d48b1d29 CL |
133 | { |
134 | int rc; | |
135 | ||
96ceddea CL |
136 | /* Ensure MR is not on any rl_registered list */ |
137 | if (!list_empty(&mr->mr_list)) | |
138 | list_del(&mr->mr_list); | |
9d6b0409 | 139 | |
96ceddea | 140 | rc = ib_dereg_mr(mr->frwr.fr_mr); |
d48b1d29 CL |
141 | if (rc) |
142 | pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", | |
96ceddea CL |
143 | mr, rc); |
144 | kfree(mr->mr_sg); | |
145 | kfree(mr); | |
d48b1d29 CL |
146 | } |
147 | ||
d7a21c1b | 148 | static int |
96ceddea | 149 | __frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) |
d7a21c1b | 150 | { |
96ceddea | 151 | struct rpcrdma_frwr *frwr = &mr->frwr; |
d7a21c1b CL |
152 | int rc; |
153 | ||
ce5b3717 | 154 | rc = ib_dereg_mr(frwr->fr_mr); |
d7a21c1b CL |
155 | if (rc) { |
156 | pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", | |
96ceddea | 157 | rc, mr); |
d7a21c1b CL |
158 | return rc; |
159 | } | |
160 | ||
ce5b3717 CL |
161 | frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, |
162 | ia->ri_max_frwr_depth); | |
163 | if (IS_ERR(frwr->fr_mr)) { | |
d7a21c1b | 164 | pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", |
96ceddea | 165 | PTR_ERR(frwr->fr_mr), mr); |
ce5b3717 | 166 | return PTR_ERR(frwr->fr_mr); |
d7a21c1b CL |
167 | } |
168 | ||
ce5b3717 CL |
169 | dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr); |
170 | frwr->fr_state = FRWR_IS_INVALID; | |
d7a21c1b CL |
171 | return 0; |
172 | } | |
173 | ||
ce5b3717 | 174 | /* Reset of a single FRWR. Generate a fresh rkey by replacing the MR. |
505bbe64 | 175 | */ |
660bb497 | 176 | static void |
96ceddea | 177 | frwr_op_recover_mr(struct rpcrdma_mr *mr) |
660bb497 | 178 | { |
96ceddea CL |
179 | enum rpcrdma_frwr_state state = mr->frwr.fr_state; |
180 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; | |
660bb497 | 181 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
660bb497 CL |
182 | int rc; |
183 | ||
96ceddea | 184 | rc = __frwr_mr_reset(ia, mr); |
2937fede CL |
185 | if (state != FRWR_FLUSHED_LI) { |
186 | trace_xprtrdma_dma_unmap(mr); | |
62bdf94a | 187 | ib_dma_unmap_sg(ia->ri_device, |
96ceddea | 188 | mr->mr_sg, mr->mr_nents, mr->mr_dir); |
2937fede | 189 | } |
2ffc871a CL |
190 | if (rc) |
191 | goto out_release; | |
951e721c | 192 | |
96ceddea | 193 | rpcrdma_mr_put(mr); |
505bbe64 | 194 | r_xprt->rx_stats.mrs_recovered++; |
2ffc871a CL |
195 | return; |
196 | ||
197 | out_release: | |
96ceddea | 198 | pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr); |
2ffc871a CL |
199 | r_xprt->rx_stats.mrs_orphaned++; |
200 | ||
96ceddea CL |
201 | spin_lock(&r_xprt->rx_buf.rb_mrlock); |
202 | list_del(&mr->mr_all); | |
203 | spin_unlock(&r_xprt->rx_buf.rb_mrlock); | |
2ffc871a | 204 | |
96ceddea | 205 | frwr_op_release_mr(mr); |
951e721c CL |
206 | } |
207 | ||
3968cb58 CL |
208 | static int |
209 | frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |
210 | struct rpcrdma_create_data_internal *cdata) | |
211 | { | |
5e9fc6a0 | 212 | struct ib_device_attr *attrs = &ia->ri_device->attrs; |
3968cb58 CL |
213 | int depth, delta; |
214 | ||
5e9fc6a0 CL |
215 | ia->ri_mrtype = IB_MR_TYPE_MEM_REG; |
216 | if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) | |
217 | ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; | |
218 | ||
ce5b3717 | 219 | ia->ri_max_frwr_depth = |
3968cb58 | 220 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, |
5e9fc6a0 | 221 | attrs->max_fast_reg_page_list_len); |
3968cb58 | 222 | dprintk("RPC: %s: device's max FR page list len = %u\n", |
ce5b3717 CL |
223 | __func__, ia->ri_max_frwr_depth); |
224 | ||
225 | /* Add room for frwr register and invalidate WRs. | |
226 | * 1. FRWR reg WR for head | |
227 | * 2. FRWR invalidate WR for head | |
228 | * 3. N FRWR reg WRs for pagelist | |
229 | * 4. N FRWR invalidate WRs for pagelist | |
230 | * 5. FRWR reg WR for tail | |
231 | * 6. FRWR invalidate WR for tail | |
3968cb58 CL |
232 | * 7. The RDMA_SEND WR |
233 | */ | |
234 | depth = 7; | |
235 | ||
ce5b3717 | 236 | /* Calculate N if the device max FRWR depth is smaller than |
3968cb58 CL |
237 | * RPCRDMA_MAX_DATA_SEGS. |
238 | */ | |
ce5b3717 CL |
239 | if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) { |
240 | delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth; | |
3968cb58 | 241 | do { |
ce5b3717 CL |
242 | depth += 2; /* FRWR reg + invalidate */ |
243 | delta -= ia->ri_max_frwr_depth; | |
3968cb58 CL |
244 | } while (delta > 0); |
245 | } | |
246 | ||
247 | ep->rep_attr.cap.max_send_wr *= depth; | |
5e9fc6a0 CL |
248 | if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) { |
249 | cdata->max_requests = attrs->max_qp_wr / depth; | |
3968cb58 CL |
250 | if (!cdata->max_requests) |
251 | return -EINVAL; | |
252 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | |
253 | depth; | |
254 | } | |
255 | ||
87cfb9a0 | 256 | ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / |
ce5b3717 | 257 | ia->ri_max_frwr_depth); |
3968cb58 CL |
258 | return 0; |
259 | } | |
260 | ||
1c9351ee CL |
261 | /* FRWR mode conveys a list of pages per chunk segment. The |
262 | * maximum length of that list is the FRWR page list depth. | |
263 | */ | |
264 | static size_t | |
265 | frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |
266 | { | |
267 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | |
268 | ||
269 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | |
ce5b3717 | 270 | RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth); |
1c9351ee CL |
271 | } |
272 | ||
2fa8f88d | 273 | static void |
62bdf94a | 274 | __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr) |
2fa8f88d | 275 | { |
2fa8f88d CL |
276 | if (wc->status != IB_WC_WR_FLUSH_ERR) |
277 | pr_err("rpcrdma: %s: %s (%u/0x%x)\n", | |
278 | wr, ib_wc_status_msg(wc->status), | |
279 | wc->status, wc->vendor_err); | |
280 | } | |
281 | ||
282 | /** | |
6afafa77 | 283 | * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC |
2fa8f88d CL |
284 | * @cq: completion queue (ignored) |
285 | * @wc: completed WR | |
c9918ff5 | 286 | * |
c9918ff5 | 287 | */ |
e46ac34c | 288 | static void |
2fa8f88d | 289 | frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) |
e46ac34c | 290 | { |
58f10ad4 CL |
291 | struct ib_cqe *cqe = wc->wr_cqe; |
292 | struct rpcrdma_frwr *frwr = | |
293 | container_of(cqe, struct rpcrdma_frwr, fr_cqe); | |
c9918ff5 | 294 | |
2fa8f88d CL |
295 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
296 | if (wc->status != IB_WC_SUCCESS) { | |
ce5b3717 | 297 | frwr->fr_state = FRWR_FLUSHED_FR; |
62bdf94a | 298 | __frwr_sendcompletion_flush(wc, "fastreg"); |
2fa8f88d | 299 | } |
58f10ad4 | 300 | trace_xprtrdma_wc_fastreg(wc, frwr); |
e46ac34c CL |
301 | } |
302 | ||
2fa8f88d | 303 | /** |
6afafa77 | 304 | * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC |
2fa8f88d CL |
305 | * @cq: completion queue (ignored) |
306 | * @wc: completed WR | |
307 | * | |
308 | */ | |
c9918ff5 | 309 | static void |
2fa8f88d | 310 | frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) |
c9918ff5 | 311 | { |
2937fede CL |
312 | struct ib_cqe *cqe = wc->wr_cqe; |
313 | struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, | |
314 | fr_cqe); | |
c9918ff5 | 315 | |
2fa8f88d CL |
316 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
317 | if (wc->status != IB_WC_SUCCESS) { | |
ce5b3717 | 318 | frwr->fr_state = FRWR_FLUSHED_LI; |
62bdf94a | 319 | __frwr_sendcompletion_flush(wc, "localinv"); |
2fa8f88d | 320 | } |
2937fede | 321 | trace_xprtrdma_wc_li(wc, frwr); |
2fa8f88d | 322 | } |
c9918ff5 | 323 | |
2fa8f88d | 324 | /** |
6afafa77 | 325 | * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv WC |
2fa8f88d CL |
326 | * @cq: completion queue (ignored) |
327 | * @wc: completed WR | |
328 | * | |
329 | * Awaken anyone waiting for an MR to finish being fenced. | |
330 | */ | |
331 | static void | |
332 | frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) | |
333 | { | |
2937fede CL |
334 | struct ib_cqe *cqe = wc->wr_cqe; |
335 | struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, | |
336 | fr_cqe); | |
2fa8f88d CL |
337 | |
338 | /* WARNING: Only wr_cqe and status are reliable at this point */ | |
62bdf94a | 339 | if (wc->status != IB_WC_SUCCESS) { |
ce5b3717 | 340 | frwr->fr_state = FRWR_FLUSHED_LI; |
62bdf94a CL |
341 | __frwr_sendcompletion_flush(wc, "localinv"); |
342 | } | |
ce5b3717 | 343 | complete(&frwr->fr_linv_done); |
2937fede | 344 | trace_xprtrdma_wc_li_wake(wc, frwr); |
c9918ff5 CL |
345 | } |
346 | ||
564471d2 | 347 | /* Post a REG_MR Work Request to register a memory region |
9c1b4d77 CL |
348 | * for remote access via RDMA READ or RDMA WRITE. |
349 | */ | |
6748b0ca | 350 | static struct rpcrdma_mr_seg * |
9c1b4d77 | 351 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, |
96ceddea | 352 | int nsegs, bool writing, struct rpcrdma_mr **out) |
9c1b4d77 CL |
353 | { |
354 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | |
5e9fc6a0 | 355 | bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; |
ce5b3717 | 356 | struct rpcrdma_frwr *frwr; |
96ceddea CL |
357 | struct rpcrdma_mr *mr; |
358 | struct ib_mr *ibmr; | |
3cf4e169 | 359 | struct ib_reg_wr *reg_wr; |
e622f2f4 | 360 | struct ib_send_wr *bad_wr; |
1f541895 | 361 | int rc, i, n; |
9c1b4d77 | 362 | u8 key; |
9c1b4d77 | 363 | |
96ceddea | 364 | mr = NULL; |
c14d86e5 | 365 | do { |
96ceddea CL |
366 | if (mr) |
367 | rpcrdma_mr_defer_recovery(mr); | |
368 | mr = rpcrdma_mr_get(r_xprt); | |
369 | if (!mr) | |
6748b0ca | 370 | return ERR_PTR(-ENOBUFS); |
96ceddea CL |
371 | } while (mr->frwr.fr_state != FRWR_IS_INVALID); |
372 | frwr = &mr->frwr; | |
ce5b3717 | 373 | frwr->fr_state = FRWR_IS_VALID; |
ce5b3717 CL |
374 | |
375 | if (nsegs > ia->ri_max_frwr_depth) | |
376 | nsegs = ia->ri_max_frwr_depth; | |
4143f34e SG |
377 | for (i = 0; i < nsegs;) { |
378 | if (seg->mr_page) | |
96ceddea | 379 | sg_set_page(&mr->mr_sg[i], |
4143f34e SG |
380 | seg->mr_page, |
381 | seg->mr_len, | |
382 | offset_in_page(seg->mr_offset)); | |
383 | else | |
96ceddea | 384 | sg_set_buf(&mr->mr_sg[i], seg->mr_offset, |
4143f34e SG |
385 | seg->mr_len); |
386 | ||
9c1b4d77 CL |
387 | ++seg; |
388 | ++i; | |
5e9fc6a0 CL |
389 | if (holes_ok) |
390 | continue; | |
9c1b4d77 CL |
391 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || |
392 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | |
393 | break; | |
394 | } | |
96ceddea | 395 | mr->mr_dir = rpcrdma_data_dir(writing); |
4143f34e | 396 | |
96ceddea CL |
397 | mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); |
398 | if (!mr->mr_nents) | |
564471d2 CL |
399 | goto out_dmamap_err; |
400 | ||
96ceddea CL |
401 | ibmr = frwr->fr_mr; |
402 | n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); | |
403 | if (unlikely(n != mr->mr_nents)) | |
564471d2 | 404 | goto out_mapmr_err; |
4143f34e | 405 | |
96ceddea CL |
406 | key = (u8)(ibmr->rkey & 0x000000FF); |
407 | ib_update_fast_reg_key(ibmr, ++key); | |
4143f34e | 408 | |
ce5b3717 | 409 | reg_wr = &frwr->fr_regwr; |
3cf4e169 CL |
410 | reg_wr->wr.next = NULL; |
411 | reg_wr->wr.opcode = IB_WR_REG_MR; | |
ce5b3717 CL |
412 | frwr->fr_cqe.done = frwr_wc_fastreg; |
413 | reg_wr->wr.wr_cqe = &frwr->fr_cqe; | |
3cf4e169 CL |
414 | reg_wr->wr.num_sge = 0; |
415 | reg_wr->wr.send_flags = 0; | |
96ceddea CL |
416 | reg_wr->mr = ibmr; |
417 | reg_wr->key = ibmr->rkey; | |
3cf4e169 CL |
418 | reg_wr->access = writing ? |
419 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | |
420 | IB_ACCESS_REMOTE_READ; | |
9c1b4d77 | 421 | |
3cf4e169 | 422 | rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); |
9c1b4d77 CL |
423 | if (rc) |
424 | goto out_senderr; | |
425 | ||
96ceddea CL |
426 | mr->mr_handle = ibmr->rkey; |
427 | mr->mr_length = ibmr->length; | |
428 | mr->mr_offset = ibmr->iova; | |
4143f34e | 429 | |
96ceddea | 430 | *out = mr; |
6748b0ca | 431 | return seg; |
564471d2 CL |
432 | |
433 | out_dmamap_err: | |
1f541895 | 434 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", |
96ceddea | 435 | mr->mr_sg, i); |
ce5b3717 | 436 | frwr->fr_state = FRWR_IS_INVALID; |
96ceddea | 437 | rpcrdma_mr_put(mr); |
6748b0ca | 438 | return ERR_PTR(-EIO); |
564471d2 CL |
439 | |
440 | out_mapmr_err: | |
1f541895 | 441 | pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", |
96ceddea CL |
442 | frwr->fr_mr, n, mr->mr_nents); |
443 | rpcrdma_mr_defer_recovery(mr); | |
6748b0ca | 444 | return ERR_PTR(-EIO); |
9c1b4d77 CL |
445 | |
446 | out_senderr: | |
ce5b3717 | 447 | pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc); |
96ceddea | 448 | rpcrdma_mr_defer_recovery(mr); |
6748b0ca | 449 | return ERR_PTR(-ENOTCONN); |
9c1b4d77 CL |
450 | } |
451 | ||
96ceddea | 452 | /* Handle a remotely invalidated mr on the @mrs list |
c3441618 CL |
453 | */ |
454 | static void | |
96ceddea | 455 | frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) |
c3441618 | 456 | { |
96ceddea | 457 | struct rpcrdma_mr *mr; |
c3441618 | 458 | |
96ceddea CL |
459 | list_for_each_entry(mr, mrs, mr_list) |
460 | if (mr->mr_handle == rep->rr_inv_rkey) { | |
96ceddea | 461 | list_del(&mr->mr_list); |
2937fede | 462 | trace_xprtrdma_remoteinv(mr); |
96ceddea | 463 | mr->frwr.fr_state = FRWR_IS_INVALID; |
ec12e479 | 464 | rpcrdma_mr_unmap_and_put(mr); |
c3441618 CL |
465 | break; /* only one invalidated MR per RPC */ |
466 | } | |
467 | } | |
468 | ||
c9918ff5 CL |
469 | /* Invalidate all memory regions that were registered for "req". |
470 | * | |
471 | * Sleeps until it is safe for the host CPU to access the | |
472 | * previously mapped memory regions. | |
9d6b0409 | 473 | * |
96ceddea | 474 | * Caller ensures that @mrs is not empty before the call. This |
451d26e1 | 475 | * function empties the list. |
c9918ff5 CL |
476 | */ |
477 | static void | |
96ceddea | 478 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) |
c9918ff5 | 479 | { |
a100fda1 | 480 | struct ib_send_wr *first, **prev, *last, *bad_wr; |
c9918ff5 | 481 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
ce5b3717 | 482 | struct rpcrdma_frwr *frwr; |
96ceddea | 483 | struct rpcrdma_mr *mr; |
8d38de65 | 484 | int count, rc; |
c9918ff5 | 485 | |
451d26e1 | 486 | /* ORDER: Invalidate all of the MRs first |
c9918ff5 CL |
487 | * |
488 | * Chain the LOCAL_INV Work Requests and post them with | |
489 | * a single ib_post_send() call. | |
490 | */ | |
ce5b3717 | 491 | frwr = NULL; |
8d38de65 | 492 | count = 0; |
a100fda1 | 493 | prev = &first; |
96ceddea CL |
494 | list_for_each_entry(mr, mrs, mr_list) { |
495 | mr->frwr.fr_state = FRWR_IS_INVALID; | |
496 | ||
497 | frwr = &mr->frwr; | |
2937fede | 498 | trace_xprtrdma_localinv(mr); |
a100fda1 | 499 | |
ce5b3717 CL |
500 | frwr->fr_cqe.done = frwr_wc_localinv; |
501 | last = &frwr->fr_invwr; | |
a100fda1 | 502 | memset(last, 0, sizeof(*last)); |
ce5b3717 | 503 | last->wr_cqe = &frwr->fr_cqe; |
a100fda1 | 504 | last->opcode = IB_WR_LOCAL_INV; |
96ceddea | 505 | last->ex.invalidate_rkey = mr->mr_handle; |
8d38de65 | 506 | count++; |
c9918ff5 | 507 | |
a100fda1 CL |
508 | *prev = last; |
509 | prev = &last->next; | |
c9918ff5 | 510 | } |
ce5b3717 | 511 | if (!frwr) |
c8b920bb | 512 | goto unmap; |
c9918ff5 CL |
513 | |
514 | /* Strong send queue ordering guarantees that when the | |
515 | * last WR in the chain completes, all WRs in the chain | |
516 | * are complete. | |
517 | */ | |
a100fda1 | 518 | last->send_flags = IB_SEND_SIGNALED; |
ce5b3717 CL |
519 | frwr->fr_cqe.done = frwr_wc_localinv_wake; |
520 | reinit_completion(&frwr->fr_linv_done); | |
8d38de65 | 521 | |
c9918ff5 CL |
522 | /* Transport disconnect drains the receive CQ before it |
523 | * replaces the QP. The RPC reply handler won't call us | |
524 | * unless ri_id->qp is a valid pointer. | |
525 | */ | |
c8b920bb | 526 | r_xprt->rx_stats.local_inv_needed++; |
8d75483a | 527 | bad_wr = NULL; |
a100fda1 | 528 | rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); |
8d75483a | 529 | if (bad_wr != first) |
ce5b3717 | 530 | wait_for_completion(&frwr->fr_linv_done); |
d7a21c1b CL |
531 | if (rc) |
532 | goto reset_mrs; | |
c9918ff5 | 533 | |
451d26e1 | 534 | /* ORDER: Now DMA unmap all of the MRs, and return |
96ceddea | 535 | * them to the free MR list. |
c9918ff5 | 536 | */ |
b892a699 | 537 | unmap: |
96ceddea CL |
538 | while (!list_empty(mrs)) { |
539 | mr = rpcrdma_mr_pop(mrs); | |
ec12e479 | 540 | rpcrdma_mr_unmap_and_put(mr); |
c9918ff5 | 541 | } |
d7a21c1b | 542 | return; |
c9918ff5 | 543 | |
d7a21c1b | 544 | reset_mrs: |
ce5b3717 | 545 | pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc); |
6814baea | 546 | |
d7a21c1b | 547 | /* Find and reset the MRs in the LOCAL_INV WRs that did not |
8d75483a | 548 | * get posted. |
d7a21c1b | 549 | */ |
8d75483a | 550 | while (bad_wr) { |
ce5b3717 CL |
551 | frwr = container_of(bad_wr, struct rpcrdma_frwr, |
552 | fr_invwr); | |
96ceddea | 553 | mr = container_of(frwr, struct rpcrdma_mr, frwr); |
8d75483a | 554 | |
96ceddea | 555 | __frwr_mr_reset(ia, mr); |
8d75483a CL |
556 | |
557 | bad_wr = bad_wr->next; | |
d7a21c1b CL |
558 | } |
559 | goto unmap; | |
c9918ff5 | 560 | } |
6814baea | 561 | |
a0ce85f5 | 562 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { |
9c1b4d77 | 563 | .ro_map = frwr_op_map, |
c3441618 | 564 | .ro_reminv = frwr_op_reminv, |
c9918ff5 | 565 | .ro_unmap_sync = frwr_op_unmap_sync, |
505bbe64 | 566 | .ro_recover_mr = frwr_op_recover_mr, |
3968cb58 | 567 | .ro_open = frwr_op_open, |
1c9351ee | 568 | .ro_maxpages = frwr_op_maxpages, |
e2ac236c CL |
569 | .ro_init_mr = frwr_op_init_mr, |
570 | .ro_release_mr = frwr_op_release_mr, | |
a0ce85f5 | 571 | .ro_displayname = "frwr", |
c8b920bb | 572 | .ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK, |
a0ce85f5 | 573 | }; |