]>
Commit | Line | Data |
---|---|---|
d5b31be6 | 1 | /* |
0bf48289 | 2 | * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. |
d5b31be6 TT |
3 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. |
4 | * | |
5 | * This software is available to you under a choice of one of two | |
6 | * licenses. You may choose to be licensed under the terms of the GNU | |
7 | * General Public License (GPL) Version 2, available from the file | |
8 | * COPYING in the main directory of this source tree, or the BSD-type | |
9 | * license below: | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | |
14 | * | |
15 | * Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | |
17 | * | |
18 | * Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials provided | |
21 | * with the distribution. | |
22 | * | |
23 | * Neither the name of the Network Appliance, Inc. nor the names of | |
24 | * its contributors may be used to endorse or promote products | |
25 | * derived from this software without specific prior written | |
26 | * permission. | |
27 | * | |
28 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
29 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
30 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
31 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
32 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
33 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
34 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
35 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
36 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
37 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
38 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
39 | * | |
40 | * Author: Tom Tucker <tom@opengridcomputing.com> | |
41 | */ | |
42 | ||
43 | #include <linux/sunrpc/debug.h> | |
44 | #include <linux/sunrpc/rpc_rdma.h> | |
45 | #include <linux/spinlock.h> | |
46 | #include <asm/unaligned.h> | |
47 | #include <rdma/ib_verbs.h> | |
48 | #include <rdma/rdma_cm.h> | |
49 | #include <linux/sunrpc/svc_rdma.h> | |
50 | ||
51 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | |
52 | ||
53 | /* | |
54 | * Replace the pages in the rq_argpages array with the pages from the SGE in | |
55 | * the RDMA_RECV completion. The SGL should contain full pages up until the | |
56 | * last one. | |
57 | */ | |
58 | static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |
59 | struct svc_rdma_op_ctxt *ctxt, | |
60 | u32 byte_count) | |
61 | { | |
0b056c22 | 62 | struct rpcrdma_msg *rmsgp; |
d5b31be6 TT |
63 | struct page *page; |
64 | u32 bc; | |
65 | int sge_no; | |
66 | ||
67 | /* Swap the page in the SGE with the page in argpages */ | |
68 | page = ctxt->pages[0]; | |
69 | put_page(rqstp->rq_pages[0]); | |
70 | rqstp->rq_pages[0] = page; | |
71 | ||
72 | /* Set up the XDR head */ | |
73 | rqstp->rq_arg.head[0].iov_base = page_address(page); | |
0bf48289 SW |
74 | rqstp->rq_arg.head[0].iov_len = |
75 | min_t(size_t, byte_count, ctxt->sge[0].length); | |
d5b31be6 TT |
76 | rqstp->rq_arg.len = byte_count; |
77 | rqstp->rq_arg.buflen = byte_count; | |
78 | ||
79 | /* Compute bytes past head in the SGL */ | |
80 | bc = byte_count - rqstp->rq_arg.head[0].iov_len; | |
81 | ||
82 | /* If data remains, store it in the pagelist */ | |
83 | rqstp->rq_arg.page_len = bc; | |
84 | rqstp->rq_arg.page_base = 0; | |
0b056c22 CL |
85 | |
86 | /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ | |
87 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | |
30b7e246 | 88 | if (rmsgp->rm_type == rdma_nomsg) |
0b056c22 CL |
89 | rqstp->rq_arg.pages = &rqstp->rq_pages[0]; |
90 | else | |
91 | rqstp->rq_arg.pages = &rqstp->rq_pages[1]; | |
92 | ||
d5b31be6 TT |
93 | sge_no = 1; |
94 | while (bc && sge_no < ctxt->count) { | |
95 | page = ctxt->pages[sge_no]; | |
96 | put_page(rqstp->rq_pages[sge_no]); | |
97 | rqstp->rq_pages[sge_no] = page; | |
0bf48289 | 98 | bc -= min_t(u32, bc, ctxt->sge[sge_no].length); |
d5b31be6 TT |
99 | rqstp->rq_arg.buflen += ctxt->sge[sge_no].length; |
100 | sge_no++; | |
101 | } | |
102 | rqstp->rq_respages = &rqstp->rq_pages[sge_no]; | |
7e4359e2 | 103 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
d5b31be6 | 104 | |
d5b31be6 TT |
105 | /* If not all pages were used from the SGL, free the remaining ones */ |
106 | bc = sge_no; | |
107 | while (sge_no < ctxt->count) { | |
108 | page = ctxt->pages[sge_no++]; | |
109 | put_page(page); | |
110 | } | |
111 | ctxt->count = bc; | |
112 | ||
113 | /* Set up tail */ | |
114 | rqstp->rq_arg.tail[0].iov_base = NULL; | |
115 | rqstp->rq_arg.tail[0].iov_len = 0; | |
116 | } | |
117 | ||
0bf48289 | 118 | /* Issue an RDMA_READ using the local lkey to map the data sink */ |
e5452411 CL |
119 | int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, |
120 | struct svc_rqst *rqstp, | |
121 | struct svc_rdma_op_ctxt *head, | |
122 | int *page_no, | |
123 | u32 *page_offset, | |
124 | u32 rs_handle, | |
125 | u32 rs_length, | |
126 | u64 rs_offset, | |
127 | bool last) | |
0bf48289 | 128 | { |
e622f2f4 | 129 | struct ib_rdma_wr read_wr; |
0bf48289 SW |
130 | int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; |
131 | struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); | |
132 | int ret, read, pno; | |
133 | u32 pg_off = *page_offset; | |
134 | u32 pg_no = *page_no; | |
135 | ||
136 | ctxt->direction = DMA_FROM_DEVICE; | |
137 | ctxt->read_hdr = head; | |
bc3fe2e3 | 138 | pages_needed = min_t(int, pages_needed, xprt->sc_max_sge_rd); |
c91aed98 SW |
139 | read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset, |
140 | rs_length); | |
0bf48289 SW |
141 | |
142 | for (pno = 0; pno < pages_needed; pno++) { | |
143 | int len = min_t(int, rs_length, PAGE_SIZE - pg_off); | |
144 | ||
145 | head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; | |
146 | head->arg.page_len += len; | |
5fe1043d | 147 | |
0bf48289 SW |
148 | head->arg.len += len; |
149 | if (!pg_off) | |
150 | head->count++; | |
151 | rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; | |
7e4359e2 | 152 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
0bf48289 SW |
153 | ctxt->sge[pno].addr = |
154 | ib_dma_map_page(xprt->sc_cm_id->device, | |
155 | head->arg.pages[pg_no], pg_off, | |
156 | PAGE_SIZE - pg_off, | |
157 | DMA_FROM_DEVICE); | |
158 | ret = ib_dma_mapping_error(xprt->sc_cm_id->device, | |
159 | ctxt->sge[pno].addr); | |
160 | if (ret) | |
161 | goto err; | |
cace564f | 162 | svc_rdma_count_mappings(xprt, ctxt); |
d5b31be6 | 163 | |
5fe1043d | 164 | ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey; |
0bf48289 SW |
165 | ctxt->sge[pno].length = len; |
166 | ctxt->count++; | |
167 | ||
168 | /* adjust offset and wrap to next page if needed */ | |
169 | pg_off += len; | |
170 | if (pg_off == PAGE_SIZE) { | |
171 | pg_off = 0; | |
172 | pg_no++; | |
d5b31be6 | 173 | } |
0bf48289 | 174 | rs_length -= len; |
d5b31be6 | 175 | } |
0bf48289 SW |
176 | |
177 | if (last && rs_length == 0) | |
178 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
179 | else | |
180 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
181 | ||
182 | memset(&read_wr, 0, sizeof(read_wr)); | |
be99bb11 CL |
183 | ctxt->cqe.done = svc_rdma_wc_read; |
184 | read_wr.wr.wr_cqe = &ctxt->cqe; | |
e622f2f4 | 185 | read_wr.wr.opcode = IB_WR_RDMA_READ; |
e622f2f4 CH |
186 | read_wr.wr.send_flags = IB_SEND_SIGNALED; |
187 | read_wr.rkey = rs_handle; | |
188 | read_wr.remote_addr = rs_offset; | |
189 | read_wr.wr.sg_list = ctxt->sge; | |
190 | read_wr.wr.num_sge = pages_needed; | |
191 | ||
192 | ret = svc_rdma_send(xprt, &read_wr.wr); | |
0bf48289 SW |
193 | if (ret) { |
194 | pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); | |
195 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | |
196 | goto err; | |
197 | } | |
198 | ||
199 | /* return current location in page array */ | |
200 | *page_no = pg_no; | |
201 | *page_offset = pg_off; | |
202 | ret = read; | |
203 | atomic_inc(&rdma_stat_read); | |
204 | return ret; | |
205 | err: | |
206 | svc_rdma_unmap_dma(ctxt); | |
207 | svc_rdma_put_context(ctxt, 0); | |
208 | return ret; | |
d5b31be6 TT |
209 | } |
210 | ||
0bf48289 | 211 | /* Issue an RDMA_READ using an FRMR to map the data sink */ |
e5452411 CL |
212 | int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, |
213 | struct svc_rqst *rqstp, | |
214 | struct svc_rdma_op_ctxt *head, | |
215 | int *page_no, | |
216 | u32 *page_offset, | |
217 | u32 rs_handle, | |
218 | u32 rs_length, | |
219 | u64 rs_offset, | |
220 | bool last) | |
146b6df6 | 221 | { |
e622f2f4 | 222 | struct ib_rdma_wr read_wr; |
0bf48289 | 223 | struct ib_send_wr inv_wr; |
412a15c0 | 224 | struct ib_reg_wr reg_wr; |
0bf48289 | 225 | u8 key; |
412a15c0 | 226 | int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; |
0bf48289 SW |
227 | struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); |
228 | struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); | |
412a15c0 | 229 | int ret, read, pno, dma_nents, n; |
0bf48289 SW |
230 | u32 pg_off = *page_offset; |
231 | u32 pg_no = *page_no; | |
146b6df6 | 232 | |
146b6df6 TT |
233 | if (IS_ERR(frmr)) |
234 | return -ENOMEM; | |
235 | ||
0bf48289 SW |
236 | ctxt->direction = DMA_FROM_DEVICE; |
237 | ctxt->frmr = frmr; | |
412a15c0 | 238 | nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len); |
ab9f2faf | 239 | read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length); |
146b6df6 | 240 | |
146b6df6 TT |
241 | frmr->direction = DMA_FROM_DEVICE; |
242 | frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); | |
412a15c0 | 243 | frmr->sg_nents = nents; |
0bf48289 | 244 | |
412a15c0 | 245 | for (pno = 0; pno < nents; pno++) { |
0bf48289 SW |
246 | int len = min_t(int, rs_length, PAGE_SIZE - pg_off); |
247 | ||
248 | head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; | |
249 | head->arg.page_len += len; | |
250 | head->arg.len += len; | |
251 | if (!pg_off) | |
252 | head->count++; | |
412a15c0 SG |
253 | |
254 | sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no], | |
255 | len, pg_off); | |
256 | ||
0bf48289 SW |
257 | rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; |
258 | rqstp->rq_next_page = rqstp->rq_respages + 1; | |
146b6df6 | 259 | |
0bf48289 SW |
260 | /* adjust offset and wrap to next page if needed */ |
261 | pg_off += len; | |
262 | if (pg_off == PAGE_SIZE) { | |
263 | pg_off = 0; | |
264 | pg_no++; | |
265 | } | |
266 | rs_length -= len; | |
146b6df6 TT |
267 | } |
268 | ||
0bf48289 SW |
269 | if (last && rs_length == 0) |
270 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
271 | else | |
272 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
d5b31be6 | 273 | |
412a15c0 SG |
274 | dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device, |
275 | frmr->sg, frmr->sg_nents, | |
276 | frmr->direction); | |
277 | if (!dma_nents) { | |
278 | pr_err("svcrdma: failed to dma map sg %p\n", | |
279 | frmr->sg); | |
280 | return -ENOMEM; | |
281 | } | |
412a15c0 | 282 | |
9aa8b321 | 283 | n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE); |
412a15c0 SG |
284 | if (unlikely(n != frmr->sg_nents)) { |
285 | pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", | |
286 | frmr->mr, n, frmr->sg_nents); | |
287 | return n < 0 ? n : -EINVAL; | |
288 | } | |
289 | ||
0bf48289 SW |
290 | /* Bump the key */ |
291 | key = (u8)(frmr->mr->lkey & 0x000000FF); | |
292 | ib_update_fast_reg_key(frmr->mr, ++key); | |
293 | ||
412a15c0 | 294 | ctxt->sge[0].addr = frmr->mr->iova; |
0bf48289 | 295 | ctxt->sge[0].lkey = frmr->mr->lkey; |
412a15c0 | 296 | ctxt->sge[0].length = frmr->mr->length; |
0bf48289 SW |
297 | ctxt->count = 1; |
298 | ctxt->read_hdr = head; | |
299 | ||
412a15c0 | 300 | /* Prepare REG WR */ |
be99bb11 CL |
301 | ctxt->reg_cqe.done = svc_rdma_wc_reg; |
302 | reg_wr.wr.wr_cqe = &ctxt->reg_cqe; | |
412a15c0 | 303 | reg_wr.wr.opcode = IB_WR_REG_MR; |
412a15c0 SG |
304 | reg_wr.wr.send_flags = IB_SEND_SIGNALED; |
305 | reg_wr.wr.num_sge = 0; | |
306 | reg_wr.mr = frmr->mr; | |
307 | reg_wr.key = frmr->mr->lkey; | |
308 | reg_wr.access = frmr->access_flags; | |
309 | reg_wr.wr.next = &read_wr.wr; | |
0bf48289 SW |
310 | |
311 | /* Prepare RDMA_READ */ | |
312 | memset(&read_wr, 0, sizeof(read_wr)); | |
be99bb11 CL |
313 | ctxt->cqe.done = svc_rdma_wc_read; |
314 | read_wr.wr.wr_cqe = &ctxt->cqe; | |
e622f2f4 CH |
315 | read_wr.wr.send_flags = IB_SEND_SIGNALED; |
316 | read_wr.rkey = rs_handle; | |
317 | read_wr.remote_addr = rs_offset; | |
318 | read_wr.wr.sg_list = ctxt->sge; | |
319 | read_wr.wr.num_sge = 1; | |
0bf48289 | 320 | if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { |
e622f2f4 | 321 | read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; |
e622f2f4 | 322 | read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; |
0bf48289 | 323 | } else { |
e622f2f4 CH |
324 | read_wr.wr.opcode = IB_WR_RDMA_READ; |
325 | read_wr.wr.next = &inv_wr; | |
0bf48289 SW |
326 | /* Prepare invalidate */ |
327 | memset(&inv_wr, 0, sizeof(inv_wr)); | |
be99bb11 CL |
328 | ctxt->inv_cqe.done = svc_rdma_wc_inv; |
329 | inv_wr.wr_cqe = &ctxt->inv_cqe; | |
0bf48289 | 330 | inv_wr.opcode = IB_WR_LOCAL_INV; |
83710fc7 | 331 | inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; |
0bf48289 SW |
332 | inv_wr.ex.invalidate_rkey = frmr->mr->lkey; |
333 | } | |
0bf48289 SW |
334 | |
335 | /* Post the chain */ | |
412a15c0 | 336 | ret = svc_rdma_send(xprt, ®_wr.wr); |
0bf48289 SW |
337 | if (ret) { |
338 | pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); | |
339 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | |
340 | goto err; | |
d5b31be6 | 341 | } |
d5b31be6 | 342 | |
0bf48289 SW |
343 | /* return current location in page array */ |
344 | *page_no = pg_no; | |
345 | *page_offset = pg_off; | |
346 | ret = read; | |
347 | atomic_inc(&rdma_stat_read); | |
348 | return ret; | |
349 | err: | |
0bf48289 SW |
350 | svc_rdma_put_context(ctxt, 0); |
351 | svc_rdma_put_frmr(xprt, frmr); | |
352 | return ret; | |
d5b31be6 TT |
353 | } |
354 | ||
2397aa8b CL |
355 | static unsigned int |
356 | rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch) | |
357 | { | |
358 | unsigned int count; | |
359 | ||
360 | for (count = 0; ch->rc_discrim != xdr_zero; ch++) | |
361 | count++; | |
362 | return count; | |
363 | } | |
364 | ||
a97c331f CL |
365 | /* If there was additional inline content, append it to the end of arg.pages. |
366 | * Tail copy has to be done after the reader function has determined how many | |
367 | * pages are needed for RDMA READ. | |
368 | */ | |
369 | static int | |
370 | rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, | |
371 | u32 position, u32 byte_count, u32 page_offset, int page_no) | |
372 | { | |
373 | char *srcp, *destp; | |
a97c331f | 374 | |
a97c331f CL |
375 | srcp = head->arg.head[0].iov_base + position; |
376 | byte_count = head->arg.head[0].iov_len - position; | |
377 | if (byte_count > PAGE_SIZE) { | |
378 | dprintk("svcrdma: large tail unsupported\n"); | |
379 | return 0; | |
380 | } | |
381 | ||
382 | /* Fit as much of the tail on the current page as possible */ | |
383 | if (page_offset != PAGE_SIZE) { | |
384 | destp = page_address(rqstp->rq_arg.pages[page_no]); | |
385 | destp += page_offset; | |
386 | while (byte_count--) { | |
387 | *destp++ = *srcp++; | |
388 | page_offset++; | |
389 | if (page_offset == PAGE_SIZE && byte_count) | |
390 | goto more; | |
391 | } | |
392 | goto done; | |
393 | } | |
394 | ||
395 | more: | |
396 | /* Fit the rest on the next page */ | |
397 | page_no++; | |
398 | destp = page_address(rqstp->rq_arg.pages[page_no]); | |
399 | while (byte_count--) | |
400 | *destp++ = *srcp++; | |
401 | ||
402 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; | |
403 | rqstp->rq_next_page = rqstp->rq_respages + 1; | |
404 | ||
405 | done: | |
406 | byte_count = head->arg.head[0].iov_len - position; | |
407 | head->arg.page_len += byte_count; | |
408 | head->arg.len += byte_count; | |
409 | head->arg.buflen += byte_count; | |
410 | return 1; | |
411 | } | |
412 | ||
5fdca653 CL |
413 | /* Returns the address of the first read chunk or <nul> if no read chunk |
414 | * is present | |
415 | */ | |
416 | static struct rpcrdma_read_chunk * | |
417 | svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) | |
418 | { | |
419 | struct rpcrdma_read_chunk *ch = | |
420 | (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | |
421 | ||
422 | if (ch->rc_discrim == xdr_zero) | |
423 | return NULL; | |
424 | return ch; | |
425 | } | |
426 | ||
0bf48289 SW |
427 | static int rdma_read_chunks(struct svcxprt_rdma *xprt, |
428 | struct rpcrdma_msg *rmsgp, | |
429 | struct svc_rqst *rqstp, | |
430 | struct svc_rdma_op_ctxt *head) | |
d5b31be6 | 431 | { |
2397aa8b | 432 | int page_no, ret; |
d5b31be6 | 433 | struct rpcrdma_read_chunk *ch; |
e5452411 | 434 | u32 handle, page_offset, byte_count; |
61edbcb7 | 435 | u32 position; |
0bf48289 | 436 | u64 rs_offset; |
e5452411 | 437 | bool last; |
d5b31be6 TT |
438 | |
439 | /* If no read list is present, return 0 */ | |
440 | ch = svc_rdma_get_read_chunk(rmsgp); | |
441 | if (!ch) | |
442 | return 0; | |
443 | ||
2397aa8b | 444 | if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES) |
a6f911c0 | 445 | return -EINVAL; |
146b6df6 | 446 | |
0bf48289 SW |
447 | /* The request is completed when the RDMA_READs complete. The |
448 | * head context keeps all the pages that comprise the | |
449 | * request. | |
450 | */ | |
451 | head->arg.head[0] = rqstp->rq_arg.head[0]; | |
452 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; | |
0bf48289 SW |
453 | head->hdr_count = head->count; |
454 | head->arg.page_base = 0; | |
455 | head->arg.page_len = 0; | |
456 | head->arg.len = rqstp->rq_arg.len; | |
457 | head->arg.buflen = rqstp->rq_arg.buflen; | |
59fb3066 | 458 | |
0b056c22 | 459 | /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ |
cac7f150 | 460 | position = be32_to_cpu(ch->rc_position); |
0b056c22 CL |
461 | if (position == 0) { |
462 | head->arg.pages = &head->pages[0]; | |
463 | page_offset = head->byte_len; | |
464 | } else { | |
465 | head->arg.pages = &head->pages[head->count]; | |
466 | page_offset = 0; | |
467 | } | |
468 | ||
61edbcb7 CL |
469 | ret = 0; |
470 | page_no = 0; | |
61edbcb7 CL |
471 | for (; ch->rc_discrim != xdr_zero; ch++) { |
472 | if (be32_to_cpu(ch->rc_position) != position) | |
473 | goto err; | |
474 | ||
475 | handle = be32_to_cpu(ch->rc_target.rs_handle), | |
e5452411 | 476 | byte_count = be32_to_cpu(ch->rc_target.rs_length); |
cec56c8f TT |
477 | xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, |
478 | &rs_offset); | |
0bf48289 SW |
479 | |
480 | while (byte_count > 0) { | |
e5452411 CL |
481 | last = (ch + 1)->rc_discrim == xdr_zero; |
482 | ret = xprt->sc_reader(xprt, rqstp, head, | |
483 | &page_no, &page_offset, | |
484 | handle, byte_count, | |
485 | rs_offset, last); | |
0bf48289 SW |
486 | if (ret < 0) |
487 | goto err; | |
488 | byte_count -= ret; | |
489 | rs_offset += ret; | |
490 | head->arg.buflen += ret; | |
d5b31be6 | 491 | } |
d5b31be6 | 492 | } |
0b056c22 | 493 | |
fcbeced5 CL |
494 | /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */ |
495 | if (page_offset & 3) { | |
496 | u32 pad = 4 - (page_offset & 3); | |
497 | ||
6625d091 | 498 | head->arg.tail[0].iov_len += pad; |
fcbeced5 CL |
499 | head->arg.len += pad; |
500 | head->arg.buflen += pad; | |
a97c331f | 501 | page_offset += pad; |
fcbeced5 CL |
502 | } |
503 | ||
0bf48289 | 504 | ret = 1; |
a97c331f CL |
505 | if (position && position < head->arg.head[0].iov_len) |
506 | ret = rdma_copy_tail(rqstp, head, position, | |
507 | byte_count, page_offset, page_no); | |
508 | head->arg.head[0].iov_len = position; | |
0b056c22 CL |
509 | head->position = position; |
510 | ||
0bf48289 | 511 | err: |
d5b31be6 | 512 | /* Detach arg pages. svc_recv will replenish them */ |
0bf48289 SW |
513 | for (page_no = 0; |
514 | &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++) | |
515 | rqstp->rq_pages[page_no] = NULL; | |
d5b31be6 | 516 | |
0bf48289 | 517 | return ret; |
d5b31be6 TT |
518 | } |
519 | ||
84f225c2 CL |
520 | static void rdma_read_complete(struct svc_rqst *rqstp, |
521 | struct svc_rdma_op_ctxt *head) | |
d5b31be6 | 522 | { |
d5b31be6 | 523 | int page_no; |
d5b31be6 | 524 | |
d5b31be6 TT |
525 | /* Copy RPC pages */ |
526 | for (page_no = 0; page_no < head->count; page_no++) { | |
527 | put_page(rqstp->rq_pages[page_no]); | |
528 | rqstp->rq_pages[page_no] = head->pages[page_no]; | |
529 | } | |
0b056c22 CL |
530 | |
531 | /* Adjustments made for RDMA_NOMSG type requests */ | |
532 | if (head->position == 0) { | |
533 | if (head->arg.len <= head->sge[0].length) { | |
534 | head->arg.head[0].iov_len = head->arg.len - | |
535 | head->byte_len; | |
536 | head->arg.page_len = 0; | |
537 | } else { | |
538 | head->arg.head[0].iov_len = head->sge[0].length - | |
539 | head->byte_len; | |
540 | head->arg.page_len = head->arg.len - | |
541 | head->sge[0].length; | |
542 | } | |
543 | } | |
544 | ||
d5b31be6 | 545 | /* Point rq_arg.pages past header */ |
f820c57e | 546 | rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; |
d5b31be6 TT |
547 | rqstp->rq_arg.page_len = head->arg.page_len; |
548 | rqstp->rq_arg.page_base = head->arg.page_base; | |
549 | ||
550 | /* rq_respages starts after the last arg page */ | |
3be7f328 | 551 | rqstp->rq_respages = &rqstp->rq_pages[page_no]; |
7e4359e2 | 552 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
d5b31be6 TT |
553 | |
554 | /* Rebuild rq_arg head and tail. */ | |
555 | rqstp->rq_arg.head[0] = head->arg.head[0]; | |
556 | rqstp->rq_arg.tail[0] = head->arg.tail[0]; | |
557 | rqstp->rq_arg.len = head->arg.len; | |
558 | rqstp->rq_arg.buflen = head->arg.buflen; | |
d5b31be6 TT |
559 | } |
560 | ||
5d252f90 CL |
561 | /* By convention, backchannel calls arrive via rdma_msg type |
562 | * messages, and never populate the chunk lists. This makes | |
563 | * the RPC/RDMA header small and fixed in size, so it is | |
564 | * straightforward to check the RPC header's direction field. | |
565 | */ | |
566 | static bool | |
567 | svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp) | |
568 | { | |
569 | __be32 *p = (__be32 *)rmsgp; | |
570 | ||
571 | if (!xprt->xpt_bc_xprt) | |
572 | return false; | |
573 | ||
574 | if (rmsgp->rm_type != rdma_msg) | |
575 | return false; | |
576 | if (rmsgp->rm_body.rm_chunks[0] != xdr_zero) | |
577 | return false; | |
578 | if (rmsgp->rm_body.rm_chunks[1] != xdr_zero) | |
579 | return false; | |
580 | if (rmsgp->rm_body.rm_chunks[2] != xdr_zero) | |
581 | return false; | |
582 | ||
583 | /* sanity */ | |
584 | if (p[7] != rmsgp->rm_xid) | |
585 | return false; | |
586 | /* call direction */ | |
587 | if (p[8] == cpu_to_be32(RPC_CALL)) | |
588 | return false; | |
589 | ||
590 | return true; | |
591 | } | |
592 | ||
d5b31be6 TT |
593 | /* |
594 | * Set up the rqstp thread context to point to the RQ buffer. If | |
595 | * necessary, pull additional data from the client with an RDMA_READ | |
596 | * request. | |
597 | */ | |
598 | int svc_rdma_recvfrom(struct svc_rqst *rqstp) | |
599 | { | |
600 | struct svc_xprt *xprt = rqstp->rq_xprt; | |
601 | struct svcxprt_rdma *rdma_xprt = | |
602 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | |
603 | struct svc_rdma_op_ctxt *ctxt = NULL; | |
604 | struct rpcrdma_msg *rmsgp; | |
605 | int ret = 0; | |
d5b31be6 TT |
606 | |
607 | dprintk("svcrdma: rqstp=%p\n", rqstp); | |
608 | ||
24b8b447 | 609 | spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); |
d5b31be6 TT |
610 | if (!list_empty(&rdma_xprt->sc_read_complete_q)) { |
611 | ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, | |
612 | struct svc_rdma_op_ctxt, | |
613 | dto_q); | |
614 | list_del_init(&ctxt->dto_q); | |
24b8b447 | 615 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); |
84f225c2 CL |
616 | rdma_read_complete(rqstp, ctxt); |
617 | goto complete; | |
0bf48289 | 618 | } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { |
d5b31be6 TT |
619 | ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, |
620 | struct svc_rdma_op_ctxt, | |
621 | dto_q); | |
622 | list_del_init(&ctxt->dto_q); | |
623 | } else { | |
624 | atomic_inc(&rdma_stat_rq_starve); | |
625 | clear_bit(XPT_DATA, &xprt->xpt_flags); | |
626 | ctxt = NULL; | |
627 | } | |
628 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); | |
629 | if (!ctxt) { | |
630 | /* This is the EAGAIN path. The svc_recv routine will | |
631 | * return -EAGAIN, the nfsd thread will go to call into | |
632 | * svc_recv again and we shouldn't be on the active | |
633 | * transport list | |
634 | */ | |
635 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) | |
ec705fd4 | 636 | goto defer; |
d5b31be6 TT |
637 | goto out; |
638 | } | |
96a58f9c CL |
639 | dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p\n", |
640 | ctxt, rdma_xprt, rqstp); | |
d5b31be6 TT |
641 | atomic_inc(&rdma_stat_recv); |
642 | ||
643 | /* Build up the XDR from the receive buffers. */ | |
644 | rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len); | |
645 | ||
646 | /* Decode the RDMA header. */ | |
f3ea53fb | 647 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; |
d9e4084f | 648 | ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg); |
a6081b82 CL |
649 | if (ret < 0) |
650 | goto out_err; | |
a0544c94 CL |
651 | if (ret == 0) |
652 | goto out_drop; | |
a6081b82 | 653 | rqstp->rq_xprt_hlen = ret; |
d5b31be6 | 654 | |
5d252f90 CL |
655 | if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) { |
656 | ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp, | |
657 | &rqstp->rq_arg); | |
658 | svc_rdma_put_context(ctxt, 0); | |
659 | if (ret) | |
660 | goto repost; | |
661 | return ret; | |
662 | } | |
663 | ||
d16d4009 | 664 | /* Read read-list data. */ |
0bf48289 | 665 | ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt); |
d16d4009 TT |
666 | if (ret > 0) { |
667 | /* read-list posted, defer until data received from client. */ | |
b1721d2b | 668 | goto defer; |
0bf48289 | 669 | } else if (ret < 0) { |
d16d4009 TT |
670 | /* Post of read-list failed, free context. */ |
671 | svc_rdma_put_context(ctxt, 1); | |
672 | return 0; | |
673 | } | |
d5b31be6 | 674 | |
84f225c2 | 675 | complete: |
d5b31be6 TT |
676 | ret = rqstp->rq_arg.head[0].iov_len |
677 | + rqstp->rq_arg.page_len | |
678 | + rqstp->rq_arg.tail[0].iov_len; | |
679 | svc_rdma_put_context(ctxt, 0); | |
680 | out: | |
597561bf CL |
681 | dprintk("svcrdma: ret=%d, rq_arg.len=%u, " |
682 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n", | |
d5b31be6 TT |
683 | ret, rqstp->rq_arg.len, |
684 | rqstp->rq_arg.head[0].iov_base, | |
685 | rqstp->rq_arg.head[0].iov_len); | |
686 | rqstp->rq_prot = IPPROTO_MAX; | |
687 | svc_xprt_copy_addrs(rqstp, xprt); | |
d5b31be6 TT |
688 | return ret; |
689 | ||
a6081b82 CL |
690 | out_err: |
691 | svc_rdma_send_error(rdma_xprt, rmsgp, ret); | |
692 | svc_rdma_put_context(ctxt, 0); | |
693 | return 0; | |
694 | ||
b1721d2b | 695 | defer: |
d5b31be6 | 696 | return 0; |
5d252f90 | 697 | |
a0544c94 CL |
698 | out_drop: |
699 | svc_rdma_put_context(ctxt, 1); | |
5d252f90 | 700 | repost: |
bf36387a | 701 | return svc_rdma_repost_recv(rdma_xprt, GFP_KERNEL); |
d5b31be6 | 702 | } |