]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
d67ae825 TH |
2 | /* |
3 | * Device operations for the pnfs nfs4 file layout driver. | |
4 | * | |
5 | * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | |
6 | * | |
7 | * Tao Peng <bergwolf@primarydata.com> | |
8 | */ | |
9 | ||
10 | #include <linux/nfs_fs.h> | |
11 | #include <linux/vmalloc.h> | |
12 | #include <linux/module.h> | |
13 | #include <linux/sunrpc/addr.h> | |
14 | ||
15 | #include "../internal.h" | |
16 | #include "../nfs4session.h" | |
17 | #include "flexfilelayout.h" | |
18 | ||
19 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | |
20 | ||
15d03055 TM |
21 | static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; |
22 | static unsigned int dataserver_retrans; | |
d67ae825 | 23 | |
65990d1a FI |
24 | static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); |
25 | ||
d67ae825 TH |
26 | void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) |
27 | { | |
65990d1a | 28 | if (!IS_ERR_OR_NULL(mirror_ds)) |
d67ae825 TH |
29 | nfs4_put_deviceid_node(&mirror_ds->id_node); |
30 | } | |
31 | ||
32 | void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | |
33 | { | |
34 | nfs4_print_deviceid(&mirror_ds->id_node.deviceid); | |
35 | nfs4_pnfs_ds_put(mirror_ds->ds); | |
1feb2616 | 36 | kfree(mirror_ds->ds_versions); |
84a80f62 | 37 | kfree_rcu(mirror_ds, id_node.rcu); |
d67ae825 TH |
38 | } |
39 | ||
40 | /* Decode opaque device data and construct new_ds using it */ | |
41 | struct nfs4_ff_layout_ds * | |
42 | nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | |
43 | gfp_t gfp_flags) | |
44 | { | |
45 | struct xdr_stream stream; | |
46 | struct xdr_buf buf; | |
47 | struct page *scratch; | |
48 | struct list_head dsaddrs; | |
49 | struct nfs4_pnfs_ds_addr *da; | |
50 | struct nfs4_ff_layout_ds *new_ds = NULL; | |
51 | struct nfs4_ff_ds_version *ds_versions = NULL; | |
52 | u32 mp_count; | |
53 | u32 version_count; | |
54 | __be32 *p; | |
55 | int i, ret = -ENOMEM; | |
56 | ||
57 | /* set up xdr stream */ | |
58 | scratch = alloc_page(gfp_flags); | |
59 | if (!scratch) | |
60 | goto out_err; | |
61 | ||
62 | new_ds = kzalloc(sizeof(struct nfs4_ff_layout_ds), gfp_flags); | |
63 | if (!new_ds) | |
64 | goto out_scratch; | |
65 | ||
66 | nfs4_init_deviceid_node(&new_ds->id_node, | |
67 | server, | |
68 | &pdev->dev_id); | |
69 | INIT_LIST_HEAD(&dsaddrs); | |
70 | ||
71 | xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); | |
72 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | |
73 | ||
74 | /* multipath count */ | |
75 | p = xdr_inline_decode(&stream, 4); | |
76 | if (unlikely(!p)) | |
77 | goto out_err_drain_dsaddrs; | |
78 | mp_count = be32_to_cpup(p); | |
79 | dprintk("%s: multipath ds count %d\n", __func__, mp_count); | |
80 | ||
81 | for (i = 0; i < mp_count; i++) { | |
82 | /* multipath ds */ | |
83 | da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, | |
84 | &stream, gfp_flags); | |
85 | if (da) | |
86 | list_add_tail(&da->da_node, &dsaddrs); | |
87 | } | |
88 | if (list_empty(&dsaddrs)) { | |
89 | dprintk("%s: no suitable DS addresses found\n", | |
90 | __func__); | |
91 | ret = -ENOMEDIUM; | |
92 | goto out_err_drain_dsaddrs; | |
93 | } | |
94 | ||
95 | /* version count */ | |
96 | p = xdr_inline_decode(&stream, 4); | |
97 | if (unlikely(!p)) | |
98 | goto out_err_drain_dsaddrs; | |
99 | version_count = be32_to_cpup(p); | |
100 | dprintk("%s: version count %d\n", __func__, version_count); | |
101 | ||
102 | ds_versions = kzalloc(version_count * sizeof(struct nfs4_ff_ds_version), | |
103 | gfp_flags); | |
104 | if (!ds_versions) | |
105 | goto out_scratch; | |
106 | ||
107 | for (i = 0; i < version_count; i++) { | |
108 | /* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) + | |
109 | * tightly_coupled(4) */ | |
110 | p = xdr_inline_decode(&stream, 20); | |
111 | if (unlikely(!p)) | |
112 | goto out_err_drain_dsaddrs; | |
113 | ds_versions[i].version = be32_to_cpup(p++); | |
114 | ds_versions[i].minor_version = be32_to_cpup(p++); | |
115 | ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL); | |
116 | ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL); | |
117 | ds_versions[i].tightly_coupled = be32_to_cpup(p); | |
118 | ||
119 | if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE) | |
120 | ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE; | |
121 | if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE) | |
122 | ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE; | |
123 | ||
a7878ca1 TM |
124 | /* |
125 | * check for valid major/minor combination. | |
126 | * currently we support dataserver which talk: | |
127 | * v3, v4.0, v4.1, v4.2 | |
128 | */ | |
129 | if (!((ds_versions[i].version == 3 && ds_versions[i].minor_version == 0) || | |
130 | (ds_versions[i].version == 4 && ds_versions[i].minor_version < 3))) { | |
d67ae825 TH |
131 | dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__, |
132 | i, ds_versions[i].version, | |
133 | ds_versions[i].minor_version); | |
134 | ret = -EPROTONOSUPPORT; | |
135 | goto out_err_drain_dsaddrs; | |
136 | } | |
137 | ||
138 | dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n", | |
139 | __func__, i, ds_versions[i].version, | |
140 | ds_versions[i].minor_version, | |
141 | ds_versions[i].rsize, | |
142 | ds_versions[i].wsize, | |
143 | ds_versions[i].tightly_coupled); | |
144 | } | |
145 | ||
146 | new_ds->ds_versions = ds_versions; | |
147 | new_ds->ds_versions_cnt = version_count; | |
148 | ||
149 | new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); | |
150 | if (!new_ds->ds) | |
151 | goto out_err_drain_dsaddrs; | |
152 | ||
153 | /* If DS was already in cache, free ds addrs */ | |
154 | while (!list_empty(&dsaddrs)) { | |
155 | da = list_first_entry(&dsaddrs, | |
156 | struct nfs4_pnfs_ds_addr, | |
157 | da_node); | |
158 | list_del_init(&da->da_node); | |
159 | kfree(da->da_remotestr); | |
160 | kfree(da); | |
161 | } | |
162 | ||
163 | __free_page(scratch); | |
164 | return new_ds; | |
165 | ||
166 | out_err_drain_dsaddrs: | |
167 | while (!list_empty(&dsaddrs)) { | |
168 | da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, | |
169 | da_node); | |
170 | list_del_init(&da->da_node); | |
171 | kfree(da->da_remotestr); | |
172 | kfree(da); | |
173 | } | |
174 | ||
175 | kfree(ds_versions); | |
176 | out_scratch: | |
177 | __free_page(scratch); | |
178 | out_err: | |
179 | kfree(new_ds); | |
180 | ||
181 | dprintk("%s ERROR: returning %d\n", __func__, ret); | |
182 | return NULL; | |
183 | } | |
184 | ||
889d94d4 TM |
185 | static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg, |
186 | struct nfs4_deviceid_node *devid) | |
187 | { | |
1c48cee8 | 188 | nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); |
889d94d4 TM |
189 | if (!ff_layout_has_available_ds(lseg)) |
190 | pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, | |
191 | lseg); | |
192 | } | |
193 | ||
194 | static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, | |
65990d1a FI |
195 | struct nfs4_ff_layout_mirror *mirror, |
196 | bool create) | |
889d94d4 | 197 | { |
65990d1a FI |
198 | if (mirror == NULL || IS_ERR(mirror->mirror_ds)) |
199 | goto outerr; | |
200 | if (mirror->mirror_ds == NULL) { | |
201 | if (create) { | |
202 | struct nfs4_deviceid_node *node; | |
203 | struct pnfs_layout_hdr *lh = lseg->pls_layout; | |
204 | struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); | |
205 | ||
206 | node = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), | |
207 | &mirror->devid, lh->plh_lc_cred, | |
208 | GFP_KERNEL); | |
209 | if (node) | |
210 | mirror_ds = FF_LAYOUT_MIRROR_DS(node); | |
211 | ||
212 | /* check for race with another call to this function */ | |
213 | if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && | |
214 | mirror_ds != ERR_PTR(-ENODEV)) | |
215 | nfs4_put_deviceid_node(node); | |
216 | } else | |
217 | goto outerr; | |
889d94d4 | 218 | } |
f17f8a14 TM |
219 | |
220 | if (IS_ERR(mirror->mirror_ds)) | |
221 | goto outerr; | |
222 | ||
889d94d4 TM |
223 | if (mirror->mirror_ds->ds == NULL) { |
224 | struct nfs4_deviceid_node *devid; | |
225 | devid = &mirror->mirror_ds->id_node; | |
226 | ff_layout_mark_devid_invalid(lseg, devid); | |
227 | return false; | |
228 | } | |
229 | return true; | |
65990d1a FI |
230 | outerr: |
231 | pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); | |
232 | return false; | |
889d94d4 TM |
233 | } |
234 | ||
d67ae825 TH |
235 | static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, |
236 | u64 offset, u64 length) | |
237 | { | |
238 | u64 end; | |
239 | ||
17822b20 TM |
240 | end = max_t(u64, pnfs_end_offset(err->offset, err->length), |
241 | pnfs_end_offset(offset, length)); | |
d67ae825 TH |
242 | err->offset = min_t(u64, err->offset, offset); |
243 | err->length = end - err->offset; | |
244 | } | |
245 | ||
b819ed4b TM |
246 | static int |
247 | ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1, | |
248 | const struct nfs4_ff_layout_ds_err *e2) | |
d67ae825 | 249 | { |
b819ed4b TM |
250 | int ret; |
251 | ||
252 | if (e1->opnum != e2->opnum) | |
253 | return e1->opnum < e2->opnum ? -1 : 1; | |
254 | if (e1->status != e2->status) | |
255 | return e1->status < e2->status ? -1 : 1; | |
93b717fd TM |
256 | ret = memcmp(e1->stateid.data, e2->stateid.data, |
257 | sizeof(e1->stateid.data)); | |
b819ed4b TM |
258 | if (ret != 0) |
259 | return ret; | |
260 | ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid)); | |
261 | if (ret != 0) | |
262 | return ret; | |
17822b20 | 263 | if (pnfs_end_offset(e1->offset, e1->length) < e2->offset) |
b819ed4b | 264 | return -1; |
17822b20 | 265 | if (e1->offset > pnfs_end_offset(e2->offset, e2->length)) |
b819ed4b TM |
266 | return 1; |
267 | /* If ranges overlap or are contiguous, they are the same */ | |
268 | return 0; | |
d67ae825 TH |
269 | } |
270 | ||
b819ed4b | 271 | static void |
d67ae825 TH |
272 | ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, |
273 | struct nfs4_ff_layout_ds_err *dserr) | |
274 | { | |
b819ed4b TM |
275 | struct nfs4_ff_layout_ds_err *err, *tmp; |
276 | struct list_head *head = &flo->error_list; | |
277 | int match; | |
278 | ||
279 | /* Do insertion sort w/ merges */ | |
280 | list_for_each_entry_safe(err, tmp, &flo->error_list, list) { | |
281 | match = ff_ds_error_match(err, dserr); | |
282 | if (match < 0) | |
283 | continue; | |
284 | if (match > 0) { | |
285 | /* Add entry "dserr" _before_ entry "err" */ | |
286 | head = &err->list; | |
d67ae825 TH |
287 | break; |
288 | } | |
b819ed4b TM |
289 | /* Entries match, so merge "err" into "dserr" */ |
290 | extend_ds_error(dserr, err->offset, err->length); | |
cb067935 | 291 | list_replace(&err->list, &dserr->list); |
b819ed4b | 292 | kfree(err); |
cb067935 | 293 | return; |
d67ae825 TH |
294 | } |
295 | ||
b819ed4b | 296 | list_add_tail(&dserr->list, head); |
d67ae825 TH |
297 | } |
298 | ||
299 | int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, | |
300 | struct nfs4_ff_layout_mirror *mirror, u64 offset, | |
301 | u64 length, int status, enum nfs_opnum4 opnum, | |
302 | gfp_t gfp_flags) | |
303 | { | |
304 | struct nfs4_ff_layout_ds_err *dserr; | |
d67ae825 TH |
305 | |
306 | if (status == 0) | |
307 | return 0; | |
308 | ||
309 | if (mirror->mirror_ds == NULL) | |
310 | return -EINVAL; | |
311 | ||
d67ae825 TH |
312 | dserr = kmalloc(sizeof(*dserr), gfp_flags); |
313 | if (!dserr) | |
314 | return -ENOMEM; | |
315 | ||
316 | INIT_LIST_HEAD(&dserr->list); | |
317 | dserr->offset = offset; | |
318 | dserr->length = length; | |
319 | dserr->status = status; | |
320 | dserr->opnum = opnum; | |
321 | nfs4_stateid_copy(&dserr->stateid, &mirror->stateid); | |
322 | memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid, | |
323 | NFS4_DEVICEID4_SIZE); | |
324 | ||
325 | spin_lock(&flo->generic_hdr.plh_inode->i_lock); | |
b819ed4b | 326 | ff_layout_add_ds_error_locked(flo, dserr); |
d67ae825 | 327 | spin_unlock(&flo->generic_hdr.plh_inode->i_lock); |
d67ae825 TH |
328 | |
329 | return 0; | |
330 | } | |
331 | ||
57f3f4c0 JL |
332 | static struct rpc_cred * |
333 | ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode) | |
334 | { | |
3064b686 | 335 | struct rpc_cred *cred, __rcu **pcred; |
57f3f4c0 | 336 | |
3064b686 JL |
337 | if (iomode == IOMODE_READ) |
338 | pcred = &mirror->ro_cred; | |
339 | else | |
340 | pcred = &mirror->rw_cred; | |
57f3f4c0 JL |
341 | |
342 | rcu_read_lock(); | |
343 | do { | |
344 | cred = rcu_dereference(*pcred); | |
345 | if (!cred) | |
346 | break; | |
347 | ||
348 | cred = get_rpccred_rcu(cred); | |
349 | } while(!cred); | |
350 | rcu_read_unlock(); | |
351 | return cred; | |
352 | } | |
353 | ||
d67ae825 TH |
354 | struct nfs_fh * |
355 | nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) | |
356 | { | |
357 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); | |
358 | struct nfs_fh *fh = NULL; | |
d67ae825 | 359 | |
65990d1a | 360 | if (!ff_layout_mirror_valid(lseg, mirror, false)) { |
889d94d4 | 361 | pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", |
d67ae825 | 362 | __func__, mirror_idx); |
d67ae825 TH |
363 | goto out; |
364 | } | |
365 | ||
366 | /* FIXME: For now assume there is only 1 version available for the DS */ | |
367 | fh = &mirror->fh_versions[0]; | |
368 | out: | |
369 | return fh; | |
370 | } | |
371 | ||
95e2b7e9 JL |
372 | /** |
373 | * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call | |
374 | * @lseg: the layout segment we're operating on | |
375 | * @ds_idx: index of the DS to use | |
376 | * @fail_return: return layout on connect failure? | |
377 | * | |
378 | * Try to prepare a DS connection to accept an RPC call. This involves | |
379 | * selecting a mirror to use and connecting the client to it if it's not | |
380 | * already connected. | |
381 | * | |
382 | * Since we only need a single functioning mirror to satisfy a read, we don't | |
383 | * want to return the layout if there is one. For writes though, any down | |
384 | * mirror should result in a LAYOUTRETURN. @fail_return is how we distinguish | |
385 | * between the two cases. | |
386 | * | |
387 | * Returns a pointer to a connected DS object on success or NULL on failure. | |
388 | */ | |
d67ae825 TH |
389 | struct nfs4_pnfs_ds * |
390 | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |
391 | bool fail_return) | |
392 | { | |
393 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | |
394 | struct nfs4_pnfs_ds *ds = NULL; | |
395 | struct nfs4_deviceid_node *devid; | |
396 | struct inode *ino = lseg->pls_layout->plh_inode; | |
397 | struct nfs_server *s = NFS_SERVER(ino); | |
398 | unsigned int max_payload; | |
a33e4b03 | 399 | int status; |
d67ae825 | 400 | |
65990d1a | 401 | if (!ff_layout_mirror_valid(lseg, mirror, true)) { |
889d94d4 | 402 | pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", |
d67ae825 | 403 | __func__, ds_idx); |
d67ae825 TH |
404 | goto out; |
405 | } | |
406 | ||
407 | devid = &mirror->mirror_ds->id_node; | |
408 | if (ff_layout_test_devid_unavailable(devid)) | |
3dc14735 | 409 | goto out_fail; |
d67ae825 TH |
410 | |
411 | ds = mirror->mirror_ds->ds; | |
412 | /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ | |
413 | smp_rmb(); | |
414 | if (ds->ds_clp) | |
90a0be00 | 415 | goto out; |
d67ae825 TH |
416 | |
417 | /* FIXME: For now we assume the server sent only one version of NFS | |
418 | * to use for the DS. | |
419 | */ | |
a33e4b03 | 420 | status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, |
d67ae825 TH |
421 | dataserver_retrans, |
422 | mirror->mirror_ds->ds_versions[0].version, | |
7d38de3f | 423 | mirror->mirror_ds->ds_versions[0].minor_version); |
d67ae825 TH |
424 | |
425 | /* connect success, check rsize/wsize limit */ | |
260f32ad | 426 | if (!status) { |
d67ae825 TH |
427 | max_payload = |
428 | nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), | |
429 | NULL); | |
430 | if (mirror->mirror_ds->ds_versions[0].rsize > max_payload) | |
431 | mirror->mirror_ds->ds_versions[0].rsize = max_payload; | |
432 | if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) | |
433 | mirror->mirror_ds->ds_versions[0].wsize = max_payload; | |
3dc14735 | 434 | goto out; |
d67ae825 | 435 | } |
da066f3f | 436 | out_fail: |
3dc14735 TM |
437 | ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), |
438 | mirror, lseg->pls_range.offset, | |
439 | lseg->pls_range.length, NFS4ERR_NXIO, | |
440 | OP_ILLEGAL, GFP_NOIO); | |
3dc14735 TM |
441 | if (fail_return || !ff_layout_has_available_ds(lseg)) |
442 | pnfs_error_mark_layout_for_return(ino, lseg); | |
443 | ds = NULL; | |
d67ae825 TH |
444 | out: |
445 | return ds; | |
446 | } | |
447 | ||
448 | struct rpc_cred * | |
449 | ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, | |
450 | struct rpc_cred *mdscred) | |
451 | { | |
452 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | |
547a6376 | 453 | struct rpc_cred *cred; |
d67ae825 | 454 | |
57f3f4c0 JL |
455 | if (mirror) { |
456 | cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode); | |
457 | if (!cred) | |
458 | cred = get_rpccred(mdscred); | |
459 | } else { | |
460 | cred = get_rpccred(mdscred); | |
461 | } | |
d67ae825 TH |
462 | return cred; |
463 | } | |
464 | ||
465 | /** | |
466 | * Find or create a DS rpc client with th MDS server rpc client auth flavor | |
467 | * in the nfs_client cl_ds_clients list. | |
468 | */ | |
469 | struct rpc_clnt * | |
470 | nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, | |
471 | struct nfs_client *ds_clp, struct inode *inode) | |
472 | { | |
473 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | |
474 | ||
475 | switch (mirror->mirror_ds->ds_versions[0].version) { | |
476 | case 3: | |
477 | /* For NFSv3 DS, flavor is set when creating DS connections */ | |
478 | return ds_clp->cl_rpcclient; | |
479 | case 4: | |
480 | return nfs4_find_or_create_ds_client(ds_clp, inode); | |
481 | default: | |
482 | BUG(); | |
483 | } | |
484 | } | |
485 | ||
5b9b3c85 TM |
486 | void ff_layout_free_ds_ioerr(struct list_head *head) |
487 | { | |
488 | struct nfs4_ff_layout_ds_err *err; | |
489 | ||
490 | while (!list_empty(head)) { | |
491 | err = list_first_entry(head, | |
492 | struct nfs4_ff_layout_ds_err, | |
493 | list); | |
494 | list_del(&err->list); | |
495 | kfree(err); | |
496 | } | |
497 | } | |
498 | ||
d67ae825 | 499 | /* called with inode i_lock held */ |
5b9b3c85 | 500 | int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head) |
d67ae825 | 501 | { |
5b9b3c85 | 502 | struct nfs4_ff_layout_ds_err *err; |
d67ae825 TH |
503 | __be32 *p; |
504 | ||
5b9b3c85 | 505 | list_for_each_entry(err, head, list) { |
d67ae825 | 506 | /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) |
d1354907 TM |
507 | * + array length + deviceid(NFS4_DEVICEID4_SIZE) |
508 | * + status(4) + opnum(4) | |
d67ae825 TH |
509 | */ |
510 | p = xdr_reserve_space(xdr, | |
d1354907 | 511 | 28 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); |
d67ae825 TH |
512 | if (unlikely(!p)) |
513 | return -ENOBUFS; | |
514 | p = xdr_encode_hyper(p, err->offset); | |
515 | p = xdr_encode_hyper(p, err->length); | |
516 | p = xdr_encode_opaque_fixed(p, &err->stateid, | |
517 | NFS4_STATEID_SIZE); | |
d1354907 TM |
518 | /* Encode 1 error */ |
519 | *p++ = cpu_to_be32(1); | |
d67ae825 TH |
520 | p = xdr_encode_opaque_fixed(p, &err->deviceid, |
521 | NFS4_DEVICEID4_SIZE); | |
522 | *p++ = cpu_to_be32(err->status); | |
523 | *p++ = cpu_to_be32(err->opnum); | |
5b9b3c85 | 524 | dprintk("%s: offset %llu length %llu status %d op %d\n", |
d67ae825 | 525 | __func__, err->offset, err->length, err->status, |
5b9b3c85 | 526 | err->opnum); |
d67ae825 TH |
527 | } |
528 | ||
529 | return 0; | |
530 | } | |
531 | ||
5b9b3c85 TM |
532 | static |
533 | unsigned int do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, | |
534 | const struct pnfs_layout_range *range, | |
535 | struct list_head *head, | |
536 | unsigned int maxnum) | |
537 | { | |
538 | struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); | |
539 | struct inode *inode = lo->plh_inode; | |
540 | struct nfs4_ff_layout_ds_err *err, *n; | |
541 | unsigned int ret = 0; | |
542 | ||
543 | spin_lock(&inode->i_lock); | |
544 | list_for_each_entry_safe(err, n, &flo->error_list, list) { | |
545 | if (!pnfs_is_range_intersecting(err->offset, | |
546 | pnfs_end_offset(err->offset, err->length), | |
547 | range->offset, | |
548 | pnfs_end_offset(range->offset, range->length))) | |
549 | continue; | |
550 | if (!maxnum) | |
551 | break; | |
552 | list_move(&err->list, head); | |
553 | maxnum--; | |
554 | ret++; | |
555 | } | |
556 | spin_unlock(&inode->i_lock); | |
557 | return ret; | |
558 | } | |
559 | ||
560 | unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, | |
561 | const struct pnfs_layout_range *range, | |
562 | struct list_head *head, | |
563 | unsigned int maxnum) | |
564 | { | |
565 | unsigned int ret; | |
566 | ||
567 | ret = do_layout_fetch_ds_ioerr(lo, range, head, maxnum); | |
568 | /* If we're over the max, discard all remaining entries */ | |
569 | if (ret == maxnum) { | |
570 | LIST_HEAD(discard); | |
571 | do_layout_fetch_ds_ioerr(lo, range, &discard, -1); | |
572 | ff_layout_free_ds_ioerr(&discard); | |
573 | } | |
574 | return ret; | |
575 | } | |
576 | ||
81d6dc8b | 577 | static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg) |
d67ae825 TH |
578 | { |
579 | struct nfs4_ff_layout_mirror *mirror; | |
580 | struct nfs4_deviceid_node *devid; | |
81d6dc8b | 581 | u32 idx; |
d67ae825 TH |
582 | |
583 | for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | |
584 | mirror = FF_LAYOUT_COMP(lseg, idx); | |
65990d1a FI |
585 | if (mirror) { |
586 | if (!mirror->mirror_ds) | |
587 | return true; | |
588 | if (IS_ERR(mirror->mirror_ds)) | |
589 | continue; | |
d67ae825 TH |
590 | devid = &mirror->mirror_ds->id_node; |
591 | if (!ff_layout_test_devid_unavailable(devid)) | |
592 | return true; | |
593 | } | |
594 | } | |
595 | ||
596 | return false; | |
597 | } | |
598 | ||
81d6dc8b TM |
599 | static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) |
600 | { | |
601 | struct nfs4_ff_layout_mirror *mirror; | |
602 | struct nfs4_deviceid_node *devid; | |
603 | u32 idx; | |
604 | ||
605 | for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | |
606 | mirror = FF_LAYOUT_COMP(lseg, idx); | |
65990d1a | 607 | if (!mirror || IS_ERR(mirror->mirror_ds)) |
81d6dc8b | 608 | return false; |
65990d1a FI |
609 | if (!mirror->mirror_ds) |
610 | continue; | |
81d6dc8b TM |
611 | devid = &mirror->mirror_ds->id_node; |
612 | if (ff_layout_test_devid_unavailable(devid)) | |
613 | return false; | |
614 | } | |
615 | ||
616 | return FF_LAYOUT_MIRROR_COUNT(lseg) != 0; | |
617 | } | |
618 | ||
65990d1a | 619 | static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) |
81d6dc8b TM |
620 | { |
621 | if (lseg->pls_range.iomode == IOMODE_READ) | |
622 | return ff_read_layout_has_available_ds(lseg); | |
623 | /* Note: RW layout needs all mirrors available */ | |
624 | return ff_rw_layout_has_available_ds(lseg); | |
625 | } | |
626 | ||
3b13b4b3 TH |
627 | bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg) |
628 | { | |
629 | return ff_layout_no_fallback_to_mds(lseg) || | |
630 | ff_layout_has_available_ds(lseg); | |
631 | } | |
632 | ||
fb1084e3 TH |
633 | bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg) |
634 | { | |
635 | return lseg->pls_range.iomode == IOMODE_RW && | |
636 | ff_layout_no_read_on_rw(lseg); | |
637 | } | |
638 | ||
d67ae825 TH |
639 | module_param(dataserver_retrans, uint, 0644); |
640 | MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " | |
641 | "retries a request before it attempts further " | |
642 | " recovery action."); | |
643 | module_param(dataserver_timeo, uint, 0644); | |
644 | MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " | |
645 | "NFSv4.1 client waits for a response from a " | |
646 | " data server before it retries an NFS request."); |