]>
Commit | Line | Data |
---|---|---|
d67ae825 TH |
1 | /* |
2 | * Device operations for the pnfs nfs4 file layout driver. | |
3 | * | |
4 | * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | |
5 | * | |
6 | * Tao Peng <bergwolf@primarydata.com> | |
7 | */ | |
8 | ||
9 | #include <linux/nfs_fs.h> | |
10 | #include <linux/vmalloc.h> | |
11 | #include <linux/module.h> | |
12 | #include <linux/sunrpc/addr.h> | |
13 | ||
14 | #include "../internal.h" | |
15 | #include "../nfs4session.h" | |
16 | #include "flexfilelayout.h" | |
17 | ||
18 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | |
19 | ||
15d03055 TM |
20 | static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; |
21 | static unsigned int dataserver_retrans; | |
d67ae825 | 22 | |
65990d1a FI |
23 | static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); |
24 | ||
d67ae825 TH |
25 | void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) |
26 | { | |
65990d1a | 27 | if (!IS_ERR_OR_NULL(mirror_ds)) |
d67ae825 TH |
28 | nfs4_put_deviceid_node(&mirror_ds->id_node); |
29 | } | |
30 | ||
31 | void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | |
32 | { | |
33 | nfs4_print_deviceid(&mirror_ds->id_node.deviceid); | |
34 | nfs4_pnfs_ds_put(mirror_ds->ds); | |
84a80f62 | 35 | kfree_rcu(mirror_ds, id_node.rcu); |
d67ae825 TH |
36 | } |
37 | ||
38 | /* Decode opaque device data and construct new_ds using it */ | |
39 | struct nfs4_ff_layout_ds * | |
40 | nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | |
41 | gfp_t gfp_flags) | |
42 | { | |
43 | struct xdr_stream stream; | |
44 | struct xdr_buf buf; | |
45 | struct page *scratch; | |
46 | struct list_head dsaddrs; | |
47 | struct nfs4_pnfs_ds_addr *da; | |
48 | struct nfs4_ff_layout_ds *new_ds = NULL; | |
49 | struct nfs4_ff_ds_version *ds_versions = NULL; | |
50 | u32 mp_count; | |
51 | u32 version_count; | |
52 | __be32 *p; | |
53 | int i, ret = -ENOMEM; | |
54 | ||
55 | /* set up xdr stream */ | |
56 | scratch = alloc_page(gfp_flags); | |
57 | if (!scratch) | |
58 | goto out_err; | |
59 | ||
60 | new_ds = kzalloc(sizeof(struct nfs4_ff_layout_ds), gfp_flags); | |
61 | if (!new_ds) | |
62 | goto out_scratch; | |
63 | ||
64 | nfs4_init_deviceid_node(&new_ds->id_node, | |
65 | server, | |
66 | &pdev->dev_id); | |
67 | INIT_LIST_HEAD(&dsaddrs); | |
68 | ||
69 | xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); | |
70 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | |
71 | ||
72 | /* multipath count */ | |
73 | p = xdr_inline_decode(&stream, 4); | |
74 | if (unlikely(!p)) | |
75 | goto out_err_drain_dsaddrs; | |
76 | mp_count = be32_to_cpup(p); | |
77 | dprintk("%s: multipath ds count %d\n", __func__, mp_count); | |
78 | ||
79 | for (i = 0; i < mp_count; i++) { | |
80 | /* multipath ds */ | |
81 | da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, | |
82 | &stream, gfp_flags); | |
83 | if (da) | |
84 | list_add_tail(&da->da_node, &dsaddrs); | |
85 | } | |
86 | if (list_empty(&dsaddrs)) { | |
87 | dprintk("%s: no suitable DS addresses found\n", | |
88 | __func__); | |
89 | ret = -ENOMEDIUM; | |
90 | goto out_err_drain_dsaddrs; | |
91 | } | |
92 | ||
93 | /* version count */ | |
94 | p = xdr_inline_decode(&stream, 4); | |
95 | if (unlikely(!p)) | |
96 | goto out_err_drain_dsaddrs; | |
97 | version_count = be32_to_cpup(p); | |
98 | dprintk("%s: version count %d\n", __func__, version_count); | |
99 | ||
100 | ds_versions = kzalloc(version_count * sizeof(struct nfs4_ff_ds_version), | |
101 | gfp_flags); | |
102 | if (!ds_versions) | |
103 | goto out_scratch; | |
104 | ||
105 | for (i = 0; i < version_count; i++) { | |
106 | /* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) + | |
107 | * tightly_coupled(4) */ | |
108 | p = xdr_inline_decode(&stream, 20); | |
109 | if (unlikely(!p)) | |
110 | goto out_err_drain_dsaddrs; | |
111 | ds_versions[i].version = be32_to_cpup(p++); | |
112 | ds_versions[i].minor_version = be32_to_cpup(p++); | |
113 | ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL); | |
114 | ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL); | |
115 | ds_versions[i].tightly_coupled = be32_to_cpup(p); | |
116 | ||
117 | if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE) | |
118 | ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE; | |
119 | if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE) | |
120 | ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE; | |
121 | ||
122 | if (ds_versions[i].version != 3 || ds_versions[i].minor_version != 0) { | |
123 | dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__, | |
124 | i, ds_versions[i].version, | |
125 | ds_versions[i].minor_version); | |
126 | ret = -EPROTONOSUPPORT; | |
127 | goto out_err_drain_dsaddrs; | |
128 | } | |
129 | ||
130 | dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n", | |
131 | __func__, i, ds_versions[i].version, | |
132 | ds_versions[i].minor_version, | |
133 | ds_versions[i].rsize, | |
134 | ds_versions[i].wsize, | |
135 | ds_versions[i].tightly_coupled); | |
136 | } | |
137 | ||
138 | new_ds->ds_versions = ds_versions; | |
139 | new_ds->ds_versions_cnt = version_count; | |
140 | ||
141 | new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); | |
142 | if (!new_ds->ds) | |
143 | goto out_err_drain_dsaddrs; | |
144 | ||
145 | /* If DS was already in cache, free ds addrs */ | |
146 | while (!list_empty(&dsaddrs)) { | |
147 | da = list_first_entry(&dsaddrs, | |
148 | struct nfs4_pnfs_ds_addr, | |
149 | da_node); | |
150 | list_del_init(&da->da_node); | |
151 | kfree(da->da_remotestr); | |
152 | kfree(da); | |
153 | } | |
154 | ||
155 | __free_page(scratch); | |
156 | return new_ds; | |
157 | ||
158 | out_err_drain_dsaddrs: | |
159 | while (!list_empty(&dsaddrs)) { | |
160 | da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, | |
161 | da_node); | |
162 | list_del_init(&da->da_node); | |
163 | kfree(da->da_remotestr); | |
164 | kfree(da); | |
165 | } | |
166 | ||
167 | kfree(ds_versions); | |
168 | out_scratch: | |
169 | __free_page(scratch); | |
170 | out_err: | |
171 | kfree(new_ds); | |
172 | ||
173 | dprintk("%s ERROR: returning %d\n", __func__, ret); | |
174 | return NULL; | |
175 | } | |
176 | ||
889d94d4 TM |
177 | static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg, |
178 | struct nfs4_deviceid_node *devid) | |
179 | { | |
1c48cee8 | 180 | nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); |
889d94d4 TM |
181 | if (!ff_layout_has_available_ds(lseg)) |
182 | pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, | |
183 | lseg); | |
184 | } | |
185 | ||
186 | static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, | |
65990d1a FI |
187 | struct nfs4_ff_layout_mirror *mirror, |
188 | bool create) | |
889d94d4 | 189 | { |
65990d1a FI |
190 | if (mirror == NULL || IS_ERR(mirror->mirror_ds)) |
191 | goto outerr; | |
192 | if (mirror->mirror_ds == NULL) { | |
193 | if (create) { | |
194 | struct nfs4_deviceid_node *node; | |
195 | struct pnfs_layout_hdr *lh = lseg->pls_layout; | |
196 | struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); | |
197 | ||
198 | node = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), | |
199 | &mirror->devid, lh->plh_lc_cred, | |
200 | GFP_KERNEL); | |
201 | if (node) | |
202 | mirror_ds = FF_LAYOUT_MIRROR_DS(node); | |
203 | ||
204 | /* check for race with another call to this function */ | |
205 | if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && | |
206 | mirror_ds != ERR_PTR(-ENODEV)) | |
207 | nfs4_put_deviceid_node(node); | |
208 | } else | |
209 | goto outerr; | |
889d94d4 TM |
210 | } |
211 | if (mirror->mirror_ds->ds == NULL) { | |
212 | struct nfs4_deviceid_node *devid; | |
213 | devid = &mirror->mirror_ds->id_node; | |
214 | ff_layout_mark_devid_invalid(lseg, devid); | |
215 | return false; | |
216 | } | |
217 | return true; | |
65990d1a FI |
218 | outerr: |
219 | pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); | |
220 | return false; | |
889d94d4 TM |
221 | } |
222 | ||
d67ae825 TH |
223 | static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, |
224 | u64 offset, u64 length) | |
225 | { | |
226 | u64 end; | |
227 | ||
17822b20 TM |
228 | end = max_t(u64, pnfs_end_offset(err->offset, err->length), |
229 | pnfs_end_offset(offset, length)); | |
d67ae825 TH |
230 | err->offset = min_t(u64, err->offset, offset); |
231 | err->length = end - err->offset; | |
232 | } | |
233 | ||
b819ed4b TM |
234 | static int |
235 | ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1, | |
236 | const struct nfs4_ff_layout_ds_err *e2) | |
d67ae825 | 237 | { |
b819ed4b TM |
238 | int ret; |
239 | ||
240 | if (e1->opnum != e2->opnum) | |
241 | return e1->opnum < e2->opnum ? -1 : 1; | |
242 | if (e1->status != e2->status) | |
243 | return e1->status < e2->status ? -1 : 1; | |
93b717fd TM |
244 | ret = memcmp(e1->stateid.data, e2->stateid.data, |
245 | sizeof(e1->stateid.data)); | |
b819ed4b TM |
246 | if (ret != 0) |
247 | return ret; | |
248 | ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid)); | |
249 | if (ret != 0) | |
250 | return ret; | |
17822b20 | 251 | if (pnfs_end_offset(e1->offset, e1->length) < e2->offset) |
b819ed4b | 252 | return -1; |
17822b20 | 253 | if (e1->offset > pnfs_end_offset(e2->offset, e2->length)) |
b819ed4b TM |
254 | return 1; |
255 | /* If ranges overlap or are contiguous, they are the same */ | |
256 | return 0; | |
d67ae825 TH |
257 | } |
258 | ||
b819ed4b | 259 | static void |
d67ae825 TH |
260 | ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, |
261 | struct nfs4_ff_layout_ds_err *dserr) | |
262 | { | |
b819ed4b TM |
263 | struct nfs4_ff_layout_ds_err *err, *tmp; |
264 | struct list_head *head = &flo->error_list; | |
265 | int match; | |
266 | ||
267 | /* Do insertion sort w/ merges */ | |
268 | list_for_each_entry_safe(err, tmp, &flo->error_list, list) { | |
269 | match = ff_ds_error_match(err, dserr); | |
270 | if (match < 0) | |
271 | continue; | |
272 | if (match > 0) { | |
273 | /* Add entry "dserr" _before_ entry "err" */ | |
274 | head = &err->list; | |
d67ae825 TH |
275 | break; |
276 | } | |
b819ed4b TM |
277 | /* Entries match, so merge "err" into "dserr" */ |
278 | extend_ds_error(dserr, err->offset, err->length); | |
cb067935 | 279 | list_replace(&err->list, &dserr->list); |
b819ed4b | 280 | kfree(err); |
cb067935 | 281 | return; |
d67ae825 TH |
282 | } |
283 | ||
b819ed4b | 284 | list_add_tail(&dserr->list, head); |
d67ae825 TH |
285 | } |
286 | ||
287 | int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, | |
288 | struct nfs4_ff_layout_mirror *mirror, u64 offset, | |
289 | u64 length, int status, enum nfs_opnum4 opnum, | |
290 | gfp_t gfp_flags) | |
291 | { | |
292 | struct nfs4_ff_layout_ds_err *dserr; | |
d67ae825 TH |
293 | |
294 | if (status == 0) | |
295 | return 0; | |
296 | ||
297 | if (mirror->mirror_ds == NULL) | |
298 | return -EINVAL; | |
299 | ||
d67ae825 TH |
300 | dserr = kmalloc(sizeof(*dserr), gfp_flags); |
301 | if (!dserr) | |
302 | return -ENOMEM; | |
303 | ||
304 | INIT_LIST_HEAD(&dserr->list); | |
305 | dserr->offset = offset; | |
306 | dserr->length = length; | |
307 | dserr->status = status; | |
308 | dserr->opnum = opnum; | |
309 | nfs4_stateid_copy(&dserr->stateid, &mirror->stateid); | |
310 | memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid, | |
311 | NFS4_DEVICEID4_SIZE); | |
312 | ||
313 | spin_lock(&flo->generic_hdr.plh_inode->i_lock); | |
b819ed4b | 314 | ff_layout_add_ds_error_locked(flo, dserr); |
d67ae825 | 315 | spin_unlock(&flo->generic_hdr.plh_inode->i_lock); |
d67ae825 TH |
316 | |
317 | return 0; | |
318 | } | |
319 | ||
57f3f4c0 JL |
320 | static struct rpc_cred * |
321 | ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode) | |
322 | { | |
3064b686 | 323 | struct rpc_cred *cred, __rcu **pcred; |
57f3f4c0 | 324 | |
3064b686 JL |
325 | if (iomode == IOMODE_READ) |
326 | pcred = &mirror->ro_cred; | |
327 | else | |
328 | pcred = &mirror->rw_cred; | |
57f3f4c0 JL |
329 | |
330 | rcu_read_lock(); | |
331 | do { | |
332 | cred = rcu_dereference(*pcred); | |
333 | if (!cred) | |
334 | break; | |
335 | ||
336 | cred = get_rpccred_rcu(cred); | |
337 | } while(!cred); | |
338 | rcu_read_unlock(); | |
339 | return cred; | |
340 | } | |
341 | ||
d67ae825 TH |
342 | struct nfs_fh * |
343 | nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) | |
344 | { | |
345 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); | |
346 | struct nfs_fh *fh = NULL; | |
d67ae825 | 347 | |
65990d1a | 348 | if (!ff_layout_mirror_valid(lseg, mirror, false)) { |
889d94d4 | 349 | pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", |
d67ae825 | 350 | __func__, mirror_idx); |
d67ae825 TH |
351 | goto out; |
352 | } | |
353 | ||
354 | /* FIXME: For now assume there is only 1 version available for the DS */ | |
355 | fh = &mirror->fh_versions[0]; | |
356 | out: | |
357 | return fh; | |
358 | } | |
359 | ||
95e2b7e9 JL |
360 | /** |
361 | * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call | |
362 | * @lseg: the layout segment we're operating on | |
363 | * @ds_idx: index of the DS to use | |
364 | * @fail_return: return layout on connect failure? | |
365 | * | |
366 | * Try to prepare a DS connection to accept an RPC call. This involves | |
367 | * selecting a mirror to use and connecting the client to it if it's not | |
368 | * already connected. | |
369 | * | |
370 | * Since we only need a single functioning mirror to satisfy a read, we don't | |
371 | * want to return the layout if there is one. For writes though, any down | |
372 | * mirror should result in a LAYOUTRETURN. @fail_return is how we distinguish | |
373 | * between the two cases. | |
374 | * | |
375 | * Returns a pointer to a connected DS object on success or NULL on failure. | |
376 | */ | |
d67ae825 TH |
377 | struct nfs4_pnfs_ds * |
378 | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |
379 | bool fail_return) | |
380 | { | |
381 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | |
382 | struct nfs4_pnfs_ds *ds = NULL; | |
383 | struct nfs4_deviceid_node *devid; | |
384 | struct inode *ino = lseg->pls_layout->plh_inode; | |
385 | struct nfs_server *s = NFS_SERVER(ino); | |
386 | unsigned int max_payload; | |
d67ae825 | 387 | |
65990d1a | 388 | if (!ff_layout_mirror_valid(lseg, mirror, true)) { |
889d94d4 | 389 | pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", |
d67ae825 | 390 | __func__, ds_idx); |
d67ae825 TH |
391 | goto out; |
392 | } | |
393 | ||
394 | devid = &mirror->mirror_ds->id_node; | |
395 | if (ff_layout_test_devid_unavailable(devid)) | |
3dc14735 | 396 | goto out_fail; |
d67ae825 TH |
397 | |
398 | ds = mirror->mirror_ds->ds; | |
399 | /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ | |
400 | smp_rmb(); | |
401 | if (ds->ds_clp) | |
90a0be00 | 402 | goto out; |
d67ae825 TH |
403 | |
404 | /* FIXME: For now we assume the server sent only one version of NFS | |
405 | * to use for the DS. | |
406 | */ | |
407 | nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, | |
408 | dataserver_retrans, | |
409 | mirror->mirror_ds->ds_versions[0].version, | |
7d38de3f | 410 | mirror->mirror_ds->ds_versions[0].minor_version); |
d67ae825 TH |
411 | |
412 | /* connect success, check rsize/wsize limit */ | |
413 | if (ds->ds_clp) { | |
414 | max_payload = | |
415 | nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), | |
416 | NULL); | |
417 | if (mirror->mirror_ds->ds_versions[0].rsize > max_payload) | |
418 | mirror->mirror_ds->ds_versions[0].rsize = max_payload; | |
419 | if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) | |
420 | mirror->mirror_ds->ds_versions[0].wsize = max_payload; | |
3dc14735 | 421 | goto out; |
d67ae825 | 422 | } |
3dc14735 TM |
423 | ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), |
424 | mirror, lseg->pls_range.offset, | |
425 | lseg->pls_range.length, NFS4ERR_NXIO, | |
426 | OP_ILLEGAL, GFP_NOIO); | |
427 | out_fail: | |
428 | if (fail_return || !ff_layout_has_available_ds(lseg)) | |
429 | pnfs_error_mark_layout_for_return(ino, lseg); | |
430 | ds = NULL; | |
d67ae825 TH |
431 | out: |
432 | return ds; | |
433 | } | |
434 | ||
435 | struct rpc_cred * | |
436 | ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, | |
437 | struct rpc_cred *mdscred) | |
438 | { | |
439 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | |
547a6376 | 440 | struct rpc_cred *cred; |
d67ae825 | 441 | |
57f3f4c0 JL |
442 | if (mirror) { |
443 | cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode); | |
444 | if (!cred) | |
445 | cred = get_rpccred(mdscred); | |
446 | } else { | |
447 | cred = get_rpccred(mdscred); | |
448 | } | |
d67ae825 TH |
449 | return cred; |
450 | } | |
451 | ||
452 | /** | |
453 | * Find or create a DS rpc client with th MDS server rpc client auth flavor | |
454 | * in the nfs_client cl_ds_clients list. | |
455 | */ | |
456 | struct rpc_clnt * | |
457 | nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, | |
458 | struct nfs_client *ds_clp, struct inode *inode) | |
459 | { | |
460 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | |
461 | ||
462 | switch (mirror->mirror_ds->ds_versions[0].version) { | |
463 | case 3: | |
464 | /* For NFSv3 DS, flavor is set when creating DS connections */ | |
465 | return ds_clp->cl_rpcclient; | |
466 | case 4: | |
467 | return nfs4_find_or_create_ds_client(ds_clp, inode); | |
468 | default: | |
469 | BUG(); | |
470 | } | |
471 | } | |
472 | ||
5b9b3c85 TM |
473 | void ff_layout_free_ds_ioerr(struct list_head *head) |
474 | { | |
475 | struct nfs4_ff_layout_ds_err *err; | |
476 | ||
477 | while (!list_empty(head)) { | |
478 | err = list_first_entry(head, | |
479 | struct nfs4_ff_layout_ds_err, | |
480 | list); | |
481 | list_del(&err->list); | |
482 | kfree(err); | |
483 | } | |
484 | } | |
485 | ||
d67ae825 | 486 | /* called with inode i_lock held */ |
5b9b3c85 | 487 | int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head) |
d67ae825 | 488 | { |
5b9b3c85 | 489 | struct nfs4_ff_layout_ds_err *err; |
d67ae825 TH |
490 | __be32 *p; |
491 | ||
5b9b3c85 | 492 | list_for_each_entry(err, head, list) { |
d67ae825 | 493 | /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) |
d1354907 TM |
494 | * + array length + deviceid(NFS4_DEVICEID4_SIZE) |
495 | * + status(4) + opnum(4) | |
d67ae825 TH |
496 | */ |
497 | p = xdr_reserve_space(xdr, | |
d1354907 | 498 | 28 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); |
d67ae825 TH |
499 | if (unlikely(!p)) |
500 | return -ENOBUFS; | |
501 | p = xdr_encode_hyper(p, err->offset); | |
502 | p = xdr_encode_hyper(p, err->length); | |
503 | p = xdr_encode_opaque_fixed(p, &err->stateid, | |
504 | NFS4_STATEID_SIZE); | |
d1354907 TM |
505 | /* Encode 1 error */ |
506 | *p++ = cpu_to_be32(1); | |
d67ae825 TH |
507 | p = xdr_encode_opaque_fixed(p, &err->deviceid, |
508 | NFS4_DEVICEID4_SIZE); | |
509 | *p++ = cpu_to_be32(err->status); | |
510 | *p++ = cpu_to_be32(err->opnum); | |
5b9b3c85 | 511 | dprintk("%s: offset %llu length %llu status %d op %d\n", |
d67ae825 | 512 | __func__, err->offset, err->length, err->status, |
5b9b3c85 | 513 | err->opnum); |
d67ae825 TH |
514 | } |
515 | ||
516 | return 0; | |
517 | } | |
518 | ||
5b9b3c85 TM |
519 | static |
520 | unsigned int do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, | |
521 | const struct pnfs_layout_range *range, | |
522 | struct list_head *head, | |
523 | unsigned int maxnum) | |
524 | { | |
525 | struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); | |
526 | struct inode *inode = lo->plh_inode; | |
527 | struct nfs4_ff_layout_ds_err *err, *n; | |
528 | unsigned int ret = 0; | |
529 | ||
530 | spin_lock(&inode->i_lock); | |
531 | list_for_each_entry_safe(err, n, &flo->error_list, list) { | |
532 | if (!pnfs_is_range_intersecting(err->offset, | |
533 | pnfs_end_offset(err->offset, err->length), | |
534 | range->offset, | |
535 | pnfs_end_offset(range->offset, range->length))) | |
536 | continue; | |
537 | if (!maxnum) | |
538 | break; | |
539 | list_move(&err->list, head); | |
540 | maxnum--; | |
541 | ret++; | |
542 | } | |
543 | spin_unlock(&inode->i_lock); | |
544 | return ret; | |
545 | } | |
546 | ||
547 | unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, | |
548 | const struct pnfs_layout_range *range, | |
549 | struct list_head *head, | |
550 | unsigned int maxnum) | |
551 | { | |
552 | unsigned int ret; | |
553 | ||
554 | ret = do_layout_fetch_ds_ioerr(lo, range, head, maxnum); | |
555 | /* If we're over the max, discard all remaining entries */ | |
556 | if (ret == maxnum) { | |
557 | LIST_HEAD(discard); | |
558 | do_layout_fetch_ds_ioerr(lo, range, &discard, -1); | |
559 | ff_layout_free_ds_ioerr(&discard); | |
560 | } | |
561 | return ret; | |
562 | } | |
563 | ||
81d6dc8b | 564 | static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg) |
d67ae825 TH |
565 | { |
566 | struct nfs4_ff_layout_mirror *mirror; | |
567 | struct nfs4_deviceid_node *devid; | |
81d6dc8b | 568 | u32 idx; |
d67ae825 TH |
569 | |
570 | for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | |
571 | mirror = FF_LAYOUT_COMP(lseg, idx); | |
65990d1a FI |
572 | if (mirror) { |
573 | if (!mirror->mirror_ds) | |
574 | return true; | |
575 | if (IS_ERR(mirror->mirror_ds)) | |
576 | continue; | |
d67ae825 TH |
577 | devid = &mirror->mirror_ds->id_node; |
578 | if (!ff_layout_test_devid_unavailable(devid)) | |
579 | return true; | |
580 | } | |
581 | } | |
582 | ||
583 | return false; | |
584 | } | |
585 | ||
81d6dc8b TM |
586 | static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) |
587 | { | |
588 | struct nfs4_ff_layout_mirror *mirror; | |
589 | struct nfs4_deviceid_node *devid; | |
590 | u32 idx; | |
591 | ||
592 | for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | |
593 | mirror = FF_LAYOUT_COMP(lseg, idx); | |
65990d1a | 594 | if (!mirror || IS_ERR(mirror->mirror_ds)) |
81d6dc8b | 595 | return false; |
65990d1a FI |
596 | if (!mirror->mirror_ds) |
597 | continue; | |
81d6dc8b TM |
598 | devid = &mirror->mirror_ds->id_node; |
599 | if (ff_layout_test_devid_unavailable(devid)) | |
600 | return false; | |
601 | } | |
602 | ||
603 | return FF_LAYOUT_MIRROR_COUNT(lseg) != 0; | |
604 | } | |
605 | ||
65990d1a | 606 | static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) |
81d6dc8b TM |
607 | { |
608 | if (lseg->pls_range.iomode == IOMODE_READ) | |
609 | return ff_read_layout_has_available_ds(lseg); | |
610 | /* Note: RW layout needs all mirrors available */ | |
611 | return ff_rw_layout_has_available_ds(lseg); | |
612 | } | |
613 | ||
3b13b4b3 TH |
614 | bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg) |
615 | { | |
616 | return ff_layout_no_fallback_to_mds(lseg) || | |
617 | ff_layout_has_available_ds(lseg); | |
618 | } | |
619 | ||
fb1084e3 TH |
620 | bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg) |
621 | { | |
622 | return lseg->pls_range.iomode == IOMODE_RW && | |
623 | ff_layout_no_read_on_rw(lseg); | |
624 | } | |
625 | ||
d67ae825 TH |
626 | module_param(dataserver_retrans, uint, 0644); |
627 | MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " | |
628 | "retries a request before it attempts further " | |
629 | " recovery action."); | |
630 | module_param(dataserver_timeo, uint, 0644); | |
631 | MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " | |
632 | "NFSv4.1 client waits for a response from a " | |
633 | " data server before it retries an NFS request."); |