]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - fs/nfs/pnfs.c
Merge branch 'for-2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie...
[mirror_ubuntu-artful-kernel.git] / fs / nfs / pnfs.c
CommitLineData
85e174ba
RL
1/*
2 * pNFS functions to call and manage layout drivers.
3 *
4 * Copyright (c) 2002 [year of first publication]
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#include <linux/nfs_fs.h>
974cec8c 31#include "internal.h"
85e174ba
RL
32#include "pnfs.h"
33
34#define NFSDBG_FACILITY NFSDBG_PNFS
35
02c35fca
FI
36/* Locking:
37 *
38 * pnfs_spinlock:
39 * protects pnfs_modules_tbl.
40 */
41static DEFINE_SPINLOCK(pnfs_spinlock);
42
43/*
44 * pnfs_modules_tbl holds all pnfs modules
45 */
46static LIST_HEAD(pnfs_modules_tbl);
47
48/* Return the registered pnfs layout driver module matching given id */
49static struct pnfs_layoutdriver_type *
50find_pnfs_driver_locked(u32 id)
51{
52 struct pnfs_layoutdriver_type *local;
53
54 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
55 if (local->id == id)
56 goto out;
57 local = NULL;
58out:
59 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
60 return local;
61}
62
85e174ba
RL
63static struct pnfs_layoutdriver_type *
64find_pnfs_driver(u32 id)
65{
02c35fca
FI
66 struct pnfs_layoutdriver_type *local;
67
68 spin_lock(&pnfs_spinlock);
69 local = find_pnfs_driver_locked(id);
70 spin_unlock(&pnfs_spinlock);
71 return local;
85e174ba
RL
72}
73
74void
75unset_pnfs_layoutdriver(struct nfs_server *nfss)
76{
02c35fca 77 if (nfss->pnfs_curr_ld) {
1c787096 78 nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
02c35fca
FI
79 module_put(nfss->pnfs_curr_ld->owner);
80 }
85e174ba
RL
81 nfss->pnfs_curr_ld = NULL;
82}
83
84/*
85 * Try to set the server's pnfs module to the pnfs layout type specified by id.
86 * Currently only one pNFS layout driver per filesystem is supported.
87 *
88 * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
89 */
90void
91set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
92{
93 struct pnfs_layoutdriver_type *ld_type = NULL;
94
95 if (id == 0)
96 goto out_no_driver;
97 if (!(server->nfs_client->cl_exchange_flags &
98 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
99 printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
100 id, server->nfs_client->cl_exchange_flags);
101 goto out_no_driver;
102 }
103 ld_type = find_pnfs_driver(id);
104 if (!ld_type) {
105 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
106 ld_type = find_pnfs_driver(id);
107 if (!ld_type) {
108 dprintk("%s: No pNFS module found for %u.\n",
109 __func__, id);
110 goto out_no_driver;
111 }
112 }
02c35fca
FI
113 if (!try_module_get(ld_type->owner)) {
114 dprintk("%s: Could not grab reference on module\n", __func__);
115 goto out_no_driver;
116 }
85e174ba 117 server->pnfs_curr_ld = ld_type;
1c787096 118 if (ld_type->set_layoutdriver(server)) {
02c35fca
FI
119 printk(KERN_ERR
120 "%s: Error initializing mount point for layout driver %u.\n",
121 __func__, id);
122 module_put(ld_type->owner);
123 goto out_no_driver;
124 }
85e174ba
RL
125 dprintk("%s: pNFS module for %u set\n", __func__, id);
126 return;
127
128out_no_driver:
129 dprintk("%s: Using NFSv4 I/O\n", __func__);
130 server->pnfs_curr_ld = NULL;
131}
02c35fca
FI
132
133int
134pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
135{
136 int status = -EINVAL;
137 struct pnfs_layoutdriver_type *tmp;
138
139 if (ld_type->id == 0) {
140 printk(KERN_ERR "%s id 0 is reserved\n", __func__);
141 return status;
142 }
b1f69b75
AA
143 if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
144 printk(KERN_ERR "%s Layout driver must provide "
145 "alloc_lseg and free_lseg.\n", __func__);
146 return status;
147 }
02c35fca
FI
148
149 spin_lock(&pnfs_spinlock);
150 tmp = find_pnfs_driver_locked(ld_type->id);
151 if (!tmp) {
152 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
153 status = 0;
154 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
155 ld_type->name);
156 } else {
157 printk(KERN_ERR "%s Module with id %d already loaded!\n",
158 __func__, ld_type->id);
159 }
160 spin_unlock(&pnfs_spinlock);
161
162 return status;
163}
164EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
165
166void
167pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
168{
169 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
170 spin_lock(&pnfs_spinlock);
171 list_del(&ld_type->pnfs_tblid);
172 spin_unlock(&pnfs_spinlock);
173}
174EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
e5e94017 175
b1f69b75
AA
176/*
177 * pNFS client layout cache
178 */
179
cc6e5340 180/* Need to hold i_lock if caller does not already hold reference */
43f1b3da 181void
cc6e5340 182get_layout_hdr(struct pnfs_layout_hdr *lo)
e5e94017 183{
cc6e5340 184 atomic_inc(&lo->plh_refcount);
e5e94017
BH
185}
186
187static void
cc6e5340 188destroy_layout_hdr(struct pnfs_layout_hdr *lo)
e5e94017 189{
cc6e5340
FI
190 dprintk("%s: freeing layout cache %p\n", __func__, lo);
191 BUG_ON(!list_empty(&lo->plh_layouts));
192 NFS_I(lo->plh_inode)->layout = NULL;
193 kfree(lo);
194}
e5e94017 195
cc6e5340
FI
196static void
197put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
198{
199 if (atomic_dec_and_test(&lo->plh_refcount))
200 destroy_layout_hdr(lo);
e5e94017
BH
201}
202
b1f69b75 203void
cc6e5340 204put_layout_hdr(struct pnfs_layout_hdr *lo)
974cec8c 205{
cc6e5340
FI
206 struct inode *inode = lo->plh_inode;
207
208 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
209 destroy_layout_hdr(lo);
210 spin_unlock(&inode->i_lock);
211 }
974cec8c
AA
212}
213
214static void
215init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
216{
566052c5 217 INIT_LIST_HEAD(&lseg->pls_list);
4541d16c
FI
218 atomic_set(&lseg->pls_refcount, 1);
219 smp_mb();
220 set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
566052c5 221 lseg->pls_layout = lo;
974cec8c
AA
222}
223
4541d16c 224static void free_lseg(struct pnfs_layout_segment *lseg)
974cec8c 225{
b7edfaa1 226 struct inode *ino = lseg->pls_layout->plh_inode;
974cec8c 227
b1f69b75 228 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
52fabd73 229 /* Matched by get_layout_hdr in pnfs_insert_layout */
cc6e5340 230 put_layout_hdr(NFS_I(ino)->layout);
974cec8c
AA
231}
232
4541d16c
FI
233/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
234 * could sleep, so must be called outside of the lock.
235 * Returns 1 if object was removed, otherwise return 0.
236 */
237static int
238put_lseg_locked(struct pnfs_layout_segment *lseg,
239 struct list_head *tmp_list)
974cec8c 240{
4541d16c
FI
241 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
242 atomic_read(&lseg->pls_refcount),
243 test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
244 if (atomic_dec_and_test(&lseg->pls_refcount)) {
245 struct inode *ino = lseg->pls_layout->plh_inode;
246
247 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
248 list_del(&lseg->pls_list);
249 if (list_empty(&lseg->pls_layout->plh_segs)) {
250 struct nfs_client *clp;
251
252 clp = NFS_SERVER(ino)->nfs_client;
253 spin_lock(&clp->cl_lock);
254 /* List does not take a reference, so no need for put here */
255 list_del_init(&lseg->pls_layout->plh_layouts);
256 spin_unlock(&clp->cl_lock);
43f1b3da 257 clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
4541d16c 258 }
f7e8917a 259 rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
4541d16c
FI
260 list_add(&lseg->pls_list, tmp_list);
261 return 1;
262 }
263 return 0;
264}
974cec8c 265
4541d16c
FI
266static bool
267should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
268{
269 return (recall_iomode == IOMODE_ANY ||
270 lseg_iomode == recall_iomode);
974cec8c
AA
271}
272
4541d16c
FI
273/* Returns 1 if lseg is removed from list, 0 otherwise */
274static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
275 struct list_head *tmp_list)
276{
277 int rv = 0;
278
279 if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
280 /* Remove the reference keeping the lseg in the
281 * list. It will now be removed when all
282 * outstanding io is finished.
283 */
284 rv = put_lseg_locked(lseg, tmp_list);
285 }
286 return rv;
287}
288
289/* Returns count of number of matching invalid lsegs remaining in list
290 * after call.
291 */
43f1b3da 292int
4541d16c
FI
293mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
294 struct list_head *tmp_list,
295 u32 iomode)
974cec8c
AA
296{
297 struct pnfs_layout_segment *lseg, *next;
4541d16c 298 int invalid = 0, removed = 0;
974cec8c
AA
299
300 dprintk("%s:Begin lo %p\n", __func__, lo);
301
4541d16c
FI
302 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
303 if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
304 dprintk("%s: freeing lseg %p iomode %d "
305 "offset %llu length %llu\n", __func__,
306 lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
307 lseg->pls_range.length);
308 invalid++;
309 removed += mark_lseg_invalid(lseg, tmp_list);
310 }
311 dprintk("%s:Return %i\n", __func__, invalid - removed);
312 return invalid - removed;
974cec8c
AA
313}
314
43f1b3da 315void
4541d16c 316pnfs_free_lseg_list(struct list_head *free_me)
974cec8c 317{
4541d16c 318 struct pnfs_layout_segment *lseg, *tmp;
974cec8c 319
4541d16c 320 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
566052c5 321 list_del(&lseg->pls_list);
4541d16c 322 free_lseg(lseg);
974cec8c
AA
323 }
324}
325
e5e94017
BH
326void
327pnfs_destroy_layout(struct nfs_inode *nfsi)
328{
329 struct pnfs_layout_hdr *lo;
974cec8c 330 LIST_HEAD(tmp_list);
e5e94017
BH
331
332 spin_lock(&nfsi->vfs_inode.i_lock);
333 lo = nfsi->layout;
334 if (lo) {
4541d16c
FI
335 set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags);
336 mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
e5e94017
BH
337 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
338 put_layout_hdr_locked(lo);
339 }
340 spin_unlock(&nfsi->vfs_inode.i_lock);
974cec8c
AA
341 pnfs_free_lseg_list(&tmp_list);
342}
343
344/*
345 * Called by the state manger to remove all layouts established under an
346 * expired lease.
347 */
348void
349pnfs_destroy_all_layouts(struct nfs_client *clp)
350{
351 struct pnfs_layout_hdr *lo;
352 LIST_HEAD(tmp_list);
353
354 spin_lock(&clp->cl_lock);
355 list_splice_init(&clp->cl_layouts, &tmp_list);
356 spin_unlock(&clp->cl_lock);
357
358 while (!list_empty(&tmp_list)) {
359 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
b7edfaa1 360 plh_layouts);
974cec8c 361 dprintk("%s freeing layout for inode %lu\n", __func__,
b7edfaa1
FI
362 lo->plh_inode->i_ino);
363 pnfs_destroy_layout(NFS_I(lo->plh_inode));
974cec8c 364 }
e5e94017
BH
365}
366
fd6002e9 367/* update lo->plh_stateid with new if is more recent */
43f1b3da
FI
368void
369pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
370 bool update_barrier)
b1f69b75 371{
fd6002e9 372 u32 oldseq, newseq;
b1f69b75 373
fd6002e9
FI
374 oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
375 newseq = be32_to_cpu(new->stateid.seqid);
43f1b3da 376 if ((int)(newseq - oldseq) > 0) {
fd6002e9 377 memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
43f1b3da
FI
378 if (update_barrier) {
379 u32 new_barrier = be32_to_cpu(new->stateid.seqid);
380
381 if ((int)(new_barrier - lo->plh_barrier))
382 lo->plh_barrier = new_barrier;
383 } else {
384 /* Because of wraparound, we want to keep the barrier
385 * "close" to the current seqids. It needs to be
386 * within 2**31 to count as "behind", so if it
387 * gets too near that limit, give us a litle leeway
388 * and bring it to within 2**30.
389 * NOTE - and yes, this is all unsigned arithmetic.
390 */
391 if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
392 lo->plh_barrier = newseq - (1 << 30);
393 }
394 }
b1f69b75
AA
395}
396
cf7d63f1
FI
397/* lget is set to 1 if called from inside send_layoutget call chain */
398static bool
43f1b3da
FI
399pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
400 int lget)
401{
402 if ((stateid) &&
403 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
404 return true;
f7e8917a
FI
405 return lo->plh_block_lgets ||
406 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
43f1b3da 407 (list_empty(&lo->plh_segs) &&
cf7d63f1
FI
408 (atomic_read(&lo->plh_outstanding) > lget));
409}
410
fd6002e9
FI
411int
412pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
413 struct nfs4_state *open_state)
b1f69b75 414{
fd6002e9 415 int status = 0;
974cec8c 416
b1f69b75 417 dprintk("--> %s\n", __func__);
fd6002e9 418 spin_lock(&lo->plh_inode->i_lock);
43f1b3da 419 if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
cf7d63f1
FI
420 status = -EAGAIN;
421 } else if (list_empty(&lo->plh_segs)) {
fd6002e9
FI
422 int seq;
423
424 do {
425 seq = read_seqbegin(&open_state->seqlock);
426 memcpy(dst->data, open_state->stateid.data,
427 sizeof(open_state->stateid.data));
428 } while (read_seqretry(&open_state->seqlock, seq));
429 } else
430 memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
431 spin_unlock(&lo->plh_inode->i_lock);
b1f69b75 432 dprintk("<-- %s\n", __func__);
fd6002e9 433 return status;
b1f69b75
AA
434}
435
436/*
437* Get layout from server.
438* for now, assume that whole file layouts are requested.
439* arg->offset: 0
440* arg->length: all ones
441*/
e5e94017
BH
442static struct pnfs_layout_segment *
443send_layoutget(struct pnfs_layout_hdr *lo,
444 struct nfs_open_context *ctx,
445 u32 iomode)
446{
b7edfaa1 447 struct inode *ino = lo->plh_inode;
b1f69b75
AA
448 struct nfs_server *server = NFS_SERVER(ino);
449 struct nfs4_layoutget *lgp;
450 struct pnfs_layout_segment *lseg = NULL;
451
452 dprintk("--> %s\n", __func__);
e5e94017 453
b1f69b75
AA
454 BUG_ON(ctx == NULL);
455 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
cf7d63f1 456 if (lgp == NULL)
b1f69b75 457 return NULL;
b1f69b75
AA
458 lgp->args.minlength = NFS4_MAX_UINT64;
459 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
460 lgp->args.range.iomode = iomode;
461 lgp->args.range.offset = 0;
462 lgp->args.range.length = NFS4_MAX_UINT64;
463 lgp->args.type = server->pnfs_curr_ld->id;
464 lgp->args.inode = ino;
465 lgp->args.ctx = get_nfs_open_context(ctx);
466 lgp->lsegpp = &lseg;
467
468 /* Synchronously retrieve layout information from server and
469 * store in lseg.
470 */
471 nfs4_proc_layoutget(lgp);
974cec8c 472 if (!lseg) {
b1f69b75 473 /* remember that LAYOUTGET failed and suspend trying */
566052c5 474 set_bit(lo_fail_bit(iomode), &lo->plh_flags);
974cec8c 475 }
974cec8c
AA
476 return lseg;
477}
478
f7e8917a
FI
479bool pnfs_roc(struct inode *ino)
480{
481 struct pnfs_layout_hdr *lo;
482 struct pnfs_layout_segment *lseg, *tmp;
483 LIST_HEAD(tmp_list);
484 bool found = false;
485
486 spin_lock(&ino->i_lock);
487 lo = NFS_I(ino)->layout;
488 if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
489 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
490 goto out_nolayout;
491 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
492 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
493 mark_lseg_invalid(lseg, &tmp_list);
494 found = true;
495 }
496 if (!found)
497 goto out_nolayout;
498 lo->plh_block_lgets++;
499 get_layout_hdr(lo); /* matched in pnfs_roc_release */
500 spin_unlock(&ino->i_lock);
501 pnfs_free_lseg_list(&tmp_list);
502 return true;
503
504out_nolayout:
505 spin_unlock(&ino->i_lock);
506 return false;
507}
508
509void pnfs_roc_release(struct inode *ino)
510{
511 struct pnfs_layout_hdr *lo;
512
513 spin_lock(&ino->i_lock);
514 lo = NFS_I(ino)->layout;
515 lo->plh_block_lgets--;
516 put_layout_hdr_locked(lo);
517 spin_unlock(&ino->i_lock);
518}
519
520void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
521{
522 struct pnfs_layout_hdr *lo;
523
524 spin_lock(&ino->i_lock);
525 lo = NFS_I(ino)->layout;
526 if ((int)(barrier - lo->plh_barrier) > 0)
527 lo->plh_barrier = barrier;
528 spin_unlock(&ino->i_lock);
529}
530
531bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
532{
533 struct nfs_inode *nfsi = NFS_I(ino);
534 struct pnfs_layout_segment *lseg;
535 bool found = false;
536
537 spin_lock(&ino->i_lock);
538 list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
539 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
540 found = true;
541 break;
542 }
543 if (!found) {
544 struct pnfs_layout_hdr *lo = nfsi->layout;
545 u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
546
547 /* Since close does not return a layout stateid for use as
548 * a barrier, we choose the worst-case barrier.
549 */
550 *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
551 }
552 spin_unlock(&ino->i_lock);
553 return found;
554}
555
b1f69b75
AA
556/*
557 * Compare two layout segments for sorting into layout cache.
558 * We want to preferentially return RW over RO layouts, so ensure those
559 * are seen first.
560 */
561static s64
562cmp_layout(u32 iomode1, u32 iomode2)
563{
564 /* read > read/write */
565 return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
566}
567
974cec8c
AA
568static void
569pnfs_insert_layout(struct pnfs_layout_hdr *lo,
570 struct pnfs_layout_segment *lseg)
571{
b1f69b75
AA
572 struct pnfs_layout_segment *lp;
573 int found = 0;
574
974cec8c
AA
575 dprintk("%s:Begin\n", __func__);
576
b7edfaa1 577 assert_spin_locked(&lo->plh_inode->i_lock);
b7edfaa1 578 list_for_each_entry(lp, &lo->plh_segs, pls_list) {
566052c5 579 if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
b1f69b75 580 continue;
566052c5 581 list_add_tail(&lseg->pls_list, &lp->pls_list);
b1f69b75
AA
582 dprintk("%s: inserted lseg %p "
583 "iomode %d offset %llu length %llu before "
584 "lp %p iomode %d offset %llu length %llu\n",
566052c5
FI
585 __func__, lseg, lseg->pls_range.iomode,
586 lseg->pls_range.offset, lseg->pls_range.length,
587 lp, lp->pls_range.iomode, lp->pls_range.offset,
588 lp->pls_range.length);
b1f69b75
AA
589 found = 1;
590 break;
591 }
592 if (!found) {
b7edfaa1 593 list_add_tail(&lseg->pls_list, &lo->plh_segs);
b1f69b75
AA
594 dprintk("%s: inserted lseg %p "
595 "iomode %d offset %llu length %llu at tail\n",
566052c5
FI
596 __func__, lseg, lseg->pls_range.iomode,
597 lseg->pls_range.offset, lseg->pls_range.length);
974cec8c 598 }
cc6e5340 599 get_layout_hdr(lo);
974cec8c
AA
600
601 dprintk("%s:Return\n", __func__);
e5e94017
BH
602}
603
604static struct pnfs_layout_hdr *
605alloc_init_layout_hdr(struct inode *ino)
606{
607 struct pnfs_layout_hdr *lo;
608
609 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
610 if (!lo)
611 return NULL;
cc6e5340 612 atomic_set(&lo->plh_refcount, 1);
b7edfaa1
FI
613 INIT_LIST_HEAD(&lo->plh_layouts);
614 INIT_LIST_HEAD(&lo->plh_segs);
43f1b3da 615 INIT_LIST_HEAD(&lo->plh_bulk_recall);
b7edfaa1 616 lo->plh_inode = ino;
e5e94017
BH
617 return lo;
618}
619
620static struct pnfs_layout_hdr *
621pnfs_find_alloc_layout(struct inode *ino)
622{
623 struct nfs_inode *nfsi = NFS_I(ino);
624 struct pnfs_layout_hdr *new = NULL;
625
626 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
627
628 assert_spin_locked(&ino->i_lock);
4541d16c
FI
629 if (nfsi->layout) {
630 if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
631 return NULL;
632 else
633 return nfsi->layout;
634 }
e5e94017
BH
635 spin_unlock(&ino->i_lock);
636 new = alloc_init_layout_hdr(ino);
637 spin_lock(&ino->i_lock);
638
639 if (likely(nfsi->layout == NULL)) /* Won the race? */
640 nfsi->layout = new;
641 else
642 kfree(new);
643 return nfsi->layout;
644}
645
b1f69b75
AA
646/*
647 * iomode matching rules:
648 * iomode lseg match
649 * ----- ----- -----
650 * ANY READ true
651 * ANY RW true
652 * RW READ false
653 * RW RW true
654 * READ READ true
655 * READ RW true
656 */
657static int
658is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
659{
566052c5 660 return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
b1f69b75
AA
661}
662
663/*
664 * lookup range in layout
665 */
e5e94017 666static struct pnfs_layout_segment *
43f1b3da 667pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
e5e94017 668{
b1f69b75
AA
669 struct pnfs_layout_segment *lseg, *ret = NULL;
670
671 dprintk("%s:Begin\n", __func__);
672
b7edfaa1
FI
673 assert_spin_locked(&lo->plh_inode->i_lock);
674 list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
4541d16c
FI
675 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
676 is_matching_lseg(lseg, iomode)) {
b1f69b75
AA
677 ret = lseg;
678 break;
679 }
566052c5 680 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
b1f69b75
AA
681 break;
682 }
683
684 dprintk("%s:Return lseg %p ref %d\n",
4541d16c 685 __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
b1f69b75 686 return ret;
e5e94017
BH
687}
688
689/*
690 * Layout segment is retreived from the server if not cached.
691 * The appropriate layout segment is referenced and returned to the caller.
692 */
693struct pnfs_layout_segment *
694pnfs_update_layout(struct inode *ino,
695 struct nfs_open_context *ctx,
696 enum pnfs_iomode iomode)
697{
698 struct nfs_inode *nfsi = NFS_I(ino);
2130ff66 699 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
e5e94017
BH
700 struct pnfs_layout_hdr *lo;
701 struct pnfs_layout_segment *lseg = NULL;
702
703 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
704 return NULL;
705 spin_lock(&ino->i_lock);
706 lo = pnfs_find_alloc_layout(ino);
707 if (lo == NULL) {
708 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
709 goto out_unlock;
710 }
711
43f1b3da
FI
712 /* Do we even need to bother with this? */
713 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
714 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
715 dprintk("%s matches recall, use MDS\n", __func__);
e5e94017
BH
716 goto out_unlock;
717 }
43f1b3da
FI
718 /* Check to see if the layout for the given range already exists */
719 lseg = pnfs_find_lseg(lo, iomode);
720 if (lseg)
721 goto out_unlock;
e5e94017
BH
722
723 /* if LAYOUTGET already failed once we don't try again */
566052c5 724 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
e5e94017
BH
725 goto out_unlock;
726
43f1b3da 727 if (pnfs_layoutgets_blocked(lo, NULL, 0))
cf7d63f1
FI
728 goto out_unlock;
729 atomic_inc(&lo->plh_outstanding);
730
cc6e5340 731 get_layout_hdr(lo);
2130ff66
FI
732 if (list_empty(&lo->plh_segs)) {
733 /* The lo must be on the clp list if there is any
734 * chance of a CB_LAYOUTRECALL(FILE) coming in.
735 */
736 spin_lock(&clp->cl_lock);
737 BUG_ON(!list_empty(&lo->plh_layouts));
738 list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
739 spin_unlock(&clp->cl_lock);
740 }
e5e94017
BH
741 spin_unlock(&ino->i_lock);
742
743 lseg = send_layoutget(lo, ctx, iomode);
2130ff66
FI
744 if (!lseg) {
745 spin_lock(&ino->i_lock);
746 if (list_empty(&lo->plh_segs)) {
747 spin_lock(&clp->cl_lock);
748 list_del_init(&lo->plh_layouts);
749 spin_unlock(&clp->cl_lock);
43f1b3da 750 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
2130ff66
FI
751 }
752 spin_unlock(&ino->i_lock);
753 }
cf7d63f1 754 atomic_dec(&lo->plh_outstanding);
cc6e5340 755 put_layout_hdr(lo);
e5e94017
BH
756out:
757 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
566052c5 758 nfsi->layout->plh_flags, lseg);
e5e94017
BH
759 return lseg;
760out_unlock:
761 spin_unlock(&ino->i_lock);
762 goto out;
763}
b1f69b75
AA
764
765int
766pnfs_layout_process(struct nfs4_layoutget *lgp)
767{
768 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
769 struct nfs4_layoutget_res *res = &lgp->res;
770 struct pnfs_layout_segment *lseg;
b7edfaa1 771 struct inode *ino = lo->plh_inode;
43f1b3da 772 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
b1f69b75
AA
773 int status = 0;
774
fc1794c5
FI
775 /* Verify we got what we asked for.
776 * Note that because the xdr parsing only accepts a single
777 * element array, this can fail even if the server is behaving
778 * correctly.
779 */
780 if (lgp->args.range.iomode > res->range.iomode ||
781 res->range.offset != 0 ||
782 res->range.length != NFS4_MAX_UINT64) {
783 status = -EINVAL;
784 goto out;
785 }
b1f69b75
AA
786 /* Inject layout blob into I/O device driver */
787 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
788 if (!lseg || IS_ERR(lseg)) {
789 if (!lseg)
790 status = -ENOMEM;
791 else
792 status = PTR_ERR(lseg);
793 dprintk("%s: Could not allocate layout: error %d\n",
794 __func__, status);
795 goto out;
796 }
797
798 spin_lock(&ino->i_lock);
43f1b3da
FI
799 if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
800 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
801 dprintk("%s forget reply due to recall\n", __func__);
802 goto out_forget_reply;
803 }
804
805 if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
806 dprintk("%s forget reply due to state\n", __func__);
807 goto out_forget_reply;
808 }
b1f69b75 809 init_lseg(lo, lseg);
566052c5 810 lseg->pls_range = res->range;
b1f69b75
AA
811 *lgp->lsegpp = lseg;
812 pnfs_insert_layout(lo, lseg);
813
f7e8917a
FI
814 if (res->return_on_close) {
815 set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
816 set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
817 }
818
b1f69b75 819 /* Done processing layoutget. Set the layout stateid */
43f1b3da 820 pnfs_set_layout_stateid(lo, &res->stateid, false);
b1f69b75
AA
821 spin_unlock(&ino->i_lock);
822out:
823 return status;
43f1b3da
FI
824
825out_forget_reply:
826 spin_unlock(&ino->i_lock);
827 lseg->pls_layout = lo;
828 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
829 goto out;
b1f69b75
AA
830}
831
832/*
833 * Device ID cache. Currently supports one layout type per struct nfs_client.
834 * Add layout type to the lookup key to expand to support multiple types.
835 */
836int
837pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
838 void (*free_callback)(struct pnfs_deviceid_node *))
839{
840 struct pnfs_deviceid_cache *c;
841
842 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
843 if (!c)
844 return -ENOMEM;
845 spin_lock(&clp->cl_lock);
846 if (clp->cl_devid_cache != NULL) {
847 atomic_inc(&clp->cl_devid_cache->dc_ref);
848 dprintk("%s [kref [%d]]\n", __func__,
849 atomic_read(&clp->cl_devid_cache->dc_ref));
850 kfree(c);
851 } else {
852 /* kzalloc initializes hlists */
853 spin_lock_init(&c->dc_lock);
854 atomic_set(&c->dc_ref, 1);
855 c->dc_free_callback = free_callback;
856 clp->cl_devid_cache = c;
857 dprintk("%s [new]\n", __func__);
858 }
859 spin_unlock(&clp->cl_lock);
860 return 0;
861}
862EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
863
864/*
865 * Called from pnfs_layoutdriver_type->free_lseg
866 * last layout segment reference frees deviceid
867 */
868void
869pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
870 struct pnfs_deviceid_node *devid)
871{
872 struct nfs4_deviceid *id = &devid->de_id;
873 struct pnfs_deviceid_node *d;
874 struct hlist_node *n;
875 long h = nfs4_deviceid_hash(id);
876
877 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
878 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
879 return;
880
881 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
882 if (!memcmp(&d->de_id, id, sizeof(*id))) {
883 hlist_del_rcu(&d->de_node);
884 spin_unlock(&c->dc_lock);
885 synchronize_rcu();
886 c->dc_free_callback(devid);
887 return;
888 }
889 spin_unlock(&c->dc_lock);
890 /* Why wasn't it found in the list? */
891 BUG();
892}
893EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
894
895/* Find and reference a deviceid */
896struct pnfs_deviceid_node *
897pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
898{
899 struct pnfs_deviceid_node *d;
900 struct hlist_node *n;
901 long hash = nfs4_deviceid_hash(id);
902
903 dprintk("--> %s hash %ld\n", __func__, hash);
904 rcu_read_lock();
905 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
906 if (!memcmp(&d->de_id, id, sizeof(*id))) {
907 if (!atomic_inc_not_zero(&d->de_ref)) {
908 goto fail;
909 } else {
910 rcu_read_unlock();
911 return d;
912 }
913 }
914 }
915fail:
916 rcu_read_unlock();
917 return NULL;
918}
919EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
920
921/*
922 * Add a deviceid to the cache.
923 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
924 */
925struct pnfs_deviceid_node *
926pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
927{
928 struct pnfs_deviceid_node *d;
929 long hash = nfs4_deviceid_hash(&new->de_id);
930
931 dprintk("--> %s hash %ld\n", __func__, hash);
932 spin_lock(&c->dc_lock);
933 d = pnfs_find_get_deviceid(c, &new->de_id);
934 if (d) {
935 spin_unlock(&c->dc_lock);
936 dprintk("%s [discard]\n", __func__);
937 c->dc_free_callback(new);
938 return d;
939 }
940 INIT_HLIST_NODE(&new->de_node);
941 atomic_set(&new->de_ref, 1);
942 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
943 spin_unlock(&c->dc_lock);
944 dprintk("%s [new]\n", __func__);
945 return new;
946}
947EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
948
949void
950pnfs_put_deviceid_cache(struct nfs_client *clp)
951{
952 struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
953
954 dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
955 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
956 int i;
957 /* Verify cache is empty */
958 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
959 BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
960 clp->cl_devid_cache = NULL;
961 spin_unlock(&clp->cl_lock);
962 kfree(local);
963 }
964}
965EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);