]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - fs/nfs/pnfs.c
pnfs: serialize LAYOUTGET(openstateid)
[mirror_ubuntu-bionic-kernel.git] / fs / nfs / pnfs.c
index db773428f95f28e03a9631d316f2eec711b2aa41..59ed68bf79faeb93d722f6c275f5c581726f8d93 100644 (file)
@@ -180,21 +180,21 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
 static void
 get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
 {
-       assert_spin_locked(&lo->inode->i_lock);
-       lo->refcount++;
+       assert_spin_locked(&lo->plh_inode->i_lock);
+       lo->plh_refcount++;
 }
 
 static void
 put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
 {
-       assert_spin_locked(&lo->inode->i_lock);
-       BUG_ON(lo->refcount == 0);
+       assert_spin_locked(&lo->plh_inode->i_lock);
+       BUG_ON(lo->plh_refcount == 0);
 
-       lo->refcount--;
-       if (!lo->refcount) {
+       lo->plh_refcount--;
+       if (!lo->plh_refcount) {
                dprintk("%s: freeing layout cache %p\n", __func__, lo);
-               BUG_ON(!list_empty(&lo->layouts));
-               NFS_I(lo->inode)->layout = NULL;
+               BUG_ON(!list_empty(&lo->plh_layouts));
+               NFS_I(lo->plh_inode)->layout = NULL;
                kfree(lo);
        }
 }
@@ -210,72 +210,110 @@ put_layout_hdr(struct inode *inode)
 static void
 init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
 {
-       INIT_LIST_HEAD(&lseg->fi_list);
-       kref_init(&lseg->kref);
-       lseg->layout = lo;
+       INIT_LIST_HEAD(&lseg->pls_list);
+       atomic_set(&lseg->pls_refcount, 1);
+       smp_mb();
+       set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
+       lseg->pls_layout = lo;
 }
 
-/* Called without i_lock held, as the free_lseg call may sleep */
-static void
-destroy_lseg(struct kref *kref)
+static void free_lseg(struct pnfs_layout_segment *lseg)
 {
-       struct pnfs_layout_segment *lseg =
-               container_of(kref, struct pnfs_layout_segment, kref);
-       struct inode *ino = lseg->layout->inode;
+       struct inode *ino = lseg->pls_layout->plh_inode;
 
-       dprintk("--> %s\n", __func__);
        NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
-       /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
+       /* Matched by get_layout_hdr in pnfs_insert_layout */
        put_layout_hdr(ino);
 }
 
-static void
-put_lseg(struct pnfs_layout_segment *lseg)
+/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
+ * could sleep, so must be called outside of the lock.
+ * Returns 1 if object was removed, otherwise return 0.
+ */
+static int
+put_lseg_locked(struct pnfs_layout_segment *lseg,
+               struct list_head *tmp_list)
+{
+       dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
+               atomic_read(&lseg->pls_refcount),
+               test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+       if (atomic_dec_and_test(&lseg->pls_refcount)) {
+               struct inode *ino = lseg->pls_layout->plh_inode;
+
+               BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+               list_del(&lseg->pls_list);
+               if (list_empty(&lseg->pls_layout->plh_segs)) {
+                       struct nfs_client *clp;
+
+                       clp = NFS_SERVER(ino)->nfs_client;
+                       spin_lock(&clp->cl_lock);
+                       /* List does not take a reference, so no need for put here */
+                       list_del_init(&lseg->pls_layout->plh_layouts);
+                       spin_unlock(&clp->cl_lock);
+               }
+               list_add(&lseg->pls_list, tmp_list);
+               return 1;
+       }
+       return 0;
+}
+
+static bool
+should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
 {
-       if (!lseg)
-               return;
+       return (recall_iomode == IOMODE_ANY ||
+               lseg_iomode == recall_iomode);
+}
 
-       dprintk("%s: lseg %p ref %d\n", __func__, lseg,
-               atomic_read(&lseg->kref.refcount));
-       kref_put(&lseg->kref, destroy_lseg);
+/* Returns 1 if lseg is removed from list, 0 otherwise */
+static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
+                            struct list_head *tmp_list)
+{
+       int rv = 0;
+
+       if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
+               /* Remove the reference keeping the lseg in the
+                * list.  It will now be removed when all
+                * outstanding io is finished.
+                */
+               rv = put_lseg_locked(lseg, tmp_list);
+       }
+       return rv;
 }
 
-static void
-pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
+/* Returns count of number of matching invalid lsegs remaining in list
+ * after call.
+ */
+static int
+mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
+                           struct list_head *tmp_list,
+                           u32 iomode)
 {
        struct pnfs_layout_segment *lseg, *next;
-       struct nfs_client *clp;
+       int invalid = 0, removed = 0;
 
        dprintk("%s:Begin lo %p\n", __func__, lo);
 
-       assert_spin_locked(&lo->inode->i_lock);
-       list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
-               dprintk("%s: freeing lseg %p\n", __func__, lseg);
-               list_move(&lseg->fi_list, tmp_list);
-       }
-       clp = NFS_SERVER(lo->inode)->nfs_client;
-       spin_lock(&clp->cl_lock);
-       /* List does not take a reference, so no need for put here */
-       list_del_init(&lo->layouts);
-       spin_unlock(&clp->cl_lock);
-       write_seqlock(&lo->seqlock);
-       clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
-       write_sequnlock(&lo->seqlock);
-
-       dprintk("%s:Return\n", __func__);
+       list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
+               if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
+                       dprintk("%s: freeing lseg %p iomode %d "
+                               "offset %llu length %llu\n", __func__,
+                               lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
+                               lseg->pls_range.length);
+                       invalid++;
+                       removed += mark_lseg_invalid(lseg, tmp_list);
+               }
+       dprintk("%s:Return %i\n", __func__, invalid - removed);
+       return invalid - removed;
 }
 
 static void
-pnfs_free_lseg_list(struct list_head *tmp_list)
+pnfs_free_lseg_list(struct list_head *free_me)
 {
-       struct pnfs_layout_segment *lseg;
+       struct pnfs_layout_segment *lseg, *tmp;
 
-       while (!list_empty(tmp_list)) {
-               lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
-                               fi_list);
-               dprintk("%s calling put_lseg on %p\n", __func__, lseg);
-               list_del(&lseg->fi_list);
-               put_lseg(lseg);
+       list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
+               list_del(&lseg->pls_list);
+               free_lseg(lseg);
        }
 }
 
@@ -288,7 +326,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
        spin_lock(&nfsi->vfs_inode.i_lock);
        lo = nfsi->layout;
        if (lo) {
-               pnfs_clear_lseg_list(lo, &tmp_list);
+               set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags);
+               mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
                /* Matched by refcount set to 1 in alloc_init_layout_hdr */
                put_layout_hdr_locked(lo);
        }
@@ -312,76 +351,57 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
 
        while (!list_empty(&tmp_list)) {
                lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
-                               layouts);
+                               plh_layouts);
                dprintk("%s freeing layout for inode %lu\n", __func__,
-                       lo->inode->i_ino);
-               pnfs_destroy_layout(NFS_I(lo->inode));
+                       lo->plh_inode->i_ino);
+               pnfs_destroy_layout(NFS_I(lo->plh_inode));
        }
 }
 
-/* update lo->stateid with new if is more recent
- *
- * lo->stateid could be the open stateid, in which case we just use what given.
- */
+/* update lo->plh_stateid with new if is more recent */
 static void
 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
                        const nfs4_stateid *new)
 {
-       nfs4_stateid *old = &lo->stateid;
-       bool overwrite = false;
-
-       write_seqlock(&lo->seqlock);
-       if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
-           memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
-               overwrite = true;
-       else {
-               u32 oldseq, newseq;
+       u32 oldseq, newseq;
 
-               oldseq = be32_to_cpu(old->stateid.seqid);
-               newseq = be32_to_cpu(new->stateid.seqid);
-               if ((int)(newseq - oldseq) > 0)
-                       overwrite = true;
-       }
-       if (overwrite)
-               memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
-       write_sequnlock(&lo->seqlock);
+       oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
+       newseq = be32_to_cpu(new->stateid.seqid);
+       if ((int)(newseq - oldseq) > 0)
+               memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
 }
 
-static void
-pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
-                             struct nfs4_state *state)
+/* lget is set to 1 if called from inside send_layoutget call chain */
+static bool
+pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, int lget)
 {
-       int seq;
-
-       dprintk("--> %s\n", __func__);
-       write_seqlock(&lo->seqlock);
-       do {
-               seq = read_seqbegin(&state->seqlock);
-               memcpy(lo->stateid.data, state->stateid.data,
-                      sizeof(state->stateid.data));
-       } while (read_seqretry(&state->seqlock, seq));
-       set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
-       write_sequnlock(&lo->seqlock);
-       dprintk("<-- %s\n", __func__);
+       return (list_empty(&lo->plh_segs) &&
+                (atomic_read(&lo->plh_outstanding) > lget));
 }
 
-void
-pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
-                       struct nfs4_state *open_state)
+int
+pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
+                             struct nfs4_state *open_state)
 {
-       int seq;
+       int status = 0;
 
        dprintk("--> %s\n", __func__);
-       do {
-               seq = read_seqbegin(&lo->seqlock);
-               if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
-                       /* This will trigger retry of the read */
-                       pnfs_layout_from_open_stateid(lo, open_state);
-               } else
-                       memcpy(dst->data, lo->stateid.data,
-                              sizeof(lo->stateid.data));
-       } while (read_seqretry(&lo->seqlock, seq));
+       spin_lock(&lo->plh_inode->i_lock);
+       if (pnfs_layoutgets_blocked(lo, 1)) {
+               status = -EAGAIN;
+       } else if (list_empty(&lo->plh_segs)) {
+               int seq;
+
+               do {
+                       seq = read_seqbegin(&open_state->seqlock);
+                       memcpy(dst->data, open_state->stateid.data,
+                              sizeof(open_state->stateid.data));
+               } while (read_seqretry(&open_state->seqlock, seq));
+       } else
+               memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
+       spin_unlock(&lo->plh_inode->i_lock);
        dprintk("<-- %s\n", __func__);
+       return status;
 }
 
 /*
@@ -395,7 +415,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
           struct nfs_open_context *ctx,
           u32 iomode)
 {
-       struct inode *ino = lo->inode;
+       struct inode *ino = lo->plh_inode;
        struct nfs_server *server = NFS_SERVER(ino);
        struct nfs4_layoutget *lgp;
        struct pnfs_layout_segment *lseg = NULL;
@@ -404,10 +424,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 
        BUG_ON(ctx == NULL);
        lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
-       if (lgp == NULL) {
-               put_layout_hdr(lo->inode);
+       if (lgp == NULL)
                return NULL;
-       }
        lgp->args.minlength = NFS4_MAX_UINT64;
        lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
        lgp->args.range.iomode = iomode;
@@ -424,7 +442,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
        nfs4_proc_layoutget(lgp);
        if (!lseg) {
                /* remember that LAYOUTGET failed and suspend trying */
-               set_bit(lo_fail_bit(iomode), &lo->state);
+               set_bit(lo_fail_bit(iomode), &lo->plh_flags);
        }
        return lseg;
 }
@@ -450,35 +468,35 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
 
        dprintk("%s:Begin\n", __func__);
 
-       assert_spin_locked(&lo->inode->i_lock);
-       if (list_empty(&lo->segs)) {
-               struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
+       assert_spin_locked(&lo->plh_inode->i_lock);
+       if (list_empty(&lo->plh_segs)) {
+               struct nfs_client *clp = NFS_SERVER(lo->plh_inode)->nfs_client;
 
                spin_lock(&clp->cl_lock);
-               BUG_ON(!list_empty(&lo->layouts));
-               list_add_tail(&lo->layouts, &clp->cl_layouts);
+               BUG_ON(!list_empty(&lo->plh_layouts));
+               list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
                spin_unlock(&clp->cl_lock);
        }
-       list_for_each_entry(lp, &lo->segs, fi_list) {
-               if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
+       list_for_each_entry(lp, &lo->plh_segs, pls_list) {
+               if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
                        continue;
-               list_add_tail(&lseg->fi_list, &lp->fi_list);
+               list_add_tail(&lseg->pls_list, &lp->pls_list);
                dprintk("%s: inserted lseg %p "
                        "iomode %d offset %llu length %llu before "
                        "lp %p iomode %d offset %llu length %llu\n",
-                       __func__, lseg, lseg->range.iomode,
-                       lseg->range.offset, lseg->range.length,
-                       lp, lp->range.iomode, lp->range.offset,
-                       lp->range.length);
+                       __func__, lseg, lseg->pls_range.iomode,
+                       lseg->pls_range.offset, lseg->pls_range.length,
+                       lp, lp->pls_range.iomode, lp->pls_range.offset,
+                       lp->pls_range.length);
                found = 1;
                break;
        }
        if (!found) {
-               list_add_tail(&lseg->fi_list, &lo->segs);
+               list_add_tail(&lseg->pls_list, &lo->plh_segs);
                dprintk("%s: inserted lseg %p "
                        "iomode %d offset %llu length %llu at tail\n",
-                       __func__, lseg, lseg->range.iomode,
-                       lseg->range.offset, lseg->range.length);
+                       __func__, lseg, lseg->pls_range.iomode,
+                       lseg->pls_range.offset, lseg->pls_range.length);
        }
        get_layout_hdr_locked(lo);
 
@@ -493,11 +511,10 @@ alloc_init_layout_hdr(struct inode *ino)
        lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
        if (!lo)
                return NULL;
-       lo->refcount = 1;
-       INIT_LIST_HEAD(&lo->layouts);
-       INIT_LIST_HEAD(&lo->segs);
-       seqlock_init(&lo->seqlock);
-       lo->inode = ino;
+       lo->plh_refcount = 1;
+       INIT_LIST_HEAD(&lo->plh_layouts);
+       INIT_LIST_HEAD(&lo->plh_segs);
+       lo->plh_inode = ino;
        return lo;
 }
 
@@ -510,9 +527,12 @@ pnfs_find_alloc_layout(struct inode *ino)
        dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
 
        assert_spin_locked(&ino->i_lock);
-       if (nfsi->layout)
-               return nfsi->layout;
-
+       if (nfsi->layout) {
+               if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
+                       return NULL;
+               else
+                       return nfsi->layout;
+       }
        spin_unlock(&ino->i_lock);
        new = alloc_init_layout_hdr(ino);
        spin_lock(&ino->i_lock);
@@ -538,7 +558,7 @@ pnfs_find_alloc_layout(struct inode *ino)
 static int
 is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
 {
-       return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
+       return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
 }
 
 /*
@@ -551,18 +571,19 @@ pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
 
        dprintk("%s:Begin\n", __func__);
 
-       assert_spin_locked(&lo->inode->i_lock);
-       list_for_each_entry(lseg, &lo->segs, fi_list) {
-               if (is_matching_lseg(lseg, iomode)) {
+       assert_spin_locked(&lo->plh_inode->i_lock);
+       list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
+               if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
+                   is_matching_lseg(lseg, iomode)) {
                        ret = lseg;
                        break;
                }
-               if (cmp_layout(iomode, lseg->range.iomode) > 0)
+               if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
                        break;
        }
 
        dprintk("%s:Return lseg %p ref %d\n",
-               __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
+               __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
        return ret;
 }
 
@@ -597,16 +618,22 @@ pnfs_update_layout(struct inode *ino,
        }
 
        /* if LAYOUTGET already failed once we don't try again */
-       if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
+       if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
+               goto out_unlock;
+
+       if (pnfs_layoutgets_blocked(lo, 0))
                goto out_unlock;
+       atomic_inc(&lo->plh_outstanding);
 
-       get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
+       get_layout_hdr_locked(lo);
        spin_unlock(&ino->i_lock);
 
        lseg = send_layoutget(lo, ctx, iomode);
+       atomic_dec(&lo->plh_outstanding);
+       put_layout_hdr(ino);
 out:
        dprintk("%s end, state 0x%lx lseg %p\n", __func__,
-               nfsi->layout->state, lseg);
+               nfsi->layout->plh_flags, lseg);
        return lseg;
 out_unlock:
        spin_unlock(&ino->i_lock);
@@ -619,7 +646,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
        struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
        struct nfs4_layoutget_res *res = &lgp->res;
        struct pnfs_layout_segment *lseg;
-       struct inode *ino = lo->inode;
+       struct inode *ino = lo->plh_inode;
        int status = 0;
 
        /* Inject layout blob into I/O device driver */
@@ -636,7 +663,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
 
        spin_lock(&ino->i_lock);
        init_lseg(lo, lseg);
-       lseg->range = res->range;
+       lseg->pls_range = res->range;
        *lgp->lsegpp = lseg;
        pnfs_insert_layout(lo, lseg);