]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - fs/nfs/pnfs.c
NFSv4.1/pnfs: Don't prevent layoutgets when doing return-on-close
[mirror_ubuntu-bionic-kernel.git] / fs / nfs / pnfs.c
index 0ba9a02c95664960f8c0f46ea97249bd8653fe16..8c5f9f59efbb2ea1249648a088d39560a6dad945 100644 (file)
@@ -352,7 +352,7 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
 {
        struct pnfs_layout_segment *s;
 
-       if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+       if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
                return false;
 
        list_for_each_entry(s, &lo->plh_segs, pls_list)
@@ -362,6 +362,18 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
        return true;
 }
 
+static bool
+pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
+{
+       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+               return false;
+       lo->plh_return_iomode = 0;
+       lo->plh_block_lgets++;
+       pnfs_get_layout_hdr(lo);
+       clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
+       return true;
+}
+
 static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
                struct pnfs_layout_hdr *lo, struct inode *inode)
 {
@@ -372,17 +384,16 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
        if (pnfs_layout_need_return(lo, lseg)) {
                nfs4_stateid stateid;
                enum pnfs_iomode iomode;
+               bool send;
 
                stateid = lo->plh_stateid;
                iomode = lo->plh_return_iomode;
-               /* decreased in pnfs_send_layoutreturn() */
-               lo->plh_block_lgets++;
-               lo->plh_return_iomode = 0;
+               send = pnfs_prepare_layoutreturn(lo);
                spin_unlock(&inode->i_lock);
-               pnfs_get_layout_hdr(lo);
-
-               /* Send an async layoutreturn so we dont deadlock */
-               pnfs_send_layoutreturn(lo, stateid, iomode, false);
+               if (send) {
+                       /* Send an async layoutreturn so we dont deadlock */
+                       pnfs_send_layoutreturn(lo, stateid, iomode, false);
+               }
        } else
                spin_unlock(&inode->i_lock);
 }
@@ -411,6 +422,10 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
                pnfs_layoutreturn_before_put_lseg(lseg, lo, inode);
 
        if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
+               if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
+                       spin_unlock(&inode->i_lock);
+                       return;
+               }
                pnfs_get_layout_hdr(lo);
                pnfs_layout_remove_lseg(lo, lseg);
                spin_unlock(&inode->i_lock);
@@ -451,6 +466,8 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg)
                test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
        if (atomic_dec_and_test(&lseg->pls_refcount)) {
                struct pnfs_layout_hdr *lo = lseg->pls_layout;
+               if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
+                       return;
                pnfs_get_layout_hdr(lo);
                pnfs_layout_remove_lseg(lo, lseg);
                pnfs_free_lseg_async(lseg);
@@ -812,12 +829,10 @@ pnfs_layout_returning(const struct pnfs_layout_hdr *lo,
 /* lget is set to 1 if called from inside send_layoutget call chain */
 static bool
 pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo,
-                       struct pnfs_layout_range *range, int lget)
+                       struct pnfs_layout_range *range)
 {
        return lo->plh_block_lgets ||
                test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
-               (list_empty(&lo->plh_segs) &&
-                (atomic_read(&lo->plh_outstanding) > lget)) ||
                pnfs_layout_returning(lo, range);
 }
 
@@ -830,7 +845,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 
        dprintk("--> %s\n", __func__);
        spin_lock(&lo->plh_inode->i_lock);
-       if (pnfs_layoutgets_blocked(lo, range, 1)) {
+       if (pnfs_layoutgets_blocked(lo, range)) {
                status = -EAGAIN;
        } else if (!nfs4_valid_open_stateid(open_state)) {
                status = -EBADF;
@@ -924,6 +939,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
        clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
        smp_mb__after_atomic();
        wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
+       rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
 }
 
 static int
@@ -978,6 +994,7 @@ _pnfs_return_layout(struct inode *ino)
        LIST_HEAD(tmp_list);
        nfs4_stateid stateid;
        int status = 0, empty;
+       bool send;
 
        dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
 
@@ -1007,17 +1024,18 @@ _pnfs_return_layout(struct inode *ino)
        /* Don't send a LAYOUTRETURN if list was initially empty */
        if (empty) {
                spin_unlock(&ino->i_lock);
-               pnfs_put_layout_hdr(lo);
                dprintk("NFS: %s no layout segments to return\n", __func__);
-               goto out;
+               goto out_put_layout_hdr;
        }
 
        set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
-       lo->plh_block_lgets++;
+       send = pnfs_prepare_layoutreturn(lo);
        spin_unlock(&ino->i_lock);
        pnfs_free_lseg_list(&tmp_list);
-
-       status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+       if (send)
+               status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+out_put_layout_hdr:
+       pnfs_put_layout_hdr(lo);
 out:
        dprintk("<-- %s status: %d\n", __func__, status);
        return status;
@@ -1087,7 +1105,9 @@ bool pnfs_roc(struct inode *ino)
                }
        if (!found)
                goto out_noroc;
-       lo->plh_block_lgets++;
+       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+               goto out_noroc;
+       lo->plh_return_iomode = IOMODE_ANY;
        pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */
        spin_unlock(&ino->i_lock);
        pnfs_free_lseg_list(&tmp_list);
@@ -1097,13 +1117,9 @@ bool pnfs_roc(struct inode *ino)
 out_noroc:
        if (lo) {
                stateid = lo->plh_stateid;
-               layoutreturn =
-                       test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
-                                          &lo->plh_flags);
-               if (layoutreturn) {
-                       lo->plh_block_lgets++;
-                       pnfs_get_layout_hdr(lo);
-               }
+               if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+                                          &lo->plh_flags))
+                       layoutreturn = pnfs_prepare_layoutreturn(lo);
        }
        spin_unlock(&ino->i_lock);
        if (layoutreturn) {
@@ -1119,7 +1135,7 @@ void pnfs_roc_release(struct inode *ino)
 
        spin_lock(&ino->i_lock);
        lo = NFS_I(ino)->layout;
-       lo->plh_block_lgets--;
+       pnfs_clear_layoutreturn_waitbit(lo);
        if (atomic_dec_and_test(&lo->plh_refcount)) {
                pnfs_detach_layout_hdr(lo);
                spin_unlock(&ino->i_lock);
@@ -1146,15 +1162,18 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
        struct pnfs_layout_segment *lseg;
        nfs4_stateid stateid;
        u32 current_seqid;
-       bool found = false, layoutreturn = false;
+       bool layoutreturn = false;
 
        spin_lock(&ino->i_lock);
-       list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
-               if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
-                       rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
-                       found = true;
-                       goto out;
-               }
+       list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) {
+               if (!test_bit(NFS_LSEG_ROC, &lseg->pls_flags))
+                       continue;
+               if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
+                       continue;
+               rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
+               spin_unlock(&ino->i_lock);
+               return true;
+       }
        lo = nfsi->layout;
        current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
 
@@ -1162,23 +1181,19 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
         * a barrier, we choose the worst-case barrier.
         */
        *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
-out:
-       if (!found) {
-               stateid = lo->plh_stateid;
-               layoutreturn =
-                       test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
-                                          &lo->plh_flags);
-               if (layoutreturn) {
-                       lo->plh_block_lgets++;
-                       pnfs_get_layout_hdr(lo);
-               }
-       }
+       stateid = lo->plh_stateid;
+       if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+                                          &lo->plh_flags))
+               layoutreturn = pnfs_prepare_layoutreturn(lo);
+       if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+               rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
+
        spin_unlock(&ino->i_lock);
        if (layoutreturn) {
-               rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
                pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false);
+               return true;
        }
-       return found;
+       return false;
 }
 
 /*
@@ -1423,6 +1438,8 @@ static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key)
 
 static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
 {
+       if (!pnfs_should_retry_layoutget(lo))
+               return false;
        /*
         * send layoutcommit as it can hold up layoutreturn due to lseg
         * reference
@@ -1518,8 +1535,7 @@ lookup_again:
         * Because we free lsegs before sending LAYOUTRETURN, we need to wait
         * for LAYOUTRETURN even if first is true.
         */
-       if (!lseg && pnfs_should_retry_layoutget(lo) &&
-           test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
+       if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
                spin_unlock(&ino->i_lock);
                dprintk("%s wait for layoutreturn\n", __func__);
                if (pnfs_prepare_to_retry_layoutget(lo)) {
@@ -1532,7 +1548,7 @@ lookup_again:
                goto out_put_layout_hdr;
        }
 
-       if (pnfs_layoutgets_blocked(lo, &arg, 0))
+       if (pnfs_layoutgets_blocked(lo, &arg))
                goto out_unlock;
        atomic_inc(&lo->plh_outstanding);
        spin_unlock(&ino->i_lock);
@@ -1609,7 +1625,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
                goto out_forget_reply;
        }
 
-       if (pnfs_layoutgets_blocked(lo, &lgp->args.range, 1)) {
+       if (pnfs_layoutgets_blocked(lo, &lgp->args.range)) {
                dprintk("%s forget reply due to state\n", __func__);
                goto out_forget_reply;
        }
@@ -1695,7 +1711,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
        spin_lock(&inode->i_lock);
        /* set failure bit so that pnfs path will be retried later */
        pnfs_layout_set_fail_bit(lo, iomode);
-       set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
        if (lo->plh_return_iomode == 0)
                lo->plh_return_iomode = range.iomode;
        else if (lo->plh_return_iomode != range.iomode)
@@ -2207,13 +2222,12 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
        if (ld->prepare_layoutcommit) {
                status = ld->prepare_layoutcommit(&data->args);
                if (status) {
+                       put_rpccred(data->cred);
                        spin_lock(&inode->i_lock);
                        set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
                        if (end_pos > nfsi->layout->plh_lwb)
                                nfsi->layout->plh_lwb = end_pos;
-                       spin_unlock(&inode->i_lock);
-                       put_rpccred(data->cred);
-                       goto clear_layoutcommitting;
+                       goto out_unlock;
                }
        }
 
@@ -2254,7 +2268,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
 
 #if IS_ENABLED(CONFIG_NFS_V4_2)
 int
-pnfs_report_layoutstat(struct inode *inode)
+pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags)
 {
        struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
        struct nfs_server *server = NFS_SERVER(inode);
@@ -2281,7 +2295,7 @@ pnfs_report_layoutstat(struct inode *inode)
        pnfs_get_layout_hdr(hdr);
        spin_unlock(&inode->i_lock);
 
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       data = kzalloc(sizeof(*data), gfp_flags);
        if (!data) {
                status = -ENOMEM;
                goto out_put;