]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
Merge tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 18 Nov 2017 19:22:04 +0000 (11:22 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 18 Nov 2017 19:22:04 +0000 (11:22 -0800)
Pull nfsd updates from Bruce Fields:
 "Lots of good bugfixes, including:

   -  fix a number of races in the NFSv4+ state code

   -  fix some shutdown crashes in multiple-network-namespace cases

   -  relax our 4.1 session limits; if you've an artificially low limit
      to the number of 4.1 clients that can mount simultaneously, try
      upgrading"

* tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux: (22 commits)
  SUNRPC: Improve ordering of transport processing
  nfsd: deal with revoked delegations appropriately
  svcrdma: Enqueue after setting XPT_CLOSE in completion handlers
  nfsd: use nfs->ns.inum as net ID
  rpc: remove some BUG()s
  svcrdma: Preserve CB send buffer across retransmits
  nfds: avoid gettimeofday for nfssvc_boot time
  fs, nfsd: convert nfs4_file.fi_ref from atomic_t to refcount_t
  fs, nfsd: convert nfs4_cntl_odstate.co_odcount from atomic_t to refcount_t
  fs, nfsd: convert nfs4_stid.sc_count from atomic_t to refcount_t
  lockd: double unregister of inetaddr notifiers
  nfsd4: catch some false session retries
  nfsd4: fix cached replies to solo SEQUENCE compounds
  sunrcp: make function _svc_create_xprt static
  SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status
  nfsd: use ARRAY_SIZE
  nfsd: give out fewer session slots as limit approaches
  nfsd: increase DRC cache limit
  nfsd: remove unnecessary nofilehandle checks
  nfs_common: convert int to bool
  ...

18 files changed:
fs/lockd/svc.c
fs/nfs_common/grace.c
fs/nfsd/fault_inject.c
fs/nfsd/netns.h
fs/nfsd/nfs3xdr.c
fs/nfsd/nfs4layouts.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4state.c
fs/nfsd/nfssvc.c
fs/nfsd/state.h
fs/nfsd/xdr4.h
include/linux/fs.h
include/linux/sunrpc/svc.h
include/trace/events/sunrpc.h
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/svc_xprt.c
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
net/sunrpc/xprtrdma/svc_rdma_transport.c

index b837fb7e290a6a63346f1ad7f00e63f2db8a5382..a8e3777c94dc6c44ae050168bf7906eecf71d085 100644 (file)
@@ -369,6 +369,7 @@ static int lockd_start_svc(struct svc_serv *serv)
                printk(KERN_WARNING
                        "lockd_up: svc_rqst allocation failed, error=%d\n",
                        error);
+               lockd_unregister_notifiers();
                goto out_rqst;
        }
 
@@ -459,13 +460,16 @@ int lockd_up(struct net *net)
        }
 
        error = lockd_up_net(serv, net);
-       if (error < 0)
-               goto err_net;
+       if (error < 0) {
+               lockd_unregister_notifiers();
+               goto err_put;
+       }
 
        error = lockd_start_svc(serv);
-       if (error < 0)
-               goto err_start;
-
+       if (error < 0) {
+               lockd_down_net(serv, net);
+               goto err_put;
+       }
        nlmsvc_users++;
        /*
         * Note: svc_serv structures have an initial use count of 1,
@@ -476,12 +480,6 @@ err_put:
 err_create:
        mutex_unlock(&nlmsvc_mutex);
        return error;
-
-err_start:
-       lockd_down_net(serv, net);
-err_net:
-       lockd_unregister_notifiers();
-       goto err_put;
 }
 EXPORT_SYMBOL_GPL(lockd_up);
 
index 420d3a0ab258fb2b312310e50081bbab30f2c2ae..897b299db55e01e291641b35d25f4c903962fec5 100644 (file)
@@ -55,14 +55,7 @@ locks_end_grace(struct lock_manager *lm)
 }
 EXPORT_SYMBOL_GPL(locks_end_grace);
 
-/**
- * locks_in_grace
- *
- * Lock managers call this function to determine when it is OK for them
- * to answer ordinary lock requests, and when they should accept only
- * lock reclaims.
- */
-int
+static bool
 __state_in_grace(struct net *net, bool open)
 {
        struct list_head *grace_list = net_generic(net, grace_net_id);
@@ -78,15 +71,22 @@ __state_in_grace(struct net *net, bool open)
        return false;
 }
 
-int locks_in_grace(struct net *net)
+/**
+ * locks_in_grace
+ *
+ * Lock managers call this function to determine when it is OK for them
+ * to answer ordinary lock requests, and when they should accept only
+ * lock reclaims.
+ */
+bool locks_in_grace(struct net *net)
 {
-       return __state_in_grace(net, 0);
+       return __state_in_grace(net, false);
 }
 EXPORT_SYMBOL_GPL(locks_in_grace);
 
-int opens_in_grace(struct net *net)
+bool opens_in_grace(struct net *net)
 {
-       return __state_in_grace(net, 1);
+       return __state_in_grace(net, true);
 }
 EXPORT_SYMBOL_GPL(opens_in_grace);
 
index 6dfede6d172aa276ba99544cf561cf4744220ff7..84831253203dda4a4926db4a532098ffee8a1f4c 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/nsproxy.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/uaccess.h>
+#include <linux/kernel.h>
 
 #include "state.h"
 #include "netns.h"
@@ -126,8 +127,6 @@ static struct nfsd_fault_inject_op inject_ops[] = {
        },
 };
 
-#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op))
-
 int nfsd_fault_inject_init(void)
 {
        unsigned int i;
@@ -138,7 +137,7 @@ int nfsd_fault_inject_init(void)
        if (!debug_dir)
                goto fail;
 
-       for (i = 0; i < NUM_INJECT_OPS; i++) {
+       for (i = 0; i < ARRAY_SIZE(inject_ops); i++) {
                op = &inject_ops[i];
                if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
                        goto fail;
index 3714231a9d0fb71e4e440a9f8efa7113839c4392..1c91391f48055699bb5c9ffbe64ca31d709c7518 100644 (file)
@@ -107,7 +107,7 @@ struct nfsd_net {
        bool lockd_up;
 
        /* Time of server startup */
-       struct timeval nfssvc_boot;
+       struct timespec64 nfssvc_boot;
 
        /*
         * Max number of connections this nfsd container will allow. Defaults
index f38acd9054419606e3abd25060599960d38c6f2c..2758480555faa504b1aafc204ea549361bf3b932 100644 (file)
@@ -748,8 +748,9 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
        if (resp->status == 0) {
                *p++ = htonl(resp->count);
                *p++ = htonl(resp->committed);
-               *p++ = htonl(nn->nfssvc_boot.tv_sec);
-               *p++ = htonl(nn->nfssvc_boot.tv_usec);
+               /* unique identifier, y2038 overflow can be ignored */
+               *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
+               *p++ = htonl(nn->nfssvc_boot.tv_nsec);
        }
        return xdr_ressize_check(rqstp, p);
 }
@@ -1119,8 +1120,9 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
        p = encode_wcc_data(rqstp, p, &resp->fh);
        /* Write verifier */
        if (resp->status == 0) {
-               *p++ = htonl(nn->nfssvc_boot.tv_sec);
-               *p++ = htonl(nn->nfssvc_boot.tv_usec);
+               /* unique identifier, y2038 overflow can be ignored */
+               *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
+               *p++ = htonl(nn->nfssvc_boot.tv_nsec);
        }
        return xdr_ressize_check(rqstp, p);
 }
index ea45d954e8d7c53cbb3db6dcbf8ac3958b314a90..7d888369f85a4194b0ddf0c2202bb693fe9cac99 100644 (file)
@@ -336,7 +336,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
 
        trace_layout_recall(&ls->ls_stid.sc_stateid);
 
-       atomic_inc(&ls->ls_stid.sc_count);
+       refcount_inc(&ls->ls_stid.sc_count);
        nfsd4_run_cb(&ls->ls_recall);
 
 out_unlock:
@@ -441,7 +441,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
                        goto done;
        }
 
-       atomic_inc(&ls->ls_stid.sc_count);
+       refcount_inc(&ls->ls_stid.sc_count);
        list_add_tail(&new->lo_perstate, &ls->ls_layouts);
        new = NULL;
 done:
index 8487486ec4963efb72477e7cf2f19616108f12f2..008ea0b627d02d5a06f8b3febb793627e11b70c3 100644 (file)
@@ -485,9 +485,6 @@ static __be32
 nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
            union nfsd4_op_u *u)
 {
-       if (!cstate->current_fh.fh_dentry)
-               return nfserr_nofilehandle;
-
        u->getfh = &cstate->current_fh;
        return nfs_ok;
 }
@@ -535,9 +532,6 @@ static __be32
 nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
             union nfsd4_op_u *u)
 {
-       if (!cstate->current_fh.fh_dentry)
-               return nfserr_nofilehandle;
-
        fh_dup2(&cstate->save_fh, &cstate->current_fh);
        if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) {
                memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t));
@@ -570,10 +564,11 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
 
        /*
         * This is opaque to client, so no need to byte-swap. Use
-        * __force to keep sparse happy
+        * __force to keep sparse happy. y2038 time_t overflow is
+        * irrelevant in this usage.
         */
        verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
-       verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec;
+       verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
        memcpy(verifier->data, verf, sizeof(verifier->data));
 }
 
@@ -703,10 +698,8 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
           union nfsd4_op_u *u)
 {
        struct nfsd4_link *link = &u->link;
-       __be32 status = nfserr_nofilehandle;
+       __be32 status;
 
-       if (!cstate->save_fh.fh_dentry)
-               return status;
        status = nfsd_link(rqstp, &cstate->current_fh,
                           link->li_name, link->li_namelen, &cstate->save_fh);
        if (!status)
@@ -850,10 +843,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
             union nfsd4_op_u *u)
 {
        struct nfsd4_rename *rename = &u->rename;
-       __be32 status = nfserr_nofilehandle;
+       __be32 status;
 
-       if (!cstate->save_fh.fh_dentry)
-               return status;
        if (opens_in_grace(SVC_NET(rqstp)) &&
                !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
                return nfserr_grace;
index 0c04f81aa63b225b2207b226b1113e1973ec1e1b..b82817767b9da4ea6e8e3fc0cde8e6f068756ca7 100644 (file)
@@ -359,7 +359,7 @@ put_nfs4_file(struct nfs4_file *fi)
 {
        might_lock(&state_lock);
 
-       if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
+       if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
                hlist_del_rcu(&fi->fi_hash);
                spin_unlock(&state_lock);
                WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
@@ -568,7 +568,7 @@ alloc_clnt_odstate(struct nfs4_client *clp)
        co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL);
        if (co) {
                co->co_client = clp;
-               atomic_set(&co->co_odcount, 1);
+               refcount_set(&co->co_odcount, 1);
        }
        return co;
 }
@@ -586,7 +586,7 @@ static inline void
 get_clnt_odstate(struct nfs4_clnt_odstate *co)
 {
        if (co)
-               atomic_inc(&co->co_odcount);
+               refcount_inc(&co->co_odcount);
 }
 
 static void
@@ -598,7 +598,7 @@ put_clnt_odstate(struct nfs4_clnt_odstate *co)
                return;
 
        fp = co->co_file;
-       if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
+       if (refcount_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
                list_del(&co->co_perfile);
                spin_unlock(&fp->fi_lock);
 
@@ -656,7 +656,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
        stid->sc_stateid.si_opaque.so_id = new_id;
        stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
        /* Will be incremented before return to client: */
-       atomic_set(&stid->sc_count, 1);
+       refcount_set(&stid->sc_count, 1);
        spin_lock_init(&stid->sc_lock);
 
        /*
@@ -813,7 +813,7 @@ nfs4_put_stid(struct nfs4_stid *s)
 
        might_lock(&clp->cl_lock);
 
-       if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
+       if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
                wake_up_all(&close_wq);
                return;
        }
@@ -913,7 +913,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
        if (status)
                return status;
        ++fp->fi_delegees;
-       atomic_inc(&dp->dl_stid.sc_count);
+       refcount_inc(&dp->dl_stid.sc_count);
        dp->dl_stid.sc_type = NFS4_DELEG_STID;
        list_add(&dp->dl_perfile, &fp->fi_delegations);
        list_add(&dp->dl_perclnt, &clp->cl_delegations);
@@ -1214,7 +1214,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
 
        WARN_ON_ONCE(!list_empty(&stp->st_locks));
 
-       if (!atomic_dec_and_test(&s->sc_count)) {
+       if (!refcount_dec_and_test(&s->sc_count)) {
                wake_up_all(&close_wq);
                return;
        }
@@ -1439,8 +1439,10 @@ free_session_slots(struct nfsd4_session *ses)
 {
        int i;
 
-       for (i = 0; i < ses->se_fchannel.maxreqs; i++)
+       for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+               free_svc_cred(&ses->se_slots[i]->sl_cred);
                kfree(ses->se_slots[i]);
+       }
 }
 
 /*
@@ -1472,6 +1474,11 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
        spin_lock(&nfsd_drc_lock);
        avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
                    nfsd_drc_max_mem - nfsd_drc_mem_used);
+       /*
+        * Never use more than a third of the remaining memory,
+        * unless it's the only way to give this client a slot:
+        */
+       avail = clamp_t(int, avail, slotsize, avail/3);
        num = min_t(int, num, avail / slotsize);
        nfsd_drc_mem_used += num * slotsize;
        spin_unlock(&nfsd_drc_lock);
@@ -2072,7 +2079,7 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
        s = find_stateid_locked(cl, t);
        if (s != NULL) {
                if (typemask & s->sc_type)
-                       atomic_inc(&s->sc_count);
+                       refcount_inc(&s->sc_count);
                else
                        s = NULL;
        }
@@ -2287,14 +2294,18 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
 
        dprintk("--> %s slot %p\n", __func__, slot);
 
+       slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
        slot->sl_opcnt = resp->opcnt;
        slot->sl_status = resp->cstate.status;
+       free_svc_cred(&slot->sl_cred);
+       copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred);
 
-       slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
-       if (nfsd4_not_cached(resp)) {
-               slot->sl_datalen = 0;
+       if (!nfsd4_cache_this(resp)) {
+               slot->sl_flags &= ~NFSD4_SLOT_CACHED;
                return;
        }
+       slot->sl_flags |= NFSD4_SLOT_CACHED;
+
        base = resp->cstate.data_offset;
        slot->sl_datalen = buf->len - base;
        if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
@@ -2321,8 +2332,16 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
        op = &args->ops[resp->opcnt - 1];
        nfsd4_encode_operation(resp, op);
 
-       /* Return nfserr_retry_uncached_rep in next operation. */
-       if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) {
+       if (slot->sl_flags & NFSD4_SLOT_CACHED)
+               return op->status;
+       if (args->opcnt == 1) {
+               /*
+                * The original operation wasn't a solo sequence--we
+                * always cache those--so this retry must not match the
+                * original:
+                */
+               op->status = nfserr_seq_false_retry;
+       } else {
                op = &args->ops[resp->opcnt++];
                op->status = nfserr_retry_uncached_rep;
                nfsd4_encode_operation(resp, op);
@@ -2986,6 +3005,34 @@ static bool nfsd4_request_too_big(struct svc_rqst *rqstp,
        return xb->len > session->se_fchannel.maxreq_sz;
 }
 
+static bool replay_matches_cache(struct svc_rqst *rqstp,
+                struct nfsd4_sequence *seq, struct nfsd4_slot *slot)
+{
+       struct nfsd4_compoundargs *argp = rqstp->rq_argp;
+
+       if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) !=
+           (bool)seq->cachethis)
+               return false;
+       /*
+        * If there's an error than the reply can have fewer ops than
+        * the call.  But if we cached a reply with *more* ops than the
+        * call you're sending us now, then this new call is clearly not
+        * really a replay of the old one:
+        */
+       if (slot->sl_opcnt < argp->opcnt)
+               return false;
+       /* This is the only check explicitly called by spec: */
+       if (!same_creds(&rqstp->rq_cred, &slot->sl_cred))
+               return false;
+       /*
+        * There may be more comparisons we could actually do, but the
+        * spec doesn't require us to catch every case where the calls
+        * don't match (that would require caching the call as well as
+        * the reply), so we don't bother.
+        */
+       return true;
+}
+
 __be32
 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                union nfsd4_op_u *u)
@@ -3045,6 +3092,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                status = nfserr_seq_misordered;
                if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
                        goto out_put_session;
+               status = nfserr_seq_false_retry;
+               if (!replay_matches_cache(rqstp, seq, slot))
+                       goto out_put_session;
                cstate->slot = slot;
                cstate->session = session;
                cstate->clp = clp;
@@ -3351,7 +3401,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
 {
        lockdep_assert_held(&state_lock);
 
-       atomic_set(&fp->fi_ref, 1);
+       refcount_set(&fp->fi_ref, 1);
        spin_lock_init(&fp->fi_lock);
        INIT_LIST_HEAD(&fp->fi_stateids);
        INIT_LIST_HEAD(&fp->fi_delegations);
@@ -3514,7 +3564,7 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
                        continue;
                if (local->st_stateowner == &oo->oo_owner) {
                        ret = local;
-                       atomic_inc(&ret->st_stid.sc_count);
+                       refcount_inc(&ret->st_stid.sc_count);
                        break;
                }
        }
@@ -3573,7 +3623,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
                goto out_unlock;
 
        open->op_stp = NULL;
-       atomic_inc(&stp->st_stid.sc_count);
+       refcount_inc(&stp->st_stid.sc_count);
        stp->st_stid.sc_type = NFS4_OPEN_STID;
        INIT_LIST_HEAD(&stp->st_locks);
        stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner);
@@ -3621,7 +3671,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
         * there should be no danger of the refcount going back up again at
         * this point.
         */
-       wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2);
+       wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
 
        release_all_access(s);
        if (s->st_stid.sc_file) {
@@ -3647,7 +3697,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
 
        hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) {
                if (fh_match(&fp->fi_fhandle, fh)) {
-                       if (atomic_inc_not_zero(&fp->fi_ref))
+                       if (refcount_inc_not_zero(&fp->fi_ref))
                                return fp;
                }
        }
@@ -3783,7 +3833,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
         * lock) we know the server hasn't removed the lease yet, we know
         * it's safe to take a reference.
         */
-       atomic_inc(&dp->dl_stid.sc_count);
+       refcount_inc(&dp->dl_stid.sc_count);
        nfsd4_run_cb(&dp->dl_recall);
 }
 
@@ -3966,7 +4016,8 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei
 {
        struct nfs4_stid *ret;
 
-       ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID);
+       ret = find_stateid_by_type(cl, s,
+                               NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
        if (!ret)
                return NULL;
        return delegstateid(ret);
@@ -3989,6 +4040,12 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
        deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
        if (deleg == NULL)
                goto out;
+       if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
+               nfs4_put_stid(&deleg->dl_stid);
+               if (cl->cl_minorversion)
+                       status = nfserr_deleg_revoked;
+               goto out;
+       }
        flags = share_access_to_flags(open->op_share_access);
        status = nfs4_check_delegmode(deleg, flags);
        if (status) {
@@ -4858,6 +4915,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
                     struct nfs4_stid **s, struct nfsd_net *nn)
 {
        __be32 status;
+       bool return_revoked = false;
+
+       /*
+        *  only return revoked delegations if explicitly asked.
+        *  otherwise we report revoked or bad_stateid status.
+        */
+       if (typemask & NFS4_REVOKED_DELEG_STID)
+               return_revoked = true;
+       else if (typemask & NFS4_DELEG_STID)
+               typemask |= NFS4_REVOKED_DELEG_STID;
 
        if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
                return nfserr_bad_stateid;
@@ -4872,6 +4939,12 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
        *s = find_stateid_by_type(cstate->clp, stateid, typemask);
        if (!*s)
                return nfserr_bad_stateid;
+       if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
+               nfs4_put_stid(*s);
+               if (cstate->minorversion)
+                       return nfserr_deleg_revoked;
+               return nfserr_bad_stateid;
+       }
        return nfs_ok;
 }
 
@@ -5071,7 +5144,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                ret = nfserr_locks_held;
                break;
        case NFS4_LOCK_STID:
-               atomic_inc(&s->sc_count);
+               refcount_inc(&s->sc_count);
                spin_unlock(&cl->cl_lock);
                ret = nfsd4_free_lock_stateid(stateid, s);
                goto out;
@@ -5578,7 +5651,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
 
        lockdep_assert_held(&clp->cl_lock);
 
-       atomic_inc(&stp->st_stid.sc_count);
+       refcount_inc(&stp->st_stid.sc_count);
        stp->st_stid.sc_type = NFS4_LOCK_STID;
        stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
        get_nfs4_file(fp);
@@ -5604,7 +5677,7 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp)
 
        list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) {
                if (lst->st_stid.sc_file == fp) {
-                       atomic_inc(&lst->st_stid.sc_count);
+                       refcount_inc(&lst->st_stid.sc_count);
                        return lst;
                }
        }
@@ -7006,8 +7079,8 @@ nfs4_state_start_net(struct net *net)
        nn->nfsd4_manager.block_opens = true;
        locks_start_grace(net, &nn->nfsd4_manager);
        nfsd4_client_tracking_init(net);
-       printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
-              nn->nfsd4_grace, net);
+       printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n",
+              nn->nfsd4_grace, net->ns.inum);
        queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
        return 0;
 }
index e02bd278312463af174de08b017c445eb4e86b5b..33117d4ffce0753e987ff8fc67b03c719b180676 100644 (file)
@@ -447,7 +447,7 @@ void nfsd_reset_versions(void)
  */
 static void set_max_drc(void)
 {
-       #define NFSD_DRC_SIZE_SHIFT     10
+       #define NFSD_DRC_SIZE_SHIFT     7
        nfsd_drc_max_mem = (nr_free_buffer_pages()
                                        >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
        nfsd_drc_mem_used = 0;
@@ -517,7 +517,7 @@ int nfsd_create_serv(struct net *net)
                register_inet6addr_notifier(&nfsd_inet6addr_notifier);
 #endif
        }
-       do_gettimeofday(&nn->nfssvc_boot);              /* record boot time */
+       ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */
        return 0;
 }
 
index 005c911b34ac4553a2c02da05b4e5d975b660710..f3772ea8ba0d394f95c302584093d57fc19e37d7 100644 (file)
@@ -36,6 +36,7 @@
 #define _NFSD4_STATE_H
 
 #include <linux/idr.h>
+#include <linux/refcount.h>
 #include <linux/sunrpc/svc_xprt.h>
 #include "nfsfh.h"
 
@@ -83,7 +84,7 @@ struct nfsd4_callback_ops {
  * fields that are of general use to any stateid.
  */
 struct nfs4_stid {
-       atomic_t                sc_count;
+       refcount_t              sc_count;
 #define NFS4_OPEN_STID 1
 #define NFS4_LOCK_STID 2
 #define NFS4_DELEG_STID 4
@@ -169,11 +170,13 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
 struct nfsd4_slot {
        u32     sl_seqid;
        __be32  sl_status;
+       struct svc_cred sl_cred;
        u32     sl_datalen;
        u16     sl_opcnt;
 #define NFSD4_SLOT_INUSE       (1 << 0)
 #define NFSD4_SLOT_CACHETHIS   (1 << 1)
 #define NFSD4_SLOT_INITIALIZED (1 << 2)
+#define NFSD4_SLOT_CACHED      (1 << 3)
        u8      sl_flags;
        char    sl_data[];
 };
@@ -465,7 +468,7 @@ struct nfs4_clnt_odstate {
        struct nfs4_client      *co_client;
        struct nfs4_file        *co_file;
        struct list_head        co_perfile;
-       atomic_t                co_odcount;
+       refcount_t              co_odcount;
 };
 
 /*
@@ -481,7 +484,7 @@ struct nfs4_clnt_odstate {
  * the global state_lock spinlock.
  */
 struct nfs4_file {
-       atomic_t                fi_ref;
+       refcount_t              fi_ref;
        spinlock_t              fi_lock;
        struct hlist_node       fi_hash;        /* hash on fi_fhandle */
        struct list_head        fi_stateids;
@@ -634,7 +637,7 @@ struct nfs4_file *find_file(struct knfsd_fh *fh);
 void put_nfs4_file(struct nfs4_file *fi);
 static inline void get_nfs4_file(struct nfs4_file *fi)
 {
-       atomic_inc(&fi->fi_ref);
+       refcount_inc(&fi->fi_ref);
 }
 struct file *find_any_file(struct nfs4_file *f);
 
index 1e4edbf70052bf5f6c8c0af06e0d7bf1812e80a5..bc29511b6405275522a09db2c596991cdd99e710 100644 (file)
@@ -649,9 +649,18 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
        return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
 }
 
-static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
+/*
+ * The session reply cache only needs to cache replies that the client
+ * actually asked us to.  But it's almost free for us to cache compounds
+ * consisting of only a SEQUENCE op, so we may as well cache those too.
+ * Also, the protocol doesn't give us a convenient response in the case
+ * of a replay of a solo SEQUENCE op that wasn't cached
+ * (RETRY_UNCACHED_REP can only be returned in the second op of a
+ * compound).
+ */
+static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp)
 {
-       return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
+       return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
                || nfsd4_is_solo_sequence(resp);
 }
 
index e9379e258d6464587bd25b31edaf62856c44b96d..2995a271ec466c54117025cf819ce65931d5166c 100644 (file)
@@ -971,8 +971,8 @@ struct lock_manager {
 struct net;
 void locks_start_grace(struct net *, struct lock_manager *);
 void locks_end_grace(struct lock_manager *);
-int locks_in_grace(struct net *);
-int opens_in_grace(struct net *);
+bool locks_in_grace(struct net *);
+bool opens_in_grace(struct net *);
 
 /* that will die - we need it for nfs_lock_info */
 #include <linux/nfs_fs_i.h>
index 3b9f0d1dbb808587b608c0941edc43e55d002bae..786ae2255f0566bc50b44368303a9daba049678c 100644 (file)
@@ -47,6 +47,7 @@ struct svc_pool {
        struct svc_pool_stats   sp_stats;       /* statistics on pool operation */
 #define        SP_TASK_PENDING         (0)             /* still work to do even if no
                                                 * xprt is queued. */
+#define SP_CONGESTED           (1)
        unsigned long           sp_flags;
 } ____cacheline_aligned_in_smp;
 
index ecbdbfe86eb6620350fe5eca369906cd3ea60823..8c153f68509e297225db7614f8e706479476bb9c 100644 (file)
@@ -486,20 +486,22 @@ TRACE_EVENT(svc_recv,
        TP_ARGS(rqst, status),
 
        TP_STRUCT__entry(
-               __field(struct sockaddr *, addr)
                __field(u32, xid)
                __field(int, status)
                __field(unsigned long, flags)
+               __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
        ),
 
        TP_fast_assign(
-               __entry->addr = (struct sockaddr *)&rqst->rq_addr;
                __entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0;
                __entry->status = status;
                __entry->flags = rqst->rq_flags;
+               memcpy(__get_dynamic_array(addr),
+                       &rqst->rq_addr, rqst->rq_addrlen);
        ),
 
-       TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s", __entry->addr,
+       TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s",
+                       (struct sockaddr *)__get_dynamic_array(addr),
                        __entry->xid, __entry->status,
                        show_rqstp_flags(__entry->flags))
 );
@@ -544,22 +546,23 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
        TP_ARGS(rqst, status),
 
        TP_STRUCT__entry(
-               __field(struct sockaddr *, addr)
                __field(u32, xid)
-               __field(int, dropme)
                __field(int, status)
                __field(unsigned long, flags)
+               __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
        ),
 
        TP_fast_assign(
-               __entry->addr = (struct sockaddr *)&rqst->rq_addr;
                __entry->xid = be32_to_cpu(rqst->rq_xid);
                __entry->status = status;
                __entry->flags = rqst->rq_flags;
+               memcpy(__get_dynamic_array(addr),
+                       &rqst->rq_addr, rqst->rq_addrlen);
        ),
 
        TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s",
-               __entry->addr, __entry->xid,
+               (struct sockaddr *)__get_dynamic_array(addr),
+               __entry->xid,
                __entry->status, show_rqstp_flags(__entry->flags))
 );
 
index 7b1ee5a0b03cd10d167a6ca522243c4285996151..73165e9ca5bfd2c2a928f7d1c16569cd1b59e65a 100644 (file)
@@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
                return stat;
        if (integ_len > buf->len)
                return stat;
-       if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len))
-               BUG();
+       if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) {
+               WARN_ON_ONCE(1);
+               return stat;
+       }
        /* copy out mic... */
        if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
-               BUG();
+               return stat;
        if (mic.len > RPC_MAX_AUTH_SIZE)
                return stat;
        mic.data = kmalloc(mic.len, GFP_KERNEL);
@@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
        BUG_ON(integ_len % 4);
        *p++ = htonl(integ_len);
        *p++ = htonl(gc->gc_seq);
-       if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len))
-               BUG();
+       if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) {
+               WARN_ON_ONCE(1);
+               goto out_err;
+       }
        if (resbuf->tail[0].iov_base == NULL) {
                if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
                        goto out_err;
index 71de77bd44236dee6bd7ea1e86b8e317aee65060..e8e0831229cfcce48b2d6802493e80a429aa108b 100644 (file)
@@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
        svc_xprt_received(new);
 }
 
-int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
-                   struct net *net, const int family,
-                   const unsigned short port, int flags)
+static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+                           struct net *net, const int family,
+                           const unsigned short port, int flags)
 {
        struct svc_xprt_class *xcl;
 
@@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
        struct svc_pool *pool;
        struct svc_rqst *rqstp = NULL;
        int cpu;
-       bool queued = false;
 
        if (!svc_xprt_has_something_to_do(xprt))
                goto out;
@@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 
        atomic_long_inc(&pool->sp_stats.packets);
 
-redo_search:
+       dprintk("svc: transport %p put into queue\n", xprt);
+       spin_lock_bh(&pool->sp_lock);
+       list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
+       pool->sp_stats.sockets_queued++;
+       spin_unlock_bh(&pool->sp_lock);
+
        /* find a thread for this xprt */
        rcu_read_lock();
        list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
-               /* Do a lockless check first */
-               if (test_bit(RQ_BUSY, &rqstp->rq_flags))
+               if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
                        continue;
-
-               /*
-                * Once the xprt has been queued, it can only be dequeued by
-                * the task that intends to service it. All we can do at that
-                * point is to try to wake this thread back up so that it can
-                * do so.
-                */
-               if (!queued) {
-                       spin_lock_bh(&rqstp->rq_lock);
-                       if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
-                               /* already busy, move on... */
-                               spin_unlock_bh(&rqstp->rq_lock);
-                               continue;
-                       }
-
-                       /* this one will do */
-                       rqstp->rq_xprt = xprt;
-                       svc_xprt_get(xprt);
-                       spin_unlock_bh(&rqstp->rq_lock);
-               }
-               rcu_read_unlock();
-
                atomic_long_inc(&pool->sp_stats.threads_woken);
                wake_up_process(rqstp->rq_task);
-               put_cpu();
-               goto out;
-       }
-       rcu_read_unlock();
-
-       /*
-        * We didn't find an idle thread to use, so we need to queue the xprt.
-        * Do so and then search again. If we find one, we can't hook this one
-        * up to it directly but we can wake the thread up in the hopes that it
-        * will pick it up once it searches for a xprt to service.
-        */
-       if (!queued) {
-               queued = true;
-               dprintk("svc: transport %p put into queue\n", xprt);
-               spin_lock_bh(&pool->sp_lock);
-               list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
-               pool->sp_stats.sockets_queued++;
-               spin_unlock_bh(&pool->sp_lock);
-               goto redo_search;
+               goto out_unlock;
        }
+       set_bit(SP_CONGESTED, &pool->sp_flags);
        rqstp = NULL;
+out_unlock:
+       rcu_read_unlock();
        put_cpu();
 out:
        trace_svc_xprt_do_enqueue(xprt, rqstp);
@@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp)
 
 static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 {
-       struct svc_xprt *xprt;
        struct svc_pool         *pool = rqstp->rq_pool;
        long                    time_left = 0;
 
        /* rq_xprt should be clear on entry */
        WARN_ON_ONCE(rqstp->rq_xprt);
 
-       /* Normally we will wait up to 5 seconds for any required
-        * cache information to be provided.
-        */
-       rqstp->rq_chandle.thread_wait = 5*HZ;
-
-       xprt = svc_xprt_dequeue(pool);
-       if (xprt) {
-               rqstp->rq_xprt = xprt;
-
-               /* As there is a shortage of threads and this request
-                * had to be queued, don't allow the thread to wait so
-                * long for cache updates.
-                */
-               rqstp->rq_chandle.thread_wait = 1*HZ;
-               clear_bit(SP_TASK_PENDING, &pool->sp_flags);
-               return xprt;
-       }
+       rqstp->rq_xprt = svc_xprt_dequeue(pool);
+       if (rqstp->rq_xprt)
+               goto out_found;
 
        /*
         * We have to be able to interrupt this wait
         * to bring down the daemons ...
         */
        set_current_state(TASK_INTERRUPTIBLE);
+       smp_mb__before_atomic();
+       clear_bit(SP_CONGESTED, &pool->sp_flags);
        clear_bit(RQ_BUSY, &rqstp->rq_flags);
-       smp_mb();
+       smp_mb__after_atomic();
 
        if (likely(rqst_should_sleep(rqstp)))
                time_left = schedule_timeout(timeout);
@@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 
        try_to_freeze();
 
-       spin_lock_bh(&rqstp->rq_lock);
        set_bit(RQ_BUSY, &rqstp->rq_flags);
-       spin_unlock_bh(&rqstp->rq_lock);
-
-       xprt = rqstp->rq_xprt;
-       if (xprt != NULL)
-               return xprt;
+       smp_mb__after_atomic();
+       rqstp->rq_xprt = svc_xprt_dequeue(pool);
+       if (rqstp->rq_xprt)
+               goto out_found;
 
        if (!time_left)
                atomic_long_inc(&pool->sp_stats.threads_timedout);
@@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
        if (signalled() || kthread_should_stop())
                return ERR_PTR(-EINTR);
        return ERR_PTR(-EAGAIN);
+out_found:
+       /* Normally we will wait up to 5 seconds for any required
+        * cache information to be provided.
+        */
+       if (!test_bit(SP_CONGESTED, &pool->sp_flags))
+               rqstp->rq_chandle.thread_wait = 5*HZ;
+       else
+               rqstp->rq_chandle.thread_wait = 1*HZ;
+       return rqstp->rq_xprt;
 }
 
 static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
index 992594b7cc6b699d75614ca45bbf4631c5e1ede2..af7893501e40acdbaf678a373f721264cf398029 100644 (file)
@@ -133,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
        if (ret)
                goto out_err;
 
+       /* Bump page refcnt so Send completion doesn't release
+        * the rq_buffer before all retransmits are complete.
+        */
+       get_page(virt_to_page(rqst->rq_buffer));
        ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
        if (ret)
                goto out_unmap;
@@ -165,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task)
                return -EINVAL;
        }
 
-       /* svc_rdma_sendto releases this page */
        page = alloc_page(RPCRDMA_DEF_GFP);
        if (!page)
                return -ENOMEM;
@@ -184,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task)
 {
        struct rpc_rqst *rqst = task->tk_rqstp;
 
+       put_page(virt_to_page(rqst->rq_buffer));
        kfree(rqst->rq_rbuffer);
 }
 
index 5caf8e722a118659f8b9e8c3531f60a8e738158b..46ec069150d50ff53e93a7f17b0d716fa80503a2 100644 (file)
@@ -290,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
                        ib_event_msg(event->event), event->event,
                        event->element.qp);
                set_bit(XPT_CLOSE, &xprt->xpt_flags);
+               svc_xprt_enqueue(xprt);
                break;
        }
 }
@@ -322,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
        set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
        if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
                goto out;
-       svc_xprt_enqueue(&xprt->sc_xprt);
-       goto out;
+       goto out_enqueue;
 
 flushed:
        if (wc->status != IB_WC_WR_FLUSH_ERR)
@@ -333,6 +333,8 @@ flushed:
        set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
        svc_rdma_put_context(ctxt, 1);
 
+out_enqueue:
+       svc_xprt_enqueue(&xprt->sc_xprt);
 out:
        svc_xprt_put(&xprt->sc_xprt);
 }
@@ -358,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
 
        if (unlikely(wc->status != IB_WC_SUCCESS)) {
                set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+               svc_xprt_enqueue(&xprt->sc_xprt);
                if (wc->status != IB_WC_WR_FLUSH_ERR)
                        pr_err("svcrdma: Send: %s (%u/0x%x)\n",
                               ib_wc_status_msg(wc->status),
@@ -569,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
        case RDMA_CM_EVENT_DEVICE_REMOVAL:
                dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
                        xprt, cma_id);
-               if (xprt)
+               if (xprt) {
                        set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+                       svc_xprt_enqueue(&xprt->sc_xprt);
+               }
                break;
 
        default: