]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - net/netfilter/nf_conntrack_core.c
netfilter: merge ctinfo into nfct pointer storage area
[mirror_ubuntu-artful-kernel.git] / net / netfilter / nf_conntrack_core.c
index 9934b0c93c1e4513dc58cb2bf0154bfe89642a56..47c4ea53daa614fa76293883842195733783f2e3 100644 (file)
@@ -72,12 +72,27 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
 struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_hash);
 
+struct conntrack_gc_work {
+       struct delayed_work     dwork;
+       u32                     last_bucket;
+       bool                    exiting;
+       long                    next_gc_run;
+};
+
 static __read_mostly struct kmem_cache *nf_conntrack_cachep;
 static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
-static __read_mostly seqcount_t nf_conntrack_generation;
 static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
 static __read_mostly bool nf_conntrack_locks_all;
 
+/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
+#define GC_MAX_BUCKETS_DIV     64u
+/* upper bound of scan intervals */
+#define GC_INTERVAL_MAX                (2 * HZ)
+/* maximum conntracks to evict per gc run */
+#define GC_MAX_EVICTS          256u
+
+static struct conntrack_gc_work conntrack_gc_work;
+
 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
 {
        spin_lock(lock);
@@ -164,7 +179,7 @@ unsigned int nf_conntrack_htable_size __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 
 unsigned int nf_conntrack_max __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_max);
+seqcount_t nf_conntrack_generation __read_mostly;
 
 DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
@@ -335,16 +350,31 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
        spin_unlock(&pcpu->lock);
 }
 
+#define NFCT_ALIGN(len)        (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK)
+
 /* Released via destroy_conntrack() */
 struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
                                 const struct nf_conntrack_zone *zone,
                                 gfp_t flags)
 {
-       struct nf_conn *tmpl;
+       struct nf_conn *tmpl, *p;
 
-       tmpl = kzalloc(sizeof(*tmpl), flags);
-       if (tmpl == NULL)
-               return NULL;
+       if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) {
+               tmpl = kzalloc(sizeof(*tmpl) + NFCT_INFOMASK, flags);
+               if (!tmpl)
+                       return NULL;
+
+               p = tmpl;
+               tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p);
+               if (tmpl != p) {
+                       tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p);
+                       tmpl->proto.tmpl_padto = (char *)tmpl - (char *)p;
+               }
+       } else {
+               tmpl = kzalloc(sizeof(*tmpl), flags);
+               if (!tmpl)
+                       return NULL;
+       }
 
        tmpl->status = IPS_TEMPLATE;
        write_pnet(&tmpl->ct_net, net);
@@ -359,7 +389,11 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl)
 {
        nf_ct_ext_destroy(tmpl);
        nf_ct_ext_free(tmpl);
-       kfree(tmpl);
+
+       if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK)
+               kfree((char *)tmpl - tmpl->proto.tmpl_padto);
+       else
+               kfree(tmpl);
 }
 EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
 
@@ -367,12 +401,10 @@ static void
 destroy_conntrack(struct nf_conntrack *nfct)
 {
        struct nf_conn *ct = (struct nf_conn *)nfct;
-       struct net *net = nf_ct_net(ct);
        struct nf_conntrack_l4proto *l4proto;
 
        pr_debug("destroy_conntrack(%p)\n", ct);
        NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
-       NF_CT_ASSERT(!timer_pending(&ct->timeout));
 
        if (unlikely(nf_ct_is_template(ct))) {
                nf_ct_tmpl_free(ct);
@@ -395,7 +427,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
 
        nf_ct_del_from_dying_or_unconfirmed_list(ct);
 
-       NF_CT_STAT_INC(net, delete);
        local_bh_enable();
 
        if (ct->master)
@@ -427,7 +458,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
 
        nf_ct_add_to_dying_list(ct);
 
-       NF_CT_STAT_INC(net, delete_list);
        local_bh_enable();
 }
 
@@ -435,35 +465,30 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
 {
        struct nf_conn_tstamp *tstamp;
 
+       if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
+               return false;
+
        tstamp = nf_conn_tstamp_find(ct);
        if (tstamp && tstamp->stop == 0)
                tstamp->stop = ktime_get_real_ns();
 
-       if (nf_ct_is_dying(ct))
-               goto delete;
-
        if (nf_conntrack_event_report(IPCT_DESTROY, ct,
                                    portid, report) < 0) {
-               /* destroy event was not delivered */
+               /* destroy event was not delivered. nf_ct_put will
+                * be done by event cache worker on redelivery.
+                */
                nf_ct_delete_from_lists(ct);
                nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
                return false;
        }
 
        nf_conntrack_ecache_work(nf_ct_net(ct));
-       set_bit(IPS_DYING_BIT, &ct->status);
- delete:
        nf_ct_delete_from_lists(ct);
        nf_ct_put(ct);
        return true;
 }
 EXPORT_SYMBOL_GPL(nf_ct_delete);
 
-static void death_by_timeout(unsigned long ul_conntrack)
-{
-       nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
-}
-
 static inline bool
 nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
                const struct nf_conntrack_tuple *tuple,
@@ -481,22 +506,17 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
               net_eq(net, nf_ct_net(ct));
 }
 
-/* must be called with rcu read lock held */
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+/* caller must hold rcu readlock and none of the nf_conntrack_locks */
+static void nf_ct_gc_expired(struct nf_conn *ct)
 {
-       struct hlist_nulls_head *hptr;
-       unsigned int sequence, hsz;
+       if (!atomic_inc_not_zero(&ct->ct_general.use))
+               return;
 
-       do {
-               sequence = read_seqcount_begin(&nf_conntrack_generation);
-               hsz = nf_conntrack_htable_size;
-               hptr = nf_conntrack_hash;
-       } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+       if (nf_ct_should_gc(ct))
+               nf_ct_kill(ct);
 
-       *hash = hptr;
-       *hsize = hsz;
+       nf_ct_put(ct);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
 
 /*
  * Warning :
@@ -510,21 +530,26 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
        struct nf_conntrack_tuple_hash *h;
        struct hlist_nulls_head *ct_hash;
        struct hlist_nulls_node *n;
-       unsigned int bucket, sequence;
+       unsigned int bucket, hsize;
 
 begin:
-       do {
-               sequence = read_seqcount_begin(&nf_conntrack_generation);
-               bucket = scale_hash(hash);
-               ct_hash = nf_conntrack_hash;
-       } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+       nf_conntrack_get_ht(&ct_hash, &hsize);
+       bucket = reciprocal_scale(hash, hsize);
 
        hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
-               if (nf_ct_key_equal(h, tuple, zone, net)) {
-                       NF_CT_STAT_INC_ATOMIC(net, found);
-                       return h;
+               struct nf_conn *ct;
+
+               ct = nf_ct_tuplehash_to_ctrack(h);
+               if (nf_ct_is_expired(ct)) {
+                       nf_ct_gc_expired(ct);
+                       continue;
                }
-               NF_CT_STAT_INC_ATOMIC(net, searched);
+
+               if (nf_ct_is_dying(ct))
+                       continue;
+
+               if (nf_ct_key_equal(h, tuple, zone, net))
+                       return h;
        }
        /*
         * if the nulls value we got at the end of this lookup is
@@ -618,7 +643,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
                                    zone, net))
                        goto out;
 
-       add_timer(&ct->timeout);
        smp_wmb();
        /* The caller holds a reference to this object */
        atomic_set(&ct->ct_general.use, 2);
@@ -681,12 +705,12 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
            !nfct_nat(ct) &&
            !nf_ct_is_dying(ct) &&
            atomic_inc_not_zero(&ct->ct_general.use)) {
-               nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
-               nf_conntrack_put(skb->nfct);
-               /* Assign conntrack already in hashes to this skbuff. Don't
-                * modify skb->nfctinfo to ensure consistent stateful filtering.
-                */
-               skb->nfct = &ct->ct_general;
+               enum ip_conntrack_info oldinfo;
+               struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
+
+               nf_ct_acct_merge(ct, ctinfo, loser_ct);
+               nf_conntrack_put(&loser_ct->ct_general);
+               nf_ct_set(skb, ct, oldinfo);
                return NF_ACCEPT;
        }
        NF_CT_STAT_INC(net, drop);
@@ -771,15 +795,14 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        /* Timer relative to confirmation time, not original
           setting time, otherwise we'd get timer wrap in
           weird delay cases. */
-       ct->timeout.expires += jiffies;
-       add_timer(&ct->timeout);
+       ct->timeout += nfct_time_stamp;
        atomic_inc(&ct->ct_general.use);
        ct->status |= IPS_CONFIRMED;
 
        /* set conntrack timestamp, if enabled. */
        tstamp = nf_conn_tstamp_find(ct);
        if (tstamp) {
-               if (skb->tstamp.tv64 == 0)
+               if (skb->tstamp == 0)
                        __net_timestamp(skb);
 
                tstamp->start = ktime_to_ns(skb->tstamp);
@@ -791,7 +814,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
         */
        __nf_conntrack_hash_insert(ct, hash, reply_hash);
        nf_conntrack_double_unlock(hash, reply_hash);
-       NF_CT_STAT_INC(net, insert);
        local_bh_enable();
 
        help = nfct_help(ct);
@@ -823,29 +845,40 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
        const struct nf_conntrack_zone *zone;
        struct nf_conntrack_tuple_hash *h;
        struct hlist_nulls_head *ct_hash;
-       unsigned int hash, sequence;
+       unsigned int hash, hsize;
        struct hlist_nulls_node *n;
        struct nf_conn *ct;
 
        zone = nf_ct_zone(ignored_conntrack);
 
        rcu_read_lock();
-       do {
-               sequence = read_seqcount_begin(&nf_conntrack_generation);
-               hash = hash_conntrack(net, tuple);
-               ct_hash = nf_conntrack_hash;
-       } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ begin:
+       nf_conntrack_get_ht(&ct_hash, &hsize);
+       hash = __hash_conntrack(net, tuple, hsize);
 
        hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
                ct = nf_ct_tuplehash_to_ctrack(h);
-               if (ct != ignored_conntrack &&
-                   nf_ct_key_equal(h, tuple, zone, net)) {
+
+               if (ct == ignored_conntrack)
+                       continue;
+
+               if (nf_ct_is_expired(ct)) {
+                       nf_ct_gc_expired(ct);
+                       continue;
+               }
+
+               if (nf_ct_key_equal(h, tuple, zone, net)) {
                        NF_CT_STAT_INC_ATOMIC(net, found);
                        rcu_read_unlock();
                        return 1;
                }
-               NF_CT_STAT_INC_ATOMIC(net, searched);
        }
+
+       if (get_nulls_value(n) != hash) {
+               NF_CT_STAT_INC_ATOMIC(net, search_restart);
+               goto begin;
+       }
+
        rcu_read_unlock();
 
        return 0;
@@ -867,6 +900,11 @@ static unsigned int early_drop_list(struct net *net,
        hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
                tmp = nf_ct_tuplehash_to_ctrack(h);
 
+               if (nf_ct_is_expired(tmp)) {
+                       nf_ct_gc_expired(tmp);
+                       continue;
+               }
+
                if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
                    !net_eq(nf_ct_net(tmp), net) ||
                    nf_ct_is_dying(tmp))
@@ -884,7 +922,6 @@ static unsigned int early_drop_list(struct net *net,
                 */
                if (net_eq(nf_ct_net(tmp), net) &&
                    nf_ct_is_confirmed(tmp) &&
-                   del_timer(&tmp->timeout) &&
                    nf_ct_delete(tmp, 0, 0))
                        drops++;
 
@@ -900,14 +937,11 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
 
        for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
                struct hlist_nulls_head *ct_hash;
-               unsigned hash, sequence, drops;
+               unsigned int hash, hsize, drops;
 
                rcu_read_lock();
-               do {
-                       sequence = read_seqcount_begin(&nf_conntrack_generation);
-                       hash = scale_hash(_hash++);
-                       ct_hash = nf_conntrack_hash;
-               } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+               nf_conntrack_get_ht(&ct_hash, &hsize);
+               hash = reciprocal_scale(_hash++, hsize);
 
                drops = early_drop_list(net, &ct_hash[hash]);
                rcu_read_unlock();
@@ -921,6 +955,99 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
        return false;
 }
 
+static void gc_worker(struct work_struct *work)
+{
+       unsigned int i, goal, buckets = 0, expired_count = 0;
+       struct conntrack_gc_work *gc_work;
+       unsigned int ratio, scanned = 0;
+       unsigned long next_run;
+
+       gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
+
+       goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
+       i = gc_work->last_bucket;
+
+       do {
+               struct nf_conntrack_tuple_hash *h;
+               struct hlist_nulls_head *ct_hash;
+               struct hlist_nulls_node *n;
+               unsigned int hashsz;
+               struct nf_conn *tmp;
+
+               i++;
+               rcu_read_lock();
+
+               nf_conntrack_get_ht(&ct_hash, &hashsz);
+               if (i >= hashsz)
+                       i = 0;
+
+               hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+                       tmp = nf_ct_tuplehash_to_ctrack(h);
+
+                       scanned++;
+                       if (nf_ct_is_expired(tmp)) {
+                               nf_ct_gc_expired(tmp);
+                               expired_count++;
+                               continue;
+                       }
+               }
+
+               /* could check get_nulls_value() here and restart if ct
+                * was moved to another chain.  But given gc is best-effort
+                * we will just continue with next hash slot.
+                */
+               rcu_read_unlock();
+               cond_resched_rcu_qs();
+       } while (++buckets < goal &&
+                expired_count < GC_MAX_EVICTS);
+
+       if (gc_work->exiting)
+               return;
+
+       /*
+        * Eviction will normally happen from the packet path, and not
+        * from this gc worker.
+        *
+        * This worker is only here to reap expired entries when system went
+        * idle after a busy period.
+        *
+        * The heuristics below are supposed to balance conflicting goals:
+        *
+        * 1. Minimize time until we notice a stale entry
+        * 2. Maximize scan intervals to not waste cycles
+        *
+        * Normally, expired_count will be 0, this increases the next_run time
+        * to priorize 2) above.
+        *
+        * As soon as a timed-out entry is found, move towards 1) and increase
+        * the scan frequency.
+        * In case we have lots of evictions next scan is done immediately.
+        */
+       ratio = scanned ? expired_count * 100 / scanned : 0;
+       if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
+               gc_work->next_gc_run = 0;
+               next_run = 0;
+       } else if (expired_count) {
+               gc_work->next_gc_run /= 2U;
+               next_run = msecs_to_jiffies(1);
+       } else {
+               if (gc_work->next_gc_run < GC_INTERVAL_MAX)
+                       gc_work->next_gc_run += msecs_to_jiffies(1);
+
+               next_run = gc_work->next_gc_run;
+       }
+
+       gc_work->last_bucket = i;
+       queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
+}
+
+static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
+{
+       INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+       gc_work->next_gc_run = GC_INTERVAL_MAX;
+       gc_work->exiting = false;
+}
+
 static struct nf_conn *
 __nf_conntrack_alloc(struct net *net,
                     const struct nf_conntrack_zone *zone,
@@ -957,8 +1084,6 @@ __nf_conntrack_alloc(struct net *net,
        /* save hash for reusing when confirming */
        *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
        ct->status = 0;
-       /* Don't set timer yet: wait for confirmation */
-       setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
        write_pnet(&ct->ct_net, net);
        memset(&ct->__nfct_init_offset[0], 0,
               offsetof(struct nf_conn, proto) -
@@ -1096,10 +1221,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
                }
                spin_unlock(&nf_conntrack_expect_lock);
        }
-       if (!exp) {
+       if (!exp)
                __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
-               NF_CT_STAT_INC(net, new);
-       }
 
        /* Now it is inserted into the unconfirmed list, bump refcount */
        nf_conntrack_get(&ct->ct_general);
@@ -1116,7 +1239,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
        return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
 }
 
-/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
+/* On success, returns conntrack ptr, sets skb->_nfct | ctinfo */
 static inline struct nf_conn *
 resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
                  struct sk_buff *skb,
@@ -1175,8 +1298,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
                }
                *set_reply = 0;
        }
-       skb->nfct = &ct->ct_general;
-       skb->nfctinfo = *ctinfo;
+       nf_ct_set(skb, ct, *ctinfo);
        return ct;
 }
 
@@ -1184,7 +1306,7 @@ unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                struct sk_buff *skb)
 {
-       struct nf_conn *ct, *tmpl = NULL;
+       struct nf_conn *ct, *tmpl;
        enum ip_conntrack_info ctinfo;
        struct nf_conntrack_l3proto *l3proto;
        struct nf_conntrack_l4proto *l4proto;
@@ -1194,17 +1316,17 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
        int set_reply = 0;
        int ret;
 
-       if (skb->nfct) {
+       tmpl = nf_ct_get(skb, &ctinfo);
+       if (tmpl) {
                /* Previously seen (loopback or untracked)?  Ignore. */
-               tmpl = (struct nf_conn *)skb->nfct;
                if (!nf_ct_is_template(tmpl)) {
                        NF_CT_STAT_INC_ATOMIC(net, ignore);
                        return NF_ACCEPT;
                }
-               skb->nfct = NULL;
+               skb->_nfct = 0;
        }
 
-       /* rcu_read_lock()ed by nf_hook_slow */
+       /* rcu_read_lock()ed by nf_hook_thresh */
        l3proto = __nf_ct_l3proto_find(pf);
        ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
                                   &dataoff, &protonum);
@@ -1222,8 +1344,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
         * inverse of the return code tells to the netfilter
         * core what to do with the packet. */
        if (l4proto->error != NULL) {
-               ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo,
-                                    pf, hooknum);
+               ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum);
                if (ret <= 0) {
                        NF_CT_STAT_INC_ATOMIC(net, error);
                        NF_CT_STAT_INC_ATOMIC(net, invalid);
@@ -1231,10 +1352,10 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                        goto out;
                }
                /* ICMP[v6] protocol trackers may assign one conntrack. */
-               if (skb->nfct)
+               if (skb->_nfct)
                        goto out;
        }
-
+repeat:
        ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
                               l3proto, l4proto, &set_reply, &ctinfo);
        if (!ct) {
@@ -1251,7 +1372,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                goto out;
        }
 
-       NF_CT_ASSERT(skb->nfct);
+       NF_CT_ASSERT(skb_nfct(skb));
 
        /* Decide what timeout policy we want to apply to this flow. */
        timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
@@ -1261,11 +1382,17 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                /* Invalid: inverse of the return code tells
                 * the netfilter core what to do */
                pr_debug("nf_conntrack_in: Can't track with proto module\n");
-               nf_conntrack_put(skb->nfct);
-               skb->nfct = NULL;
+               nf_conntrack_put(&ct->ct_general);
+               skb->_nfct = 0;
                NF_CT_STAT_INC_ATOMIC(net, invalid);
                if (ret == -NF_DROP)
                        NF_CT_STAT_INC_ATOMIC(net, drop);
+               /* Special case: TCP tracker reports an attempt to reopen a
+                * closed/aborted connection. We have to go back and create a
+                * fresh conntrack.
+                */
+               if (ret == -NF_REPEAT)
+                       goto repeat;
                ret = -ret;
                goto out;
        }
@@ -1273,15 +1400,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
        if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
                nf_conntrack_event_cache(IPCT_REPLY, ct);
 out:
-       if (tmpl) {
-               /* Special case: we have to repeat this hook, assign the
-                * template again to this packet. We assume that this packet
-                * has no conntrack assigned. This is used by nf_ct_tcp. */
-               if (ret == NF_REPEAT)
-                       skb->nfct = (struct nf_conntrack *)tmpl;
-               else
-                       nf_ct_put(tmpl);
-       }
+       if (tmpl)
+               nf_ct_put(tmpl);
 
        return ret;
 }
@@ -1332,7 +1452,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
                          unsigned long extra_jiffies,
                          int do_acct)
 {
-       NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
        NF_CT_ASSERT(skb);
 
        /* Only update if this is not a fixed timeout */
@@ -1340,39 +1459,25 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
                goto acct;
 
        /* If not in hash table, timer will not be active yet */
-       if (!nf_ct_is_confirmed(ct)) {
-               ct->timeout.expires = extra_jiffies;
-       } else {
-               unsigned long newtime = jiffies + extra_jiffies;
-
-               /* Only update the timeout if the new timeout is at least
-                  HZ jiffies from the old timeout. Need del_timer for race
-                  avoidance (may already be dying). */
-               if (newtime - ct->timeout.expires >= HZ)
-                       mod_timer_pending(&ct->timeout, newtime);
-       }
+       if (nf_ct_is_confirmed(ct))
+               extra_jiffies += nfct_time_stamp;
 
+       ct->timeout = extra_jiffies;
 acct:
        if (do_acct)
                nf_ct_acct_update(ct, ctinfo, skb->len);
 }
 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 
-bool __nf_ct_kill_acct(struct nf_conn *ct,
-                      enum ip_conntrack_info ctinfo,
-                      const struct sk_buff *skb,
-                      int do_acct)
+bool nf_ct_kill_acct(struct nf_conn *ct,
+                    enum ip_conntrack_info ctinfo,
+                    const struct sk_buff *skb)
 {
-       if (do_acct)
-               nf_ct_acct_update(ct, ctinfo, skb->len);
+       nf_ct_acct_update(ct, ctinfo, skb->len);
 
-       if (del_timer(&ct->timeout)) {
-               ct->timeout.function((unsigned long)ct);
-               return true;
-       }
-       return false;
+       return nf_ct_delete(ct, 0, 0);
 }
-EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 
@@ -1436,9 +1541,8 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
                ctinfo = IP_CT_RELATED;
 
        /* Attach to new skbuff, and increment count */
-       nskb->nfct = &ct->ct_general;
-       nskb->nfctinfo = ctinfo;
-       nf_conntrack_get(nskb->nfct);
+       nf_ct_set(nskb, ct, ctinfo);
+       nf_conntrack_get(skb_nfct(nskb));
 }
 
 /* Bring out ya dead! */
@@ -1505,11 +1609,8 @@ void nf_ct_iterate_cleanup(struct net *net,
 
        while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
                /* Time to push up daises... */
-               if (del_timer(&ct->timeout))
-                       nf_ct_delete(ct, portid, report);
-
-               /* ... else the timer will get him soon. */
 
+               nf_ct_delete(ct, portid, report);
                nf_ct_put(ct);
                cond_resched();
        }
@@ -1545,6 +1646,7 @@ static int untrack_refs(void)
 
 void nf_conntrack_cleanup_start(void)
 {
+       conntrack_gc_work.exiting = true;
        RCU_INIT_POINTER(ip_ct_attach, NULL);
 }
 
@@ -1554,6 +1656,7 @@ void nf_conntrack_cleanup_end(void)
        while (untrack_refs() > 0)
                schedule();
 
+       cancel_delayed_work_sync(&conntrack_gc_work.dwork);
        nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
 
        nf_conntrack_proto_fini();
@@ -1775,7 +1878,8 @@ int nf_conntrack_init_start(void)
        nf_conntrack_max = max_factor * nf_conntrack_htable_size;
 
        nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
-                                               sizeof(struct nf_conn), 0,
+                                               sizeof(struct nf_conn),
+                                               NFCT_INFOMASK + 1,
                                                SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
        if (!nf_conntrack_cachep)
                goto err_cachep;
@@ -1828,6 +1932,10 @@ int nf_conntrack_init_start(void)
        }
        /*  - and look it like as a confirmed connection */
        nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
+
+       conntrack_gc_work_init(&conntrack_gc_work);
+       queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
+
        return 0;
 
 err_proto: