]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - net/sched/sch_fq.c
Merge tag 'armsoc-dt' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
[mirror_ubuntu-bionic-kernel.git] / net / sched / sch_fq.c
index e5458b99e09cb4d4226a1ef49a7ec81f2e20d94a..18e752439f6f42a944a96f704105ea6e98707284 100644 (file)
@@ -86,6 +86,7 @@ struct fq_sched_data {
 
        struct rb_root  delayed;        /* for rate limited flows */
        u64             time_next_delayed_flow;
+       unsigned long   unthrottle_latency_ns;
 
        struct fq_flow  internal;       /* for non classified or high prio packets */
        u32             quantum;
@@ -94,6 +95,7 @@ struct fq_sched_data {
        u32             flow_max_rate;  /* optional max rate per flow */
        u32             flow_plimit;    /* max packets per flow */
        u32             orphan_mask;    /* mask for orphaned skb */
+       u32             low_rate_threshold;
        struct rb_root  *fq_root;
        u8              rate_enable;
        u8              fq_trees_log;
@@ -407,11 +409,19 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
 static void fq_check_throttled(struct fq_sched_data *q, u64 now)
 {
+       unsigned long sample;
        struct rb_node *p;
 
        if (q->time_next_delayed_flow > now)
                return;
 
+       /* Update unthrottle latency EWMA.
+        * This is cheap and can help diagnosing timer/latency problems.
+        */
+       sample = (unsigned long)(now - q->time_next_delayed_flow);
+       q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
+       q->unthrottle_latency_ns += sample >> 3;
+
        q->time_next_delayed_flow = ~0ULL;
        while ((p = rb_first(&q->delayed)) != NULL) {
                struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
@@ -433,7 +443,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
        struct fq_flow_head *head;
        struct sk_buff *skb;
        struct fq_flow *f;
-       u32 rate;
+       u32 rate, plen;
 
        skb = fq_dequeue_head(sch, &q->internal);
        if (skb)
@@ -482,7 +492,7 @@ begin:
        prefetch(&skb->end);
        f->credit -= qdisc_pkt_len(skb);
 
-       if (f->credit > 0 || !q->rate_enable)
+       if (!q->rate_enable)
                goto out;
 
        /* Do not pace locally generated ack packets */
@@ -493,8 +503,15 @@ begin:
        if (skb->sk)
                rate = min(skb->sk->sk_pacing_rate, rate);
 
+       if (rate <= q->low_rate_threshold) {
+               f->credit = 0;
+               plen = qdisc_pkt_len(skb);
+       } else {
+               plen = max(qdisc_pkt_len(skb), q->quantum);
+               if (f->credit > 0)
+                       goto out;
+       }
        if (rate != ~0U) {
-               u32 plen = max(qdisc_pkt_len(skb), q->quantum);
                u64 len = (u64)plen * NSEC_PER_SEC;
 
                if (likely(rate))
@@ -507,7 +524,12 @@ begin:
                        len = NSEC_PER_SEC;
                        q->stat_pkts_too_long++;
                }
-
+               /* Account for schedule/timers drifts.
+                * f->time_next_packet was set when prior packet was sent,
+                * and current time (@now) can be too late by tens of us.
+                */
+               if (f->time_next_packet)
+                       len -= min(len/2, now - f->time_next_packet);
                f->time_next_packet = now + len;
        }
 out:
@@ -662,6 +684,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
        [TCA_FQ_FLOW_MAX_RATE]          = { .type = NLA_U32 },
        [TCA_FQ_BUCKETS_LOG]            = { .type = NLA_U32 },
        [TCA_FQ_FLOW_REFILL_DELAY]      = { .type = NLA_U32 },
+       [TCA_FQ_LOW_RATE_THRESHOLD]     = { .type = NLA_U32 },
 };
 
 static int fq_change(struct Qdisc *sch, struct nlattr *opt)
@@ -716,6 +739,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
        if (tb[TCA_FQ_FLOW_MAX_RATE])
                q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
 
+       if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
+               q->low_rate_threshold =
+                       nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
+
        if (tb[TCA_FQ_RATE_ENABLE]) {
                u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
 
@@ -774,6 +801,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
        q->initial_quantum      = 10 * psched_mtu(qdisc_dev(sch));
        q->flow_refill_delay    = msecs_to_jiffies(40);
        q->flow_max_rate        = ~0U;
+       q->time_next_delayed_flow = ~0ULL;
        q->rate_enable          = 1;
        q->new_flows.first      = NULL;
        q->old_flows.first      = NULL;
@@ -781,6 +809,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
        q->fq_root              = NULL;
        q->fq_trees_log         = ilog2(1024);
        q->orphan_mask          = 1024 - 1;
+       q->low_rate_threshold   = 550000 / 8;
        qdisc_watchdog_init(&q->watchdog, sch);
 
        if (opt)
@@ -811,6 +840,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
            nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
                        jiffies_to_usecs(q->flow_refill_delay)) ||
            nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
+           nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
+                       q->low_rate_threshold) ||
            nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
                goto nla_put_failure;
 
@@ -823,20 +854,24 @@ nla_put_failure:
 static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
        struct fq_sched_data *q = qdisc_priv(sch);
-       u64 now = ktime_get_ns();
-       struct tc_fq_qd_stats st = {
-               .gc_flows               = q->stat_gc_flows,
-               .highprio_packets       = q->stat_internal_packets,
-               .tcp_retrans            = q->stat_tcp_retrans,
-               .throttled              = q->stat_throttled,
-               .flows_plimit           = q->stat_flows_plimit,
-               .pkts_too_long          = q->stat_pkts_too_long,
-               .allocation_errors      = q->stat_allocation_errors,
-               .flows                  = q->flows,
-               .inactive_flows         = q->inactive_flows,
-               .throttled_flows        = q->throttled_flows,
-               .time_next_delayed_flow = q->time_next_delayed_flow - now,
-       };
+       struct tc_fq_qd_stats st;
+
+       sch_tree_lock(sch);
+
+       st.gc_flows               = q->stat_gc_flows;
+       st.highprio_packets       = q->stat_internal_packets;
+       st.tcp_retrans            = q->stat_tcp_retrans;
+       st.throttled              = q->stat_throttled;
+       st.flows_plimit           = q->stat_flows_plimit;
+       st.pkts_too_long          = q->stat_pkts_too_long;
+       st.allocation_errors      = q->stat_allocation_errors;
+       st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns();
+       st.flows                  = q->flows;
+       st.inactive_flows         = q->inactive_flows;
+       st.throttled_flows        = q->throttled_flows;
+       st.unthrottle_latency_ns  = min_t(unsigned long,
+                                         q->unthrottle_latency_ns, ~0U);
+       sch_tree_unlock(sch);
 
        return gnet_stats_copy_app(d, &st, sizeof(st));
 }