Merge tag 'armsoc-dt' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc

[mirror_ubuntu-bionic-kernel.git] / net / sched / sch_fq.c
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c

index e5458b99e09cb4d4226a1ef49a7ec81f2e20d94a..18e752439f6f42a944a96f704105ea6e98707284 100644 (file)
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -86,6 +86,7 @@ struct fq_sched_data {
  
         struct rb_root  delayed;        /* for rate limited flows */
         u64             time_next_delayed_flow;
+       unsigned long   unthrottle_latency_ns;
  
         struct fq_flow  internal;       /* for non classified or high prio packets */
         u32             quantum;
@@ -94,6 +95,7 @@ struct fq_sched_data {
         u32             flow_max_rate;  /* optional max rate per flow */
         u32             flow_plimit;    /* max packets per flow */
         u32             orphan_mask;    /* mask for orphaned skb */
+       u32             low_rate_threshold;
         struct rb_root  *fq_root;
         u8              rate_enable;
         u8              fq_trees_log;
@@ -407,11 +409,19 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  
  static void fq_check_throttled(struct fq_sched_data *q, u64 now)
  {
+       unsigned long sample;
         struct rb_node *p;
  
         if (q->time_next_delayed_flow > now)
                 return;
  
+       /* Update unthrottle latency EWMA.
+        * This is cheap and can help diagnosing timer/latency problems.
+        */
+       sample = (unsigned long)(now - q->time_next_delayed_flow);
+       q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
+       q->unthrottle_latency_ns += sample >> 3;
+
         q->time_next_delayed_flow = ~0ULL;
         while ((p = rb_first(&q->delayed)) != NULL) {
                 struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
@@ -433,7 +443,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
         struct fq_flow_head *head;
         struct sk_buff *skb;
         struct fq_flow *f;
-       u32 rate;
+       u32 rate, plen;
  
         skb = fq_dequeue_head(sch, &q->internal);
         if (skb)
@@ -482,7 +492,7 @@ begin:
         prefetch(&skb->end);
         f->credit -= qdisc_pkt_len(skb);
  
-       if (f->credit > 0 || !q->rate_enable)
+       if (!q->rate_enable)
                 goto out;
  
         /* Do not pace locally generated ack packets */
@@ -493,8 +503,15 @@ begin:
         if (skb->sk)
                 rate = min(skb->sk->sk_pacing_rate, rate);
  
+       if (rate <= q->low_rate_threshold) {
+               f->credit = 0;
+               plen = qdisc_pkt_len(skb);
+       } else {
+               plen = max(qdisc_pkt_len(skb), q->quantum);
+               if (f->credit > 0)
+                       goto out;
+       }
         if (rate != ~0U) {
-               u32 plen = max(qdisc_pkt_len(skb), q->quantum);
                 u64 len = (u64)plen * NSEC_PER_SEC;
  
                 if (likely(rate))
@@ -507,7 +524,12 @@ begin:
                         len = NSEC_PER_SEC;
                         q->stat_pkts_too_long++;
                 }
-
+               /* Account for schedule/timers drifts.
+                * f->time_next_packet was set when prior packet was sent,
+                * and current time (@now) can be too late by tens of us.
+                */
+               if (f->time_next_packet)
+                       len -= min(len/2, now - f->time_next_packet);
                 f->time_next_packet = now + len;
         }
  out:
@@ -662,6 +684,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
         [TCA_FQ_FLOW_MAX_RATE]          = { .type = NLA_U32 },
         [TCA_FQ_BUCKETS_LOG]            = { .type = NLA_U32 },
         [TCA_FQ_FLOW_REFILL_DELAY]      = { .type = NLA_U32 },
+       [TCA_FQ_LOW_RATE_THRESHOLD]     = { .type = NLA_U32 },
  };
  
  static int fq_change(struct Qdisc *sch, struct nlattr *opt)
@@ -716,6 +739,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
         if (tb[TCA_FQ_FLOW_MAX_RATE])
                 q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
  
+       if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
+               q->low_rate_threshold =
+                       nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
+
         if (tb[TCA_FQ_RATE_ENABLE]) {
                 u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
  
@@ -774,6 +801,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
         q->initial_quantum      = 10 * psched_mtu(qdisc_dev(sch));
         q->flow_refill_delay    = msecs_to_jiffies(40);
         q->flow_max_rate        = ~0U;
+       q->time_next_delayed_flow = ~0ULL;
         q->rate_enable          = 1;
         q->new_flows.first      = NULL;
         q->old_flows.first      = NULL;
@@ -781,6 +809,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
         q->fq_root              = NULL;
         q->fq_trees_log         = ilog2(1024);
         q->orphan_mask          = 1024 - 1;
+       q->low_rate_threshold   = 550000 / 8;
         qdisc_watchdog_init(&q->watchdog, sch);
  
         if (opt)
@@ -811,6 +840,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
             nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
                         jiffies_to_usecs(q->flow_refill_delay)) ||
             nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
+           nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
+                       q->low_rate_threshold) ||
             nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
                 goto nla_put_failure;
  
@@ -823,20 +854,24 @@ nla_put_failure:
  static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
  {
         struct fq_sched_data *q = qdisc_priv(sch);
-       u64 now = ktime_get_ns();
-       struct tc_fq_qd_stats st = {
-               .gc_flows               = q->stat_gc_flows,
-               .highprio_packets       = q->stat_internal_packets,
-               .tcp_retrans            = q->stat_tcp_retrans,
-               .throttled              = q->stat_throttled,
-               .flows_plimit           = q->stat_flows_plimit,
-               .pkts_too_long          = q->stat_pkts_too_long,
-               .allocation_errors      = q->stat_allocation_errors,
-               .flows                  = q->flows,
-               .inactive_flows         = q->inactive_flows,
-               .throttled_flows        = q->throttled_flows,
-               .time_next_delayed_flow = q->time_next_delayed_flow - now,
-       };
+       struct tc_fq_qd_stats st;
+
+       sch_tree_lock(sch);
+
+       st.gc_flows               = q->stat_gc_flows;
+       st.highprio_packets       = q->stat_internal_packets;
+       st.tcp_retrans            = q->stat_tcp_retrans;
+       st.throttled              = q->stat_throttled;
+       st.flows_plimit           = q->stat_flows_plimit;
+       st.pkts_too_long          = q->stat_pkts_too_long;
+       st.allocation_errors      = q->stat_allocation_errors;
+       st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns();
+       st.flows                  = q->flows;
+       st.inactive_flows         = q->inactive_flows;
+       st.throttled_flows        = q->throttled_flows;
+       st.unthrottle_latency_ns  = min_t(unsigned long,
+                                         q->unthrottle_latency_ns, ~0U);
+       sch_tree_unlock(sch);
  
         return gnet_stats_copy_app(d, &st, sizeof(st));
  }