sched/fair: Scale bandwidth quota and period without losing quota/period ratio precision

author Xuewei Zhang <xueweiz@google.com>

Fri, 4 Oct 2019 00:12:43 +0000 (17:12 -0700)

committer Marcelo Henrique Cerri <marcelo.cerri@canonical.com>

Fri, 17 Jan 2020 17:22:24 +0000 (14:22 -0300)
author Xuewei Zhang <xueweiz@google.com>
Fri, 4 Oct 2019 00:12:43 +0000 (17:12 -0700)
committer Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
Fri, 17 Jan 2020 17:22:24 +0000 (14:22 -0300)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 5a50a874772a50958fc79ac5a937f347d1671d67..b6f560d086ffcd22e8984ca90cb9b083a6e5ccf9 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4993,20 +4993,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
                 if (++count > 3) {
                         u64 new, old = ktime_to_ns(cfs_b->period);
  
-                       new = (old * 147) / 128; /* ~115% */
-                       new = min(new, max_cfs_quota_period);
-
-                       cfs_b->period = ns_to_ktime(new);
-
-                       /* since max is 1s, this is limited to 1e9^2, which fits in u64 */
-                       cfs_b->quota *= new;
-                       cfs_b->quota = div64_u64(cfs_b->quota, old);
-
-                       pr_warn_ratelimited(
-        "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n",
-                               smp_processor_id(),
-                               div_u64(new, NSEC_PER_USEC),
-                                div_u64(cfs_b->quota, NSEC_PER_USEC));
+                       /*
+                        * Grow period by a factor of 2 to avoid losing precision.
+                        * Precision loss in the quota/period ratio can cause __cfs_schedulable
+                        * to fail.
+                        */
+                       new = old * 2;
+                       if (new < max_cfs_quota_period) {
+                               cfs_b->period = ns_to_ktime(new);
+                               cfs_b->quota *= 2;
+
+                               pr_warn_ratelimited(
+       "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us = %lld, cfs_quota_us = %lld)\n",
+                                       smp_processor_id(),
+                                       div_u64(new, NSEC_PER_USEC),
+                                       div_u64(cfs_b->quota, NSEC_PER_USEC));
+                       } else {
+                               pr_warn_ratelimited(
+       "cfs_period_timer[cpu%d]: period too short, but cannot scale up without losing precision (cfs_period_us = %lld, cfs_quota_us = %lld)\n",
+                                       smp_processor_id(),
+                                       div_u64(old, NSEC_PER_USEC),
+                                       div_u64(cfs_b->quota, NSEC_PER_USEC));
+                       }
  
                         /* reset count so we don't come right back in here */
                         count = 0;
author	Xuewei Zhang <xueweiz@google.com>
	Fri, 4 Oct 2019 00:12:43 +0000 (17:12 -0700)
committer	Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
	Fri, 17 Jan 2020 17:22:24 +0000 (14:22 -0300)