sched: Optimize rq_lockp() usage

[mirror_ubuntu-kernels.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 1d75af1ecfb480de847d88a0b169e133d2f53a44..18960d00708a40265f87913bb631a17f67140a22 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1107,7 +1107,7 @@ struct numa_group {
  static struct numa_group *deref_task_numa_group(struct task_struct *p)
  {
         return rcu_dereference_check(p->numa_group, p == current ||
-               (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu)));
+               (lockdep_is_held(__rq_lockp(task_rq(p))) && !READ_ONCE(p->on_cpu)));
  }
  
  static struct numa_group *deref_curr_numa_group(struct task_struct *p)
@@ -5328,7 +5328,7 @@ static void __maybe_unused update_runtime_enabled(struct rq *rq)
  {
         struct task_group *tg;
  
-       lockdep_assert_held(&rq->lock);
+       lockdep_assert_rq_held(rq);
  
         rcu_read_lock();
         list_for_each_entry_rcu(tg, &task_groups, list) {
@@ -5347,7 +5347,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
  {
         struct task_group *tg;
  
-       lockdep_assert_held(&rq->lock);
+       lockdep_assert_rq_held(rq);
  
         rcu_read_lock();
         list_for_each_entry_rcu(tg, &task_groups, list) {
@@ -6217,7 +6217,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
         }
  
         if (has_idle_core)
-               set_idle_cores(this, false);
+               set_idle_cores(target, false);
  
         if (sched_feat(SIS_PROP) && !has_idle_core) {
                 time = cpu_clock(this) - time;
@@ -6288,6 +6288,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
                 task_util = uclamp_task_util(p);
         }
  
+       /*
+        * per-cpu select_idle_mask usage
+        */
+       lockdep_assert_irqs_disabled();
+
         if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
             asym_fits_capacity(task_util, target))
                 return target;
@@ -6661,15 +6666,15 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
  {
         unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
         struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+       int cpu, best_energy_cpu = prev_cpu, target = -1;
         unsigned long cpu_cap, util, base_energy = 0;
-       int cpu, best_energy_cpu = prev_cpu;
         struct sched_domain *sd;
         struct perf_domain *pd;
  
         rcu_read_lock();
         pd = rcu_dereference(rd->pd);
         if (!pd || READ_ONCE(rd->overutilized))
-               goto fail;
+               goto unlock;
  
         /*
          * Energy-aware wake-up happens on the lowest sched_domain starting
@@ -6679,7 +6684,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
         while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
                 sd = sd->parent;
         if (!sd)
-               goto fail;
+               goto unlock;
+
+       target = prev_cpu;
  
         sync_entity_load_avg(&p->se);
         if (!task_util_est(p))
@@ -6687,13 +6694,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
  
         for (; pd; pd = pd->next) {
                 unsigned long cur_delta, spare_cap, max_spare_cap = 0;
+               bool compute_prev_delta = false;
                 unsigned long base_energy_pd;
                 int max_spare_cap_cpu = -1;
  
-               /* Compute the 'base' energy of the pd, without @p */
-               base_energy_pd = compute_energy(p, -1, pd);
-               base_energy += base_energy_pd;
-
                 for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
                         if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                                 continue;
@@ -6714,26 +6718,40 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                         if (!fits_capacity(util, cpu_cap))
                                 continue;
  
-                       /* Always use prev_cpu as a candidate. */
                         if (cpu == prev_cpu) {
-                               prev_delta = compute_energy(p, prev_cpu, pd);
-                               prev_delta -= base_energy_pd;
-                               best_delta = min(best_delta, prev_delta);
-                       }
-
-                       /*
-                        * Find the CPU with the maximum spare capacity in
-                        * the performance domain
-                        */
-                       if (spare_cap > max_spare_cap) {
+                               /* Always use prev_cpu as a candidate. */
+                               compute_prev_delta = true;
+                       } else if (spare_cap > max_spare_cap) {
+                               /*
+                                * Find the CPU with the maximum spare capacity
+                                * in the performance domain.
+                                */
                                 max_spare_cap = spare_cap;
                                 max_spare_cap_cpu = cpu;
                         }
                 }
  
-               /* Evaluate the energy impact of using this CPU. */
-               if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
+               if (max_spare_cap_cpu < 0 && !compute_prev_delta)
+                       continue;
+
+               /* Compute the 'base' energy of the pd, without @p */
+               base_energy_pd = compute_energy(p, -1, pd);
+               base_energy += base_energy_pd;
+
+               /* Evaluate the energy impact of using prev_cpu. */
+               if (compute_prev_delta) {
+                       prev_delta = compute_energy(p, prev_cpu, pd);
+                       if (prev_delta < base_energy_pd)
+                               goto unlock;
+                       prev_delta -= base_energy_pd;
+                       best_delta = min(best_delta, prev_delta);
+               }
+
+               /* Evaluate the energy impact of using max_spare_cap_cpu. */
+               if (max_spare_cap_cpu >= 0) {
                         cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
+                       if (cur_delta < base_energy_pd)
+                               goto unlock;
                         cur_delta -= base_energy_pd;
                         if (cur_delta < best_delta) {
                                 best_delta = cur_delta;
@@ -6741,25 +6759,22 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                         }
                 }
         }
-unlock:
         rcu_read_unlock();
  
         /*
          * Pick the best CPU if prev_cpu cannot be used, or if it saves at
          * least 6% of the energy used by prev_cpu.
          */
-       if (prev_delta == ULONG_MAX)
-               return best_energy_cpu;
-
-       if ((prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
-               return best_energy_cpu;
+       if ((prev_delta == ULONG_MAX) ||
+           (prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
+               target = best_energy_cpu;
  
-       return prev_cpu;
+       return target;
  
-fail:
+unlock:
         rcu_read_unlock();
  
-       return -1;
+       return target;
  }
  
  /*
@@ -6771,8 +6786,6 @@ fail:
   * certain conditions an idle sibling CPU if the domain has SD_WAKE_AFFINE set.
   *
   * Returns the target CPU number.
- *
- * preempt must be disabled.
   */
  static int
  select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
@@ -6785,6 +6798,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
         /* SD_flags and WF_flags share the first nibble */
         int sd_flag = wake_flags & 0xF;
  
+       /*
+        * required for stable ->cpus_allowed
+        */
+       lockdep_assert_held(&p->pi_lock);
         if (wake_flags & WF_TTWU) {
                 record_wakee(p);
  
@@ -6874,7 +6891,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
                  * In case of TASK_ON_RQ_MIGRATING we in fact hold the 'old'
                  * rq->lock and can modify state directly.
                  */
-               lockdep_assert_held(&task_rq(p)->lock);
+               lockdep_assert_rq_held(task_rq(p));
                 detach_entity_cfs_rq(&p->se);
  
         } else {
@@ -7501,7 +7518,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
  {
         s64 delta;
  
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
  
         if (p->sched_class != &fair_sched_class)
                 return 0;
@@ -7599,7 +7616,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
  {
         int tsk_cache_hot;
  
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
  
         /*
          * We do not migrate tasks that are:
@@ -7688,7 +7705,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
   */
  static void detach_task(struct task_struct *p, struct lb_env *env)
  {
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
  
         deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
         set_task_cpu(p, env->dst_cpu);
@@ -7704,7 +7721,7 @@ static struct task_struct *detach_one_task(struct lb_env *env)
  {
         struct task_struct *p;
  
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
  
         list_for_each_entry_reverse(p,
                         &env->src_rq->cfs_tasks, se.group_node) {
@@ -7740,7 +7757,7 @@ static int detach_tasks(struct lb_env *env)
         struct task_struct *p;
         int detached = 0;
  
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
  
         /*
          * Source run queue has been emptied by another CPU, clear
@@ -7870,7 +7887,7 @@ next:
   */
  static void attach_task(struct rq *rq, struct task_struct *p)
  {
-       lockdep_assert_held(&rq->lock);
+       lockdep_assert_rq_held(rq);
  
         BUG_ON(task_rq(p) != rq);
         activate_task(rq, p, ENQUEUE_NOCLOCK);
@@ -9781,7 +9798,7 @@ more_balance:
                 if (need_active_balance(&env)) {
                         unsigned long flags;
  
-                       raw_spin_lock_irqsave(&busiest->lock, flags);
+                       raw_spin_rq_lock_irqsave(busiest, flags);
  
                         /*
                          * Don't kick the active_load_balance_cpu_stop,
@@ -9789,8 +9806,7 @@ more_balance:
                          * moved to this_cpu:
                          */
                         if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) {
-                               raw_spin_unlock_irqrestore(&busiest->lock,
-                                                           flags);
+                               raw_spin_rq_unlock_irqrestore(busiest, flags);
                                 goto out_one_pinned;
                         }
  
@@ -9807,7 +9823,7 @@ more_balance:
                                 busiest->push_cpu = this_cpu;
                                 active_balance = 1;
                         }
-                       raw_spin_unlock_irqrestore(&busiest->lock, flags);
+                       raw_spin_rq_unlock_irqrestore(busiest, flags);
  
                         if (active_balance) {
                                 stop_one_cpu_nowait(cpu_of(busiest),
@@ -10592,6 +10608,14 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
         u64 curr_cost = 0;
  
         update_misfit_status(NULL, this_rq);
+
+       /*
+        * There is a task waiting to run. No need to search for one.
+        * Return 0; the task will be enqueued when switching to idle.
+        */
+       if (this_rq->ttwu_pending)
+               return 0;
+
         /*
          * We must set idle_stamp _before_ calling idle_balance(), such that we
          * measure the duration of idle_balance() as idle time.
@@ -10624,7 +10648,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
                 goto out;
         }
  
-       raw_spin_unlock(&this_rq->lock);
+       raw_spin_rq_unlock(this_rq);
  
         update_blocked_averages(this_cpu);
         rcu_read_lock();
@@ -10657,12 +10681,13 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
                  * Stop searching for tasks to pull if there are
                  * now runnable tasks on this rq.
                  */
-               if (pulled_task || this_rq->nr_running > 0)
+               if (pulled_task || this_rq->nr_running > 0 ||
+                   this_rq->ttwu_pending)
                         break;
         }
         rcu_read_unlock();
  
-       raw_spin_lock(&this_rq->lock);
+       raw_spin_rq_lock(this_rq);
  
         if (curr_cost > this_rq->max_idle_balance_cost)
                 this_rq->max_idle_balance_cost = curr_cost;
@@ -10878,16 +10903,22 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
  {
         struct cfs_rq *cfs_rq;
  
+       list_add_leaf_cfs_rq(cfs_rq_of(se));
+
         /* Start to propagate at parent */
         se = se->parent;
  
         for_each_sched_entity(se) {
                 cfs_rq = cfs_rq_of(se);
  
-               if (cfs_rq_throttled(cfs_rq))
-                       break;
+               if (!cfs_rq_throttled(cfs_rq)){
+                       update_load_avg(cfs_rq, se, UPDATE_TG);
+                       list_add_leaf_cfs_rq(cfs_rq);
+                       continue;
+               }
  
-               update_load_avg(cfs_rq, se, UPDATE_TG);
+               if (list_add_leaf_cfs_rq(cfs_rq))
+                       break;
         }
  }
  #else
@@ -11143,9 +11174,9 @@ void unregister_fair_sched_group(struct task_group *tg)
  
                 rq = cpu_rq(cpu);
  
-               raw_spin_lock_irqsave(&rq->lock, flags);
+               raw_spin_rq_lock_irqsave(rq, flags);
                 list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
-               raw_spin_unlock_irqrestore(&rq->lock, flags);
+               raw_spin_rq_unlock_irqrestore(rq, flags);
         }
  }