]> git.proxmox.com Git - mirror_ubuntu-kernels.git/blobdiff - kernel/sched/fair.c
sched: Optimize rq_lockp() usage
[mirror_ubuntu-kernels.git] / kernel / sched / fair.c
index 1d75af1ecfb480de847d88a0b169e133d2f53a44..18960d00708a40265f87913bb631a17f67140a22 100644 (file)
@@ -1107,7 +1107,7 @@ struct numa_group {
 static struct numa_group *deref_task_numa_group(struct task_struct *p)
 {
        return rcu_dereference_check(p->numa_group, p == current ||
-               (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu)));
+               (lockdep_is_held(__rq_lockp(task_rq(p))) && !READ_ONCE(p->on_cpu)));
 }
 
 static struct numa_group *deref_curr_numa_group(struct task_struct *p)
@@ -5328,7 +5328,7 @@ static void __maybe_unused update_runtime_enabled(struct rq *rq)
 {
        struct task_group *tg;
 
-       lockdep_assert_held(&rq->lock);
+       lockdep_assert_rq_held(rq);
 
        rcu_read_lock();
        list_for_each_entry_rcu(tg, &task_groups, list) {
@@ -5347,7 +5347,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 {
        struct task_group *tg;
 
-       lockdep_assert_held(&rq->lock);
+       lockdep_assert_rq_held(rq);
 
        rcu_read_lock();
        list_for_each_entry_rcu(tg, &task_groups, list) {
@@ -6217,7 +6217,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
        }
 
        if (has_idle_core)
-               set_idle_cores(this, false);
+               set_idle_cores(target, false);
 
        if (sched_feat(SIS_PROP) && !has_idle_core) {
                time = cpu_clock(this) - time;
@@ -6288,6 +6288,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
                task_util = uclamp_task_util(p);
        }
 
+       /*
+        * per-cpu select_idle_mask usage
+        */
+       lockdep_assert_irqs_disabled();
+
        if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
            asym_fits_capacity(task_util, target))
                return target;
@@ -6661,15 +6666,15 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 {
        unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
        struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+       int cpu, best_energy_cpu = prev_cpu, target = -1;
        unsigned long cpu_cap, util, base_energy = 0;
-       int cpu, best_energy_cpu = prev_cpu;
        struct sched_domain *sd;
        struct perf_domain *pd;
 
        rcu_read_lock();
        pd = rcu_dereference(rd->pd);
        if (!pd || READ_ONCE(rd->overutilized))
-               goto fail;
+               goto unlock;
 
        /*
         * Energy-aware wake-up happens on the lowest sched_domain starting
@@ -6679,7 +6684,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
        while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
                sd = sd->parent;
        if (!sd)
-               goto fail;
+               goto unlock;
+
+       target = prev_cpu;
 
        sync_entity_load_avg(&p->se);
        if (!task_util_est(p))
@@ -6687,13 +6694,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 
        for (; pd; pd = pd->next) {
                unsigned long cur_delta, spare_cap, max_spare_cap = 0;
+               bool compute_prev_delta = false;
                unsigned long base_energy_pd;
                int max_spare_cap_cpu = -1;
 
-               /* Compute the 'base' energy of the pd, without @p */
-               base_energy_pd = compute_energy(p, -1, pd);
-               base_energy += base_energy_pd;
-
                for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
                        if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                                continue;
@@ -6714,26 +6718,40 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                        if (!fits_capacity(util, cpu_cap))
                                continue;
 
-                       /* Always use prev_cpu as a candidate. */
                        if (cpu == prev_cpu) {
-                               prev_delta = compute_energy(p, prev_cpu, pd);
-                               prev_delta -= base_energy_pd;
-                               best_delta = min(best_delta, prev_delta);
-                       }
-
-                       /*
-                        * Find the CPU with the maximum spare capacity in
-                        * the performance domain
-                        */
-                       if (spare_cap > max_spare_cap) {
+                               /* Always use prev_cpu as a candidate. */
+                               compute_prev_delta = true;
+                       } else if (spare_cap > max_spare_cap) {
+                               /*
+                                * Find the CPU with the maximum spare capacity
+                                * in the performance domain.
+                                */
                                max_spare_cap = spare_cap;
                                max_spare_cap_cpu = cpu;
                        }
                }
 
-               /* Evaluate the energy impact of using this CPU. */
-               if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
+               if (max_spare_cap_cpu < 0 && !compute_prev_delta)
+                       continue;
+
+               /* Compute the 'base' energy of the pd, without @p */
+               base_energy_pd = compute_energy(p, -1, pd);
+               base_energy += base_energy_pd;
+
+               /* Evaluate the energy impact of using prev_cpu. */
+               if (compute_prev_delta) {
+                       prev_delta = compute_energy(p, prev_cpu, pd);
+                       if (prev_delta < base_energy_pd)
+                               goto unlock;
+                       prev_delta -= base_energy_pd;
+                       best_delta = min(best_delta, prev_delta);
+               }
+
+               /* Evaluate the energy impact of using max_spare_cap_cpu. */
+               if (max_spare_cap_cpu >= 0) {
                        cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
+                       if (cur_delta < base_energy_pd)
+                               goto unlock;
                        cur_delta -= base_energy_pd;
                        if (cur_delta < best_delta) {
                                best_delta = cur_delta;
@@ -6741,25 +6759,22 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                        }
                }
        }
-unlock:
        rcu_read_unlock();
 
        /*
         * Pick the best CPU if prev_cpu cannot be used, or if it saves at
         * least 6% of the energy used by prev_cpu.
         */
-       if (prev_delta == ULONG_MAX)
-               return best_energy_cpu;
-
-       if ((prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
-               return best_energy_cpu;
+       if ((prev_delta == ULONG_MAX) ||
+           (prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
+               target = best_energy_cpu;
 
-       return prev_cpu;
+       return target;
 
-fail:
+unlock:
        rcu_read_unlock();
 
-       return -1;
+       return target;
 }
 
 /*
@@ -6771,8 +6786,6 @@ fail:
  * certain conditions an idle sibling CPU if the domain has SD_WAKE_AFFINE set.
  *
  * Returns the target CPU number.
- *
- * preempt must be disabled.
  */
 static int
 select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
@@ -6785,6 +6798,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
        /* SD_flags and WF_flags share the first nibble */
        int sd_flag = wake_flags & 0xF;
 
+       /*
+        * required for stable ->cpus_allowed
+        */
+       lockdep_assert_held(&p->pi_lock);
        if (wake_flags & WF_TTWU) {
                record_wakee(p);
 
@@ -6874,7 +6891,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
                 * In case of TASK_ON_RQ_MIGRATING we in fact hold the 'old'
                 * rq->lock and can modify state directly.
                 */
-               lockdep_assert_held(&task_rq(p)->lock);
+               lockdep_assert_rq_held(task_rq(p));
                detach_entity_cfs_rq(&p->se);
 
        } else {
@@ -7501,7 +7518,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
 {
        s64 delta;
 
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
 
        if (p->sched_class != &fair_sched_class)
                return 0;
@@ -7599,7 +7616,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 {
        int tsk_cache_hot;
 
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
 
        /*
         * We do not migrate tasks that are:
@@ -7688,7 +7705,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
  */
 static void detach_task(struct task_struct *p, struct lb_env *env)
 {
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
 
        deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
        set_task_cpu(p, env->dst_cpu);
@@ -7704,7 +7721,7 @@ static struct task_struct *detach_one_task(struct lb_env *env)
 {
        struct task_struct *p;
 
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
 
        list_for_each_entry_reverse(p,
                        &env->src_rq->cfs_tasks, se.group_node) {
@@ -7740,7 +7757,7 @@ static int detach_tasks(struct lb_env *env)
        struct task_struct *p;
        int detached = 0;
 
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
 
        /*
         * Source run queue has been emptied by another CPU, clear
@@ -7870,7 +7887,7 @@ next:
  */
 static void attach_task(struct rq *rq, struct task_struct *p)
 {
-       lockdep_assert_held(&rq->lock);
+       lockdep_assert_rq_held(rq);
 
        BUG_ON(task_rq(p) != rq);
        activate_task(rq, p, ENQUEUE_NOCLOCK);
@@ -9781,7 +9798,7 @@ more_balance:
                if (need_active_balance(&env)) {
                        unsigned long flags;
 
-                       raw_spin_lock_irqsave(&busiest->lock, flags);
+                       raw_spin_rq_lock_irqsave(busiest, flags);
 
                        /*
                         * Don't kick the active_load_balance_cpu_stop,
@@ -9789,8 +9806,7 @@ more_balance:
                         * moved to this_cpu:
                         */
                        if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) {
-                               raw_spin_unlock_irqrestore(&busiest->lock,
-                                                           flags);
+                               raw_spin_rq_unlock_irqrestore(busiest, flags);
                                goto out_one_pinned;
                        }
 
@@ -9807,7 +9823,7 @@ more_balance:
                                busiest->push_cpu = this_cpu;
                                active_balance = 1;
                        }
-                       raw_spin_unlock_irqrestore(&busiest->lock, flags);
+                       raw_spin_rq_unlock_irqrestore(busiest, flags);
 
                        if (active_balance) {
                                stop_one_cpu_nowait(cpu_of(busiest),
@@ -10592,6 +10608,14 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
        u64 curr_cost = 0;
 
        update_misfit_status(NULL, this_rq);
+
+       /*
+        * There is a task waiting to run. No need to search for one.
+        * Return 0; the task will be enqueued when switching to idle.
+        */
+       if (this_rq->ttwu_pending)
+               return 0;
+
        /*
         * We must set idle_stamp _before_ calling idle_balance(), such that we
         * measure the duration of idle_balance() as idle time.
@@ -10624,7 +10648,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
                goto out;
        }
 
-       raw_spin_unlock(&this_rq->lock);
+       raw_spin_rq_unlock(this_rq);
 
        update_blocked_averages(this_cpu);
        rcu_read_lock();
@@ -10657,12 +10681,13 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
                 * Stop searching for tasks to pull if there are
                 * now runnable tasks on this rq.
                 */
-               if (pulled_task || this_rq->nr_running > 0)
+               if (pulled_task || this_rq->nr_running > 0 ||
+                   this_rq->ttwu_pending)
                        break;
        }
        rcu_read_unlock();
 
-       raw_spin_lock(&this_rq->lock);
+       raw_spin_rq_lock(this_rq);
 
        if (curr_cost > this_rq->max_idle_balance_cost)
                this_rq->max_idle_balance_cost = curr_cost;
@@ -10878,16 +10903,22 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
 {
        struct cfs_rq *cfs_rq;
 
+       list_add_leaf_cfs_rq(cfs_rq_of(se));
+
        /* Start to propagate at parent */
        se = se->parent;
 
        for_each_sched_entity(se) {
                cfs_rq = cfs_rq_of(se);
 
-               if (cfs_rq_throttled(cfs_rq))
-                       break;
+               if (!cfs_rq_throttled(cfs_rq)){
+                       update_load_avg(cfs_rq, se, UPDATE_TG);
+                       list_add_leaf_cfs_rq(cfs_rq);
+                       continue;
+               }
 
-               update_load_avg(cfs_rq, se, UPDATE_TG);
+               if (list_add_leaf_cfs_rq(cfs_rq))
+                       break;
        }
 }
 #else
@@ -11143,9 +11174,9 @@ void unregister_fair_sched_group(struct task_group *tg)
 
                rq = cpu_rq(cpu);
 
-               raw_spin_lock_irqsave(&rq->lock, flags);
+               raw_spin_rq_lock_irqsave(rq, flags);
                list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
-               raw_spin_unlock_irqrestore(&rq->lock, flags);
+               raw_spin_rq_unlock_irqrestore(rq, flags);
        }
 }