]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blobdiff - kernel/sched/core.c
sched/deadline: Always enqueue on previous rq when dl_task_timer() fires
[mirror_ubuntu-jammy-kernel.git] / kernel / sched / core.c
index 13049aac05a6242e44a59b9ec424f1004448b3d4..4c49e75ca24dfdc1c4777cbe5960cbd5e08e3cde 100644 (file)
@@ -306,66 +306,6 @@ __read_mostly int scheduler_running;
  */
 int sysctl_sched_rt_runtime = 950000;
 
-/*
- * __task_rq_lock - lock the rq @p resides on.
- */
-static inline struct rq *__task_rq_lock(struct task_struct *p)
-       __acquires(rq->lock)
-{
-       struct rq *rq;
-
-       lockdep_assert_held(&p->pi_lock);
-
-       for (;;) {
-               rq = task_rq(p);
-               raw_spin_lock(&rq->lock);
-               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-                       return rq;
-               raw_spin_unlock(&rq->lock);
-
-               while (unlikely(task_on_rq_migrating(p)))
-                       cpu_relax();
-       }
-}
-
-/*
- * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
- */
-static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
-       __acquires(p->pi_lock)
-       __acquires(rq->lock)
-{
-       struct rq *rq;
-
-       for (;;) {
-               raw_spin_lock_irqsave(&p->pi_lock, *flags);
-               rq = task_rq(p);
-               raw_spin_lock(&rq->lock);
-               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-                       return rq;
-               raw_spin_unlock(&rq->lock);
-               raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-
-               while (unlikely(task_on_rq_migrating(p)))
-                       cpu_relax();
-       }
-}
-
-static void __task_rq_unlock(struct rq *rq)
-       __releases(rq->lock)
-{
-       raw_spin_unlock(&rq->lock);
-}
-
-static inline void
-task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
-       __releases(rq->lock)
-       __releases(p->pi_lock)
-{
-       raw_spin_unlock(&rq->lock);
-       raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-}
-
 /*
  * this_rq_lock - lock this runqueue and disable interrupts.
  */
@@ -749,6 +689,23 @@ static inline bool got_nohz_idle_kick(void)
 #ifdef CONFIG_NO_HZ_FULL
 bool sched_can_stop_tick(void)
 {
+       /*
+        * FIFO realtime policy runs the highest priority task. Other runnable
+        * tasks are of a lower priority. The scheduler tick does nothing.
+        */
+       if (current->policy == SCHED_FIFO)
+               return true;
+
+       /*
+        * Round-robin realtime tasks time slice with other tasks at the same
+        * realtime priority. Is this task the only one at this priority?
+        */
+       if (current->policy == SCHED_RR) {
+               struct sched_rt_entity *rt_se = &current->rt;
+
+               return rt_se->run_list.prev == rt_se->run_list.next;
+       }
+
        /*
         * More than one running task need preemption.
         * nr_running update is assumed to be visible
@@ -2899,7 +2856,7 @@ void __sched schedule_preempt_disabled(void)
        preempt_disable();
 }
 
-static void preempt_schedule_common(void)
+static void __sched notrace preempt_schedule_common(void)
 {
        do {
                __preempt_count_add(PREEMPT_ACTIVE);
@@ -3094,6 +3051,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
        } else {
                if (dl_prio(oldprio))
                        p->dl.dl_boosted = 0;
+               if (rt_prio(oldprio))
+                       p->rt.timeout = 0;
                p->sched_class = &fair_sched_class;
        }
 
@@ -4418,36 +4377,29 @@ EXPORT_SYMBOL_GPL(yield_to);
  * This task is about to go to sleep on IO. Increment rq->nr_iowait so
  * that process accounting knows that this is a task in IO wait state.
  */
-void __sched io_schedule(void)
-{
-       struct rq *rq = raw_rq();
-
-       delayacct_blkio_start();
-       atomic_inc(&rq->nr_iowait);
-       blk_flush_plug(current);
-       current->in_iowait = 1;
-       schedule();
-       current->in_iowait = 0;
-       atomic_dec(&rq->nr_iowait);
-       delayacct_blkio_end();
-}
-EXPORT_SYMBOL(io_schedule);
-
 long __sched io_schedule_timeout(long timeout)
 {
-       struct rq *rq = raw_rq();
+       int old_iowait = current->in_iowait;
+       struct rq *rq;
        long ret;
 
+       current->in_iowait = 1;
+       if (old_iowait)
+               blk_schedule_flush_plug(current);
+       else
+               blk_flush_plug(current);
+
        delayacct_blkio_start();
+       rq = raw_rq();
        atomic_inc(&rq->nr_iowait);
-       blk_flush_plug(current);
-       current->in_iowait = 1;
        ret = schedule_timeout(timeout);
-       current->in_iowait = 0;
+       current->in_iowait = old_iowait;
        atomic_dec(&rq->nr_iowait);
        delayacct_blkio_end();
+
        return ret;
 }
+EXPORT_SYMBOL(io_schedule_timeout);
 
 /**
  * sys_sched_get_priority_max - return maximum RT priority.
@@ -5495,17 +5447,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                        break;
                }
 
-               /*
-                * Even though we initialize ->capacity to something semi-sane,
-                * we leave capacity_orig unset. This allows us to detect if
-                * domain iteration is still funny without causing /0 traps.
-                */
-               if (!group->sgc->capacity_orig) {
-                       printk(KERN_CONT "\n");
-                       printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
-                       break;
-               }
-
                if (!cpumask_weight(sched_group_cpus(group))) {
                        printk(KERN_CONT "\n");
                        printk(KERN_ERR "ERROR: empty group\n");
@@ -5989,7 +5930,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
                 * die on a /0 trap.
                 */
                sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
-               sg->sgc->capacity_orig = sg->sgc->capacity;
 
                /*
                 * Make sure the first group of this domain contains the
@@ -6300,6 +6240,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
         */
 
        if (sd->flags & SD_SHARE_CPUCAPACITY) {
+               sd->flags |= SD_PREFER_SIBLING;
                sd->imbalance_pct = 110;
                sd->smt_gain = 1178; /* ~15% */
 
@@ -7223,8 +7164,8 @@ void __init sched_init(void)
                rq->calc_load_active = 0;
                rq->calc_load_update = jiffies + LOAD_FREQ;
                init_cfs_rq(&rq->cfs);
-               init_rt_rq(&rq->rt, rq);
-               init_dl_rq(&rq->dl, rq);
+               init_rt_rq(&rq->rt);
+               init_dl_rq(&rq->dl);
 #ifdef CONFIG_FAIR_GROUP_SCHED
                root_task_group.shares = ROOT_TASK_GROUP_LOAD;
                INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
@@ -7264,7 +7205,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
                rq->sd = NULL;
                rq->rd = NULL;
-               rq->cpu_capacity = SCHED_CAPACITY_SCALE;
+               rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
                rq->post_schedule = 0;
                rq->active_balance = 0;
                rq->next_balance = jiffies;
@@ -7642,6 +7583,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
 {
        struct task_struct *g, *p;
 
+       /*
+        * Autogroups do not have RT tasks; see autogroup_create().
+        */
+       if (task_group_is_autogroup(tg))
+               return 0;
+
        for_each_process_thread(g, p) {
                if (rt_task(p) && task_group(p) == tg)
                        return 1;
@@ -7734,6 +7681,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg,
 {
        int i, err = 0;
 
+       /*
+        * Disallowing the root group RT runtime is BAD, it would disallow the
+        * kernel creating (and or operating) RT threads.
+        */
+       if (tg == &root_task_group && rt_runtime == 0)
+               return -EINVAL;
+
+       /* No period doesn't make any sense. */
+       if (rt_period == 0)
+               return -EINVAL;
+
        mutex_lock(&rt_constraints_mutex);
        read_lock(&tasklist_lock);
        err = __rt_schedulable(tg, rt_period, rt_runtime);
@@ -7790,9 +7748,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
        rt_period = (u64)rt_period_us * NSEC_PER_USEC;
        rt_runtime = tg->rt_bandwidth.rt_runtime;
 
-       if (rt_period == 0)
-               return -EINVAL;
-
        return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
 }
 
@@ -7849,7 +7804,7 @@ static int sched_rt_global_constraints(void)
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
-static int sched_dl_global_constraints(void)
+static int sched_dl_global_validate(void)
 {
        u64 runtime = global_rt_runtime();
        u64 period = global_rt_period();
@@ -7950,11 +7905,11 @@ int sched_rt_handler(struct ctl_table *table, int write,
                if (ret)
                        goto undo;
 
-               ret = sched_rt_global_constraints();
+               ret = sched_dl_global_validate();
                if (ret)
                        goto undo;
 
-               ret = sched_dl_global_constraints();
+               ret = sched_rt_global_constraints();
                if (ret)
                        goto undo;