]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blobdiff - kernel/sched.c
sched: add /proc/sys/kernel/sched_compat_yield
[mirror_ubuntu-jammy-kernel.git] / kernel / sched.c
index 48e7586168ef45635c1244ffc7dc4c07a213f65c..63e0971c8fbb644b21d77a7393da6dc5b4c41df1 100644 (file)
@@ -668,7 +668,7 @@ static u64 div64_likely32(u64 divident, unsigned long divisor)
 /*
  * Shift right and round:
  */
-#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
+#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
 
 static unsigned long
 calc_delta_mine(unsigned long delta_exec, unsigned long weight,
@@ -684,10 +684,10 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
         * Check whether we'd overflow the 64-bit multiplication:
         */
        if (unlikely(tmp > WMULT_CONST))
-               tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
+               tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
                        WMULT_SHIFT/2);
        else
-               tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT);
+               tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);
 
        return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
 }
@@ -858,7 +858,6 @@ static void dec_nr_running(struct task_struct *p, struct rq *rq)
 
 static void set_load_weight(struct task_struct *p)
 {
-       task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime;
        p->se.wait_runtime = 0;
 
        if (task_has_rt_policy(p)) {
@@ -1587,6 +1586,7 @@ static void __sched_fork(struct task_struct *p)
        p->se.wait_start_fair           = 0;
        p->se.exec_start                = 0;
        p->se.sum_exec_runtime          = 0;
+       p->se.prev_sum_exec_runtime     = 0;
        p->se.delta_exec                = 0;
        p->se.delta_fair_run            = 0;
        p->se.delta_fair_sleep          = 0;
@@ -2180,12 +2180,6 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
        if (task_running(rq, p))
                return 0;
 
-       /*
-        * Aggressive migration if too many balance attempts have failed:
-        */
-       if (sd->nr_balance_failed > sd->cache_nice_tries)
-               return 1;
-
        return 1;
 }
 
@@ -2517,7 +2511,7 @@ group_next:
         * a think about bumping its value to force at least one task to be
         * moved
         */
-       if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) {
+       if (*imbalance < busiest_load_per_task) {
                unsigned long tmp, pwr_now, pwr_move;
                unsigned int imbn;
 
@@ -2569,10 +2563,8 @@ small_imbalance:
                pwr_move /= SCHED_LOAD_SCALE;
 
                /* Move if we gain throughput */
-               if (pwr_move <= pwr_now)
-                       goto out_balanced;
-
-               *imbalance = busiest_load_per_task;
+               if (pwr_move > pwr_now)
+                       *imbalance = busiest_load_per_task;
        }
 
        return busiest;
@@ -3043,6 +3035,7 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
        struct sched_domain *sd;
        /* Earliest time when we have to do rebalance again */
        unsigned long next_balance = jiffies + 60*HZ;
+       int update_next_balance = 0;
 
        for_each_domain(cpu, sd) {
                if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3079,8 +3072,10 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
                if (sd->flags & SD_SERIALIZE)
                        spin_unlock(&balancing);
 out:
-               if (time_after(next_balance, sd->last_balance + interval))
+               if (time_after(next_balance, sd->last_balance + interval)) {
                        next_balance = sd->last_balance + interval;
+                       update_next_balance = 1;
+               }
 
                /*
                 * Stop the load balance at this level. There is another
@@ -3090,7 +3085,14 @@ out:
                if (!balance)
                        break;
        }
-       rq->next_balance = next_balance;
+
+       /*
+        * next_balance will be updated only when there is a need.
+        * When the cpu is attached to null domain for ex, it will not be
+        * updated.
+        */
+       if (likely(update_next_balance))
+               rq->next_balance = next_balance;
 }
 
 /*
@@ -4548,10 +4550,7 @@ asmlinkage long sys_sched_yield(void)
        struct rq *rq = this_rq_lock();
 
        schedstat_inc(rq, yld_cnt);
-       if (unlikely(rq->nr_running == 1))
-               schedstat_inc(rq, yld_act_empty);
-       else
-               current->sched_class->yield_task(rq, current);
+       current->sched_class->yield_task(rq, current);
 
        /*
         * Since we are going to call schedule() anyway, there's
@@ -4907,14 +4906,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
 static inline void sched_init_granularity(void)
 {
        unsigned int factor = 1 + ilog2(num_online_cpus());
-       const unsigned long gran_limit = 100000000;
+       const unsigned long limit = 100000000;
+
+       sysctl_sched_min_granularity *= factor;
+       if (sysctl_sched_min_granularity > limit)
+               sysctl_sched_min_granularity = limit;
 
-       sysctl_sched_granularity *= factor;
-       if (sysctl_sched_granularity > gran_limit)
-               sysctl_sched_granularity = gran_limit;
+       sysctl_sched_latency *= factor;
+       if (sysctl_sched_latency > limit)
+               sysctl_sched_latency = limit;
 
-       sysctl_sched_runtime_limit = sysctl_sched_granularity * 4;
-       sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2;
+       sysctl_sched_runtime_limit = sysctl_sched_latency;
+       sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2;
 }
 
 #ifdef CONFIG_SMP
@@ -5257,15 +5260,16 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
 static struct ctl_table sd_ctl_dir[] = {
        {
                .procname       = "sched_domain",
-               .mode           = 0755,
+               .mode           = 0555,
        },
        {0,},
 };
 
 static struct ctl_table sd_ctl_root[] = {
        {
+               .ctl_name       = CTL_KERN,
                .procname       = "kernel",
-               .mode           = 0755,
+               .mode           = 0555,
                .child          = sd_ctl_dir,
        },
        {0,},
@@ -5341,7 +5345,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
        for_each_domain(cpu, sd) {
                snprintf(buf, 32, "domain%d", i);
                entry->procname = kstrdup(buf, GFP_KERNEL);
-               entry->mode = 0755;
+               entry->mode = 0555;
                entry->child = sd_alloc_ctl_domain_table(sd);
                entry++;
                i++;
@@ -5361,7 +5365,7 @@ static void init_sched_domain_sysctl(void)
        for (i = 0; i < cpu_num; i++, entry++) {
                snprintf(buf, 32, "cpu%d", i);
                entry->procname = kstrdup(buf, GFP_KERNEL);
-               entry->mode = 0755;
+               entry->mode = 0555;
                entry->child = sd_alloc_ctl_cpu_table(i);
        }
        sd_sysctl_header = register_sysctl_table(sd_ctl_root);