X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=kernel%2Fsched.c;h=72bb9483d9492422eef9a0b4f88bd49e00cfe857;hb=6cfb0d5d06bea2b8791f32145eae539d524e5f6c;hp=0559665a3a0b14e485edccf09282511e0b1fa60a;hpb=af09f1e4b3214569de93bc9309c35014e5c8a3d0;p=mirror_ubuntu-kernels.git diff --git a/kernel/sched.c b/kernel/sched.c index 0559665a3a0b..72bb9483d949 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -263,8 +264,6 @@ struct rq { unsigned int clock_warps, clock_overflows; unsigned int clock_unstable_events; - struct sched_class *load_balance_class; - atomic_t nr_iowait; #ifdef CONFIG_SMP @@ -301,7 +300,7 @@ struct rq { struct lock_class_key rq_lock_key; }; -static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp; +static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); static DEFINE_MUTEX(sched_hotcpu_mutex); static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) @@ -379,6 +378,22 @@ static inline unsigned long long rq_clock(struct rq *rq) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) +/* + * For kernel-internal use: high-speed (but slightly incorrect) per-cpu + * clock constructed from sched_clock(): + */ +unsigned long long cpu_clock(int cpu) +{ + unsigned long long now; + unsigned long flags; + + local_irq_save(flags); + now = rq_clock(cpu_rq(cpu)); + local_irq_restore(flags); + + return now; +} + #ifdef CONFIG_FAIR_GROUP_SCHED /* Change a task's ->cfs_rq if it moves across CPUs */ static inline void set_task_cfs_rq(struct task_struct *p) @@ -622,7 +637,7 @@ static u64 div64_likely32(u64 divident, unsigned long divisor) #define WMULT_SHIFT 32 -static inline unsigned long +static unsigned long calc_delta_mine(unsigned long delta_exec, unsigned long weight, struct load_weight *lw) { @@ -642,7 +657,7 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT; } - return (unsigned long)min(tmp, (u64)sysctl_sched_runtime_limit); + return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); } static inline unsigned long @@ -663,6 +678,86 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec) lw->inv_weight = 0; } +/* + * To aid in avoiding the subversion of "niceness" due to uneven distribution + * of tasks with abnormal "nice" values across CPUs the contribution that + * each task makes to its run queue's load is weighted according to its + * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a + * scaled version of the new time slice allocation that they receive on time + * slice expiry etc. + */ + +#define WEIGHT_IDLEPRIO 2 +#define WMULT_IDLEPRIO (1 << 31) + +/* + * Nice levels are multiplicative, with a gentle 10% change for every + * nice level changed. I.e. when a CPU-bound task goes from nice 0 to + * nice 1, it will get ~10% less CPU time than another CPU-bound task + * that remained on nice 0. + * + * The "10% effect" is relative and cumulative: from _any_ nice level, + * if you go up 1 level, it's -10% CPU usage, if you go down 1 level + * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25. + * If a task goes up by ~10% and another task goes down by ~10% then + * the relative distance between them is ~25%.) + */ +static const int prio_to_weight[40] = { +/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, +/* -10 */ 9537, 7629, 6103, 4883, 3906, 3125, 2500, 2000, 1600, 1280, +/* 0 */ NICE_0_LOAD /* 1024 */, +/* 1 */ 819, 655, 524, 419, 336, 268, 215, 172, 137, +/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, +}; + +/* + * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated. + * + * In cases where the weight does not change often, we can use the + * precalculated inverse to speed up arithmetics by turning divisions + * into multiplications: + */ +static const u32 prio_to_wmult[40] = { +/* -20 */ 48356, 60446, 75558, 94446, 118058, +/* -15 */ 147573, 184467, 230589, 288233, 360285, +/* -10 */ 450347, 562979, 703746, 879575, 1099582, +/* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443, +/* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518, +/* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126, +/* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717, +/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, +}; + +static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); + +/* + * runqueue iterator, to support SMP load-balancing between different + * scheduling classes, without having to expose their internal data + * structures to the load-balancing proper: + */ +struct rq_iterator { + void *arg; + struct task_struct *(*start)(void *); + struct task_struct *(*next)(void *); +}; + +static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, + unsigned long max_nr_move, unsigned long max_load_move, + struct sched_domain *sd, enum cpu_idle_type idle, + int *all_pinned, unsigned long *load_moved, + int this_best_prio, int best_prio, int best_prio_seen, + struct rq_iterator *iterator); + +#include "sched_stats.h" +#include "sched_rt.c" +#include "sched_fair.c" +#include "sched_idletask.c" +#ifdef CONFIG_SCHED_DEBUG +# include "sched_debug.c" +#endif + +#define sched_class_highest (&rt_sched_class) + static void __update_curr_load(struct rq *rq, struct load_stat *ls) { if (rq->curr != rq->idle && ls->load.weight) { @@ -703,60 +798,6 @@ static void update_curr_load(struct rq *rq, u64 now) __update_curr_load(rq, ls); } -/* - * To aid in avoiding the subversion of "niceness" due to uneven distribution - * of tasks with abnormal "nice" values across CPUs the contribution that - * each task makes to its run queue's load is weighted according to its - * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a - * scaled version of the new time slice allocation that they receive on time - * slice expiry etc. - */ - -/* - * Assume: static_prio_timeslice(NICE_TO_PRIO(0)) == DEF_TIMESLICE - * If static_prio_timeslice() is ever changed to break this assumption then - * this code will need modification - */ -#define TIME_SLICE_NICE_ZERO DEF_TIMESLICE -#define load_weight(lp) \ - (((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO) -#define PRIO_TO_LOAD_WEIGHT(prio) \ - load_weight(static_prio_timeslice(prio)) -#define RTPRIO_TO_LOAD_WEIGHT(rp) \ - (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + load_weight(rp)) - -#define WEIGHT_IDLEPRIO 2 -#define WMULT_IDLEPRIO (1 << 31) - -/* - * Nice levels are multiplicative, with a gentle 10% change for every - * nice level changed. I.e. when a CPU-bound task goes from nice 0 to - * nice 1, it will get ~10% less CPU time than another CPU-bound task - * that remained on nice 0. - * - * The "10% effect" is relative and cumulative: from _any_ nice level, - * if you go up 1 level, it's -10% CPU usage, if you go down 1 level - * it's +10% CPU usage. - */ -static const int prio_to_weight[40] = { -/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, -/* -10 */ 9537, 7629, 6103, 4883, 3906, 3125, 2500, 2000, 1600, 1280, -/* 0 */ NICE_0_LOAD /* 1024 */, -/* 1 */ 819, 655, 524, 419, 336, 268, 215, 172, 137, -/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, -}; - -static const u32 prio_to_wmult[40] = { - 48356, 60446, 75558, 94446, 118058, 147573, - 184467, 230589, 288233, 360285, 450347, - 562979, 703746, 879575, 1099582, 1374389, - 717986, 2147483, 2684354, 3355443, 4194304, - 5244160, 6557201, 8196502, 10250518, 12782640, - 16025997, 19976592, 24970740, 31350126, 39045157, - 49367440, 61356675, 76695844, 95443717, 119304647, - 148102320, 186737708, 238609294, 286331153, -}; - static inline void inc_load(struct rq *rq, const struct task_struct *p, u64 now) { @@ -771,48 +812,18 @@ dec_load(struct rq *rq, const struct task_struct *p, u64 now) update_load_sub(&rq->ls.load, p->se.load.weight); } -static inline void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) +static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) { rq->nr_running++; inc_load(rq, p, now); } -static inline void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) +static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) { rq->nr_running--; dec_load(rq, p, now); } -static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); - -/* - * runqueue iterator, to support SMP load-balancing between different - * scheduling classes, without having to expose their internal data - * structures to the load-balancing proper: - */ -struct rq_iterator { - void *arg; - struct task_struct *(*start)(void *); - struct task_struct *(*next)(void *); -}; - -static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator); - -#include "sched_stats.h" -#include "sched_rt.c" -#include "sched_fair.c" -#include "sched_idletask.c" -#ifdef CONFIG_SCHED_DEBUG -# include "sched_debug.c" -#endif - -#define sched_class_highest (&rt_sched_class) - static void set_load_weight(struct task_struct *p) { task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime; @@ -972,18 +983,21 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) u64 clock_offset, fair_clock_offset; clock_offset = old_rq->clock - new_rq->clock; - fair_clock_offset = old_rq->cfs.fair_clock - - new_rq->cfs.fair_clock; - if (p->se.wait_start) - p->se.wait_start -= clock_offset; + fair_clock_offset = old_rq->cfs.fair_clock - new_rq->cfs.fair_clock; + if (p->se.wait_start_fair) p->se.wait_start_fair -= fair_clock_offset; + if (p->se.sleep_start_fair) + p->se.sleep_start_fair -= fair_clock_offset; + +#ifdef CONFIG_SCHEDSTATS + if (p->se.wait_start) + p->se.wait_start -= clock_offset; if (p->se.sleep_start) p->se.sleep_start -= clock_offset; if (p->se.block_start) p->se.block_start -= clock_offset; - if (p->se.sleep_start_fair) - p->se.sleep_start_fair -= fair_clock_offset; +#endif __set_task_cpu(p, new_cpu); } @@ -1544,17 +1558,19 @@ int fastcall wake_up_state(struct task_struct *p, unsigned int state) static void __sched_fork(struct task_struct *p) { p->se.wait_start_fair = 0; - p->se.wait_start = 0; p->se.exec_start = 0; p->se.sum_exec_runtime = 0; p->se.delta_exec = 0; p->se.delta_fair_run = 0; p->se.delta_fair_sleep = 0; p->se.wait_runtime = 0; + p->se.sleep_start_fair = 0; + +#ifdef CONFIG_SCHEDSTATS + p->se.wait_start = 0; p->se.sum_wait_runtime = 0; p->se.sum_sleep_runtime = 0; p->se.sleep_start = 0; - p->se.sleep_start_fair = 0; p->se.block_start = 0; p->se.sleep_max = 0; p->se.block_max = 0; @@ -1562,10 +1578,15 @@ static void __sched_fork(struct task_struct *p) p->se.wait_max = 0; p->se.wait_runtime_overruns = 0; p->se.wait_runtime_underruns = 0; +#endif INIT_LIST_HEAD(&p->run_list); p->se.on_rq = 0; +#ifdef CONFIG_PREEMPT_NOTIFIERS + INIT_HLIST_HEAD(&p->preempt_notifiers); +#endif + /* * We mark the process as running here, but have not actually * inserted it onto the runqueue yet. This guarantees that @@ -1626,30 +1647,94 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) unsigned long flags; struct rq *rq; int this_cpu; + u64 now; rq = task_rq_lock(p, &flags); BUG_ON(p->state != TASK_RUNNING); this_cpu = smp_processor_id(); /* parent's CPU */ + now = rq_clock(rq); p->prio = effective_prio(p); - if (!sysctl_sched_child_runs_first || (clone_flags & CLONE_VM) || - task_cpu(p) != this_cpu || !current->se.on_rq) { + if (!p->sched_class->task_new || !sysctl_sched_child_runs_first || + (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu || + !current->se.on_rq) { + activate_task(rq, p, 0); } else { /* * Let the scheduling class do new task startup * management (if any): */ - p->sched_class->task_new(rq, p); + p->sched_class->task_new(rq, p, now); + inc_nr_running(p, rq, now); } check_preempt_curr(rq, p); task_rq_unlock(rq, &flags); } +#ifdef CONFIG_PREEMPT_NOTIFIERS + +/** + * preempt_notifier_register - tell me when current is being being preempted & rescheduled + * @notifier: notifier struct to register + */ +void preempt_notifier_register(struct preempt_notifier *notifier) +{ + hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); +} +EXPORT_SYMBOL_GPL(preempt_notifier_register); + +/** + * preempt_notifier_unregister - no longer interested in preemption notifications + * @notifier: notifier struct to unregister + * + * This is safe to call from within a preemption notifier. + */ +void preempt_notifier_unregister(struct preempt_notifier *notifier) +{ + hlist_del(¬ifier->link); +} +EXPORT_SYMBOL_GPL(preempt_notifier_unregister); + +static void fire_sched_in_preempt_notifiers(struct task_struct *curr) +{ + struct preempt_notifier *notifier; + struct hlist_node *node; + + hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + notifier->ops->sched_in(notifier, raw_smp_processor_id()); +} + +static void +fire_sched_out_preempt_notifiers(struct task_struct *curr, + struct task_struct *next) +{ + struct preempt_notifier *notifier; + struct hlist_node *node; + + hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + notifier->ops->sched_out(notifier, next); +} + +#else + +static void fire_sched_in_preempt_notifiers(struct task_struct *curr) +{ +} + +static void +fire_sched_out_preempt_notifiers(struct task_struct *curr, + struct task_struct *next) +{ +} + +#endif + /** * prepare_task_switch - prepare to switch tasks * @rq: the runqueue preparing to switch + * @prev: the current task that is being switched out * @next: the task we are going to switch to. * * This is called with the rq lock held and interrupts off. It must @@ -1659,8 +1744,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) * prepare_task_switch sets up locking and calls architecture specific * hooks. */ -static inline void prepare_task_switch(struct rq *rq, struct task_struct *next) +static inline void +prepare_task_switch(struct rq *rq, struct task_struct *prev, + struct task_struct *next) { + fire_sched_out_preempt_notifiers(prev, next); prepare_lock_switch(rq, next); prepare_arch_switch(next); } @@ -1702,6 +1790,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) prev_state = prev->state; finish_arch_switch(prev); finish_lock_switch(rq, prev); + fire_sched_in_preempt_notifiers(current); if (mm) mmdrop(mm); if (unlikely(prev_state == TASK_DEAD)) { @@ -1742,7 +1831,7 @@ context_switch(struct rq *rq, struct task_struct *prev, { struct mm_struct *mm, *oldmm; - prepare_task_switch(rq, next); + prepare_task_switch(rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* @@ -2226,7 +2315,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, rq = cpu_rq(i); - if (*sd_idle && !idle_cpu(i)) + if (*sd_idle && rq->nr_running) *sd_idle = 0; /* Bias balancing toward cpus of our domain */ @@ -2248,9 +2337,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, /* * First idle cpu or the first cpu(busiest) in this sched group * is eligible for doing load balancing at this and above - * domains. + * domains. In the newly idle case, we will allow all the cpu's + * to do the newly idle load balance. */ - if (local_group && balance_cpu != this_cpu && balance) { + if (idle != CPU_NEWLY_IDLE && local_group && + balance_cpu != this_cpu && balance) { *balance = 0; goto ret; } @@ -2668,6 +2759,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) unsigned long imbalance; int nr_moved = 0; int sd_idle = 0; + int all_pinned = 0; cpumask_t cpus = CPU_MASK_ALL; /* @@ -2706,10 +2798,11 @@ redo: double_lock_balance(this_rq, busiest); nr_moved = move_tasks(this_rq, this_cpu, busiest, minus_1_or_zero(busiest->nr_running), - imbalance, sd, CPU_NEWLY_IDLE, NULL); + imbalance, sd, CPU_NEWLY_IDLE, + &all_pinned); spin_unlock(&busiest->lock); - if (!nr_moved) { + if (unlikely(all_pinned)) { cpu_clear(cpu_of(busiest), cpus); if (!cpus_empty(cpus)) goto redo; @@ -2813,8 +2906,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) schedstat_inc(sd, alb_cnt); if (move_tasks(target_rq, target_cpu, busiest_rq, 1, - RTPRIO_TO_LOAD_WEIGHT(100), sd, CPU_IDLE, - NULL)) + ULONG_MAX, sd, CPU_IDLE, NULL)) schedstat_inc(sd, alb_pushed); else schedstat_inc(sd, alb_failed); @@ -4903,8 +4995,6 @@ static int migration_thread(void *data) struct migration_req *req; struct list_head *head; - try_to_freeze(); - spin_lock_irq(&rq->lock); if (cpu_is_offline(cpu)) { @@ -5112,10 +5202,127 @@ static void migrate_dead_tasks(unsigned int dead_cpu) if (!next) break; migrate_dead(dead_cpu, next); + } } #endif /* CONFIG_HOTPLUG_CPU */ +#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) + +static struct ctl_table sd_ctl_dir[] = { + {CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, }, + {0,}, +}; + +static struct ctl_table sd_ctl_root[] = { + {CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, }, + {0,}, +}; + +static struct ctl_table *sd_alloc_ctl_entry(int n) +{ + struct ctl_table *entry = + kmalloc(n * sizeof(struct ctl_table), GFP_KERNEL); + + BUG_ON(!entry); + memset(entry, 0, n * sizeof(struct ctl_table)); + + return entry; +} + +static void +set_table_entry(struct ctl_table *entry, int ctl_name, + const char *procname, void *data, int maxlen, + mode_t mode, proc_handler *proc_handler) +{ + entry->ctl_name = ctl_name; + entry->procname = procname; + entry->data = data; + entry->maxlen = maxlen; + entry->mode = mode; + entry->proc_handler = proc_handler; +} + +static struct ctl_table * +sd_alloc_ctl_domain_table(struct sched_domain *sd) +{ + struct ctl_table *table = sd_alloc_ctl_entry(14); + + set_table_entry(&table[0], 1, "min_interval", &sd->min_interval, + sizeof(long), 0644, proc_doulongvec_minmax); + set_table_entry(&table[1], 2, "max_interval", &sd->max_interval, + sizeof(long), 0644, proc_doulongvec_minmax); + set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[10], 11, "cache_nice_tries", + &sd->cache_nice_tries, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[12], 13, "flags", &sd->flags, + sizeof(int), 0644, proc_dointvec_minmax); + + return table; +} + +static ctl_table *sd_alloc_ctl_cpu_table(int cpu) +{ + struct ctl_table *entry, *table; + struct sched_domain *sd; + int domain_num = 0, i; + char buf[32]; + + for_each_domain(cpu, sd) + domain_num++; + entry = table = sd_alloc_ctl_entry(domain_num + 1); + + i = 0; + for_each_domain(cpu, sd) { + snprintf(buf, 32, "domain%d", i); + entry->ctl_name = i + 1; + entry->procname = kstrdup(buf, GFP_KERNEL); + entry->mode = 0755; + entry->child = sd_alloc_ctl_domain_table(sd); + entry++; + i++; + } + return table; +} + +static struct ctl_table_header *sd_sysctl_header; +static void init_sched_domain_sysctl(void) +{ + int i, cpu_num = num_online_cpus(); + struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); + char buf[32]; + + sd_ctl_dir[0].child = entry; + + for (i = 0; i < cpu_num; i++, entry++) { + snprintf(buf, 32, "cpu%d", i); + entry->ctl_name = i + 1; + entry->procname = kstrdup(buf, GFP_KERNEL); + entry->mode = 0755; + entry->child = sd_alloc_ctl_cpu_table(i); + } + sd_sysctl_header = register_sysctl_table(sd_ctl_root); +} +#else +static void init_sched_domain_sysctl(void) +{ +} +#endif + /* * migration_call - callback that gets triggered when a CPU is added. * Here we can start up the necessary migration thread for the new CPU. @@ -5138,7 +5345,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); if (IS_ERR(p)) return NOTIFY_BAD; - p->flags |= PF_NOFREEZE; kthread_bind(p, cpu); /* Must be high prio: stop_machine expects to yield to it. */ rq = task_rq_lock(p, &flags); @@ -6222,6 +6428,8 @@ void __init sched_init_smp(void) /* XXX: Theoretical race here - CPU may be hotplugged now */ hotcpu_notifier(update_sched_domains, 0); + init_sched_domain_sysctl(); + /* Move init over to a non-isolated CPU */ if (set_cpus_allowed(current, non_isolated_cpus) < 0) BUG(); @@ -6308,6 +6516,10 @@ void __init sched_init(void) set_load_weight(&init_task); +#ifdef CONFIG_PREEMPT_NOTIFIERS + INIT_HLIST_HEAD(&init_task.preempt_notifiers); +#endif + #ifdef CONFIG_SMP nr_cpu_ids = highest_cpu + 1; open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); @@ -6373,12 +6585,14 @@ void normalize_rt_tasks(void) do_each_thread(g, p) { p->se.fair_key = 0; p->se.wait_runtime = 0; + p->se.exec_start = 0; p->se.wait_start_fair = 0; + p->se.sleep_start_fair = 0; +#ifdef CONFIG_SCHEDSTATS p->se.wait_start = 0; - p->se.exec_start = 0; p->se.sleep_start = 0; - p->se.sleep_start_fair = 0; p->se.block_start = 0; +#endif task_rq(p)->cfs.fair_clock = 0; task_rq(p)->clock = 0;