[PATCH] sched: reduce debug code

[mirror_ubuntu-kernels.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 1c8076676eb1c99bb3ea325a4ac10b18a2caeeee..72bb9483d9492422eef9a0b4f88bd49e00cfe857 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -53,6 +53,7 @@
  #include <linux/percpu.h>
  #include <linux/kthread.h>
  #include <linux/seq_file.h>
+#include <linux/sysctl.h>
  #include <linux/syscalls.h>
  #include <linux/times.h>
  #include <linux/tsacct_kern.h>
@@ -263,8 +264,6 @@ struct rq {
         unsigned int clock_warps, clock_overflows;
         unsigned int clock_unstable_events;
  
-       struct sched_class *load_balance_class;
-
         atomic_t nr_iowait;
  
  #ifdef CONFIG_SMP
@@ -301,7 +300,7 @@ struct rq {
         struct lock_class_key rq_lock_key;
  };
  
-static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp;
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
  static DEFINE_MUTEX(sched_hotcpu_mutex);
  
  static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
@@ -379,6 +378,22 @@ static inline unsigned long long rq_clock(struct rq *rq)
  #define task_rq(p)             cpu_rq(task_cpu(p))
  #define cpu_curr(cpu)          (cpu_rq(cpu)->curr)
  
+/*
+ * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
+ * clock constructed from sched_clock():
+ */
+unsigned long long cpu_clock(int cpu)
+{
+       unsigned long long now;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       now = rq_clock(cpu_rq(cpu));
+       local_irq_restore(flags);
+
+       return now;
+}
+
  #ifdef CONFIG_FAIR_GROUP_SCHED
  /* Change a task's ->cfs_rq if it moves across CPUs */
  static inline void set_task_cfs_rq(struct task_struct *p)
@@ -622,7 +637,7 @@ static u64 div64_likely32(u64 divident, unsigned long divisor)
  
  #define WMULT_SHIFT    32
  
-static inline unsigned long
+static unsigned long
  calc_delta_mine(unsigned long delta_exec, unsigned long weight,
                 struct load_weight *lw)
  {
@@ -642,7 +657,7 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
                 tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT;
         }
  
-       return (unsigned long)min(tmp, (u64)sysctl_sched_runtime_limit);
+       return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
  }
  
  static inline unsigned long
@@ -663,46 +678,6 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec)
         lw->inv_weight = 0;
  }
  
-static void __update_curr_load(struct rq *rq, struct load_stat *ls)
-{
-       if (rq->curr != rq->idle && ls->load.weight) {
-               ls->delta_exec += ls->delta_stat;
-               ls->delta_fair += calc_delta_fair(ls->delta_stat, &ls->load);
-               ls->delta_stat = 0;
-       }
-}
-
-/*
- * Update delta_exec, delta_fair fields for rq.
- *
- * delta_fair clock advances at a rate inversely proportional to
- * total load (rq->ls.load.weight) on the runqueue, while
- * delta_exec advances at the same rate as wall-clock (provided
- * cpu is not idle).
- *
- * delta_exec / delta_fair is a measure of the (smoothened) load on this
- * runqueue over any given interval. This (smoothened) load is used
- * during load balance.
- *
- * This function is called /before/ updating rq->ls.load
- * and when switching tasks.
- */
-static void update_curr_load(struct rq *rq, u64 now)
-{
-       struct load_stat *ls = &rq->ls;
-       u64 start;
-
-       start = ls->load_update_start;
-       ls->load_update_start = now;
-       ls->delta_stat += now - start;
-       /*
-        * Stagger updates to ls->delta_fair. Very frequent updates
-        * can be expensive.
-        */
-       if (ls->delta_stat >= sysctl_sched_stat_granularity)
-               __update_curr_load(rq, ls);
-}
-
  /*
   * To aid in avoiding the subversion of "niceness" due to uneven distribution
   * of tasks with abnormal "nice" values across CPUs the contribution that
@@ -712,19 +687,6 @@ static void update_curr_load(struct rq *rq, u64 now)
   * slice expiry etc.
   */
  
-/*
- * Assume: static_prio_timeslice(NICE_TO_PRIO(0)) == DEF_TIMESLICE
- * If static_prio_timeslice() is ever changed to break this assumption then
- * this code will need modification
- */
-#define TIME_SLICE_NICE_ZERO DEF_TIMESLICE
-#define load_weight(lp) \
-       (((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO)
-#define PRIO_TO_LOAD_WEIGHT(prio) \
-       load_weight(static_prio_timeslice(prio))
-#define RTPRIO_TO_LOAD_WEIGHT(rp) \
-       (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + load_weight(rp))
-
  #define WEIGHT_IDLEPRIO                2
  #define WMULT_IDLEPRIO         (1 << 31)
  
@@ -766,32 +728,6 @@ static const u32 prio_to_wmult[40] = {
  /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
  };
  
-static inline void
-inc_load(struct rq *rq, const struct task_struct *p, u64 now)
-{
-       update_curr_load(rq, now);
-       update_load_add(&rq->ls.load, p->se.load.weight);
-}
-
-static inline void
-dec_load(struct rq *rq, const struct task_struct *p, u64 now)
-{
-       update_curr_load(rq, now);
-       update_load_sub(&rq->ls.load, p->se.load.weight);
-}
-
-static inline void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now)
-{
-       rq->nr_running++;
-       inc_load(rq, p, now);
-}
-
-static inline void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now)
-{
-       rq->nr_running--;
-       dec_load(rq, p, now);
-}
-
  static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
  
  /*
@@ -822,6 +758,72 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
  
  #define sched_class_highest (&rt_sched_class)
  
+static void __update_curr_load(struct rq *rq, struct load_stat *ls)
+{
+       if (rq->curr != rq->idle && ls->load.weight) {
+               ls->delta_exec += ls->delta_stat;
+               ls->delta_fair += calc_delta_fair(ls->delta_stat, &ls->load);
+               ls->delta_stat = 0;
+       }
+}
+
+/*
+ * Update delta_exec, delta_fair fields for rq.
+ *
+ * delta_fair clock advances at a rate inversely proportional to
+ * total load (rq->ls.load.weight) on the runqueue, while
+ * delta_exec advances at the same rate as wall-clock (provided
+ * cpu is not idle).
+ *
+ * delta_exec / delta_fair is a measure of the (smoothened) load on this
+ * runqueue over any given interval. This (smoothened) load is used
+ * during load balance.
+ *
+ * This function is called /before/ updating rq->ls.load
+ * and when switching tasks.
+ */
+static void update_curr_load(struct rq *rq, u64 now)
+{
+       struct load_stat *ls = &rq->ls;
+       u64 start;
+
+       start = ls->load_update_start;
+       ls->load_update_start = now;
+       ls->delta_stat += now - start;
+       /*
+        * Stagger updates to ls->delta_fair. Very frequent updates
+        * can be expensive.
+        */
+       if (ls->delta_stat >= sysctl_sched_stat_granularity)
+               __update_curr_load(rq, ls);
+}
+
+static inline void
+inc_load(struct rq *rq, const struct task_struct *p, u64 now)
+{
+       update_curr_load(rq, now);
+       update_load_add(&rq->ls.load, p->se.load.weight);
+}
+
+static inline void
+dec_load(struct rq *rq, const struct task_struct *p, u64 now)
+{
+       update_curr_load(rq, now);
+       update_load_sub(&rq->ls.load, p->se.load.weight);
+}
+
+static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now)
+{
+       rq->nr_running++;
+       inc_load(rq, p, now);
+}
+
+static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now)
+{
+       rq->nr_running--;
+       dec_load(rq, p, now);
+}
+
  static void set_load_weight(struct task_struct *p)
  {
         task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime;
@@ -981,18 +983,21 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
         u64 clock_offset, fair_clock_offset;
  
         clock_offset = old_rq->clock - new_rq->clock;
-       fair_clock_offset = old_rq->cfs.fair_clock -
-                                                new_rq->cfs.fair_clock;
-       if (p->se.wait_start)
-               p->se.wait_start -= clock_offset;
+       fair_clock_offset = old_rq->cfs.fair_clock - new_rq->cfs.fair_clock;
+
         if (p->se.wait_start_fair)
                 p->se.wait_start_fair -= fair_clock_offset;
+       if (p->se.sleep_start_fair)
+               p->se.sleep_start_fair -= fair_clock_offset;
+
+#ifdef CONFIG_SCHEDSTATS
+       if (p->se.wait_start)
+               p->se.wait_start -= clock_offset;
         if (p->se.sleep_start)
                 p->se.sleep_start -= clock_offset;
         if (p->se.block_start)
                 p->se.block_start -= clock_offset;
-       if (p->se.sleep_start_fair)
-               p->se.sleep_start_fair -= fair_clock_offset;
+#endif
  
         __set_task_cpu(p, new_cpu);
  }
@@ -1553,17 +1558,19 @@ int fastcall wake_up_state(struct task_struct *p, unsigned int state)
  static void __sched_fork(struct task_struct *p)
  {
         p->se.wait_start_fair           = 0;
-       p->se.wait_start                = 0;
         p->se.exec_start                = 0;
         p->se.sum_exec_runtime          = 0;
         p->se.delta_exec                = 0;
         p->se.delta_fair_run            = 0;
         p->se.delta_fair_sleep          = 0;
         p->se.wait_runtime              = 0;
+       p->se.sleep_start_fair          = 0;
+
+#ifdef CONFIG_SCHEDSTATS
+       p->se.wait_start                = 0;
         p->se.sum_wait_runtime          = 0;
         p->se.sum_sleep_runtime         = 0;
         p->se.sleep_start               = 0;
-       p->se.sleep_start_fair          = 0;
         p->se.block_start               = 0;
         p->se.sleep_max                 = 0;
         p->se.block_max                 = 0;
@@ -1571,10 +1578,15 @@ static void __sched_fork(struct task_struct *p)
         p->se.wait_max                  = 0;
         p->se.wait_runtime_overruns     = 0;
         p->se.wait_runtime_underruns    = 0;
+#endif
  
         INIT_LIST_HEAD(&p->run_list);
         p->se.on_rq = 0;
  
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+       INIT_HLIST_HEAD(&p->preempt_notifiers);
+#endif
+
         /*
          * We mark the process as running here, but have not actually
          * inserted it onto the runqueue yet. This guarantees that
@@ -1635,30 +1647,94 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
         unsigned long flags;
         struct rq *rq;
         int this_cpu;
+       u64 now;
  
         rq = task_rq_lock(p, &flags);
         BUG_ON(p->state != TASK_RUNNING);
         this_cpu = smp_processor_id(); /* parent's CPU */
+       now = rq_clock(rq);
  
         p->prio = effective_prio(p);
  
-       if (!sysctl_sched_child_runs_first || (clone_flags & CLONE_VM) ||
-                       task_cpu(p) != this_cpu || !current->se.on_rq) {
+       if (!p->sched_class->task_new || !sysctl_sched_child_runs_first ||
+                       (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu ||
+                       !current->se.on_rq) {
+
                 activate_task(rq, p, 0);
         } else {
                 /*
                  * Let the scheduling class do new task startup
                  * management (if any):
                  */
-               p->sched_class->task_new(rq, p);
+               p->sched_class->task_new(rq, p, now);
+               inc_nr_running(p, rq, now);
         }
         check_preempt_curr(rq, p);
         task_rq_unlock(rq, &flags);
  }
  
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+
+/**
+ * preempt_notifier_register - tell me when current is being being preempted & rescheduled
+ * @notifier: notifier struct to register
+ */
+void preempt_notifier_register(struct preempt_notifier *notifier)
+{
+       hlist_add_head(&notifier->link, &current->preempt_notifiers);
+}
+EXPORT_SYMBOL_GPL(preempt_notifier_register);
+
+/**
+ * preempt_notifier_unregister - no longer interested in preemption notifications
+ * @notifier: notifier struct to unregister
+ *
+ * This is safe to call from within a preemption notifier.
+ */
+void preempt_notifier_unregister(struct preempt_notifier *notifier)
+{
+       hlist_del(&notifier->link);
+}
+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
+
+static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
+{
+       struct preempt_notifier *notifier;
+       struct hlist_node *node;
+
+       hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
+               notifier->ops->sched_in(notifier, raw_smp_processor_id());
+}
+
+static void
+fire_sched_out_preempt_notifiers(struct task_struct *curr,
+                                struct task_struct *next)
+{
+       struct preempt_notifier *notifier;
+       struct hlist_node *node;
+
+       hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
+               notifier->ops->sched_out(notifier, next);
+}
+
+#else
+
+static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
+{
+}
+
+static void
+fire_sched_out_preempt_notifiers(struct task_struct *curr,
+                                struct task_struct *next)
+{
+}
+
+#endif
+
  /**
   * prepare_task_switch - prepare to switch tasks
   * @rq: the runqueue preparing to switch
+ * @prev: the current task that is being switched out
   * @next: the task we are going to switch to.
   *
   * This is called with the rq lock held and interrupts off. It must
@@ -1668,8 +1744,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
   * prepare_task_switch sets up locking and calls architecture specific
   * hooks.
   */
-static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
+static inline void
+prepare_task_switch(struct rq *rq, struct task_struct *prev,
+                   struct task_struct *next)
  {
+       fire_sched_out_preempt_notifiers(prev, next);
         prepare_lock_switch(rq, next);
         prepare_arch_switch(next);
  }
@@ -1711,6 +1790,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
         prev_state = prev->state;
         finish_arch_switch(prev);
         finish_lock_switch(rq, prev);
+       fire_sched_in_preempt_notifiers(current);
         if (mm)
                 mmdrop(mm);
         if (unlikely(prev_state == TASK_DEAD)) {
@@ -1751,7 +1831,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
  {
         struct mm_struct *mm, *oldmm;
  
-       prepare_task_switch(rq, next);
+       prepare_task_switch(rq, prev, next);
         mm = next->mm;
         oldmm = prev->active_mm;
         /*
@@ -2235,7 +2315,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
  
                         rq = cpu_rq(i);
  
-                       if (*sd_idle && !idle_cpu(i))
+                       if (*sd_idle && rq->nr_running)
                                 *sd_idle = 0;
  
                         /* Bias balancing toward cpus of our domain */
@@ -2257,9 +2337,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
                 /*
                  * First idle cpu or the first cpu(busiest) in this sched group
                  * is eligible for doing load balancing at this and above
-                * domains.
+                * domains. In the newly idle case, we will allow all the cpu's
+                * to do the newly idle load balance.
                  */
-               if (local_group && balance_cpu != this_cpu && balance) {
+               if (idle != CPU_NEWLY_IDLE && local_group &&
+                   balance_cpu != this_cpu && balance) {
                         *balance = 0;
                         goto ret;
                 }
@@ -2677,6 +2759,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
         unsigned long imbalance;
         int nr_moved = 0;
         int sd_idle = 0;
+       int all_pinned = 0;
         cpumask_t cpus = CPU_MASK_ALL;
  
         /*
@@ -2715,10 +2798,11 @@ redo:
                 double_lock_balance(this_rq, busiest);
                 nr_moved = move_tasks(this_rq, this_cpu, busiest,
                                         minus_1_or_zero(busiest->nr_running),
-                                       imbalance, sd, CPU_NEWLY_IDLE, NULL);
+                                       imbalance, sd, CPU_NEWLY_IDLE,
+                                       &all_pinned);
                 spin_unlock(&busiest->lock);
  
-               if (!nr_moved) {
+               if (unlikely(all_pinned)) {
                         cpu_clear(cpu_of(busiest), cpus);
                         if (!cpus_empty(cpus))
                                 goto redo;
@@ -2822,8 +2906,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
                 schedstat_inc(sd, alb_cnt);
  
                 if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
-                              RTPRIO_TO_LOAD_WEIGHT(100), sd, CPU_IDLE,
-                              NULL))
+                              ULONG_MAX, sd, CPU_IDLE, NULL))
                         schedstat_inc(sd, alb_pushed);
                 else
                         schedstat_inc(sd, alb_failed);
@@ -4912,8 +4995,6 @@ static int migration_thread(void *data)
                 struct migration_req *req;
                 struct list_head *head;
  
-               try_to_freeze();
-
                 spin_lock_irq(&rq->lock);
  
                 if (cpu_is_offline(cpu)) {
@@ -5121,10 +5202,127 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
                 if (!next)
                         break;
                 migrate_dead(dead_cpu, next);
+
         }
  }
  #endif /* CONFIG_HOTPLUG_CPU */
  
+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
+
+static struct ctl_table sd_ctl_dir[] = {
+       {CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, },
+       {0,},
+};
+
+static struct ctl_table sd_ctl_root[] = {
+       {CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, },
+       {0,},
+};
+
+static struct ctl_table *sd_alloc_ctl_entry(int n)
+{
+       struct ctl_table *entry =
+               kmalloc(n * sizeof(struct ctl_table), GFP_KERNEL);
+
+       BUG_ON(!entry);
+       memset(entry, 0, n * sizeof(struct ctl_table));
+
+       return entry;
+}
+
+static void
+set_table_entry(struct ctl_table *entry, int ctl_name,
+               const char *procname, void *data, int maxlen,
+               mode_t mode, proc_handler *proc_handler)
+{
+       entry->ctl_name = ctl_name;
+       entry->procname = procname;
+       entry->data = data;
+       entry->maxlen = maxlen;
+       entry->mode = mode;
+       entry->proc_handler = proc_handler;
+}
+
+static struct ctl_table *
+sd_alloc_ctl_domain_table(struct sched_domain *sd)
+{
+       struct ctl_table *table = sd_alloc_ctl_entry(14);
+
+       set_table_entry(&table[0], 1, "min_interval", &sd->min_interval,
+               sizeof(long), 0644, proc_doulongvec_minmax);
+       set_table_entry(&table[1], 2, "max_interval", &sd->max_interval,
+               sizeof(long), 0644, proc_doulongvec_minmax);
+       set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx,
+               sizeof(int), 0644, proc_dointvec_minmax);
+       set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx,
+               sizeof(int), 0644, proc_dointvec_minmax);
+       set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx,
+               sizeof(int), 0644, proc_dointvec_minmax);
+       set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx,
+               sizeof(int), 0644, proc_dointvec_minmax);
+       set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx,
+               sizeof(int), 0644, proc_dointvec_minmax);
+       set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor,
+               sizeof(int), 0644, proc_dointvec_minmax);
+       set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct,
+               sizeof(int), 0644, proc_dointvec_minmax);
+       set_table_entry(&table[10], 11, "cache_nice_tries",
+               &sd->cache_nice_tries,
+               sizeof(int), 0644, proc_dointvec_minmax);
+       set_table_entry(&table[12], 13, "flags", &sd->flags,
+               sizeof(int), 0644, proc_dointvec_minmax);
+
+       return table;
+}
+
+static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
+{
+       struct ctl_table *entry, *table;
+       struct sched_domain *sd;
+       int domain_num = 0, i;
+       char buf[32];
+
+       for_each_domain(cpu, sd)
+               domain_num++;
+       entry = table = sd_alloc_ctl_entry(domain_num + 1);
+
+       i = 0;
+       for_each_domain(cpu, sd) {
+               snprintf(buf, 32, "domain%d", i);
+               entry->ctl_name = i + 1;
+               entry->procname = kstrdup(buf, GFP_KERNEL);
+               entry->mode = 0755;
+               entry->child = sd_alloc_ctl_domain_table(sd);
+               entry++;
+               i++;
+       }
+       return table;
+}
+
+static struct ctl_table_header *sd_sysctl_header;
+static void init_sched_domain_sysctl(void)
+{
+       int i, cpu_num = num_online_cpus();
+       struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
+       char buf[32];
+
+       sd_ctl_dir[0].child = entry;
+
+       for (i = 0; i < cpu_num; i++, entry++) {
+               snprintf(buf, 32, "cpu%d", i);
+               entry->ctl_name = i + 1;
+               entry->procname = kstrdup(buf, GFP_KERNEL);
+               entry->mode = 0755;
+               entry->child = sd_alloc_ctl_cpu_table(i);
+       }
+       sd_sysctl_header = register_sysctl_table(sd_ctl_root);
+}
+#else
+static void init_sched_domain_sysctl(void)
+{
+}
+#endif
+
  /*
   * migration_call - callback that gets triggered when a CPU is added.
   * Here we can start up the necessary migration thread for the new CPU.
@@ -5147,7 +5345,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                 p = kthread_create(migration_thread, hcpu, "migration/%d", cpu);
                 if (IS_ERR(p))
                         return NOTIFY_BAD;
-               p->flags |= PF_NOFREEZE;
                 kthread_bind(p, cpu);
                 /* Must be high prio: stop_machine expects to yield to it. */
                 rq = task_rq_lock(p, &flags);
@@ -6231,6 +6428,8 @@ void __init sched_init_smp(void)
         /* XXX: Theoretical race here - CPU may be hotplugged now */
         hotcpu_notifier(update_sched_domains, 0);
  
+       init_sched_domain_sysctl();
+
         /* Move init over to a non-isolated CPU */
         if (set_cpus_allowed(current, non_isolated_cpus) < 0)
                 BUG();
@@ -6317,6 +6516,10 @@ void __init sched_init(void)
  
         set_load_weight(&init_task);
  
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+       INIT_HLIST_HEAD(&init_task.preempt_notifiers);
+#endif
+
  #ifdef CONFIG_SMP
         nr_cpu_ids = highest_cpu + 1;
         open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
@@ -6382,12 +6585,14 @@ void normalize_rt_tasks(void)
         do_each_thread(g, p) {
                 p->se.fair_key                  = 0;
                 p->se.wait_runtime              = 0;
+               p->se.exec_start                = 0;
                 p->se.wait_start_fair           = 0;
+               p->se.sleep_start_fair          = 0;
+#ifdef CONFIG_SCHEDSTATS
                 p->se.wait_start                = 0;
-               p->se.exec_start                = 0;
                 p->se.sleep_start               = 0;
-               p->se.sleep_start_fair          = 0;
                 p->se.block_start               = 0;
+#endif
                 task_rq(p)->cfs.fair_clock      = 0;
                 task_rq(p)->clock               = 0;