Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 1 Sep 2015 03:26:22 +0000 (20:26 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 1 Sep 2015 03:26:22 +0000 (20:26 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 1 Sep 2015 03:26:22 +0000 (20:26 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 1 Sep 2015 03:26:22 +0000 (20:26 -0700)
diff --combined drivers/cpuidle/cpuidle.c

index 48b7228563ad7b024b17d49dc8ff9b675049f587,a5d9f2e470ea1fc9f94536f230d60c0633933dda..33253930247f758a9db032632ff14e68e41a1990
--- 1/drivers/cpuidle/cpuidle.c
--- 2/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@@ -112,25 -112,22 +112,27 @@@ int cpuidle_find_deepest_state(struct c
   static void enter_freeze_proper(struct cpuidle_driver *drv,
                                 struct cpuidle_device *dev, int index)
   {
- -      tick_freeze();
+ +      /*
+ +       * trace_suspend_resume() called by tick_freeze() for the last CPU
+ +       * executing it contains RCU usage regarded as invalid in the idle
+ +       * context, so tell RCU about that.
+ +       */
+ +      RCU_NONIDLE(tick_freeze());
         /*
          * The state used here cannot be a "coupled" one, because the "coupled"
          * cpuidle mechanism enables interrupts and doing that with timekeeping
          * suspended is generally unsafe.
          */
+       stop_critical_timings();
         drv->states[index].enter_freeze(dev, drv, index);
         WARN_ON(!irqs_disabled());
         /*
          * timekeeping_resume() that will be called by tick_unfreeze() for the
- -       * last CPU executing it calls functions containing RCU read-side
+ +       * first CPU executing it calls functions containing RCU read-side
          * critical sections, so tell RCU about that.
          */
         RCU_NONIDLE(tick_unfreeze());
+       start_critical_timings();
   }
   
   /**
@@@ -195,7 -192,9 +197,9 @@@ int cpuidle_enter_state(struct cpuidle_
         trace_cpu_idle_rcuidle(index, dev->cpu);
         time_start = ktime_get();
   
+       stop_critical_timings();
         entered_state = target_state->enter(dev, drv, index);
+       start_critical_timings();
   
         time_end = ktime_get();
         trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
diff --combined include/linux/sched.h

index 04b5ada460b44e4cf8cfdd918ec8572ea8683a8d,81bb4577274becf86a1bd487995d6e5130f8e839..119823decc4631eb26842df9fd7a9a1e63709577
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -530,39 -530,49 +530,49 @@@ struct cpu_itimer 
   };
   
   /**
-  * struct cputime - snaphsot of system and user cputime
+  * struct prev_cputime - snaphsot of system and user cputime
    * @utime: time spent in user mode
    * @stime: time spent in system mode
+  * @lock: protects the above two fields
    *
-  * Gathers a generic snapshot of user and system time.
+  * Stores previous user/system time values such that we can guarantee
+  * monotonicity.
    */
- struct cputime {
+ struct prev_cputime {
+ #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
         cputime_t utime;
         cputime_t stime;
+       raw_spinlock_t lock;
+ #endif
   };
   
+ static inline void prev_cputime_init(struct prev_cputime *prev)
+ {
+ #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+       prev->utime = prev->stime = 0;
+       raw_spin_lock_init(&prev->lock);
+ #endif
+ }
+ 
   /**
    * struct task_cputime - collected CPU time counts
    * @utime:            time spent in user mode, in &cputime_t units
    * @stime:            time spent in kernel mode, in &cputime_t units
    * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
    *
-  * This is an extension of struct cputime that includes the total runtime
-  * spent by the task from the scheduler point of view.
-  *
-  * As a result, this structure groups together three kinds of CPU time
-  * that are tracked for threads and thread groups.  Most things considering
-  * CPU time want to group these counts together and treat all three
-  * of them in parallel.
+  * This structure groups together three kinds of CPU time that are tracked for
+  * threads and thread groups.  Most things considering CPU time want to group
+  * these counts together and treat all three of them in parallel.
    */
   struct task_cputime {
         cputime_t utime;
         cputime_t stime;
         unsigned long long sum_exec_runtime;
   };
+ 
   /* Alternate field names when used to cache expirations. */
- #define prof_exp      stime
   #define virt_exp      utime
+ #define prof_exp      stime
   #define sched_exp     sum_exec_runtime
   
   #define INIT_CPUTIME  \
@@@ -715,9 -725,7 +725,7 @@@ struct signal_struct 
         cputime_t utime, stime, cutime, cstime;
         cputime_t gtime;
         cputime_t cgtime;
- #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-       struct cputime prev_cputime;
- #endif
+       struct prev_cputime prev_cputime;
         unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
         unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
         unsigned long inblock, oublock, cinblock, coublock;
@@@ -1167,29 -1175,24 +1175,24 @@@ struct load_weight 
         u32 inv_weight;
   };
   
+ /*
+  * The load_avg/util_avg accumulates an infinite geometric series.
+  * 1) load_avg factors the amount of time that a sched_entity is
+  * runnable on a rq into its weight. For cfs_rq, it is the aggregated
+  * such weights of all runnable and blocked sched_entities.
+  * 2) util_avg factors frequency scaling into the amount of time
+  * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE].
+  * For cfs_rq, it is the aggregated such times of all runnable and
+  * blocked sched_entities.
+  * The 64 bit load_sum can:
+  * 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with
+  * the highest weight (=88761) always runnable, we should not overflow
+  * 2) for entity, support any load.weight always runnable
+  */
   struct sched_avg {
-       u64 last_runnable_update;
-       s64 decay_count;
-       /*
-        * utilization_avg_contrib describes the amount of time that a
-        * sched_entity is running on a CPU. It is based on running_avg_sum
-        * and is scaled in the range [0..SCHED_LOAD_SCALE].
-        * load_avg_contrib described the amount of time that a sched_entity
-        * is runnable on a rq. It is based on both runnable_avg_sum and the
-        * weight of the task.
-        */
-       unsigned long load_avg_contrib, utilization_avg_contrib;
-       /*
-        * These sums represent an infinite geometric series and so are bound
-        * above by 1024/(1-y).  Thus we only need a u32 to store them for all
-        * choices of y < 1-2^(-32)*1024.
-        * running_avg_sum reflects the time that the sched_entity is
-        * effectively running on the CPU.
-        * runnable_avg_sum represents the amount of time a sched_entity is on
-        * a runqueue which includes the running time that is monitored by
-        * running_avg_sum.
-        */
-       u32 runnable_avg_sum, avg_period, running_avg_sum;
+       u64 last_update_time, load_sum;
+       u32 util_sum, period_contrib;
+       unsigned long load_avg, util_avg;
   };
   
   #ifdef CONFIG_SCHEDSTATS
@@@ -1255,7 -1258,7 +1258,7 @@@ struct sched_entity 
   #endif
   
   #ifdef CONFIG_SMP
-       /* Per-entity load-tracking */
+       /* Per entity load average tracking */
         struct sched_avg        avg;
   #endif
   };
@@@ -1351,9 -1354,9 +1354,9 @@@ struct task_struct 
   #ifdef CONFIG_SMP
         struct llist_node wake_entry;
         int on_cpu;
-       struct task_struct *last_wakee;
-       unsigned long wakee_flips;
+       unsigned int wakee_flips;
         unsigned long wakee_flip_decay_ts;
+       struct task_struct *last_wakee;
   
         int wake_cpu;
   #endif
@@@ -1481,9 -1484,7 +1484,7 @@@
   
         cputime_t utime, stime, utimescaled, stimescaled;
         cputime_t gtime;
- #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-       struct cputime prev_cputime;
- #endif
+       struct prev_cputime prev_cputime;
   #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
         seqlock_t vtime_seqlock;
         unsigned long long vtime_snap;
@@@ -1522,6 -1523,8 +1523,6 @@@
   /* hung task detection */
         unsigned long last_switch_count;
   #endif
- -/* CPU-specific state of this task */
- -      struct thread_struct thread;
   /* filesystem information */
         struct fs_struct *fs;
   /* open file information */
@@@ -1776,22 -1779,8 +1777,22 @@@
         unsigned long   task_state_change;
   #endif
         int pagefault_disabled;
+ +/* CPU-specific state of this task */
+ +      struct thread_struct thread;
+ +/*
+ + * WARNING: on x86, 'thread_struct' contains a variable-sized
+ + * structure.  It *MUST* be at the end of 'task_struct'.
+ + *
+ + * Do not put anything below here!
+ + */
   };
   
+ +#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
+ +extern int arch_task_struct_size __read_mostly;
+ +#else
+ +# define arch_task_struct_size (sizeof(struct task_struct))
+ +#endif
+ +
   /* Future-safe accessor for struct task_struct's cpus_allowed. */
   #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
   
@@@ -2214,13 -2203,6 +2215,6 @@@ static inline void calc_load_enter_idle
   static inline void calc_load_exit_idle(void) { }
   #endif /* CONFIG_NO_HZ_COMMON */
   
- #ifndef CONFIG_CPUMASK_OFFSTACK
- static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
- {
-       return set_cpus_allowed_ptr(p, &new_mask);
- }
- #endif
- 
   /*
    * Do not use outside of architecture code which knows its limitations.
    *
@@@ -2897,12 -2879,6 +2891,6 @@@ extern int _cond_resched(void)
   
   extern int __cond_resched_lock(spinlock_t *lock);
   
- #ifdef CONFIG_PREEMPT_COUNT
- #define PREEMPT_LOCK_OFFSET   PREEMPT_OFFSET
- #else
- #define PREEMPT_LOCK_OFFSET   0
- #endif
- 
   #define cond_resched_lock(lock) ({                            \
         ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
         __cond_resched_lock(lock);                              \
diff --combined kernel/cpu.c

index 3c91a3fdfce58681ce33d311da9a876a126539c6,664ce5299334fe25e93dad7688670104bf01ebcf..82cf9dff4295eaa82305fe04a43cecb0c8a4c27e
--- 1/kernel/cpu.c
--- 2/kernel/cpu.c
+++ b/kernel/cpu.c
@@@ -21,7 -21,6 +21,7 @@@
   #include <linux/suspend.h>
   #include <linux/lockdep.h>
   #include <linux/tick.h>
+ +#include <linux/irq.h>
   #include <trace/events/power.h>
   
   #include "smpboot.h"
@@@ -191,22 -190,21 +191,22 @@@ void cpu_hotplug_done(void
   void cpu_hotplug_disable(void)
   {
         cpu_maps_update_begin();
- -      cpu_hotplug_disabled = 1;
+ +      cpu_hotplug_disabled++;
         cpu_maps_update_done();
   }
+ +EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
   
   void cpu_hotplug_enable(void)
   {
         cpu_maps_update_begin();
- -      cpu_hotplug_disabled = 0;
+ +      WARN_ON(--cpu_hotplug_disabled < 0);
         cpu_maps_update_done();
   }
- -
+ +EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
   #endif        /* CONFIG_HOTPLUG_CPU */
   
   /* Need to know about CPUs going up/down? */
- -int __ref register_cpu_notifier(struct notifier_block *nb)
+ +int register_cpu_notifier(struct notifier_block *nb)
   {
         int ret;
         cpu_maps_update_begin();
@@@ -215,7 -213,7 +215,7 @@@
         return ret;
   }
   
- -int __ref __register_cpu_notifier(struct notifier_block *nb)
+ +int __register_cpu_notifier(struct notifier_block *nb)
   {
         return raw_notifier_chain_register(&cpu_chain, nb);
   }
@@@ -245,7 -243,7 +245,7 @@@ static void cpu_notify_nofail(unsigned 
   EXPORT_SYMBOL(register_cpu_notifier);
   EXPORT_SYMBOL(__register_cpu_notifier);
   
- -void __ref unregister_cpu_notifier(struct notifier_block *nb)
+ +void unregister_cpu_notifier(struct notifier_block *nb)
   {
         cpu_maps_update_begin();
         raw_notifier_chain_unregister(&cpu_chain, nb);
@@@ -253,7 -251,7 +253,7 @@@
   }
   EXPORT_SYMBOL(unregister_cpu_notifier);
   
- -void __ref __unregister_cpu_notifier(struct notifier_block *nb)
+ +void __unregister_cpu_notifier(struct notifier_block *nb)
   {
         raw_notifier_chain_unregister(&cpu_chain, nb);
   }
@@@ -330,7 -328,7 +330,7 @@@ struct take_cpu_down_param 
   };
   
   /* Take this CPU down. */
- -static int __ref take_cpu_down(void *_param)
+ +static int take_cpu_down(void *_param)
   {
         struct take_cpu_down_param *param = _param;
         int err;
@@@ -349,7 -347,7 +349,7 @@@
   }
   
   /* Requires cpu_add_remove_lock to be held */
- -static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ +static int _cpu_down(unsigned int cpu, int tasks_frozen)
   {
         int err, nr_calls = 0;
         void *hcpu = (void *)(long)cpu;
@@@ -382,31 -380,25 +382,31 @@@
          * will observe it.
          *
          * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
- -       * not imply sync_sched(), so explicitly call both.
+ +       * not imply sync_sched(), so wait for both.
          *
          * Do sync before park smpboot threads to take care the rcu boost case.
          */
- -#ifdef CONFIG_PREEMPT
- -      synchronize_sched();
- -#endif
- -      synchronize_rcu();
+ +      if (IS_ENABLED(CONFIG_PREEMPT))
+ +              synchronize_rcu_mult(call_rcu, call_rcu_sched);
+ +      else
+ +              synchronize_rcu();
   
         smpboot_park_threads(cpu);
   
         /*
- -       * So now all preempt/rcu users must observe !cpu_active().
+ +       * Prevent irq alloc/free while the dying cpu reorganizes the
+ +       * interrupt affinities.
          */
+ +      irq_lock_sparse();
   
-       err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+ +      /*
+ +       * So now all preempt/rcu users must observe !cpu_active().
+ +       */
+       err = stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
         if (err) {
                 /* CPU didn't die: tell everyone.  Can't complain. */
                 cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
+ +              irq_unlock_sparse();
                 goto out_release;
         }
         BUG_ON(cpu_online(cpu));
@@@ -423,9 -415,6 +423,9 @@@
         smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */
         per_cpu(cpu_dead_idle, cpu) = false;
   
+ +      /* Interrupts are moved away from the dying cpu, reenable alloc/free */
+ +      irq_unlock_sparse();
+ +
         hotplug_cpu__broadcast_tick_pull(cpu);
         /* This actually kills the CPU. */
         __cpu_die(cpu);
@@@ -443,7 -432,7 +443,7 @@@ out_release
         return err;
   }
   
- -int __ref cpu_down(unsigned int cpu)
+ +int cpu_down(unsigned int cpu)
   {
         int err;
   
@@@ -530,7 -519,6 +530,7 @@@ static int _cpu_up(unsigned int cpu, in
   
         /* Arch-specific enabling code. */
         ret = __cpu_up(cpu, idle);
+ +
         if (ret != 0)
                 goto out_notify;
         BUG_ON(!cpu_online(cpu));
@@@ -609,18 -597,13 +609,18 @@@ int disable_nonboot_cpus(void
                 }
         }
   
- -      if (!error) {
+ +      if (!error)
                 BUG_ON(num_online_cpus() > 1);
- -              /* Make sure the CPUs won't be enabled by someone else */
- -              cpu_hotplug_disabled = 1;
- -      } else {
+ +      else
                 pr_err("Non-boot CPUs are not disabled\n");
- -      }
+ +
+ +      /*
+ +       * Make sure the CPUs won't be enabled by someone else. We need to do
+ +       * this even in case of failure as all disable_nonboot_cpus() users are
+ +       * supposed to do enable_nonboot_cpus() on the failure path.
+ +       */
+ +      cpu_hotplug_disabled++;
+ +
         cpu_maps_update_done();
         return error;
   }
@@@ -633,13 -616,13 +633,13 @@@ void __weak arch_enable_nonboot_cpus_en
   {
   }
   
- -void __ref enable_nonboot_cpus(void)
+ +void enable_nonboot_cpus(void)
   {
         int cpu, error;
   
         /* Allow everyone to use the CPU hotplug again */
         cpu_maps_update_begin();
- -      cpu_hotplug_disabled = 0;
+ +      WARN_ON(--cpu_hotplug_disabled < 0);
         if (cpumask_empty(frozen_cpus))
                 goto out;
   
diff --combined kernel/fork.c

index dbd9b8d7b7cc2baa8c425bf35191ce8dfd117ecf,6e8f807c57169e57f928b717d8c1a7f5ad9ec6c5..0d93b4d0617b006007e782d6dfdc6afc05c65d20
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -287,11 -287,6 +287,11 @@@ static void set_max_threads(unsigned in
         max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
   }
   
+ +#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
+ +/* Initialized by the architecture: */
+ +int arch_task_struct_size __read_mostly;
+ +#endif
+ +
   void __init fork_init(void)
   {
   #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
@@@ -300,7 -295,7 +300,7 @@@
   #endif
         /* create a slab on which task_structs can be allocated */
         task_struct_cachep =
- -              kmem_cache_create("task_struct", sizeof(struct task_struct),
+ +              kmem_cache_create("task_struct", arch_task_struct_size,
                         ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
   #endif
   
@@@ -1072,6 -1067,7 +1072,7 @@@ static int copy_sighand(unsigned long c
         rcu_assign_pointer(tsk->sighand, sig);
         if (!sig)
                 return -ENOMEM;
+ 
         atomic_set(&sig->count, 1);
         memcpy(sig->action, current->sighand->action, sizeof(sig->action));
         return 0;
@@@ -1133,6 -1129,7 +1134,7 @@@ static int copy_signal(unsigned long cl
         init_sigpending(&sig->shared_pending);
         INIT_LIST_HEAD(&sig->posix_timers);
         seqlock_init(&sig->stats_lock);
+       prev_cputime_init(&sig->prev_cputime);
   
         hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         sig->real_timer.function = it_real_fn;
@@@ -1340,9 -1337,8 +1342,8 @@@ static struct task_struct *copy_process
   
         p->utime = p->stime = p->gtime = 0;
         p->utimescaled = p->stimescaled = 0;
- #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-       p->prev_cputime.utime = p->prev_cputime.stime = 0;
- #endif
+       prev_cputime_init(&p->prev_cputime);
+ 
   #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
         seqlock_init(&p->vtime_seqlock);
         p->vtime_snap = 0;
diff --combined kernel/kthread.c

index fdea0bee7b5a4d5e2fcf43ee3b92e1a37dea6c71,7c40a189becc5ed2579177ccace6cabb73f39c6a..490924cc9e7c8252c447e802f142d0a3a150865d
--- 1/kernel/kthread.c
--- 2/kernel/kthread.c
+++ b/kernel/kthread.c
@@@ -97,7 -97,6 +97,7 @@@ bool kthread_should_park(void
   {
         return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags);
   }
+ +EXPORT_SYMBOL_GPL(kthread_should_park);
   
   /**
    * kthread_freezable_should_stop - should this freezable kthread return now?
@@@ -172,7 -171,6 +172,7 @@@ void kthread_parkme(void
   {
         __kthread_parkme(to_kthread(current));
   }
+ +EXPORT_SYMBOL_GPL(kthread_parkme);
   
   static int kthread(void *_create)
   {
@@@ -327,16 -325,30 +327,30 @@@ struct task_struct *kthread_create_on_n
   }
   EXPORT_SYMBOL(kthread_create_on_node);
   
- static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
+ static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state)
   {
-       /* Must have done schedule() in kthread() before we set_task_cpu */
+       unsigned long flags;
+ 
         if (!wait_task_inactive(p, state)) {
                 WARN_ON(1);
                 return;
         }
+ 
         /* It's safe because the task is inactive. */
-       do_set_cpus_allowed(p, cpumask_of(cpu));
+       raw_spin_lock_irqsave(&p->pi_lock, flags);
+       do_set_cpus_allowed(p, mask);
         p->flags |= PF_NO_SETAFFINITY;
+       raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ }
+ 
+ static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
+ {
+       __kthread_bind_mask(p, cpumask_of(cpu), state);
+ }
+ 
+ void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
+ {
+       __kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
   }
   
   /**
@@@ -413,7 -425,6 +427,7 @@@ void kthread_unpark(struct task_struct 
         if (kthread)
                 __kthread_unpark(k, kthread);
   }
+ +EXPORT_SYMBOL_GPL(kthread_unpark);
   
   /**
    * kthread_park - park a thread created by kthread_create().
@@@ -444,7 -455,6 +458,7 @@@ int kthread_park(struct task_struct *k
         }
         return ret;
   }
+ +EXPORT_SYMBOL_GPL(kthread_park);
   
   /**
    * kthread_stop - stop a thread created by kthread_create().
diff --combined kernel/sched/core.c

index 5e73c79fadd001d0bc898824549df6d5c89350b0,9917c962be9952d3b0d829c649fe415fbfd9e50d..a585c7b2ccf0c8897419ac1cd3b259d433bf0b87
--- 1/kernel/sched/core.c
--- 2/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@@ -1151,15 -1151,45 +1151,45 @@@ static int migration_cpu_stop(void *dat
         return 0;
   }
   
- void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+ /*
+  * sched_class::set_cpus_allowed must do the below, but is not required to
+  * actually call this function.
+  */
+ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
   {
-       if (p->sched_class->set_cpus_allowed)
-               p->sched_class->set_cpus_allowed(p, new_mask);
- 
         cpumask_copy(&p->cpus_allowed, new_mask);
         p->nr_cpus_allowed = cpumask_weight(new_mask);
   }
   
+ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+ {
+       struct rq *rq = task_rq(p);
+       bool queued, running;
+ 
+       lockdep_assert_held(&p->pi_lock);
+ 
+       queued = task_on_rq_queued(p);
+       running = task_current(rq, p);
+ 
+       if (queued) {
+               /*
+                * Because __kthread_bind() calls this on blocked tasks without
+                * holding rq->lock.
+                */
+               lockdep_assert_held(&rq->lock);
+               dequeue_task(rq, p, 0);
+       }
+       if (running)
+               put_prev_task(rq, p);
+ 
+       p->sched_class->set_cpus_allowed(p, new_mask);
+ 
+       if (running)
+               p->sched_class->set_curr_task(rq);
+       if (queued)
+               enqueue_task(rq, p, 0);
+ }
+ 
   /*
    * Change a given task's CPU affinity. Migrate the thread to a
    * proper CPU and schedule it away if the CPU it's executing on
@@@ -1169,7 -1199,8 +1199,8 @@@
    * task must not exit() & deallocate itself prematurely. The
    * call is not atomic; no spinlocks may be held.
    */
- int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+ static int __set_cpus_allowed_ptr(struct task_struct *p,
+                                 const struct cpumask *new_mask, bool check)
   {
         unsigned long flags;
         struct rq *rq;
@@@ -1178,6 -1209,15 +1209,15 @@@
   
         rq = task_rq_lock(p, &flags);
   
+       /*
+        * Must re-check here, to close a race against __kthread_bind(),
+        * sched_setaffinity() is not guaranteed to observe the flag.
+        */
+       if (check && (p->flags & PF_NO_SETAFFINITY)) {
+               ret = -EINVAL;
+               goto out;
+       }
+ 
         if (cpumask_equal(&p->cpus_allowed, new_mask))
                 goto out;
   
@@@ -1214,6 -1254,11 +1254,11 @@@ out
   
         return ret;
   }
+ 
+ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+ {
+       return __set_cpus_allowed_ptr(p, new_mask, false);
+ }
   EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
   
   void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
@@@ -1595,6 -1640,15 +1640,15 @@@ static void update_avg(u64 *avg, u64 sa
         s64 diff = sample - *avg;
         *avg += diff >> 3;
   }
+ 
+ #else
+ 
+ static inline int __set_cpus_allowed_ptr(struct task_struct *p,
+                                        const struct cpumask *new_mask, bool check)
+ {
+       return set_cpus_allowed_ptr(p, new_mask);
+ }
+ 
   #endif /* CONFIG_SMP */
   
   static void
@@@ -1654,9 -1708,9 +1708,9 @@@ static voi
   ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
   {
         check_preempt_curr(rq, p, wake_flags);
-       trace_sched_wakeup(p, true);
- 
         p->state = TASK_RUNNING;
+       trace_sched_wakeup(p);
+ 
   #ifdef CONFIG_SMP
         if (p->sched_class->task_woken) {
                 /*
@@@ -1874,6 -1928,8 +1928,8 @@@ try_to_wake_up(struct task_struct *p, u
         if (!(p->state & state))
                 goto out;
   
+       trace_sched_waking(p);
+ 
         success = 1; /* we're going to change ->state */
         cpu = task_cpu(p);
   
@@@ -1949,6 -2005,8 +2005,8 @@@ static void try_to_wake_up_local(struc
         if (!(p->state & TASK_NORMAL))
                 goto out;
   
+       trace_sched_waking(p);
+ 
         if (!task_on_rq_queued(p))
                 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
   
@@@ -2016,9 -2074,6 +2074,6 @@@ static void __sched_fork(unsigned long 
         p->se.prev_sum_exec_runtime     = 0;
         p->se.nr_migrations             = 0;
         p->se.vruntime                  = 0;
- #ifdef CONFIG_SMP
-       p->se.avg.decay_count           = 0;
- #endif
         INIT_LIST_HEAD(&p->se.group_node);
   
   #ifdef CONFIG_SCHEDSTATS
@@@ -2200,8 -2255,8 +2255,8 @@@ unsigned long to_ratio(u64 period, u64 
   #ifdef CONFIG_SMP
   inline struct dl_bw *dl_bw_of(int i)
   {
- -      rcu_lockdep_assert(rcu_read_lock_sched_held(),
- -                         "sched RCU must be held");
+ +      RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+ +                       "sched RCU must be held");
         return &cpu_rq(i)->rd->dl_bw;
   }
   
@@@ -2210,8 -2265,8 +2265,8 @@@ static inline int dl_bw_cpus(int i
         struct root_domain *rd = cpu_rq(i)->rd;
         int cpus = 0;
   
- -      rcu_lockdep_assert(rcu_read_lock_sched_held(),
- -                         "sched RCU must be held");
+ +      RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+ +                       "sched RCU must be held");
         for_each_cpu_and(i, rd->span, cpu_active_mask)
                 cpus++;
   
@@@ -2303,11 -2358,11 +2358,11 @@@ void wake_up_new_task(struct task_struc
   #endif
   
         /* Initialize new task's runnable average */
-       init_task_runnable_average(p);
+       init_entity_runnable_average(&p->se);
         rq = __task_rq_lock(p);
         activate_task(rq, p, 0);
         p->on_rq = TASK_ON_RQ_QUEUED;
-       trace_sched_wakeup_new(p, true);
+       trace_sched_wakeup_new(p);
         check_preempt_curr(rq, p, WF_FORK);
   #ifdef CONFIG_SMP
         if (p->sched_class->task_woken)
@@@ -2469,7 -2524,6 +2524,6 @@@ static struct rq *finish_task_switch(st
          */
         prev_state = prev->state;
         vtime_task_switch(prev);
-       finish_arch_switch(prev);
         perf_event_task_sched_in(prev, current);
         finish_lock_switch(rq, prev);
         finish_arch_post_lock_switch();
@@@ -4340,7 -4394,7 +4394,7 @@@ long sched_setaffinity(pid_t pid, cons
         }
   #endif
   again:
-       retval = set_cpus_allowed_ptr(p, new_mask);
+       retval = __set_cpus_allowed_ptr(p, new_mask, true);
   
         if (!retval) {
                 cpuset_cpus_allowed(p, cpus_allowed);
@@@ -4492,7 -4546,7 +4546,7 @@@ SYSCALL_DEFINE0(sched_yield
   
   int __sched _cond_resched(void)
   {
-       if (should_resched()) {
+       if (should_resched(0)) {
                 preempt_schedule_common();
                 return 1;
         }
@@@ -4510,7 -4564,7 +4564,7 @@@ EXPORT_SYMBOL(_cond_resched)
    */
   int __cond_resched_lock(spinlock_t *lock)
   {
-       int resched = should_resched();
+       int resched = should_resched(PREEMPT_LOCK_OFFSET);
         int ret = 0;
   
         lockdep_assert_held(lock);
@@@ -4532,7 -4586,7 +4586,7 @@@ int __sched __cond_resched_softirq(void
   {
         BUG_ON(!in_softirq());
   
-       if (should_resched()) {
+       if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
                 local_bh_enable();
                 preempt_schedule_common();
                 local_bh_disable();
@@@ -4865,7 -4919,8 +4919,8 @@@ void init_idle(struct task_struct *idle
         struct rq *rq = cpu_rq(cpu);
         unsigned long flags;
   
-       raw_spin_lock_irqsave(&rq->lock, flags);
+       raw_spin_lock_irqsave(&idle->pi_lock, flags);
+       raw_spin_lock(&rq->lock);
   
         __sched_fork(0, idle);
         idle->state = TASK_RUNNING;
@@@ -4891,7 -4946,8 +4946,8 @@@
   #if defined(CONFIG_SMP)
         idle->on_cpu = 1;
   #endif
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
+       raw_spin_unlock(&rq->lock);
+       raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
   
         /* Set the preempt count _outside_ the spinlocks! */
         init_idle_preempt_count(idle, cpu);
@@@ -5311,8 -5367,7 +5367,7 @@@ static void register_sched_domain_sysct
   /* may be called multiple times per register */
   static void unregister_sched_domain_sysctl(void)
   {
-       if (sd_sysctl_header)
-               unregister_sysctl_table(sd_sysctl_header);
+       unregister_sysctl_table(sd_sysctl_header);
         sd_sysctl_header = NULL;
         if (sd_ctl_dir[0].child)
                 sd_free_ctl_entry(&sd_ctl_dir[0].child);
@@@ -6445,8 -6500,10 +6500,10 @@@ static void init_numa_topology_type(voi
   
         n = sched_max_numa_distance;
   
-       if (n <= 1)
+       if (sched_domains_numa_levels <= 1) {
                 sched_numa_topology_type = NUMA_DIRECT;
+               return;
+       }
   
         for_each_online_node(a) {
                 for_each_online_node(b) {
diff --combined kernel/workqueue.c

index cb91c63b4f4a3b25ac6fb27b211d341ba013bc48,f5782d5fd196964ba220544be9cb8204f294e79c..811edb77dd6da7e5a4402066d843e3997e0ca026
--- 1/kernel/workqueue.c
--- 2/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@@ -338,20 -338,20 +338,20 @@@ static void workqueue_sysfs_unregister(
   #include <trace/events/workqueue.h>
   
   #define assert_rcu_or_pool_mutex()                                    \
- -      rcu_lockdep_assert(rcu_read_lock_sched_held() ||                \
- -                         lockdep_is_held(&wq_pool_mutex),             \
- -                         "sched RCU or wq_pool_mutex should be held")
+ +      RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() &&                 \
+ +                       !lockdep_is_held(&wq_pool_mutex),              \
+ +                       "sched RCU or wq_pool_mutex should be held")
   
   #define assert_rcu_or_wq_mutex(wq)                                    \
- -      rcu_lockdep_assert(rcu_read_lock_sched_held() ||                \
- -                         lockdep_is_held(&wq->mutex),                 \
- -                         "sched RCU or wq->mutex should be held")
+ +      RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() &&                 \
+ +                       !lockdep_is_held(&wq->mutex),                  \
+ +                       "sched RCU or wq->mutex should be held")
   
   #define assert_rcu_or_wq_mutex_or_pool_mutex(wq)                      \
- -      rcu_lockdep_assert(rcu_read_lock_sched_held() ||                \
- -                         lockdep_is_held(&wq->mutex) ||               \
- -                         lockdep_is_held(&wq_pool_mutex),             \
- -                         "sched RCU, wq->mutex or wq_pool_mutex should be held")
+ +      RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() &&                 \
+ +                       !lockdep_is_held(&wq->mutex) &&                \
+ +                       !lockdep_is_held(&wq_pool_mutex),              \
+ +                       "sched RCU, wq->mutex or wq_pool_mutex should be held")
   
   #define for_each_cpu_worker_pool(pool, cpu)                           \
         for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0];               \
@@@ -1714,9 -1714,7 +1714,7 @@@ static struct worker *create_worker(str
                 goto fail;
   
         set_user_nice(worker->task, pool->attrs->nice);
- 
-       /* prevent userland from meddling with cpumask of workqueue workers */
-       worker->task->flags |= PF_NO_SETAFFINITY;
+       kthread_bind_mask(worker->task, pool->attrs->cpumask);
   
         /* successful, attach the worker to the pool */
         worker_attach_to_pool(worker, pool);
@@@ -3856,7 -3854,7 +3854,7 @@@ struct workqueue_struct *__alloc_workqu
                 }
   
                 wq->rescuer = rescuer;
-               rescuer->task->flags |= PF_NO_SETAFFINITY;
+               kthread_bind_mask(rescuer->task, cpu_possible_mask);
                 wake_up_process(rescuer->task);
         }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 1 Sep 2015 03:26:22 +0000 (20:26 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 1 Sep 2015 03:26:22 +0000 (20:26 -0700)
		1	2
drivers/cpuidle/cpuidle.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/cpu.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/kthread.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/workqueue.c	patch \|	diff1 \|	diff2 \|	blob \| history