[PATCH] autofs4: pending flag not cleared on mount fail

[mirror_ubuntu-artful-kernel.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 9d42cbfc4f8ba2e3899cde4f1740db52dd555bab..155a33da7aa76ca733977c9b934042551ca2e7f0 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -238,6 +238,7 @@ struct rq {
         /* For active balancing */
         int active_balance;
         int push_cpu;
+       int cpu;                /* cpu of this runqueue */
  
         struct task_struct *migration_thread;
         struct list_head migration_queue;
@@ -267,6 +268,15 @@ struct rq {
  
  static DEFINE_PER_CPU(struct rq, runqueues);
  
+static inline int cpu_of(struct rq *rq)
+{
+#ifdef CONFIG_SMP
+       return rq->cpu;
+#else
+       return 0;
+#endif
+}
+
  /*
   * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
   * See detach_destroy_domains: synchronize_sched for details.
@@ -502,9 +512,36 @@ struct file_operations proc_schedstat_operations = {
         .release = single_release,
  };
  
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void
+rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
+{
+       if (rq) {
+               rq->rq_sched_info.run_delay += delta_jiffies;
+               rq->rq_sched_info.pcnt++;
+       }
+}
+
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void
+rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
+{
+       if (rq)
+               rq->rq_sched_info.cpu_time += delta_jiffies;
+}
  # define schedstat_inc(rq, field)      do { (rq)->field++; } while (0)
  # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
  #else /* !CONFIG_SCHEDSTATS */
+static inline void
+rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
+{}
+static inline void
+rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
+{}
  # define schedstat_inc(rq, field)      do { } while (0)
  # define schedstat_add(rq, field, amt) do { } while (0)
  #endif
@@ -524,7 +561,7 @@ static inline struct rq *this_rq_lock(void)
         return rq;
  }
  
-#ifdef CONFIG_SCHEDSTATS
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
  /*
   * Called when a process is dequeued from the active array and given
   * the cpu.  We should note that with the exception of interactive
@@ -552,21 +589,16 @@ static inline void sched_info_dequeued(struct task_struct *t)
   */
  static void sched_info_arrive(struct task_struct *t)
  {
-       unsigned long now = jiffies, diff = 0;
-       struct rq *rq = task_rq(t);
+       unsigned long now = jiffies, delta_jiffies = 0;
  
         if (t->sched_info.last_queued)
-               diff = now - t->sched_info.last_queued;
+               delta_jiffies = now - t->sched_info.last_queued;
         sched_info_dequeued(t);
-       t->sched_info.run_delay += diff;
+       t->sched_info.run_delay += delta_jiffies;
         t->sched_info.last_arrival = now;
         t->sched_info.pcnt++;
  
-       if (!rq)
-               return;
-
-       rq->rq_sched_info.run_delay += diff;
-       rq->rq_sched_info.pcnt++;
+       rq_sched_info_arrive(task_rq(t), delta_jiffies);
  }
  
  /*
@@ -586,8 +618,9 @@ static void sched_info_arrive(struct task_struct *t)
   */
  static inline void sched_info_queued(struct task_struct *t)
  {
-       if (!t->sched_info.last_queued)
-               t->sched_info.last_queued = jiffies;
+       if (unlikely(sched_info_on()))
+               if (!t->sched_info.last_queued)
+                       t->sched_info.last_queued = jiffies;
  }
  
  /*
@@ -596,13 +629,10 @@ static inline void sched_info_queued(struct task_struct *t)
   */
  static inline void sched_info_depart(struct task_struct *t)
  {
-       struct rq *rq = task_rq(t);
-       unsigned long diff = jiffies - t->sched_info.last_arrival;
+       unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival;
  
-       t->sched_info.cpu_time += diff;
-
-       if (rq)
-               rq->rq_sched_info.cpu_time += diff;
+       t->sched_info.cpu_time += delta_jiffies;
+       rq_sched_info_depart(task_rq(t), delta_jiffies);
  }
  
  /*
@@ -611,7 +641,7 @@ static inline void sched_info_depart(struct task_struct *t)
   * the idle task.)  We are only called when prev != next.
   */
  static inline void
-sched_info_switch(struct task_struct *prev, struct task_struct *next)
+__sched_info_switch(struct task_struct *prev, struct task_struct *next)
  {
         struct rq *rq = task_rq(prev);
  
@@ -626,10 +656,16 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
         if (next != rq->idle)
                 sched_info_arrive(next);
  }
+static inline void
+sched_info_switch(struct task_struct *prev, struct task_struct *next)
+{
+       if (unlikely(sched_info_on()))
+               __sched_info_switch(prev, next);
+}
  #else
  #define sched_info_queued(t)           do { } while (0)
  #define sched_info_switch(t, next)     do { } while (0)
-#endif /* CONFIG_SCHEDSTATS */
+#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
  
  /*
   * Adding/removing a task to/from a priority array:
@@ -1531,8 +1567,9 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
  
         INIT_LIST_HEAD(&p->run_list);
         p->array = NULL;
-#ifdef CONFIG_SCHEDSTATS
-       memset(&p->sched_info, 0, sizeof(p->sched_info));
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+       if (unlikely(sched_info_on()))
+               memset(&p->sched_info, 0, sizeof(p->sched_info));
  #endif
  #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
         p->oncpu = 0;
@@ -2184,7 +2221,8 @@ out:
   */
  static struct sched_group *
  find_busiest_group(struct sched_domain *sd, int this_cpu,
-                  unsigned long *imbalance, enum idle_type idle, int *sd_idle)
+                  unsigned long *imbalance, enum idle_type idle, int *sd_idle,
+                  cpumask_t *cpus)
  {
         struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
         unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -2221,7 +2259,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
                 sum_weighted_load = sum_nr_running = avg_load = 0;
  
                 for_each_cpu_mask(i, group->cpumask) {
-                       struct rq *rq = cpu_rq(i);
+                       struct rq *rq;
+
+                       if (!cpu_isset(i, *cpus))
+                               continue;
+
+                       rq = cpu_rq(i);
  
                         if (*sd_idle && !idle_cpu(i))
                                 *sd_idle = 0;
@@ -2439,13 +2482,17 @@ ret:
   */
  static struct rq *
  find_busiest_queue(struct sched_group *group, enum idle_type idle,
-                  unsigned long imbalance)
+                  unsigned long imbalance, cpumask_t *cpus)
  {
         struct rq *busiest = NULL, *rq;
         unsigned long max_load = 0;
         int i;
  
         for_each_cpu_mask(i, group->cpumask) {
+
+               if (!cpu_isset(i, *cpus))
+                       continue;
+
                 rq = cpu_rq(i);
  
                 if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
@@ -2484,6 +2531,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
         struct sched_group *group;
         unsigned long imbalance;
         struct rq *busiest;
+       cpumask_t cpus = CPU_MASK_ALL;
  
         if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
             !sched_smt_power_savings)
@@ -2491,13 +2539,15 @@ static int load_balance(int this_cpu, struct rq *this_rq,
  
         schedstat_inc(sd, lb_cnt[idle]);
  
-       group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle);
+redo:
+       group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
+                                                       &cpus);
         if (!group) {
                 schedstat_inc(sd, lb_nobusyg[idle]);
                 goto out_balanced;
         }
  
-       busiest = find_busiest_queue(group, idle, imbalance);
+       busiest = find_busiest_queue(group, idle, imbalance, &cpus);
         if (!busiest) {
                 schedstat_inc(sd, lb_nobusyq[idle]);
                 goto out_balanced;
@@ -2522,8 +2572,12 @@ static int load_balance(int this_cpu, struct rq *this_rq,
                 double_rq_unlock(this_rq, busiest);
  
                 /* All tasks on this runqueue were pinned by CPU affinity */
-               if (unlikely(all_pinned))
+               if (unlikely(all_pinned)) {
+                       cpu_clear(cpu_of(busiest), cpus);
+                       if (!cpus_empty(cpus))
+                               goto redo;
                         goto out_balanced;
+               }
         }
  
         if (!nr_moved) {
@@ -2612,18 +2666,22 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
         unsigned long imbalance;
         int nr_moved = 0;
         int sd_idle = 0;
+       cpumask_t cpus = CPU_MASK_ALL;
  
         if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
                 sd_idle = 1;
  
         schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
-       group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle);
+redo:
+       group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE,
+                               &sd_idle, &cpus);
         if (!group) {
                 schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
                 goto out_balanced;
         }
  
-       busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance);
+       busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance,
+                               &cpus);
         if (!busiest) {
                 schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
                 goto out_balanced;
@@ -2641,6 +2699,12 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
                                         minus_1_or_zero(busiest->nr_running),
                                         imbalance, sd, NEWLY_IDLE, NULL);
                 spin_unlock(&busiest->lock);
+
+               if (!nr_moved) {
+                       cpu_clear(cpu_of(busiest), cpus);
+                       if (!cpus_empty(cpus))
+                               goto redo;
+               }
         }
  
         if (!nr_moved) {
@@ -4016,6 +4080,8 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
   * @p: the task in question.
   * @policy: new policy.
   * @param: structure containing the new RT priority.
+ *
+ * NOTE: the task may be already dead
   */
  int sched_setscheduler(struct task_struct *p, int policy,
                        struct sched_param *param)
@@ -4043,28 +4109,32 @@ recheck:
             (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
             (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
                 return -EINVAL;
-       if ((policy == SCHED_NORMAL || policy == SCHED_BATCH)
-                                       != (param->sched_priority == 0))
+       if (is_rt_policy(policy) != (param->sched_priority != 0))
                 return -EINVAL;
  
         /*
          * Allow unprivileged RT tasks to decrease priority:
          */
         if (!capable(CAP_SYS_NICE)) {
-               /*
-                * can't change policy, except between SCHED_NORMAL
-                * and SCHED_BATCH:
-                */
-               if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) &&
-                       (policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) &&
-                               !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
-                       return -EPERM;
-               /* can't increase priority */
-               if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) &&
-                   param->sched_priority > p->rt_priority &&
-                   param->sched_priority >
-                               p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
-                       return -EPERM;
+               if (is_rt_policy(policy)) {
+                       unsigned long rlim_rtprio;
+                       unsigned long flags;
+
+                       if (!lock_task_sighand(p, &flags))
+                               return -ESRCH;
+                       rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur;
+                       unlock_task_sighand(p, &flags);
+
+                       /* can't set/change the rt policy */
+                       if (policy != p->policy && !rlim_rtprio)
+                               return -EPERM;
+
+                       /* can't increase priority */
+                       if (param->sched_priority > p->rt_priority &&
+                           param->sched_priority > rlim_rtprio)
+                               return -EPERM;
+               }
+
                 /* can't change other user's priorities */
                 if ((current->euid != p->euid) &&
                     (current->euid != p->uid))
@@ -4129,16 +4199,13 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
                 return -EINVAL;
         if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
                 return -EFAULT;
-       read_lock_irq(&tasklist_lock);
+
+       rcu_read_lock();
+       retval = -ESRCH;
         p = find_process_by_pid(pid);
-       if (!p) {
-               read_unlock_irq(&tasklist_lock);
-               return -ESRCH;
-       }
-       get_task_struct(p);
-       read_unlock_irq(&tasklist_lock);
-       retval = sched_setscheduler(p, policy, &lparam);
-       put_task_struct(p);
+       if (p != NULL)
+               retval = sched_setscheduler(p, policy, &lparam);
+       rcu_read_unlock();
  
         return retval;
  }
@@ -4429,9 +4496,9 @@ asmlinkage long sys_sched_yield(void)
         return 0;
  }
  
-static inline int __resched_legal(void)
+static inline int __resched_legal(int expected_preempt_count)
  {
-       if (unlikely(preempt_count()))
+       if (unlikely(preempt_count() != expected_preempt_count))
                 return 0;
         if (unlikely(system_state != SYSTEM_RUNNING))
                 return 0;
@@ -4457,7 +4524,7 @@ static void __cond_resched(void)
  
  int __sched cond_resched(void)
  {
-       if (need_resched() && __resched_legal()) {
+       if (need_resched() && __resched_legal(0)) {
                 __cond_resched();
                 return 1;
         }
@@ -4483,7 +4550,7 @@ int cond_resched_lock(spinlock_t *lock)
                 ret = 1;
                 spin_lock(lock);
         }
-       if (need_resched() && __resched_legal()) {
+       if (need_resched() && __resched_legal(1)) {
                 spin_release(&lock->dep_map, 1, _THIS_IP_);
                 _raw_spin_unlock(lock);
                 preempt_enable_no_resched();
@@ -4499,7 +4566,7 @@ int __sched cond_resched_softirq(void)
  {
         BUG_ON(!in_softirq());
  
-       if (need_resched() && __resched_legal()) {
+       if (need_resched() && __resched_legal(0)) {
                 raw_local_irq_disable();
                 _local_bh_enable();
                 raw_local_irq_enable();
@@ -5210,9 +5277,11 @@ static struct notifier_block __cpuinitdata migration_notifier = {
  int __init migration_init(void)
  {
         void *cpu = (void *)(long)smp_processor_id();
+       int err;
  
         /* Start one for the boot CPU: */
-       migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
+       err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
+       BUG_ON(err == NOTIFY_BAD);
         migration_call(&migration_notifier, CPU_ONLINE, cpu);
         register_cpu_notifier(&migration_notifier);
  
@@ -6467,7 +6536,12 @@ static int build_sched_domains(const cpumask_t *cpu_map)
         for (i = 0; i < MAX_NUMNODES; i++)
                 init_numa_sched_groups_power(sched_group_nodes[i]);
  
-       init_numa_sched_groups_power(sched_group_allnodes);
+       if (sched_group_allnodes) {
+               int group = cpu_to_allnodes_group(first_cpu(*cpu_map));
+               struct sched_group *sg = &sched_group_allnodes[group];
+
+               init_numa_sched_groups_power(sg);
+       }
  #endif
  
         /* Attach the domains */
@@ -6717,6 +6791,7 @@ void __init sched_init(void)
                         rq->cpu_load[j] = 0;
                 rq->active_balance = 0;
                 rq->push_cpu = 0;
+               rq->cpu = i;
                 rq->migration_thread = NULL;
                 INIT_LIST_HEAD(&rq->migration_queue);
  #endif
@@ -6734,6 +6809,11 @@ void __init sched_init(void)
         }
  
         set_load_weight(&init_task);
+
+#ifdef CONFIG_RT_MUTEXES
+       plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
+#endif
+
         /*
          * The boot idle thread does lazy MMU switching as well:
          */