/* For active balancing */
int active_balance;
int push_cpu;
+ int cpu; /* cpu of this runqueue */
struct task_struct *migration_thread;
struct list_head migration_queue;
static DEFINE_PER_CPU(struct rq, runqueues);
+static inline int cpu_of(struct rq *rq)
+{
+#ifdef CONFIG_SMP
+ return rq->cpu;
+#else
+ return 0;
+#endif
+}
+
/*
* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
* See detach_destroy_domains: synchronize_sched for details.
.release = single_release,
};
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void
+rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
+{
+ if (rq) {
+ rq->rq_sched_info.run_delay += delta_jiffies;
+ rq->rq_sched_info.pcnt++;
+ }
+}
+
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void
+rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
+{
+ if (rq)
+ rq->rq_sched_info.cpu_time += delta_jiffies;
+}
# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
#else /* !CONFIG_SCHEDSTATS */
+static inline void
+rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
+{}
+static inline void
+rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
+{}
# define schedstat_inc(rq, field) do { } while (0)
# define schedstat_add(rq, field, amt) do { } while (0)
#endif
return rq;
}
-#ifdef CONFIG_SCHEDSTATS
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
/*
* Called when a process is dequeued from the active array and given
* the cpu. We should note that with the exception of interactive
*/
static void sched_info_arrive(struct task_struct *t)
{
- unsigned long now = jiffies, diff = 0;
- struct rq *rq = task_rq(t);
+ unsigned long now = jiffies, delta_jiffies = 0;
if (t->sched_info.last_queued)
- diff = now - t->sched_info.last_queued;
+ delta_jiffies = now - t->sched_info.last_queued;
sched_info_dequeued(t);
- t->sched_info.run_delay += diff;
+ t->sched_info.run_delay += delta_jiffies;
t->sched_info.last_arrival = now;
t->sched_info.pcnt++;
- if (!rq)
- return;
-
- rq->rq_sched_info.run_delay += diff;
- rq->rq_sched_info.pcnt++;
+ rq_sched_info_arrive(task_rq(t), delta_jiffies);
}
/*
*/
static inline void sched_info_queued(struct task_struct *t)
{
- if (!t->sched_info.last_queued)
- t->sched_info.last_queued = jiffies;
+ if (unlikely(sched_info_on()))
+ if (!t->sched_info.last_queued)
+ t->sched_info.last_queued = jiffies;
}
/*
*/
static inline void sched_info_depart(struct task_struct *t)
{
- struct rq *rq = task_rq(t);
- unsigned long diff = jiffies - t->sched_info.last_arrival;
+ unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival;
- t->sched_info.cpu_time += diff;
-
- if (rq)
- rq->rq_sched_info.cpu_time += diff;
+ t->sched_info.cpu_time += delta_jiffies;
+ rq_sched_info_depart(task_rq(t), delta_jiffies);
}
/*
* the idle task.) We are only called when prev != next.
*/
static inline void
-sched_info_switch(struct task_struct *prev, struct task_struct *next)
+__sched_info_switch(struct task_struct *prev, struct task_struct *next)
{
struct rq *rq = task_rq(prev);
if (next != rq->idle)
sched_info_arrive(next);
}
+static inline void
+sched_info_switch(struct task_struct *prev, struct task_struct *next)
+{
+ if (unlikely(sched_info_on()))
+ __sched_info_switch(prev, next);
+}
#else
#define sched_info_queued(t) do { } while (0)
#define sched_info_switch(t, next) do { } while (0)
-#endif /* CONFIG_SCHEDSTATS */
+#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
/*
* Adding/removing a task to/from a priority array:
INIT_LIST_HEAD(&p->run_list);
p->array = NULL;
-#ifdef CONFIG_SCHEDSTATS
- memset(&p->sched_info, 0, sizeof(p->sched_info));
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+ if (unlikely(sched_info_on()))
+ memset(&p->sched_info, 0, sizeof(p->sched_info));
#endif
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
p->oncpu = 0;
*/
static struct sched_group *
find_busiest_group(struct sched_domain *sd, int this_cpu,
- unsigned long *imbalance, enum idle_type idle, int *sd_idle)
+ unsigned long *imbalance, enum idle_type idle, int *sd_idle,
+ cpumask_t *cpus)
{
struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
unsigned long max_load, avg_load, total_load, this_load, total_pwr;
sum_weighted_load = sum_nr_running = avg_load = 0;
for_each_cpu_mask(i, group->cpumask) {
- struct rq *rq = cpu_rq(i);
+ struct rq *rq;
+
+ if (!cpu_isset(i, *cpus))
+ continue;
+
+ rq = cpu_rq(i);
if (*sd_idle && !idle_cpu(i))
*sd_idle = 0;
*/
static struct rq *
find_busiest_queue(struct sched_group *group, enum idle_type idle,
- unsigned long imbalance)
+ unsigned long imbalance, cpumask_t *cpus)
{
struct rq *busiest = NULL, *rq;
unsigned long max_load = 0;
int i;
for_each_cpu_mask(i, group->cpumask) {
+
+ if (!cpu_isset(i, *cpus))
+ continue;
+
rq = cpu_rq(i);
if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
struct sched_group *group;
unsigned long imbalance;
struct rq *busiest;
+ cpumask_t cpus = CPU_MASK_ALL;
if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
!sched_smt_power_savings)
schedstat_inc(sd, lb_cnt[idle]);
- group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle);
+redo:
+ group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
+ &cpus);
if (!group) {
schedstat_inc(sd, lb_nobusyg[idle]);
goto out_balanced;
}
- busiest = find_busiest_queue(group, idle, imbalance);
+ busiest = find_busiest_queue(group, idle, imbalance, &cpus);
if (!busiest) {
schedstat_inc(sd, lb_nobusyq[idle]);
goto out_balanced;
double_rq_unlock(this_rq, busiest);
/* All tasks on this runqueue were pinned by CPU affinity */
- if (unlikely(all_pinned))
+ if (unlikely(all_pinned)) {
+ cpu_clear(cpu_of(busiest), cpus);
+ if (!cpus_empty(cpus))
+ goto redo;
goto out_balanced;
+ }
}
if (!nr_moved) {
unsigned long imbalance;
int nr_moved = 0;
int sd_idle = 0;
+ cpumask_t cpus = CPU_MASK_ALL;
if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
sd_idle = 1;
schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
- group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle);
+redo:
+ group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE,
+ &sd_idle, &cpus);
if (!group) {
schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
goto out_balanced;
}
- busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance);
+ busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance,
+ &cpus);
if (!busiest) {
schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
goto out_balanced;
minus_1_or_zero(busiest->nr_running),
imbalance, sd, NEWLY_IDLE, NULL);
spin_unlock(&busiest->lock);
+
+ if (!nr_moved) {
+ cpu_clear(cpu_of(busiest), cpus);
+ if (!cpus_empty(cpus))
+ goto redo;
+ }
}
if (!nr_moved) {
* @p: the task in question.
* @policy: new policy.
* @param: structure containing the new RT priority.
+ *
+ * NOTE: the task may be already dead
*/
int sched_setscheduler(struct task_struct *p, int policy,
struct sched_param *param)
(p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
(!p->mm && param->sched_priority > MAX_RT_PRIO-1))
return -EINVAL;
- if ((policy == SCHED_NORMAL || policy == SCHED_BATCH)
- != (param->sched_priority == 0))
+ if (is_rt_policy(policy) != (param->sched_priority != 0))
return -EINVAL;
/*
* Allow unprivileged RT tasks to decrease priority:
*/
if (!capable(CAP_SYS_NICE)) {
- /*
- * can't change policy, except between SCHED_NORMAL
- * and SCHED_BATCH:
- */
- if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) &&
- (policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) &&
- !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
- return -EPERM;
- /* can't increase priority */
- if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) &&
- param->sched_priority > p->rt_priority &&
- param->sched_priority >
- p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
- return -EPERM;
+ if (is_rt_policy(policy)) {
+ unsigned long rlim_rtprio;
+ unsigned long flags;
+
+ if (!lock_task_sighand(p, &flags))
+ return -ESRCH;
+ rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur;
+ unlock_task_sighand(p, &flags);
+
+ /* can't set/change the rt policy */
+ if (policy != p->policy && !rlim_rtprio)
+ return -EPERM;
+
+ /* can't increase priority */
+ if (param->sched_priority > p->rt_priority &&
+ param->sched_priority > rlim_rtprio)
+ return -EPERM;
+ }
+
/* can't change other user's priorities */
if ((current->euid != p->euid) &&
(current->euid != p->uid))
return -EINVAL;
if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
return -EFAULT;
- read_lock_irq(&tasklist_lock);
+
+ rcu_read_lock();
+ retval = -ESRCH;
p = find_process_by_pid(pid);
- if (!p) {
- read_unlock_irq(&tasklist_lock);
- return -ESRCH;
- }
- get_task_struct(p);
- read_unlock_irq(&tasklist_lock);
- retval = sched_setscheduler(p, policy, &lparam);
- put_task_struct(p);
+ if (p != NULL)
+ retval = sched_setscheduler(p, policy, &lparam);
+ rcu_read_unlock();
return retval;
}
return 0;
}
-static inline int __resched_legal(void)
+static inline int __resched_legal(int expected_preempt_count)
{
- if (unlikely(preempt_count()))
+ if (unlikely(preempt_count() != expected_preempt_count))
return 0;
if (unlikely(system_state != SYSTEM_RUNNING))
return 0;
int __sched cond_resched(void)
{
- if (need_resched() && __resched_legal()) {
+ if (need_resched() && __resched_legal(0)) {
__cond_resched();
return 1;
}
ret = 1;
spin_lock(lock);
}
- if (need_resched() && __resched_legal()) {
+ if (need_resched() && __resched_legal(1)) {
spin_release(&lock->dep_map, 1, _THIS_IP_);
_raw_spin_unlock(lock);
preempt_enable_no_resched();
{
BUG_ON(!in_softirq());
- if (need_resched() && __resched_legal()) {
+ if (need_resched() && __resched_legal(0)) {
raw_local_irq_disable();
_local_bh_enable();
raw_local_irq_enable();
int __init migration_init(void)
{
void *cpu = (void *)(long)smp_processor_id();
+ int err;
/* Start one for the boot CPU: */
- migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
+ err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
+ BUG_ON(err == NOTIFY_BAD);
migration_call(&migration_notifier, CPU_ONLINE, cpu);
register_cpu_notifier(&migration_notifier);
for (i = 0; i < MAX_NUMNODES; i++)
init_numa_sched_groups_power(sched_group_nodes[i]);
- init_numa_sched_groups_power(sched_group_allnodes);
+ if (sched_group_allnodes) {
+ int group = cpu_to_allnodes_group(first_cpu(*cpu_map));
+ struct sched_group *sg = &sched_group_allnodes[group];
+
+ init_numa_sched_groups_power(sg);
+ }
#endif
/* Attach the domains */
rq->cpu_load[j] = 0;
rq->active_balance = 0;
rq->push_cpu = 0;
+ rq->cpu = i;
rq->migration_thread = NULL;
INIT_LIST_HEAD(&rq->migration_queue);
#endif
}
set_load_weight(&init_task);
+
+#ifdef CONFIG_RT_MUTEXES
+ plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
+#endif
+
/*
* The boot idle thread does lazy MMU switching as well:
*/