[mirror_ubuntu-zesty-kernel.git] / kernel / sched_rt.c

/*
 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
 * policies)
 */

/*
 * Update the current task's runtime statistics. Skip current tasks that
 * are not in our scheduling class.
 */
static void update_curr_rt(struct rq *rq)
{
	struct task_struct *curr = rq->curr;
	u64 delta_exec;

	if (!task_has_rt_policy(curr))
		return;

	delta_exec = rq->clock - curr->se.exec_start;
	if (unlikely((s64)delta_exec < 0))
		delta_exec = 0;

	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));

	curr->se.sum_exec_runtime += delta_exec;
	curr->se.exec_start = rq->clock;
	cpuacct_charge(curr, delta_exec);
}

static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
{
	WARN_ON(!rt_task(p));
	rq->rt.rt_nr_running++;
#ifdef CONFIG_SMP
	if (p->prio < rq->rt.highest_prio)
		rq->rt.highest_prio = p->prio;
#endif /* CONFIG_SMP */
}

static inline void dec_rt_tasks(struct task_struct *p, struct rq *rq)
{
	WARN_ON(!rt_task(p));
	WARN_ON(!rq->rt.rt_nr_running);
	rq->rt.rt_nr_running--;
#ifdef CONFIG_SMP
	if (rq->rt.rt_nr_running) {
		struct rt_prio_array *array;

		WARN_ON(p->prio < rq->rt.highest_prio);
		if (p->prio == rq->rt.highest_prio) {
			/* recalculate */
			array = &rq->rt.active;
			rq->rt.highest_prio =
				sched_find_first_bit(array->bitmap);
		} /* otherwise leave rq->highest prio alone */
	} else
		rq->rt.highest_prio = MAX_RT_PRIO;
#endif /* CONFIG_SMP */
}

static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
{
	struct rt_prio_array *array = &rq->rt.active;

	list_add_tail(&p->run_list, array->queue + p->prio);
	__set_bit(p->prio, array->bitmap);
	inc_cpu_load(rq, p->se.load.weight);

	inc_rt_tasks(p, rq);
}

/*
 * Adding/removing a task to/from a priority array:
 */
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
{
	struct rt_prio_array *array = &rq->rt.active;

	update_curr_rt(rq);

	list_del(&p->run_list);
	if (list_empty(array->queue + p->prio))
		__clear_bit(p->prio, array->bitmap);
	dec_cpu_load(rq, p->se.load.weight);

	dec_rt_tasks(p, rq);
}

/*
 * Put task to the end of the run list without the overhead of dequeue
 * followed by enqueue.
 */
static void requeue_task_rt(struct rq *rq, struct task_struct *p)
{
	struct rt_prio_array *array = &rq->rt.active;

	list_move_tail(&p->run_list, array->queue + p->prio);
}

static void
yield_task_rt(struct rq *rq)
{
	requeue_task_rt(rq, rq->curr);
}

/*
 * Preempt the current task with a newly woken task if needed:
 */
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
{
	if (p->prio < rq->curr->prio)
		resched_task(rq->curr);
}

static struct task_struct *pick_next_task_rt(struct rq *rq)
{
	struct rt_prio_array *array = &rq->rt.active;
	struct task_struct *next;
	struct list_head *queue;
	int idx;

	idx = sched_find_first_bit(array->bitmap);
	if (idx >= MAX_RT_PRIO)
		return NULL;

	queue = array->queue + idx;
	next = list_entry(queue->next, struct task_struct, run_list);

	next->se.exec_start = rq->clock;

	return next;
}

static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
{
	update_curr_rt(rq);
	p->se.exec_start = 0;
}

#ifdef CONFIG_SMP
/* Only try algorithms three times */
#define RT_MAX_TRIES 3

static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);

/* Return the second highest RT task, NULL otherwise */
static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
{
	struct rt_prio_array *array = &rq->rt.active;
	struct task_struct *next;
	struct list_head *queue;
	int idx;

	assert_spin_locked(&rq->lock);

	if (likely(rq->rt.rt_nr_running < 2))
		return NULL;

	idx = sched_find_first_bit(array->bitmap);
	if (unlikely(idx >= MAX_RT_PRIO)) {
		WARN_ON(1); /* rt_nr_running is bad */
		return NULL;
	}

	queue = array->queue + idx;
	next = list_entry(queue->next, struct task_struct, run_list);
	if (unlikely(next != rq->curr))
		return next;

	if (queue->next->next != queue) {
		/* same prio task */
		next = list_entry(queue->next->next, struct task_struct, run_list);
		return next;
	}

	/* slower, but more flexible */
	idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
	if (unlikely(idx >= MAX_RT_PRIO)) {
		WARN_ON(1); /* rt_nr_running was 2 and above! */
		return NULL;
	}

	queue = array->queue + idx;
	next = list_entry(queue->next, struct task_struct, run_list);

	return next;
}

static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);

/* Will lock the rq it finds */
static struct rq *find_lock_lowest_rq(struct task_struct *task,
				      struct rq *this_rq)
{
	struct rq *lowest_rq = NULL;
	int cpu;
	int tries;
	cpumask_t *cpu_mask = &__get_cpu_var(local_cpu_mask);

	cpus_and(*cpu_mask, cpu_online_map, task->cpus_allowed);

	for (tries = 0; tries < RT_MAX_TRIES; tries++) {
		/*
		 * Scan each rq for the lowest prio.
		 */
		for_each_cpu_mask(cpu, *cpu_mask) {
			struct rq *rq = &per_cpu(runqueues, cpu);

			if (cpu == this_rq->cpu)
				continue;

			/* We look for lowest RT prio or non-rt CPU */
			if (rq->rt.highest_prio >= MAX_RT_PRIO) {
				lowest_rq = rq;
				break;
			}

			/* no locking for now */
			if (rq->rt.highest_prio > task->prio &&
			    (!lowest_rq || rq->rt.highest_prio > lowest_rq->rt.highest_prio)) {
				lowest_rq = rq;
			}
		}

		if (!lowest_rq)
			break;

		/* if the prio of this runqueue changed, try again */
		if (double_lock_balance(this_rq, lowest_rq)) {
			/*
			 * We had to unlock the run queue. In
			 * the mean time, task could have
			 * migrated already or had its affinity changed.
			 * Also make sure that it wasn't scheduled on its rq.
			 */
			if (unlikely(task_rq(task) != this_rq ||
				     !cpu_isset(lowest_rq->cpu, task->cpus_allowed) ||
				     task_running(this_rq, task) ||
				     !task->se.on_rq)) {
				spin_unlock(&lowest_rq->lock);
				lowest_rq = NULL;
				break;
			}
		}

		/* If this rq is still suitable use it. */
		if (lowest_rq->rt.highest_prio > task->prio)
			break;

		/* try again */
		spin_unlock(&lowest_rq->lock);
		lowest_rq = NULL;
	}

	return lowest_rq;
}

/*
 * If the current CPU has more than one RT task, see if the non
 * running task can migrate over to a CPU that is running a task
 * of lesser priority.
 */
static int push_rt_task(struct rq *this_rq)
{
	struct task_struct *next_task;
	struct rq *lowest_rq;
	int ret = 0;
	int paranoid = RT_MAX_TRIES;

	assert_spin_locked(&this_rq->lock);

	next_task = pick_next_highest_task_rt(this_rq);
	if (!next_task)
		return 0;

 retry:
	if (unlikely(next_task == this_rq->curr))
		return 0;

	/*
	 * It's possible that the next_task slipped in of
	 * higher priority than current. If that's the case
	 * just reschedule current.
	 */
	if (unlikely(next_task->prio < this_rq->curr->prio)) {
		resched_task(this_rq->curr);
		return 0;
	}

	/* We might release this_rq lock */
	get_task_struct(next_task);

	/* find_lock_lowest_rq locks the rq if found */
	lowest_rq = find_lock_lowest_rq(next_task, this_rq);
	if (!lowest_rq) {
		struct task_struct *task;
		/*
		 * find lock_lowest_rq releases this_rq->lock
		 * so it is possible that next_task has changed.
		 * If it has, then try again.
		 */
		task = pick_next_highest_task_rt(this_rq);
		if (unlikely(task != next_task) && task && paranoid--) {
			put_task_struct(next_task);
			next_task = task;
			goto retry;
		}
		goto out;
	}

	assert_spin_locked(&lowest_rq->lock);

	deactivate_task(this_rq, next_task, 0);
	set_task_cpu(next_task, lowest_rq->cpu);
	activate_task(lowest_rq, next_task, 0);

	resched_task(lowest_rq->curr);

	spin_unlock(&lowest_rq->lock);

	ret = 1;
out:
	put_task_struct(next_task);

	return ret;
}

/*
 * TODO: Currently we just use the second highest prio task on
 *       the queue, and stop when it can't migrate (or there's
 *       no more RT tasks).  There may be a case where a lower
 *       priority RT task has a different affinity than the
 *       higher RT task. In this case the lower RT task could
 *       possibly be able to migrate where as the higher priority
 *       RT task could not.  We currently ignore this issue.
 *       Enhancements are welcome!
 */
static void push_rt_tasks(struct rq *rq)
{
	/* push_rt_task will return true if it moved an RT */
	while (push_rt_task(rq))
		;
}

static void schedule_tail_balance_rt(struct rq *rq)
{
	/*
	 * If we have more than one rt_task queued, then
	 * see if we can push the other rt_tasks off to other CPUS.
	 * Note we may release the rq lock, and since
	 * the lock was owned by prev, we need to release it
	 * first via finish_lock_switch and then reaquire it here.
	 */
	if (unlikely(rq->rt.rt_nr_running > 1)) {
		spin_lock_irq(&rq->lock);
		push_rt_tasks(rq);
		spin_unlock_irq(&rq->lock);
	}
}

/*
 * Load-balancing iterator. Note: while the runqueue stays locked
 * during the whole iteration, the current task might be
 * dequeued so the iterator has to be dequeue-safe. Here we
 * achieve that by always pre-iterating before returning
 * the current task:
 */
static struct task_struct *load_balance_start_rt(void *arg)
{
	struct rq *rq = arg;
	struct rt_prio_array *array = &rq->rt.active;
	struct list_head *head, *curr;
	struct task_struct *p;
	int idx;

	idx = sched_find_first_bit(array->bitmap);
	if (idx >= MAX_RT_PRIO)
		return NULL;

	head = array->queue + idx;
	curr = head->prev;

	p = list_entry(curr, struct task_struct, run_list);

	curr = curr->prev;

	rq->rt.rt_load_balance_idx = idx;
	rq->rt.rt_load_balance_head = head;
	rq->rt.rt_load_balance_curr = curr;

	return p;
}

static struct task_struct *load_balance_next_rt(void *arg)
{
	struct rq *rq = arg;
	struct rt_prio_array *array = &rq->rt.active;
	struct list_head *head, *curr;
	struct task_struct *p;
	int idx;

	idx = rq->rt.rt_load_balance_idx;
	head = rq->rt.rt_load_balance_head;
	curr = rq->rt.rt_load_balance_curr;

	/*
	 * If we arrived back to the head again then
	 * iterate to the next queue (if any):
	 */
	if (unlikely(head == curr)) {
		int next_idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);

		if (next_idx >= MAX_RT_PRIO)
			return NULL;

		idx = next_idx;
		head = array->queue + idx;
		curr = head->prev;

		rq->rt.rt_load_balance_idx = idx;
		rq->rt.rt_load_balance_head = head;
	}

	p = list_entry(curr, struct task_struct, run_list);

	curr = curr->prev;

	rq->rt.rt_load_balance_curr = curr;

	return p;
}

static unsigned long
load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
		unsigned long max_load_move,
		struct sched_domain *sd, enum cpu_idle_type idle,
		int *all_pinned, int *this_best_prio)
{
	struct rq_iterator rt_rq_iterator;

	rt_rq_iterator.start = load_balance_start_rt;
	rt_rq_iterator.next = load_balance_next_rt;
	/* pass 'busiest' rq argument into
	 * load_balance_[start|next]_rt iterators
	 */
	rt_rq_iterator.arg = busiest;

	return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
			     idle, all_pinned, this_best_prio, &rt_rq_iterator);
}

static int
move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
		 struct sched_domain *sd, enum cpu_idle_type idle)
{
	struct rq_iterator rt_rq_iterator;

	rt_rq_iterator.start = load_balance_start_rt;
	rt_rq_iterator.next = load_balance_next_rt;
	rt_rq_iterator.arg = busiest;

	return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
				  &rt_rq_iterator);
}
#else /* CONFIG_SMP */
# define schedule_tail_balance_rt(rq)	do { } while (0)
#endif /* CONFIG_SMP */

static void task_tick_rt(struct rq *rq, struct task_struct *p)
{
	update_curr_rt(rq);

	/*
	 * RR tasks need a special form of timeslice management.
	 * FIFO tasks have no timeslices.
	 */
	if (p->policy != SCHED_RR)
		return;

	if (--p->time_slice)
		return;

	p->time_slice = DEF_TIMESLICE;

	/*
	 * Requeue to the end of queue if we are not the only element
	 * on the queue:
	 */
	if (p->run_list.prev != p->run_list.next) {
		requeue_task_rt(rq, p);
		set_tsk_need_resched(p);
	}
}

static void set_curr_task_rt(struct rq *rq)
{
	struct task_struct *p = rq->curr;

	p->se.exec_start = rq->clock;
}

const struct sched_class rt_sched_class = {
	.next			= &fair_sched_class,
	.enqueue_task		= enqueue_task_rt,
	.dequeue_task		= dequeue_task_rt,
	.yield_task		= yield_task_rt,

	.check_preempt_curr	= check_preempt_curr_rt,

	.pick_next_task		= pick_next_task_rt,
	.put_prev_task		= put_prev_task_rt,

#ifdef CONFIG_SMP
	.load_balance		= load_balance_rt,
	.move_one_task		= move_one_task_rt,
#endif

	.set_curr_task          = set_curr_task_rt,
	.task_tick		= task_tick_rt,
};
Commit	Line	Data
bb44e5d1 IM	1	/*
	2	* Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
	3	* policies)
	4	*/
	5
	6	/*
	7	* Update the current task's runtime statistics. Skip current tasks that
	8	* are not in our scheduling class.
	9	*/
a9957449	10	static void update_curr_rt(struct rq *rq)
bb44e5d1 IM	11	{
	12	struct task_struct *curr = rq->curr;
	13	u64 delta_exec;
	14
	15	if (!task_has_rt_policy(curr))
	16	return;
	17
d281918d	18	delta_exec = rq->clock - curr->se.exec_start;
bb44e5d1 IM	19	if (unlikely((s64)delta_exec < 0))
bb44e5d1 IM	20	delta_exec = 0;
6cfb0d5d IM	21
6cfb0d5d IM	22	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
bb44e5d1 IM	23
bb44e5d1 IM	24	curr->se.sum_exec_runtime += delta_exec;
d281918d	25	curr->se.exec_start = rq->clock;
d842de87	26	cpuacct_charge(curr, delta_exec);
bb44e5d1 IM	27	}
bb44e5d1 IM	28
63489e45 SR	29	static inline void inc_rt_tasks(struct task_struct p, struct rq rq)
	30	{
	31	WARN_ON(!rt_task(p));
	32	rq->rt.rt_nr_running++;
764a9d6f SR	33	#ifdef CONFIG_SMP
	34	if (p->prio < rq->rt.highest_prio)
	35	rq->rt.highest_prio = p->prio;
	36	#endif /* CONFIG_SMP */
63489e45 SR	37	}
	38
	39	static inline void dec_rt_tasks(struct task_struct p, struct rq rq)
	40	{
	41	WARN_ON(!rt_task(p));
	42	WARN_ON(!rq->rt.rt_nr_running);
	43	rq->rt.rt_nr_running--;
764a9d6f SR	44	#ifdef CONFIG_SMP
	45	if (rq->rt.rt_nr_running) {
	46	struct rt_prio_array *array;
	47
	48	WARN_ON(p->prio < rq->rt.highest_prio);
	49	if (p->prio == rq->rt.highest_prio) {
	50	/* recalculate */
	51	array = &rq->rt.active;
	52	rq->rt.highest_prio =
	53	sched_find_first_bit(array->bitmap);
	54	} /* otherwise leave rq->highest prio alone */
	55	} else
	56	rq->rt.highest_prio = MAX_RT_PRIO;
	57	#endif /* CONFIG_SMP */
63489e45 SR	58	}
63489e45 SR	59
fd390f6a	60	static void enqueue_task_rt(struct rq rq, struct task_struct p, int wakeup)
bb44e5d1 IM	61	{
	62	struct rt_prio_array *array = &rq->rt.active;
	63
	64	list_add_tail(&p->run_list, array->queue + p->prio);
	65	__set_bit(p->prio, array->bitmap);
58e2d4ca	66	inc_cpu_load(rq, p->se.load.weight);
63489e45 SR	67
63489e45 SR	68	inc_rt_tasks(p, rq);
bb44e5d1 IM	69	}
	70
	71	/*
	72	* Adding/removing a task to/from a priority array:
	73	*/
f02231e5	74	static void dequeue_task_rt(struct rq rq, struct task_struct p, int sleep)
bb44e5d1 IM	75	{
	76	struct rt_prio_array *array = &rq->rt.active;
	77
f1e14ef6	78	update_curr_rt(rq);
bb44e5d1 IM	79
	80	list_del(&p->run_list);
	81	if (list_empty(array->queue + p->prio))
	82	__clear_bit(p->prio, array->bitmap);
58e2d4ca	83	dec_cpu_load(rq, p->se.load.weight);
63489e45 SR	84
63489e45 SR	85	dec_rt_tasks(p, rq);
bb44e5d1 IM	86	}
	87
	88	/*
	89	* Put task to the end of the run list without the overhead of dequeue
	90	* followed by enqueue.
	91	*/
	92	static void requeue_task_rt(struct rq rq, struct task_struct p)
	93	{
	94	struct rt_prio_array *array = &rq->rt.active;
	95
	96	list_move_tail(&p->run_list, array->queue + p->prio);
	97	}
	98
	99	static void
4530d7ab	100	yield_task_rt(struct rq *rq)
bb44e5d1	101	{
4530d7ab	102	requeue_task_rt(rq, rq->curr);
bb44e5d1 IM	103	}
	104
	105	/*
	106	* Preempt the current task with a newly woken task if needed:
	107	*/
	108	static void check_preempt_curr_rt(struct rq rq, struct task_struct p)
	109	{
	110	if (p->prio < rq->curr->prio)
	111	resched_task(rq->curr);
	112	}
	113
fb8d4724	114	static struct task_struct pick_next_task_rt(struct rq rq)
bb44e5d1 IM	115	{
	116	struct rt_prio_array *array = &rq->rt.active;
	117	struct task_struct *next;
	118	struct list_head *queue;
	119	int idx;
	120
	121	idx = sched_find_first_bit(array->bitmap);
	122	if (idx >= MAX_RT_PRIO)
	123	return NULL;
	124
	125	queue = array->queue + idx;
	126	next = list_entry(queue->next, struct task_struct, run_list);
	127
d281918d	128	next->se.exec_start = rq->clock;
bb44e5d1 IM	129
	130	return next;
	131	}
	132
31ee529c	133	static void put_prev_task_rt(struct rq rq, struct task_struct p)
bb44e5d1	134	{
f1e14ef6	135	update_curr_rt(rq);
bb44e5d1 IM	136	p->se.exec_start = 0;
	137	}
	138
681f3e68	139	#ifdef CONFIG_SMP
e8fa1362 SR	140	/* Only try algorithms three times */
	141	#define RT_MAX_TRIES 3
	142
	143	static int double_lock_balance(struct rq this_rq, struct rq busiest);
	144	static void deactivate_task(struct rq rq, struct task_struct p, int sleep);
	145
	146	/* Return the second highest RT task, NULL otherwise */
	147	static struct task_struct pick_next_highest_task_rt(struct rq rq)
	148	{
	149	struct rt_prio_array *array = &rq->rt.active;
	150	struct task_struct *next;
	151	struct list_head *queue;
	152	int idx;
	153
	154	assert_spin_locked(&rq->lock);
	155
	156	if (likely(rq->rt.rt_nr_running < 2))
	157	return NULL;
	158
	159	idx = sched_find_first_bit(array->bitmap);
	160	if (unlikely(idx >= MAX_RT_PRIO)) {
	161	WARN_ON(1); /* rt_nr_running is bad */
	162	return NULL;
	163	}
	164
	165	queue = array->queue + idx;
	166	next = list_entry(queue->next, struct task_struct, run_list);
	167	if (unlikely(next != rq->curr))
	168	return next;
	169
	170	if (queue->next->next != queue) {
	171	/* same prio task */
	172	next = list_entry(queue->next->next, struct task_struct, run_list);
	173	return next;
	174	}
	175
	176	/* slower, but more flexible */
	177	idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
	178	if (unlikely(idx >= MAX_RT_PRIO)) {
	179	WARN_ON(1); /* rt_nr_running was 2 and above! */
	180	return NULL;
	181	}
	182
	183	queue = array->queue + idx;
	184	next = list_entry(queue->next, struct task_struct, run_list);
	185
	186	return next;
	187	}
	188
	189	static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
	190
	191	/* Will lock the rq it finds */
	192	static struct rq find_lock_lowest_rq(struct task_struct task,
	193	struct rq *this_rq)
	194	{
	195	struct rq *lowest_rq = NULL;
	196	int cpu;
	197	int tries;
	198	cpumask_t *cpu_mask = &__get_cpu_var(local_cpu_mask);
	199
	200	cpus_and(*cpu_mask, cpu_online_map, task->cpus_allowed);
	201
	202	for (tries = 0; tries < RT_MAX_TRIES; tries++) {
	203	/*
204	* Scan each rq for the lowest prio.
205	*/
206	for_each_cpu_mask(cpu, *cpu_mask) {
207	struct rq *rq = &per_cpu(runqueues, cpu);
208
209	if (cpu == this_rq->cpu)
210	continue;
211
212	/* We look for lowest RT prio or non-rt CPU */
213	if (rq->rt.highest_prio >= MAX_RT_PRIO) {
214	lowest_rq = rq;
215	break;
216	}
217
218	/* no locking for now */
219	if (rq->rt.highest_prio > task->prio &&
220	(!lowest_rq \|\| rq->rt.highest_prio > lowest_rq->rt.highest_prio)) {
221	lowest_rq = rq;
222	}
223	}
224
225	if (!lowest_rq)
226	break;
227
228	/* if the prio of this runqueue changed, try again */
229	if (double_lock_balance(this_rq, lowest_rq)) {
230	/*
231	* We had to unlock the run queue. In
232	* the mean time, task could have
233	* migrated already or had its affinity changed.
234	* Also make sure that it wasn't scheduled on its rq.
235	*/
236	if (unlikely(task_rq(task) != this_rq \|\|
237	!cpu_isset(lowest_rq->cpu, task->cpus_allowed) \|\|
238	task_running(this_rq, task) \|\|
239	!task->se.on_rq)) {
240	spin_unlock(&lowest_rq->lock);
241	lowest_rq = NULL;
242	break;
243	}
244	}
245
246	/* If this rq is still suitable use it. */
247	if (lowest_rq->rt.highest_prio > task->prio)
248	break;
249
250	/* try again */
251	spin_unlock(&lowest_rq->lock);
252	lowest_rq = NULL;
253	}
254
255	return lowest_rq;
256	}
257
258	/*
259	* If the current CPU has more than one RT task, see if the non
260	* running task can migrate over to a CPU that is running a task
261	* of lesser priority.
262	*/
263	static int push_rt_task(struct rq *this_rq)
264	{
265	struct task_struct *next_task;
266	struct rq *lowest_rq;
267	int ret = 0;
268	int paranoid = RT_MAX_TRIES;
269
270	assert_spin_locked(&this_rq->lock);
271
272	next_task = pick_next_highest_task_rt(this_rq);
273	if (!next_task)
274	return 0;
275
276	retry:
277	if (unlikely(next_task == this_rq->curr))
278	return 0;
279
280	/*
281	* It's possible that the next_task slipped in of
282	* higher priority than current. If that's the case
283	* just reschedule current.
284	*/
285	if (unlikely(next_task->prio < this_rq->curr->prio)) {
286	resched_task(this_rq->curr);
287	return 0;
288	}
289
290	/* We might release this_rq lock */
291	get_task_struct(next_task);
292
293	/* find_lock_lowest_rq locks the rq if found */
294	lowest_rq = find_lock_lowest_rq(next_task, this_rq);
295	if (!lowest_rq) {
296	struct task_struct *task;
297	/*
298	* find lock_lowest_rq releases this_rq->lock
299	* so it is possible that next_task has changed.
300	* If it has, then try again.
301	*/
302	task = pick_next_highest_task_rt(this_rq);
303	if (unlikely(task != next_task) && task && paranoid--) {
304	put_task_struct(next_task);
305	next_task = task;
306	goto retry;
307	}
308	goto out;
309	}
310
311	assert_spin_locked(&lowest_rq->lock);
312
313	deactivate_task(this_rq, next_task, 0);
314	set_task_cpu(next_task, lowest_rq->cpu);
315	activate_task(lowest_rq, next_task, 0);
316
317	resched_task(lowest_rq->curr);
318
319	spin_unlock(&lowest_rq->lock);
320
321	ret = 1;
322	out:
323	put_task_struct(next_task);
324
325	return ret;
326	}
327
328	/*
329	* TODO: Currently we just use the second highest prio task on
330	* the queue, and stop when it can't migrate (or there's
331	* no more RT tasks). There may be a case where a lower
332	* priority RT task has a different affinity than the
333	* higher RT task. In this case the lower RT task could
334	* possibly be able to migrate where as the higher priority
335	* RT task could not. We currently ignore this issue.
336	* Enhancements are welcome!
337	*/
338	static void push_rt_tasks(struct rq *rq)
339	{
340	/* push_rt_task will return true if it moved an RT */
341	while (push_rt_task(rq))
342	;
343	}
344
345	static void schedule_tail_balance_rt(struct rq *rq)
346	{
347	/*
348	* If we have more than one rt_task queued, then
349	* see if we can push the other rt_tasks off to other CPUS.
350	* Note we may release the rq lock, and since
351	* the lock was owned by prev, we need to release it
352	* first via finish_lock_switch and then reaquire it here.
353	*/
354	if (unlikely(rq->rt.rt_nr_running > 1)) {
355	spin_lock_irq(&rq->lock);
356	push_rt_tasks(rq);
357	spin_unlock_irq(&rq->lock);
358	}
359	}
360
bb44e5d1 IM	361	/*
	362	* Load-balancing iterator. Note: while the runqueue stays locked
	363	* during the whole iteration, the current task might be
	364	* dequeued so the iterator has to be dequeue-safe. Here we
	365	* achieve that by always pre-iterating before returning
	366	* the current task:
	367	*/
	368	static struct task_struct load_balance_start_rt(void arg)
	369	{
	370	struct rq *rq = arg;
	371	struct rt_prio_array *array = &rq->rt.active;
	372	struct list_head head, curr;
	373	struct task_struct *p;
	374	int idx;
	375
	376	idx = sched_find_first_bit(array->bitmap);
	377	if (idx >= MAX_RT_PRIO)
	378	return NULL;
	379
	380	head = array->queue + idx;
	381	curr = head->prev;
	382
	383	p = list_entry(curr, struct task_struct, run_list);
	384
	385	curr = curr->prev;
	386
	387	rq->rt.rt_load_balance_idx = idx;
	388	rq->rt.rt_load_balance_head = head;
	389	rq->rt.rt_load_balance_curr = curr;
	390
	391	return p;
	392	}
	393
	394	static struct task_struct load_balance_next_rt(void arg)
	395	{
	396	struct rq *rq = arg;
	397	struct rt_prio_array *array = &rq->rt.active;
	398	struct list_head head, curr;
	399	struct task_struct *p;
	400	int idx;
	401
	402	idx = rq->rt.rt_load_balance_idx;
	403	head = rq->rt.rt_load_balance_head;
	404	curr = rq->rt.rt_load_balance_curr;
	405
	406	/*
	407	* If we arrived back to the head again then
	408	* iterate to the next queue (if any):
	409	*/
	410	if (unlikely(head == curr)) {
	411	int next_idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
	412
	413	if (next_idx >= MAX_RT_PRIO)
	414	return NULL;
	415
	416	idx = next_idx;
	417	head = array->queue + idx;
	418	curr = head->prev;
	419
	420	rq->rt.rt_load_balance_idx = idx;
	421	rq->rt.rt_load_balance_head = head;
	422	}
	423
	424	p = list_entry(curr, struct task_struct, run_list);
425
426	curr = curr->prev;
427
428	rq->rt.rt_load_balance_curr = curr;
429
430	return p;
431	}
432
43010659	433	static unsigned long
bb44e5d1	434	load_balance_rt(struct rq this_rq, int this_cpu, struct rq busiest,
e1d1484f PW	435	unsigned long max_load_move,
	436	struct sched_domain *sd, enum cpu_idle_type idle,
	437	int all_pinned, int this_best_prio)
bb44e5d1	438	{
bb44e5d1 IM	439	struct rq_iterator rt_rq_iterator;
bb44e5d1 IM	440
bb44e5d1 IM	441	rt_rq_iterator.start = load_balance_start_rt;
	442	rt_rq_iterator.next = load_balance_next_rt;
	443	/* pass 'busiest' rq argument into
	444	* load_balance_[start\|next]_rt iterators
	445	*/
	446	rt_rq_iterator.arg = busiest;
	447
e1d1484f PW	448	return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
	449	idle, all_pinned, this_best_prio, &rt_rq_iterator);
	450	}
	451
	452	static int
	453	move_one_task_rt(struct rq this_rq, int this_cpu, struct rq busiest,
	454	struct sched_domain *sd, enum cpu_idle_type idle)
	455	{
	456	struct rq_iterator rt_rq_iterator;
	457
	458	rt_rq_iterator.start = load_balance_start_rt;
	459	rt_rq_iterator.next = load_balance_next_rt;
	460	rt_rq_iterator.arg = busiest;
bb44e5d1	461
e1d1484f PW	462	return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
e1d1484f PW	463	&rt_rq_iterator);
bb44e5d1	464	}
e8fa1362 SR	465	#else /* CONFIG_SMP */
	466	# define schedule_tail_balance_rt(rq) do { } while (0)
	467	#endif /* CONFIG_SMP */
bb44e5d1 IM	468
	469	static void task_tick_rt(struct rq rq, struct task_struct p)
	470	{
67e2be02 PZ	471	update_curr_rt(rq);
67e2be02 PZ	472
bb44e5d1 IM	473	/*
	474	* RR tasks need a special form of timeslice management.
	475	* FIFO tasks have no timeslices.
	476	*/
	477	if (p->policy != SCHED_RR)
	478	return;
	479
	480	if (--p->time_slice)
	481	return;
	482
a4ec24b4	483	p->time_slice = DEF_TIMESLICE;
bb44e5d1	484
98fbc798 DA	485	/*
	486	* Requeue to the end of queue if we are not the only element
	487	* on the queue:
	488	*/
	489	if (p->run_list.prev != p->run_list.next) {
	490	requeue_task_rt(rq, p);
	491	set_tsk_need_resched(p);
	492	}
bb44e5d1 IM	493	}
bb44e5d1 IM	494
83b699ed SV	495	static void set_curr_task_rt(struct rq *rq)
	496	{
	497	struct task_struct *p = rq->curr;
	498
	499	p->se.exec_start = rq->clock;
	500	}
	501
5522d5d5 IM	502	const struct sched_class rt_sched_class = {
5522d5d5 IM	503	.next = &fair_sched_class,
bb44e5d1 IM	504	.enqueue_task = enqueue_task_rt,
	505	.dequeue_task = dequeue_task_rt,
	506	.yield_task = yield_task_rt,
	507
	508	.check_preempt_curr = check_preempt_curr_rt,
	509
	510	.pick_next_task = pick_next_task_rt,
	511	.put_prev_task = put_prev_task_rt,
	512
681f3e68	513	#ifdef CONFIG_SMP
bb44e5d1	514	.load_balance = load_balance_rt,
e1d1484f	515	.move_one_task = move_one_task_rt,
681f3e68	516	#endif
bb44e5d1	517
83b699ed	518	.set_curr_task = set_curr_task_rt,
bb44e5d1	519	.task_tick = task_tick_rt,
bb44e5d1	520	};