]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - arch/powerpc/platforms/cell/spufs/sched.c
[POWERPC] spusched: Dynamic timeslicing for SCHED_OTHER
[mirror_ubuntu-hirsute-kernel.git] / arch / powerpc / platforms / cell / spufs / sched.c
CommitLineData
8b3d6663
AB
1/* sched.c - SPU scheduler.
2 *
3 * Copyright (C) IBM 2005
4 * Author: Mark Nutter <mnutter@us.ibm.com>
5 *
a68cf983 6 * 2006-03-31 NUMA domains added.
8b3d6663
AB
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2, or (at your option)
11 * any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 */
22
3b3d22cb
AB
23#undef DEBUG
24
8b3d6663
AB
25#include <linux/module.h>
26#include <linux/errno.h>
27#include <linux/sched.h>
28#include <linux/kernel.h>
29#include <linux/mm.h>
30#include <linux/completion.h>
31#include <linux/vmalloc.h>
32#include <linux/smp.h>
8b3d6663
AB
33#include <linux/stddef.h>
34#include <linux/unistd.h>
a68cf983
MN
35#include <linux/numa.h>
36#include <linux/mutex.h>
86767277 37#include <linux/notifier.h>
37901802 38#include <linux/kthread.h>
8b3d6663
AB
39
40#include <asm/io.h>
41#include <asm/mmu_context.h>
42#include <asm/spu.h>
43#include <asm/spu_csa.h>
a91942ae 44#include <asm/spu_priv1.h>
8b3d6663
AB
45#include "spufs.h"
46
8b3d6663 47struct spu_prio_array {
72cb3608 48 DECLARE_BITMAP(bitmap, MAX_PRIO);
079cdb61
CH
49 struct list_head runq[MAX_PRIO];
50 spinlock_t runq_lock;
a68cf983
MN
51 struct list_head active_list[MAX_NUMNODES];
52 struct mutex active_mutex[MAX_NUMNODES];
8b3d6663
AB
53};
54
a68cf983 55static struct spu_prio_array *spu_prio;
37901802
CH
56static struct task_struct *spusched_task;
57static struct timer_list spusched_timer;
8b3d6663 58
fe443ef2
CH
59/*
60 * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
61 */
62#define NORMAL_PRIO 120
63
64/*
65 * Frequency of the spu scheduler tick. By default we do one SPU scheduler
66 * tick for every 10 CPU scheduler ticks.
67 */
68#define SPUSCHED_TICK (10)
69
70/*
71 * These are the 'tuning knobs' of the scheduler:
72 *
73 * Minimum timeslice is 5 msecs (or 10 jiffies, whichever is larger),
74 * default timeslice is 100 msecs, maximum timeslice is 800 msecs.
75 */
76#define MIN_SPU_TIMESLICE max(5 * HZ / 100, 10)
77#define DEF_SPU_TIMESLICE (100 * HZ / 100)
78
79#define MAX_USER_PRIO (MAX_PRIO - MAX_RT_PRIO)
80#define SCALE_PRIO(x, prio) \
81 max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE)
82
83/*
84 * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values:
85 * [800ms ... 100ms ... 5ms]
86 *
87 * The higher a thread's priority, the bigger timeslices
88 * it gets during one round of execution. But even the lowest
89 * priority thread gets MIN_TIMESLICE worth of execution time.
90 */
91void spu_set_timeslice(struct spu_context *ctx)
92{
93 if (ctx->prio < NORMAL_PRIO)
94 ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio);
95 else
96 ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
97}
98
a68cf983 99static inline int node_allowed(int node)
8b3d6663 100{
a68cf983 101 cpumask_t mask;
8b3d6663 102
a68cf983
MN
103 if (!nr_cpus_node(node))
104 return 0;
105 mask = node_to_cpumask(node);
106 if (!cpus_intersects(mask, current->cpus_allowed))
107 return 0;
108 return 1;
8b3d6663
AB
109}
110
202557d2
CH
111/**
112 * spu_add_to_active_list - add spu to active list
113 * @spu: spu to add to the active list
114 */
115static void spu_add_to_active_list(struct spu *spu)
116{
117 mutex_lock(&spu_prio->active_mutex[spu->node]);
118 list_add_tail(&spu->list, &spu_prio->active_list[spu->node]);
119 mutex_unlock(&spu_prio->active_mutex[spu->node]);
120}
121
37901802
CH
122static void __spu_remove_from_active_list(struct spu *spu)
123{
124 list_del_init(&spu->list);
125}
126
202557d2
CH
127/**
128 * spu_remove_from_active_list - remove spu from active list
129 * @spu: spu to remove from the active list
202557d2 130 */
678b2ff1 131static void spu_remove_from_active_list(struct spu *spu)
202557d2
CH
132{
133 int node = spu->node;
202557d2
CH
134
135 mutex_lock(&spu_prio->active_mutex[node]);
37901802 136 __spu_remove_from_active_list(spu);
202557d2 137 mutex_unlock(&spu_prio->active_mutex[node]);
202557d2
CH
138}
139
86767277
AB
140static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
141
142static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
143{
144 blocking_notifier_call_chain(&spu_switch_notifier,
145 ctx ? ctx->object_id : 0, spu);
146}
147
148int spu_switch_event_register(struct notifier_block * n)
149{
150 return blocking_notifier_chain_register(&spu_switch_notifier, n);
151}
152
153int spu_switch_event_unregister(struct notifier_block * n)
154{
155 return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
156}
157
202557d2
CH
158/**
159 * spu_bind_context - bind spu context to physical spu
160 * @spu: physical spu to bind to
161 * @ctx: context to bind
162 */
163static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
8b3d6663 164{
a68cf983
MN
165 pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
166 spu->number, spu->node);
8b3d6663
AB
167 spu->ctx = ctx;
168 spu->flags = 0;
169 ctx->spu = spu;
170 ctx->ops = &spu_hw_ops;
171 spu->pid = current->pid;
94b2a439 172 spu_associate_mm(spu, ctx->owner);
8b3d6663
AB
173 spu->ibox_callback = spufs_ibox_callback;
174 spu->wbox_callback = spufs_wbox_callback;
5110459f 175 spu->stop_callback = spufs_stop_callback;
a33a7d73 176 spu->mfc_callback = spufs_mfc_callback;
9add11da 177 spu->dma_callback = spufs_dma_callback;
8b3d6663 178 mb();
5110459f 179 spu_unmap_mappings(ctx);
8b3d6663 180 spu_restore(&ctx->csa, spu);
2a911f0b 181 spu->timestamp = jiffies;
a68cf983 182 spu_cpu_affinity_set(spu, raw_smp_processor_id());
86767277 183 spu_switch_notify(spu, ctx);
81998baf 184 ctx->state = SPU_STATE_RUNNABLE;
8b3d6663
AB
185}
186
202557d2
CH
187/**
188 * spu_unbind_context - unbind spu context from physical spu
189 * @spu: physical spu to unbind from
190 * @ctx: context to unbind
202557d2 191 */
678b2ff1 192static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
8b3d6663 193{
a68cf983
MN
194 pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
195 spu->pid, spu->number, spu->node);
202557d2 196
86767277 197 spu_switch_notify(spu, NULL);
5110459f 198 spu_unmap_mappings(ctx);
8b3d6663 199 spu_save(&ctx->csa, spu);
2a911f0b 200 spu->timestamp = jiffies;
8b3d6663
AB
201 ctx->state = SPU_STATE_SAVED;
202 spu->ibox_callback = NULL;
203 spu->wbox_callback = NULL;
5110459f 204 spu->stop_callback = NULL;
a33a7d73 205 spu->mfc_callback = NULL;
9add11da 206 spu->dma_callback = NULL;
94b2a439 207 spu_associate_mm(spu, NULL);
8b3d6663 208 spu->pid = 0;
8b3d6663
AB
209 ctx->ops = &spu_backing_ops;
210 ctx->spu = NULL;
2a911f0b 211 spu->flags = 0;
8b3d6663
AB
212 spu->ctx = NULL;
213}
214
079cdb61
CH
215/**
216 * spu_add_to_rq - add a context to the runqueue
217 * @ctx: context to add
218 */
4e0f4ed0 219static void __spu_add_to_rq(struct spu_context *ctx)
8b3d6663 220{
4e0f4ed0
LB
221 int prio = ctx->prio;
222
223 list_add_tail(&ctx->rq, &spu_prio->runq[prio]);
224 set_bit(prio, spu_prio->bitmap);
2a911f0b 225}
5110459f 226
4e0f4ed0 227static void __spu_del_from_rq(struct spu_context *ctx)
a475c2f4 228{
4e0f4ed0
LB
229 int prio = ctx->prio;
230
a475c2f4
CH
231 if (!list_empty(&ctx->rq))
232 list_del_init(&ctx->rq);
233 if (list_empty(&spu_prio->runq[prio]))
4e0f4ed0 234 clear_bit(prio, spu_prio->bitmap);
079cdb61 235}
a68cf983 236
079cdb61 237static void spu_prio_wait(struct spu_context *ctx)
8b3d6663 238{
a68cf983 239 DEFINE_WAIT(wait);
8b3d6663 240
4e0f4ed0 241 spin_lock(&spu_prio->runq_lock);
079cdb61 242 prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
a68cf983 243 if (!signal_pending(current)) {
4e0f4ed0
LB
244 __spu_add_to_rq(ctx);
245 spin_unlock(&spu_prio->runq_lock);
650f8b02 246 mutex_unlock(&ctx->state_mutex);
a68cf983 247 schedule();
650f8b02 248 mutex_lock(&ctx->state_mutex);
4e0f4ed0
LB
249 spin_lock(&spu_prio->runq_lock);
250 __spu_del_from_rq(ctx);
8b3d6663 251 }
4e0f4ed0 252 spin_unlock(&spu_prio->runq_lock);
079cdb61
CH
253 __set_current_state(TASK_RUNNING);
254 remove_wait_queue(&ctx->stop_wq, &wait);
8b3d6663
AB
255}
256
079cdb61 257static struct spu *spu_get_idle(struct spu_context *ctx)
a68cf983
MN
258{
259 struct spu *spu = NULL;
260 int node = cpu_to_node(raw_smp_processor_id());
261 int n;
262
263 for (n = 0; n < MAX_NUMNODES; n++, node++) {
264 node = (node < MAX_NUMNODES) ? node : 0;
265 if (!node_allowed(node))
266 continue;
267 spu = spu_alloc_node(node);
268 if (spu)
269 break;
270 }
271 return spu;
272}
8b3d6663 273
52f04fcf
CH
274/**
275 * find_victim - find a lower priority context to preempt
276 * @ctx: canidate context for running
277 *
278 * Returns the freed physical spu to run the new context on.
279 */
280static struct spu *find_victim(struct spu_context *ctx)
281{
282 struct spu_context *victim = NULL;
283 struct spu *spu;
284 int node, n;
285
286 /*
287 * Look for a possible preemption candidate on the local node first.
288 * If there is no candidate look at the other nodes. This isn't
289 * exactly fair, but so far the whole spu schedule tries to keep
290 * a strong node affinity. We might want to fine-tune this in
291 * the future.
292 */
293 restart:
294 node = cpu_to_node(raw_smp_processor_id());
295 for (n = 0; n < MAX_NUMNODES; n++, node++) {
296 node = (node < MAX_NUMNODES) ? node : 0;
297 if (!node_allowed(node))
298 continue;
299
300 mutex_lock(&spu_prio->active_mutex[node]);
301 list_for_each_entry(spu, &spu_prio->active_list[node], list) {
302 struct spu_context *tmp = spu->ctx;
303
fe443ef2
CH
304 if (tmp->prio > ctx->prio &&
305 (!victim || tmp->prio > victim->prio))
52f04fcf
CH
306 victim = spu->ctx;
307 }
308 mutex_unlock(&spu_prio->active_mutex[node]);
309
310 if (victim) {
311 /*
312 * This nests ctx->state_mutex, but we always lock
313 * higher priority contexts before lower priority
314 * ones, so this is safe until we introduce
315 * priority inheritance schemes.
316 */
317 if (!mutex_trylock(&victim->state_mutex)) {
318 victim = NULL;
319 goto restart;
320 }
321
322 spu = victim->spu;
323 if (!spu) {
324 /*
325 * This race can happen because we've dropped
326 * the active list mutex. No a problem, just
327 * restart the search.
328 */
329 mutex_unlock(&victim->state_mutex);
330 victim = NULL;
331 goto restart;
332 }
37901802 333 spu_remove_from_active_list(spu);
52f04fcf
CH
334 spu_unbind_context(spu, victim);
335 mutex_unlock(&victim->state_mutex);
e097b513
CH
336 /*
337 * We need to break out of the wait loop in spu_run
338 * manually to ensure this context gets put on the
339 * runqueue again ASAP.
340 */
341 wake_up(&victim->stop_wq);
52f04fcf
CH
342 return spu;
343 }
344 }
345
346 return NULL;
347}
348
079cdb61
CH
349/**
350 * spu_activate - find a free spu for a context and execute it
351 * @ctx: spu context to schedule
352 * @flags: flags (currently ignored)
353 *
08873095 354 * Tries to find a free spu to run @ctx. If no free spu is available
079cdb61
CH
355 * add the context to the runqueue so it gets woken up once an spu
356 * is available.
357 */
26bec673 358int spu_activate(struct spu_context *ctx, unsigned long flags)
8b3d6663 359{
8b3d6663 360
079cdb61
CH
361 if (ctx->spu)
362 return 0;
363
364 do {
365 struct spu *spu;
366
367 spu = spu_get_idle(ctx);
52f04fcf
CH
368 /*
369 * If this is a realtime thread we try to get it running by
370 * preempting a lower priority thread.
371 */
fe443ef2 372 if (!spu && rt_prio(ctx->prio))
52f04fcf 373 spu = find_victim(ctx);
079cdb61 374 if (spu) {
202557d2 375 spu_bind_context(spu, ctx);
37901802 376 spu_add_to_active_list(spu);
079cdb61 377 return 0;
a68cf983 378 }
079cdb61 379
50b520d4 380 spu_prio_wait(ctx);
079cdb61
CH
381 } while (!signal_pending(current));
382
383 return -ERESTARTSYS;
8b3d6663
AB
384}
385
bb5db29a
CH
386/**
387 * grab_runnable_context - try to find a runnable context
388 *
389 * Remove the highest priority context on the runqueue and return it
390 * to the caller. Returns %NULL if no runnable context was found.
391 */
392static struct spu_context *grab_runnable_context(int prio)
393{
394 struct spu_context *ctx = NULL;
395 int best;
396
397 spin_lock(&spu_prio->runq_lock);
398 best = sched_find_first_bit(spu_prio->bitmap);
399 if (best < prio) {
400 struct list_head *rq = &spu_prio->runq[best];
401
402 BUG_ON(list_empty(rq));
403
404 ctx = list_entry(rq->next, struct spu_context, rq);
405 __spu_del_from_rq(ctx);
406 }
407 spin_unlock(&spu_prio->runq_lock);
408
409 return ctx;
410}
411
412static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
413{
414 struct spu *spu = ctx->spu;
415 struct spu_context *new = NULL;
416
417 if (spu) {
418 new = grab_runnable_context(max_prio);
419 if (new || force) {
37901802 420 spu_remove_from_active_list(spu);
bb5db29a
CH
421 spu_unbind_context(spu, ctx);
422 spu_free(spu);
423 if (new)
424 wake_up(&new->stop_wq);
425 }
426
427 }
428
429 return new != NULL;
430}
431
678b2ff1
CH
432/**
433 * spu_deactivate - unbind a context from it's physical spu
434 * @ctx: spu context to unbind
435 *
436 * Unbind @ctx from the physical spu it is running on and schedule
437 * the highest priority context to run on the freed physical spu.
438 */
8b3d6663
AB
439void spu_deactivate(struct spu_context *ctx)
440{
bb5db29a 441 __spu_deactivate(ctx, 1, MAX_PRIO);
8b3d6663
AB
442}
443
ae7b4c52
CH
444/**
445 * spu_yield - yield a physical spu if others are waiting
446 * @ctx: spu context to yield
447 *
448 * Check if there is a higher priority context waiting and if yes
449 * unbind @ctx from the physical spu and schedule the highest
450 * priority context to run on the freed physical spu instead.
451 */
8b3d6663
AB
452void spu_yield(struct spu_context *ctx)
453{
e5c0b9ec
CH
454 if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
455 mutex_lock(&ctx->state_mutex);
456 __spu_deactivate(ctx, 0, MAX_PRIO);
457 mutex_unlock(&ctx->state_mutex);
458 }
bb5db29a 459}
8b3d6663 460
37901802 461static void spusched_tick(struct spu_context *ctx)
bb5db29a 462{
fe443ef2 463 if (ctx->policy == SCHED_FIFO || --ctx->time_slice)
37901802 464 return;
bb5db29a
CH
465
466 /*
37901802
CH
467 * Unfortunately active_mutex ranks outside of state_mutex, so
468 * we have to trylock here. If we fail give the context another
469 * tick and try again.
bb5db29a 470 */
37901802
CH
471 if (mutex_trylock(&ctx->state_mutex)) {
472 struct spu_context *new = grab_runnable_context(ctx->prio + 1);
473 if (new) {
474 struct spu *spu = ctx->spu;
bb5db29a 475
37901802
CH
476 __spu_remove_from_active_list(spu);
477 spu_unbind_context(spu, ctx);
478 spu_free(spu);
479 wake_up(&new->stop_wq);
480 /*
481 * We need to break out of the wait loop in
482 * spu_run manually to ensure this context
483 * gets put on the runqueue again ASAP.
484 */
485 wake_up(&ctx->stop_wq);
486 }
fe443ef2 487 spu_set_timeslice(ctx);
37901802 488 mutex_unlock(&ctx->state_mutex);
bb5db29a 489 } else {
37901802 490 ctx->time_slice++;
8b3d6663 491 }
8b3d6663
AB
492}
493
37901802
CH
494static void spusched_wake(unsigned long data)
495{
496 mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
497 wake_up_process(spusched_task);
498}
499
500static int spusched_thread(void *unused)
501{
502 struct spu *spu, *next;
503 int node;
504
505 setup_timer(&spusched_timer, spusched_wake, 0);
506 __mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
507
508 while (!kthread_should_stop()) {
509 set_current_state(TASK_INTERRUPTIBLE);
510 schedule();
511 for (node = 0; node < MAX_NUMNODES; node++) {
512 mutex_lock(&spu_prio->active_mutex[node]);
513 list_for_each_entry_safe(spu, next,
514 &spu_prio->active_list[node],
515 list)
516 spusched_tick(spu->ctx);
517 mutex_unlock(&spu_prio->active_mutex[node]);
518 }
519 }
520
521 del_timer_sync(&spusched_timer);
522 return 0;
523}
524
8b3d6663
AB
525int __init spu_sched_init(void)
526{
8b3d6663
AB
527 int i;
528
a68cf983 529 spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
37901802
CH
530 if (!spu_prio)
531 return -ENOMEM;
532
8b3d6663 533 for (i = 0; i < MAX_PRIO; i++) {
079cdb61 534 INIT_LIST_HEAD(&spu_prio->runq[i]);
a68cf983 535 __clear_bit(i, spu_prio->bitmap);
8b3d6663 536 }
a68cf983
MN
537 __set_bit(MAX_PRIO, spu_prio->bitmap);
538 for (i = 0; i < MAX_NUMNODES; i++) {
539 mutex_init(&spu_prio->active_mutex[i]);
540 INIT_LIST_HEAD(&spu_prio->active_list[i]);
8b3d6663 541 }
079cdb61 542 spin_lock_init(&spu_prio->runq_lock);
37901802
CH
543
544 spusched_task = kthread_run(spusched_thread, NULL, "spusched");
545 if (IS_ERR(spusched_task)) {
546 kfree(spu_prio);
547 return PTR_ERR(spusched_task);
548 }
8b3d6663 549 return 0;
37901802 550
8b3d6663
AB
551}
552
553void __exit spu_sched_exit(void)
554{
a68cf983
MN
555 struct spu *spu, *tmp;
556 int node;
557
37901802
CH
558 kthread_stop(spusched_task);
559
a68cf983
MN
560 for (node = 0; node < MAX_NUMNODES; node++) {
561 mutex_lock(&spu_prio->active_mutex[node]);
562 list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
563 list) {
564 list_del_init(&spu->list);
565 spu_free(spu);
566 }
567 mutex_unlock(&spu_prio->active_mutex[node]);
8b3d6663 568 }
a68cf983 569 kfree(spu_prio);
8b3d6663 570}