]>
Commit | Line | Data |
---|---|---|
8b3d6663 AB |
1 | /* sched.c - SPU scheduler. |
2 | * | |
3 | * Copyright (C) IBM 2005 | |
4 | * Author: Mark Nutter <mnutter@us.ibm.com> | |
5 | * | |
a68cf983 | 6 | * 2006-03-31 NUMA domains added. |
8b3d6663 AB |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License as published by | |
10 | * the Free Software Foundation; either version 2, or (at your option) | |
11 | * any later version. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program; if not, write to the Free Software | |
20 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
21 | */ | |
22 | ||
3b3d22cb AB |
23 | #undef DEBUG |
24 | ||
8b3d6663 AB |
25 | #include <linux/module.h> |
26 | #include <linux/errno.h> | |
27 | #include <linux/sched.h> | |
28 | #include <linux/kernel.h> | |
29 | #include <linux/mm.h> | |
30 | #include <linux/completion.h> | |
31 | #include <linux/vmalloc.h> | |
32 | #include <linux/smp.h> | |
33 | #include <linux/smp_lock.h> | |
34 | #include <linux/stddef.h> | |
35 | #include <linux/unistd.h> | |
a68cf983 MN |
36 | #include <linux/numa.h> |
37 | #include <linux/mutex.h> | |
86767277 | 38 | #include <linux/notifier.h> |
8b3d6663 AB |
39 | |
40 | #include <asm/io.h> | |
41 | #include <asm/mmu_context.h> | |
42 | #include <asm/spu.h> | |
43 | #include <asm/spu_csa.h> | |
a91942ae | 44 | #include <asm/spu_priv1.h> |
8b3d6663 AB |
45 | #include "spufs.h" |
46 | ||
7945a4a2 | 47 | #define SPU_MIN_TIMESLICE (100 * HZ / 1000) |
2a911f0b | 48 | |
8b3d6663 AB |
49 | #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) |
50 | struct spu_prio_array { | |
8b3d6663 AB |
51 | unsigned long bitmap[SPU_BITMAP_SIZE]; |
52 | wait_queue_head_t waitq[MAX_PRIO]; | |
a68cf983 MN |
53 | struct list_head active_list[MAX_NUMNODES]; |
54 | struct mutex active_mutex[MAX_NUMNODES]; | |
8b3d6663 AB |
55 | }; |
56 | ||
a68cf983 | 57 | static struct spu_prio_array *spu_prio; |
8b3d6663 | 58 | |
a68cf983 | 59 | static inline int node_allowed(int node) |
8b3d6663 | 60 | { |
a68cf983 | 61 | cpumask_t mask; |
8b3d6663 | 62 | |
a68cf983 MN |
63 | if (!nr_cpus_node(node)) |
64 | return 0; | |
65 | mask = node_to_cpumask(node); | |
66 | if (!cpus_intersects(mask, current->cpus_allowed)) | |
67 | return 0; | |
68 | return 1; | |
8b3d6663 AB |
69 | } |
70 | ||
71 | static inline void mm_needs_global_tlbie(struct mm_struct *mm) | |
72 | { | |
a68cf983 MN |
73 | int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1; |
74 | ||
8b3d6663 | 75 | /* Global TLBIE broadcast required with SPEs. */ |
a68cf983 | 76 | __cpus_setall(&mm->cpu_vm_mask, nr); |
8b3d6663 AB |
77 | } |
78 | ||
86767277 AB |
79 | static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); |
80 | ||
81 | static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) | |
82 | { | |
83 | blocking_notifier_call_chain(&spu_switch_notifier, | |
84 | ctx ? ctx->object_id : 0, spu); | |
85 | } | |
86 | ||
87 | int spu_switch_event_register(struct notifier_block * n) | |
88 | { | |
89 | return blocking_notifier_chain_register(&spu_switch_notifier, n); | |
90 | } | |
91 | ||
92 | int spu_switch_event_unregister(struct notifier_block * n) | |
93 | { | |
94 | return blocking_notifier_chain_unregister(&spu_switch_notifier, n); | |
95 | } | |
96 | ||
97 | ||
8b3d6663 AB |
98 | static inline void bind_context(struct spu *spu, struct spu_context *ctx) |
99 | { | |
a68cf983 MN |
100 | pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, |
101 | spu->number, spu->node); | |
8b3d6663 AB |
102 | spu->ctx = ctx; |
103 | spu->flags = 0; | |
104 | ctx->spu = spu; | |
105 | ctx->ops = &spu_hw_ops; | |
106 | spu->pid = current->pid; | |
107 | spu->prio = current->prio; | |
108 | spu->mm = ctx->owner; | |
109 | mm_needs_global_tlbie(spu->mm); | |
110 | spu->ibox_callback = spufs_ibox_callback; | |
111 | spu->wbox_callback = spufs_wbox_callback; | |
5110459f | 112 | spu->stop_callback = spufs_stop_callback; |
a33a7d73 | 113 | spu->mfc_callback = spufs_mfc_callback; |
9add11da | 114 | spu->dma_callback = spufs_dma_callback; |
8b3d6663 | 115 | mb(); |
5110459f | 116 | spu_unmap_mappings(ctx); |
8b3d6663 | 117 | spu_restore(&ctx->csa, spu); |
2a911f0b | 118 | spu->timestamp = jiffies; |
a68cf983 | 119 | spu_cpu_affinity_set(spu, raw_smp_processor_id()); |
86767277 | 120 | spu_switch_notify(spu, ctx); |
81998baf CH |
121 | |
122 | ctx->state = SPU_STATE_RUNNABLE; | |
8b3d6663 AB |
123 | } |
124 | ||
125 | static inline void unbind_context(struct spu *spu, struct spu_context *ctx) | |
126 | { | |
a68cf983 MN |
127 | pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, |
128 | spu->pid, spu->number, spu->node); | |
86767277 | 129 | spu_switch_notify(spu, NULL); |
5110459f | 130 | spu_unmap_mappings(ctx); |
8b3d6663 | 131 | spu_save(&ctx->csa, spu); |
2a911f0b | 132 | spu->timestamp = jiffies; |
8b3d6663 AB |
133 | ctx->state = SPU_STATE_SAVED; |
134 | spu->ibox_callback = NULL; | |
135 | spu->wbox_callback = NULL; | |
5110459f | 136 | spu->stop_callback = NULL; |
a33a7d73 | 137 | spu->mfc_callback = NULL; |
9add11da | 138 | spu->dma_callback = NULL; |
8b3d6663 AB |
139 | spu->mm = NULL; |
140 | spu->pid = 0; | |
141 | spu->prio = MAX_PRIO; | |
142 | ctx->ops = &spu_backing_ops; | |
143 | ctx->spu = NULL; | |
2a911f0b | 144 | spu->flags = 0; |
8b3d6663 AB |
145 | spu->ctx = NULL; |
146 | } | |
147 | ||
a68cf983 MN |
148 | static inline void spu_add_wq(wait_queue_head_t * wq, wait_queue_t * wait, |
149 | int prio) | |
8b3d6663 | 150 | { |
a68cf983 MN |
151 | prepare_to_wait_exclusive(wq, wait, TASK_INTERRUPTIBLE); |
152 | set_bit(prio, spu_prio->bitmap); | |
2a911f0b | 153 | } |
5110459f | 154 | |
a68cf983 MN |
155 | static inline void spu_del_wq(wait_queue_head_t * wq, wait_queue_t * wait, |
156 | int prio) | |
2a911f0b | 157 | { |
a68cf983 | 158 | u64 flags; |
2a911f0b | 159 | |
a68cf983 MN |
160 | __set_current_state(TASK_RUNNING); |
161 | ||
162 | spin_lock_irqsave(&wq->lock, flags); | |
163 | ||
164 | remove_wait_queue_locked(wq, wait); | |
165 | if (list_empty(&wq->task_list)) | |
166 | clear_bit(prio, spu_prio->bitmap); | |
167 | ||
168 | spin_unlock_irqrestore(&wq->lock, flags); | |
8b3d6663 AB |
169 | } |
170 | ||
a68cf983 | 171 | static void spu_prio_wait(struct spu_context *ctx, u64 flags) |
8b3d6663 | 172 | { |
a68cf983 MN |
173 | int prio = current->prio; |
174 | wait_queue_head_t *wq = &spu_prio->waitq[prio]; | |
175 | DEFINE_WAIT(wait); | |
8b3d6663 | 176 | |
a68cf983 MN |
177 | if (ctx->spu) |
178 | return; | |
179 | ||
180 | spu_add_wq(wq, &wait, prio); | |
181 | ||
182 | if (!signal_pending(current)) { | |
183 | up_write(&ctx->state_sema); | |
184 | pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__, | |
185 | current->pid, current->prio); | |
186 | schedule(); | |
187 | down_write(&ctx->state_sema); | |
8b3d6663 | 188 | } |
a68cf983 MN |
189 | |
190 | spu_del_wq(wq, &wait, prio); | |
8b3d6663 AB |
191 | } |
192 | ||
a68cf983 | 193 | static void spu_prio_wakeup(void) |
8b3d6663 | 194 | { |
a68cf983 MN |
195 | int best = sched_find_first_bit(spu_prio->bitmap); |
196 | if (best < MAX_PRIO) { | |
197 | wait_queue_head_t *wq = &spu_prio->waitq[best]; | |
198 | wake_up_interruptible_nr(wq, 1); | |
199 | } | |
8b3d6663 AB |
200 | } |
201 | ||
202 | static int get_active_spu(struct spu *spu) | |
203 | { | |
a68cf983 | 204 | int node = spu->node; |
8b3d6663 AB |
205 | struct spu *tmp; |
206 | int rc = 0; | |
207 | ||
a68cf983 MN |
208 | mutex_lock(&spu_prio->active_mutex[node]); |
209 | list_for_each_entry(tmp, &spu_prio->active_list[node], list) { | |
8b3d6663 | 210 | if (tmp == spu) { |
a68cf983 | 211 | list_del_init(&spu->list); |
8b3d6663 AB |
212 | rc = 1; |
213 | break; | |
214 | } | |
215 | } | |
a68cf983 | 216 | mutex_unlock(&spu_prio->active_mutex[node]); |
8b3d6663 AB |
217 | return rc; |
218 | } | |
219 | ||
220 | static void put_active_spu(struct spu *spu) | |
221 | { | |
a68cf983 MN |
222 | int node = spu->node; |
223 | ||
224 | mutex_lock(&spu_prio->active_mutex[node]); | |
225 | list_add_tail(&spu->list, &spu_prio->active_list[node]); | |
226 | mutex_unlock(&spu_prio->active_mutex[node]); | |
227 | } | |
228 | ||
229 | static struct spu *spu_get_idle(struct spu_context *ctx, u64 flags) | |
230 | { | |
231 | struct spu *spu = NULL; | |
232 | int node = cpu_to_node(raw_smp_processor_id()); | |
233 | int n; | |
234 | ||
235 | for (n = 0; n < MAX_NUMNODES; n++, node++) { | |
236 | node = (node < MAX_NUMNODES) ? node : 0; | |
237 | if (!node_allowed(node)) | |
238 | continue; | |
239 | spu = spu_alloc_node(node); | |
240 | if (spu) | |
241 | break; | |
242 | } | |
243 | return spu; | |
244 | } | |
8b3d6663 | 245 | |
a68cf983 MN |
246 | static inline struct spu *spu_get(struct spu_context *ctx, u64 flags) |
247 | { | |
248 | /* Future: spu_get_idle() if possible, | |
249 | * otherwise try to preempt an active | |
250 | * context. | |
251 | */ | |
252 | return spu_get_idle(ctx, flags); | |
8b3d6663 AB |
253 | } |
254 | ||
a68cf983 MN |
255 | /* The three externally callable interfaces |
256 | * for the scheduler begin here. | |
8b3d6663 | 257 | * |
a68cf983 MN |
258 | * spu_activate - bind a context to SPU, waiting as needed. |
259 | * spu_deactivate - unbind a context from its SPU. | |
260 | * spu_yield - yield an SPU if others are waiting. | |
8b3d6663 AB |
261 | */ |
262 | ||
263 | int spu_activate(struct spu_context *ctx, u64 flags) | |
264 | { | |
265 | struct spu *spu; | |
a68cf983 | 266 | int ret = 0; |
8b3d6663 | 267 | |
a68cf983 MN |
268 | for (;;) { |
269 | if (ctx->spu) | |
270 | return 0; | |
271 | spu = spu_get(ctx, flags); | |
272 | if (spu != NULL) { | |
273 | if (ctx->spu != NULL) { | |
274 | spu_free(spu); | |
275 | spu_prio_wakeup(); | |
276 | break; | |
277 | } | |
278 | bind_context(spu, ctx); | |
279 | put_active_spu(spu); | |
280 | break; | |
281 | } | |
282 | spu_prio_wait(ctx, flags); | |
283 | if (signal_pending(current)) { | |
284 | ret = -ERESTARTSYS; | |
285 | spu_prio_wakeup(); | |
286 | break; | |
287 | } | |
288 | } | |
289 | return ret; | |
8b3d6663 AB |
290 | } |
291 | ||
292 | void spu_deactivate(struct spu_context *ctx) | |
293 | { | |
294 | struct spu *spu; | |
295 | int needs_idle; | |
296 | ||
297 | spu = ctx->spu; | |
298 | if (!spu) | |
299 | return; | |
300 | needs_idle = get_active_spu(spu); | |
301 | unbind_context(spu, ctx); | |
a68cf983 MN |
302 | if (needs_idle) { |
303 | spu_free(spu); | |
304 | spu_prio_wakeup(); | |
305 | } | |
8b3d6663 AB |
306 | } |
307 | ||
308 | void spu_yield(struct spu_context *ctx) | |
309 | { | |
310 | struct spu *spu; | |
5110459f | 311 | int need_yield = 0; |
8b3d6663 | 312 | |
a68cf983 MN |
313 | if (down_write_trylock(&ctx->state_sema)) { |
314 | if ((spu = ctx->spu) != NULL) { | |
315 | int best = sched_find_first_bit(spu_prio->bitmap); | |
316 | if (best < MAX_PRIO) { | |
317 | pr_debug("%s: yielding SPU %d NODE %d\n", | |
318 | __FUNCTION__, spu->number, spu->node); | |
319 | spu_deactivate(ctx); | |
a68cf983 MN |
320 | need_yield = 1; |
321 | } else { | |
322 | spu->prio = MAX_PRIO; | |
323 | } | |
324 | } | |
325 | up_write(&ctx->state_sema); | |
8b3d6663 | 326 | } |
5110459f AB |
327 | if (unlikely(need_yield)) |
328 | yield(); | |
8b3d6663 AB |
329 | } |
330 | ||
331 | int __init spu_sched_init(void) | |
332 | { | |
8b3d6663 AB |
333 | int i; |
334 | ||
a68cf983 MN |
335 | spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL); |
336 | if (!spu_prio) { | |
337 | printk(KERN_WARNING "%s: Unable to allocate priority queue.\n", | |
8b3d6663 AB |
338 | __FUNCTION__); |
339 | return 1; | |
340 | } | |
8b3d6663 | 341 | for (i = 0; i < MAX_PRIO; i++) { |
a68cf983 MN |
342 | init_waitqueue_head(&spu_prio->waitq[i]); |
343 | __clear_bit(i, spu_prio->bitmap); | |
8b3d6663 | 344 | } |
a68cf983 MN |
345 | __set_bit(MAX_PRIO, spu_prio->bitmap); |
346 | for (i = 0; i < MAX_NUMNODES; i++) { | |
347 | mutex_init(&spu_prio->active_mutex[i]); | |
348 | INIT_LIST_HEAD(&spu_prio->active_list[i]); | |
8b3d6663 AB |
349 | } |
350 | return 0; | |
351 | } | |
352 | ||
353 | void __exit spu_sched_exit(void) | |
354 | { | |
a68cf983 MN |
355 | struct spu *spu, *tmp; |
356 | int node; | |
357 | ||
358 | for (node = 0; node < MAX_NUMNODES; node++) { | |
359 | mutex_lock(&spu_prio->active_mutex[node]); | |
360 | list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], | |
361 | list) { | |
362 | list_del_init(&spu->list); | |
363 | spu_free(spu); | |
364 | } | |
365 | mutex_unlock(&spu_prio->active_mutex[node]); | |
8b3d6663 | 366 | } |
a68cf983 | 367 | kfree(spu_prio); |
8b3d6663 | 368 | } |