]>
Commit | Line | Data |
---|---|---|
38498a67 TG |
1 | /* |
2 | * Common SMP CPU bringup/teardown functions | |
3 | */ | |
f97f8f06 | 4 | #include <linux/cpu.h> |
29d5e047 TG |
5 | #include <linux/err.h> |
6 | #include <linux/smp.h> | |
8038dad7 | 7 | #include <linux/delay.h> |
38498a67 | 8 | #include <linux/init.h> |
f97f8f06 TG |
9 | #include <linux/list.h> |
10 | #include <linux/slab.h> | |
29d5e047 | 11 | #include <linux/sched.h> |
29930025 | 12 | #include <linux/sched/task.h> |
f97f8f06 | 13 | #include <linux/export.h> |
29d5e047 | 14 | #include <linux/percpu.h> |
f97f8f06 TG |
15 | #include <linux/kthread.h> |
16 | #include <linux/smpboot.h> | |
38498a67 TG |
17 | |
18 | #include "smpboot.h" | |
19 | ||
3180d89b PM |
20 | #ifdef CONFIG_SMP |
21 | ||
29d5e047 | 22 | #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD |
29d5e047 TG |
23 | /* |
24 | * For the hotplug case we keep the task structs around and reuse | |
25 | * them. | |
26 | */ | |
27 | static DEFINE_PER_CPU(struct task_struct *, idle_threads); | |
28 | ||
0db0628d | 29 | struct task_struct *idle_thread_get(unsigned int cpu) |
29d5e047 TG |
30 | { |
31 | struct task_struct *tsk = per_cpu(idle_threads, cpu); | |
32 | ||
33 | if (!tsk) | |
3bb5d2ee | 34 | return ERR_PTR(-ENOMEM); |
29d5e047 TG |
35 | init_idle(tsk, cpu); |
36 | return tsk; | |
37 | } | |
38 | ||
3bb5d2ee | 39 | void __init idle_thread_set_boot_cpu(void) |
29d5e047 | 40 | { |
3bb5d2ee | 41 | per_cpu(idle_threads, smp_processor_id()) = current; |
29d5e047 TG |
42 | } |
43 | ||
4a70d2d9 SB |
44 | /** |
45 | * idle_init - Initialize the idle thread for a cpu | |
46 | * @cpu: The cpu for which the idle thread should be initialized | |
47 | * | |
48 | * Creates the thread if it does not exist. | |
49 | */ | |
3bb5d2ee | 50 | static inline void idle_init(unsigned int cpu) |
29d5e047 | 51 | { |
3bb5d2ee SS |
52 | struct task_struct *tsk = per_cpu(idle_threads, cpu); |
53 | ||
54 | if (!tsk) { | |
55 | tsk = fork_idle(cpu); | |
56 | if (IS_ERR(tsk)) | |
57 | pr_err("SMP: fork_idle() failed for CPU %u\n", cpu); | |
58 | else | |
59 | per_cpu(idle_threads, cpu) = tsk; | |
60 | } | |
29d5e047 TG |
61 | } |
62 | ||
63 | /** | |
4a70d2d9 | 64 | * idle_threads_init - Initialize idle threads for all cpus |
29d5e047 | 65 | */ |
3bb5d2ee | 66 | void __init idle_threads_init(void) |
29d5e047 | 67 | { |
ee74d132 SB |
68 | unsigned int cpu, boot_cpu; |
69 | ||
70 | boot_cpu = smp_processor_id(); | |
29d5e047 | 71 | |
3bb5d2ee | 72 | for_each_possible_cpu(cpu) { |
ee74d132 | 73 | if (cpu != boot_cpu) |
3bb5d2ee | 74 | idle_init(cpu); |
29d5e047 | 75 | } |
29d5e047 | 76 | } |
29d5e047 | 77 | #endif |
f97f8f06 | 78 | |
3180d89b PM |
79 | #endif /* #ifdef CONFIG_SMP */ |
80 | ||
f97f8f06 TG |
81 | static LIST_HEAD(hotplug_threads); |
82 | static DEFINE_MUTEX(smpboot_threads_lock); | |
83 | ||
84 | struct smpboot_thread_data { | |
85 | unsigned int cpu; | |
86 | unsigned int status; | |
87 | struct smp_hotplug_thread *ht; | |
88 | }; | |
89 | ||
90 | enum { | |
91 | HP_THREAD_NONE = 0, | |
92 | HP_THREAD_ACTIVE, | |
93 | HP_THREAD_PARKED, | |
94 | }; | |
95 | ||
96 | /** | |
97 | * smpboot_thread_fn - percpu hotplug thread loop function | |
98 | * @data: thread data pointer | |
99 | * | |
100 | * Checks for thread stop and park conditions. Calls the necessary | |
101 | * setup, cleanup, park and unpark functions for the registered | |
102 | * thread. | |
103 | * | |
104 | * Returns 1 when the thread should exit, 0 otherwise. | |
105 | */ | |
106 | static int smpboot_thread_fn(void *data) | |
107 | { | |
108 | struct smpboot_thread_data *td = data; | |
109 | struct smp_hotplug_thread *ht = td->ht; | |
110 | ||
111 | while (1) { | |
112 | set_current_state(TASK_INTERRUPTIBLE); | |
113 | preempt_disable(); | |
114 | if (kthread_should_stop()) { | |
7d4d2696 | 115 | __set_current_state(TASK_RUNNING); |
f97f8f06 | 116 | preempt_enable(); |
3dd08c0c FW |
117 | /* cleanup must mirror setup */ |
118 | if (ht->cleanup && td->status != HP_THREAD_NONE) | |
f97f8f06 TG |
119 | ht->cleanup(td->cpu, cpu_online(td->cpu)); |
120 | kfree(td); | |
121 | return 0; | |
122 | } | |
123 | ||
124 | if (kthread_should_park()) { | |
125 | __set_current_state(TASK_RUNNING); | |
be6a2e4c | 126 | preempt_enable(); |
f97f8f06 TG |
127 | if (ht->park && td->status == HP_THREAD_ACTIVE) { |
128 | BUG_ON(td->cpu != smp_processor_id()); | |
129 | ht->park(td->cpu); | |
130 | td->status = HP_THREAD_PARKED; | |
131 | } | |
132 | kthread_parkme(); | |
133 | /* We might have been woken for stop */ | |
134 | continue; | |
135 | } | |
136 | ||
dc893e19 | 137 | BUG_ON(td->cpu != smp_processor_id()); |
f97f8f06 TG |
138 | |
139 | /* Check for state change setup */ | |
140 | switch (td->status) { | |
141 | case HP_THREAD_NONE: | |
7d4d2696 | 142 | __set_current_state(TASK_RUNNING); |
f97f8f06 TG |
143 | preempt_enable(); |
144 | if (ht->setup) | |
145 | ht->setup(td->cpu); | |
146 | td->status = HP_THREAD_ACTIVE; | |
7d4d2696 PZ |
147 | continue; |
148 | ||
f97f8f06 | 149 | case HP_THREAD_PARKED: |
7d4d2696 | 150 | __set_current_state(TASK_RUNNING); |
f97f8f06 TG |
151 | preempt_enable(); |
152 | if (ht->unpark) | |
153 | ht->unpark(td->cpu); | |
154 | td->status = HP_THREAD_ACTIVE; | |
7d4d2696 | 155 | continue; |
f97f8f06 TG |
156 | } |
157 | ||
158 | if (!ht->thread_should_run(td->cpu)) { | |
7d4d2696 | 159 | preempt_enable_no_resched(); |
f97f8f06 TG |
160 | schedule(); |
161 | } else { | |
7d4d2696 | 162 | __set_current_state(TASK_RUNNING); |
f97f8f06 TG |
163 | preempt_enable(); |
164 | ht->thread_fn(td->cpu); | |
165 | } | |
166 | } | |
167 | } | |
168 | ||
169 | static int | |
170 | __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) | |
171 | { | |
172 | struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); | |
173 | struct smpboot_thread_data *td; | |
174 | ||
175 | if (tsk) | |
176 | return 0; | |
177 | ||
178 | td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu)); | |
179 | if (!td) | |
180 | return -ENOMEM; | |
181 | td->cpu = cpu; | |
182 | td->ht = ht; | |
183 | ||
184 | tsk = kthread_create_on_cpu(smpboot_thread_fn, td, cpu, | |
185 | ht->thread_comm); | |
186 | if (IS_ERR(tsk)) { | |
187 | kfree(td); | |
188 | return PTR_ERR(tsk); | |
189 | } | |
a65d4096 PM |
190 | /* |
191 | * Park the thread so that it could start right on the CPU | |
192 | * when it is available. | |
193 | */ | |
194 | kthread_park(tsk); | |
f97f8f06 TG |
195 | get_task_struct(tsk); |
196 | *per_cpu_ptr(ht->store, cpu) = tsk; | |
f2530dc7 TG |
197 | if (ht->create) { |
198 | /* | |
199 | * Make sure that the task has actually scheduled out | |
200 | * into park position, before calling the create | |
201 | * callback. At least the migration thread callback | |
202 | * requires that the task is off the runqueue. | |
203 | */ | |
204 | if (!wait_task_inactive(tsk, TASK_PARKED)) | |
205 | WARN_ON(1); | |
206 | else | |
207 | ht->create(cpu); | |
208 | } | |
f97f8f06 TG |
209 | return 0; |
210 | } | |
211 | ||
212 | int smpboot_create_threads(unsigned int cpu) | |
213 | { | |
214 | struct smp_hotplug_thread *cur; | |
215 | int ret = 0; | |
216 | ||
217 | mutex_lock(&smpboot_threads_lock); | |
218 | list_for_each_entry(cur, &hotplug_threads, list) { | |
219 | ret = __smpboot_create_thread(cur, cpu); | |
220 | if (ret) | |
221 | break; | |
222 | } | |
223 | mutex_unlock(&smpboot_threads_lock); | |
224 | return ret; | |
225 | } | |
226 | ||
227 | static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cpu) | |
228 | { | |
229 | struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); | |
230 | ||
c00166d8 ON |
231 | if (!ht->selfparking) |
232 | kthread_unpark(tsk); | |
f97f8f06 TG |
233 | } |
234 | ||
931ef163 | 235 | int smpboot_unpark_threads(unsigned int cpu) |
f97f8f06 TG |
236 | { |
237 | struct smp_hotplug_thread *cur; | |
238 | ||
239 | mutex_lock(&smpboot_threads_lock); | |
240 | list_for_each_entry(cur, &hotplug_threads, list) | |
b5242e98 CM |
241 | if (cpumask_test_cpu(cpu, cur->cpumask)) |
242 | smpboot_unpark_thread(cur, cpu); | |
f97f8f06 | 243 | mutex_unlock(&smpboot_threads_lock); |
931ef163 | 244 | return 0; |
f97f8f06 TG |
245 | } |
246 | ||
247 | static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu) | |
248 | { | |
249 | struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); | |
250 | ||
7d7e499f | 251 | if (tsk && !ht->selfparking) |
f97f8f06 TG |
252 | kthread_park(tsk); |
253 | } | |
254 | ||
931ef163 | 255 | int smpboot_park_threads(unsigned int cpu) |
f97f8f06 TG |
256 | { |
257 | struct smp_hotplug_thread *cur; | |
258 | ||
259 | mutex_lock(&smpboot_threads_lock); | |
260 | list_for_each_entry_reverse(cur, &hotplug_threads, list) | |
261 | smpboot_park_thread(cur, cpu); | |
262 | mutex_unlock(&smpboot_threads_lock); | |
931ef163 | 263 | return 0; |
f97f8f06 TG |
264 | } |
265 | ||
266 | static void smpboot_destroy_threads(struct smp_hotplug_thread *ht) | |
267 | { | |
268 | unsigned int cpu; | |
269 | ||
270 | /* We need to destroy also the parked threads of offline cpus */ | |
271 | for_each_possible_cpu(cpu) { | |
272 | struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); | |
273 | ||
274 | if (tsk) { | |
275 | kthread_stop(tsk); | |
276 | put_task_struct(tsk); | |
277 | *per_cpu_ptr(ht->store, cpu) = NULL; | |
278 | } | |
279 | } | |
280 | } | |
281 | ||
282 | /** | |
230ec939 FW |
283 | * smpboot_register_percpu_thread_cpumask - Register a per_cpu thread related |
284 | * to hotplug | |
f97f8f06 | 285 | * @plug_thread: Hotplug thread descriptor |
230ec939 | 286 | * @cpumask: The cpumask where threads run |
f97f8f06 TG |
287 | * |
288 | * Creates and starts the threads on all online cpus. | |
289 | */ | |
230ec939 FW |
290 | int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread, |
291 | const struct cpumask *cpumask) | |
f97f8f06 TG |
292 | { |
293 | unsigned int cpu; | |
294 | int ret = 0; | |
295 | ||
b5242e98 CM |
296 | if (!alloc_cpumask_var(&plug_thread->cpumask, GFP_KERNEL)) |
297 | return -ENOMEM; | |
230ec939 | 298 | cpumask_copy(plug_thread->cpumask, cpumask); |
b5242e98 | 299 | |
4bee9686 | 300 | get_online_cpus(); |
f97f8f06 TG |
301 | mutex_lock(&smpboot_threads_lock); |
302 | for_each_online_cpu(cpu) { | |
303 | ret = __smpboot_create_thread(plug_thread, cpu); | |
304 | if (ret) { | |
305 | smpboot_destroy_threads(plug_thread); | |
5869b506 | 306 | free_cpumask_var(plug_thread->cpumask); |
f97f8f06 TG |
307 | goto out; |
308 | } | |
230ec939 FW |
309 | if (cpumask_test_cpu(cpu, cpumask)) |
310 | smpboot_unpark_thread(plug_thread, cpu); | |
f97f8f06 TG |
311 | } |
312 | list_add(&plug_thread->list, &hotplug_threads); | |
313 | out: | |
314 | mutex_unlock(&smpboot_threads_lock); | |
4bee9686 | 315 | put_online_cpus(); |
f97f8f06 TG |
316 | return ret; |
317 | } | |
230ec939 | 318 | EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread_cpumask); |
f97f8f06 TG |
319 | |
320 | /** | |
321 | * smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug | |
322 | * @plug_thread: Hotplug thread descriptor | |
323 | * | |
324 | * Stops all threads on all possible cpus. | |
325 | */ | |
326 | void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread) | |
327 | { | |
328 | get_online_cpus(); | |
329 | mutex_lock(&smpboot_threads_lock); | |
330 | list_del(&plug_thread->list); | |
331 | smpboot_destroy_threads(plug_thread); | |
332 | mutex_unlock(&smpboot_threads_lock); | |
333 | put_online_cpus(); | |
b5242e98 | 334 | free_cpumask_var(plug_thread->cpumask); |
f97f8f06 TG |
335 | } |
336 | EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); | |
8038dad7 | 337 | |
b5242e98 CM |
338 | /** |
339 | * smpboot_update_cpumask_percpu_thread - Adjust which per_cpu hotplug threads stay parked | |
340 | * @plug_thread: Hotplug thread descriptor | |
341 | * @new: Revised mask to use | |
342 | * | |
343 | * The cpumask field in the smp_hotplug_thread must not be updated directly | |
344 | * by the client, but only by calling this function. | |
fe4ba3c3 | 345 | * This function can only be called on a registered smp_hotplug_thread. |
b5242e98 | 346 | */ |
0d85923c TG |
347 | void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, |
348 | const struct cpumask *new) | |
b5242e98 CM |
349 | { |
350 | struct cpumask *old = plug_thread->cpumask; | |
0d85923c | 351 | static struct cpumask tmp; |
b5242e98 CM |
352 | unsigned int cpu; |
353 | ||
e31d6883 | 354 | lockdep_assert_cpus_held(); |
b5242e98 CM |
355 | mutex_lock(&smpboot_threads_lock); |
356 | ||
357 | /* Park threads that were exclusively enabled on the old mask. */ | |
0d85923c TG |
358 | cpumask_andnot(&tmp, old, new); |
359 | for_each_cpu_and(cpu, &tmp, cpu_online_mask) | |
b5242e98 CM |
360 | smpboot_park_thread(plug_thread, cpu); |
361 | ||
362 | /* Unpark threads that are exclusively enabled on the new mask. */ | |
0d85923c TG |
363 | cpumask_andnot(&tmp, new, old); |
364 | for_each_cpu_and(cpu, &tmp, cpu_online_mask) | |
b5242e98 CM |
365 | smpboot_unpark_thread(plug_thread, cpu); |
366 | ||
367 | cpumask_copy(old, new); | |
368 | ||
369 | mutex_unlock(&smpboot_threads_lock); | |
b5242e98 | 370 | } |
b5242e98 | 371 | |
8038dad7 PM |
372 | static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); |
373 | ||
374 | /* | |
375 | * Called to poll specified CPU's state, for example, when waiting for | |
376 | * a CPU to come online. | |
377 | */ | |
378 | int cpu_report_state(int cpu) | |
379 | { | |
380 | return atomic_read(&per_cpu(cpu_hotplug_state, cpu)); | |
381 | } | |
382 | ||
383 | /* | |
384 | * If CPU has died properly, set its state to CPU_UP_PREPARE and | |
385 | * return success. Otherwise, return -EBUSY if the CPU died after | |
386 | * cpu_wait_death() timed out. And yet otherwise again, return -EAGAIN | |
387 | * if cpu_wait_death() timed out and the CPU still hasn't gotten around | |
388 | * to dying. In the latter two cases, the CPU might not be set up | |
389 | * properly, but it is up to the arch-specific code to decide. | |
390 | * Finally, -EIO indicates an unanticipated problem. | |
391 | * | |
392 | * Note that it is permissible to omit this call entirely, as is | |
393 | * done in architectures that do no CPU-hotplug error checking. | |
394 | */ | |
395 | int cpu_check_up_prepare(int cpu) | |
396 | { | |
397 | if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) { | |
398 | atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE); | |
399 | return 0; | |
400 | } | |
401 | ||
402 | switch (atomic_read(&per_cpu(cpu_hotplug_state, cpu))) { | |
403 | ||
404 | case CPU_POST_DEAD: | |
405 | ||
406 | /* The CPU died properly, so just start it up again. */ | |
407 | atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE); | |
408 | return 0; | |
409 | ||
410 | case CPU_DEAD_FROZEN: | |
411 | ||
412 | /* | |
413 | * Timeout during CPU death, so let caller know. | |
414 | * The outgoing CPU completed its processing, but after | |
415 | * cpu_wait_death() timed out and reported the error. The | |
416 | * caller is free to proceed, in which case the state | |
417 | * will be reset properly by cpu_set_state_online(). | |
418 | * Proceeding despite this -EBUSY return makes sense | |
419 | * for systems where the outgoing CPUs take themselves | |
420 | * offline, with no post-death manipulation required from | |
421 | * a surviving CPU. | |
422 | */ | |
423 | return -EBUSY; | |
424 | ||
425 | case CPU_BROKEN: | |
426 | ||
427 | /* | |
428 | * The most likely reason we got here is that there was | |
429 | * a timeout during CPU death, and the outgoing CPU never | |
430 | * did complete its processing. This could happen on | |
431 | * a virtualized system if the outgoing VCPU gets preempted | |
432 | * for more than five seconds, and the user attempts to | |
433 | * immediately online that same CPU. Trying again later | |
434 | * might return -EBUSY above, hence -EAGAIN. | |
435 | */ | |
436 | return -EAGAIN; | |
437 | ||
438 | default: | |
439 | ||
440 | /* Should not happen. Famous last words. */ | |
441 | return -EIO; | |
442 | } | |
443 | } | |
444 | ||
445 | /* | |
446 | * Mark the specified CPU online. | |
447 | * | |
448 | * Note that it is permissible to omit this call entirely, as is | |
449 | * done in architectures that do no CPU-hotplug error checking. | |
450 | */ | |
451 | void cpu_set_state_online(int cpu) | |
452 | { | |
453 | (void)atomic_xchg(&per_cpu(cpu_hotplug_state, cpu), CPU_ONLINE); | |
454 | } | |
455 | ||
456 | #ifdef CONFIG_HOTPLUG_CPU | |
457 | ||
458 | /* | |
459 | * Wait for the specified CPU to exit the idle loop and die. | |
460 | */ | |
461 | bool cpu_wait_death(unsigned int cpu, int seconds) | |
462 | { | |
463 | int jf_left = seconds * HZ; | |
464 | int oldstate; | |
465 | bool ret = true; | |
466 | int sleep_jf = 1; | |
467 | ||
468 | might_sleep(); | |
469 | ||
470 | /* The outgoing CPU will normally get done quite quickly. */ | |
471 | if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD) | |
472 | goto update_state; | |
473 | udelay(5); | |
474 | ||
475 | /* But if the outgoing CPU dawdles, wait increasingly long times. */ | |
476 | while (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) != CPU_DEAD) { | |
477 | schedule_timeout_uninterruptible(sleep_jf); | |
478 | jf_left -= sleep_jf; | |
479 | if (jf_left <= 0) | |
480 | break; | |
481 | sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10); | |
482 | } | |
483 | update_state: | |
484 | oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); | |
485 | if (oldstate == CPU_DEAD) { | |
486 | /* Outgoing CPU died normally, update state. */ | |
487 | smp_mb(); /* atomic_read() before update. */ | |
488 | atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD); | |
489 | } else { | |
490 | /* Outgoing CPU still hasn't died, set state accordingly. */ | |
491 | if (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), | |
492 | oldstate, CPU_BROKEN) != oldstate) | |
493 | goto update_state; | |
494 | ret = false; | |
495 | } | |
496 | return ret; | |
497 | } | |
498 | ||
499 | /* | |
500 | * Called by the outgoing CPU to report its successful death. Return | |
501 | * false if this report follows the surviving CPU's timing out. | |
502 | * | |
503 | * A separate "CPU_DEAD_FROZEN" is used when the surviving CPU | |
504 | * timed out. This approach allows architectures to omit calls to | |
505 | * cpu_check_up_prepare() and cpu_set_state_online() without defeating | |
506 | * the next cpu_wait_death()'s polling loop. | |
507 | */ | |
508 | bool cpu_report_death(void) | |
509 | { | |
510 | int oldstate; | |
511 | int newstate; | |
512 | int cpu = smp_processor_id(); | |
513 | ||
514 | do { | |
515 | oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); | |
516 | if (oldstate != CPU_BROKEN) | |
517 | newstate = CPU_DEAD; | |
518 | else | |
519 | newstate = CPU_DEAD_FROZEN; | |
520 | } while (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), | |
521 | oldstate, newstate) != oldstate); | |
522 | return newstate == CPU_DEAD; | |
523 | } | |
524 | ||
525 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |