]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - kernel/cpu.c
Merge tag 'mfd-next-5.4' of git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd
[mirror_ubuntu-hirsute-kernel.git] / kernel / cpu.c
1 /* CPU control.
2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
3 *
4 * This code is licenced under the GPL.
5 */
6 #include <linux/proc_fs.h>
7 #include <linux/smp.h>
8 #include <linux/init.h>
9 #include <linux/notifier.h>
10 #include <linux/sched/signal.h>
11 #include <linux/sched/hotplug.h>
12 #include <linux/sched/isolation.h>
13 #include <linux/sched/task.h>
14 #include <linux/sched/smt.h>
15 #include <linux/unistd.h>
16 #include <linux/cpu.h>
17 #include <linux/oom.h>
18 #include <linux/rcupdate.h>
19 #include <linux/export.h>
20 #include <linux/bug.h>
21 #include <linux/kthread.h>
22 #include <linux/stop_machine.h>
23 #include <linux/mutex.h>
24 #include <linux/gfp.h>
25 #include <linux/suspend.h>
26 #include <linux/lockdep.h>
27 #include <linux/tick.h>
28 #include <linux/irq.h>
29 #include <linux/nmi.h>
30 #include <linux/smpboot.h>
31 #include <linux/relay.h>
32 #include <linux/slab.h>
33 #include <linux/percpu-rwsem.h>
34
35 #include <trace/events/power.h>
36 #define CREATE_TRACE_POINTS
37 #include <trace/events/cpuhp.h>
38
39 #include "smpboot.h"
40
41 /**
42 * cpuhp_cpu_state - Per cpu hotplug state storage
43 * @state: The current cpu state
44 * @target: The target state
45 * @thread: Pointer to the hotplug thread
46 * @should_run: Thread should execute
47 * @rollback: Perform a rollback
48 * @single: Single callback invocation
49 * @bringup: Single callback bringup or teardown selector
50 * @cb_state: The state for a single callback (install/uninstall)
51 * @result: Result of the operation
52 * @done_up: Signal completion to the issuer of the task for cpu-up
53 * @done_down: Signal completion to the issuer of the task for cpu-down
54 */
55 struct cpuhp_cpu_state {
56 enum cpuhp_state state;
57 enum cpuhp_state target;
58 enum cpuhp_state fail;
59 #ifdef CONFIG_SMP
60 struct task_struct *thread;
61 bool should_run;
62 bool rollback;
63 bool single;
64 bool bringup;
65 struct hlist_node *node;
66 struct hlist_node *last;
67 enum cpuhp_state cb_state;
68 int result;
69 struct completion done_up;
70 struct completion done_down;
71 #endif
72 };
73
74 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
75 .fail = CPUHP_INVALID,
76 };
77
78 #ifdef CONFIG_SMP
79 cpumask_t cpus_booted_once_mask;
80 #endif
81
82 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
83 static struct lockdep_map cpuhp_state_up_map =
84 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
85 static struct lockdep_map cpuhp_state_down_map =
86 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
87
88
89 static inline void cpuhp_lock_acquire(bool bringup)
90 {
91 lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
92 }
93
94 static inline void cpuhp_lock_release(bool bringup)
95 {
96 lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
97 }
98 #else
99
100 static inline void cpuhp_lock_acquire(bool bringup) { }
101 static inline void cpuhp_lock_release(bool bringup) { }
102
103 #endif
104
105 /**
106 * cpuhp_step - Hotplug state machine step
107 * @name: Name of the step
108 * @startup: Startup function of the step
109 * @teardown: Teardown function of the step
110 * @cant_stop: Bringup/teardown can't be stopped at this step
111 */
112 struct cpuhp_step {
113 const char *name;
114 union {
115 int (*single)(unsigned int cpu);
116 int (*multi)(unsigned int cpu,
117 struct hlist_node *node);
118 } startup;
119 union {
120 int (*single)(unsigned int cpu);
121 int (*multi)(unsigned int cpu,
122 struct hlist_node *node);
123 } teardown;
124 struct hlist_head list;
125 bool cant_stop;
126 bool multi_instance;
127 };
128
129 static DEFINE_MUTEX(cpuhp_state_mutex);
130 static struct cpuhp_step cpuhp_hp_states[];
131
132 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
133 {
134 return cpuhp_hp_states + state;
135 }
136
137 /**
138 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
139 * @cpu: The cpu for which the callback should be invoked
140 * @state: The state to do callbacks for
141 * @bringup: True if the bringup callback should be invoked
142 * @node: For multi-instance, do a single entry callback for install/remove
143 * @lastp: For multi-instance rollback, remember how far we got
144 *
145 * Called from cpu hotplug and from the state register machinery.
146 */
147 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
148 bool bringup, struct hlist_node *node,
149 struct hlist_node **lastp)
150 {
151 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
152 struct cpuhp_step *step = cpuhp_get_step(state);
153 int (*cbm)(unsigned int cpu, struct hlist_node *node);
154 int (*cb)(unsigned int cpu);
155 int ret, cnt;
156
157 if (st->fail == state) {
158 st->fail = CPUHP_INVALID;
159
160 if (!(bringup ? step->startup.single : step->teardown.single))
161 return 0;
162
163 return -EAGAIN;
164 }
165
166 if (!step->multi_instance) {
167 WARN_ON_ONCE(lastp && *lastp);
168 cb = bringup ? step->startup.single : step->teardown.single;
169 if (!cb)
170 return 0;
171 trace_cpuhp_enter(cpu, st->target, state, cb);
172 ret = cb(cpu);
173 trace_cpuhp_exit(cpu, st->state, state, ret);
174 return ret;
175 }
176 cbm = bringup ? step->startup.multi : step->teardown.multi;
177 if (!cbm)
178 return 0;
179
180 /* Single invocation for instance add/remove */
181 if (node) {
182 WARN_ON_ONCE(lastp && *lastp);
183 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
184 ret = cbm(cpu, node);
185 trace_cpuhp_exit(cpu, st->state, state, ret);
186 return ret;
187 }
188
189 /* State transition. Invoke on all instances */
190 cnt = 0;
191 hlist_for_each(node, &step->list) {
192 if (lastp && node == *lastp)
193 break;
194
195 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
196 ret = cbm(cpu, node);
197 trace_cpuhp_exit(cpu, st->state, state, ret);
198 if (ret) {
199 if (!lastp)
200 goto err;
201
202 *lastp = node;
203 return ret;
204 }
205 cnt++;
206 }
207 if (lastp)
208 *lastp = NULL;
209 return 0;
210 err:
211 /* Rollback the instances if one failed */
212 cbm = !bringup ? step->startup.multi : step->teardown.multi;
213 if (!cbm)
214 return ret;
215
216 hlist_for_each(node, &step->list) {
217 if (!cnt--)
218 break;
219
220 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
221 ret = cbm(cpu, node);
222 trace_cpuhp_exit(cpu, st->state, state, ret);
223 /*
224 * Rollback must not fail,
225 */
226 WARN_ON_ONCE(ret);
227 }
228 return ret;
229 }
230
231 #ifdef CONFIG_SMP
232 static bool cpuhp_is_ap_state(enum cpuhp_state state)
233 {
234 /*
235 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
236 * purposes as that state is handled explicitly in cpu_down.
237 */
238 return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
239 }
240
241 static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
242 {
243 struct completion *done = bringup ? &st->done_up : &st->done_down;
244 wait_for_completion(done);
245 }
246
247 static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
248 {
249 struct completion *done = bringup ? &st->done_up : &st->done_down;
250 complete(done);
251 }
252
253 /*
254 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
255 */
256 static bool cpuhp_is_atomic_state(enum cpuhp_state state)
257 {
258 return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
259 }
260
261 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
262 static DEFINE_MUTEX(cpu_add_remove_lock);
263 bool cpuhp_tasks_frozen;
264 EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
265
266 /*
267 * The following two APIs (cpu_maps_update_begin/done) must be used when
268 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
269 */
270 void cpu_maps_update_begin(void)
271 {
272 mutex_lock(&cpu_add_remove_lock);
273 }
274
275 void cpu_maps_update_done(void)
276 {
277 mutex_unlock(&cpu_add_remove_lock);
278 }
279
280 /*
281 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
282 * Should always be manipulated under cpu_add_remove_lock
283 */
284 static int cpu_hotplug_disabled;
285
286 #ifdef CONFIG_HOTPLUG_CPU
287
288 DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
289
290 void cpus_read_lock(void)
291 {
292 percpu_down_read(&cpu_hotplug_lock);
293 }
294 EXPORT_SYMBOL_GPL(cpus_read_lock);
295
296 int cpus_read_trylock(void)
297 {
298 return percpu_down_read_trylock(&cpu_hotplug_lock);
299 }
300 EXPORT_SYMBOL_GPL(cpus_read_trylock);
301
302 void cpus_read_unlock(void)
303 {
304 percpu_up_read(&cpu_hotplug_lock);
305 }
306 EXPORT_SYMBOL_GPL(cpus_read_unlock);
307
308 void cpus_write_lock(void)
309 {
310 percpu_down_write(&cpu_hotplug_lock);
311 }
312
313 void cpus_write_unlock(void)
314 {
315 percpu_up_write(&cpu_hotplug_lock);
316 }
317
318 void lockdep_assert_cpus_held(void)
319 {
320 /*
321 * We can't have hotplug operations before userspace starts running,
322 * and some init codepaths will knowingly not take the hotplug lock.
323 * This is all valid, so mute lockdep until it makes sense to report
324 * unheld locks.
325 */
326 if (system_state < SYSTEM_RUNNING)
327 return;
328
329 percpu_rwsem_assert_held(&cpu_hotplug_lock);
330 }
331
332 static void lockdep_acquire_cpus_lock(void)
333 {
334 rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_);
335 }
336
337 static void lockdep_release_cpus_lock(void)
338 {
339 rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, 1, _THIS_IP_);
340 }
341
342 /*
343 * Wait for currently running CPU hotplug operations to complete (if any) and
344 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
345 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
346 * hotplug path before performing hotplug operations. So acquiring that lock
347 * guarantees mutual exclusion from any currently running hotplug operations.
348 */
349 void cpu_hotplug_disable(void)
350 {
351 cpu_maps_update_begin();
352 cpu_hotplug_disabled++;
353 cpu_maps_update_done();
354 }
355 EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
356
357 static void __cpu_hotplug_enable(void)
358 {
359 if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
360 return;
361 cpu_hotplug_disabled--;
362 }
363
364 void cpu_hotplug_enable(void)
365 {
366 cpu_maps_update_begin();
367 __cpu_hotplug_enable();
368 cpu_maps_update_done();
369 }
370 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
371
372 #else
373
374 static void lockdep_acquire_cpus_lock(void)
375 {
376 }
377
378 static void lockdep_release_cpus_lock(void)
379 {
380 }
381
382 #endif /* CONFIG_HOTPLUG_CPU */
383
384 /*
385 * Architectures that need SMT-specific errata handling during SMT hotplug
386 * should override this.
387 */
388 void __weak arch_smt_update(void) { }
389
390 #ifdef CONFIG_HOTPLUG_SMT
391 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
392
393 void __init cpu_smt_disable(bool force)
394 {
395 if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
396 cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
397 return;
398
399 if (force) {
400 pr_info("SMT: Force disabled\n");
401 cpu_smt_control = CPU_SMT_FORCE_DISABLED;
402 } else {
403 pr_info("SMT: disabled\n");
404 cpu_smt_control = CPU_SMT_DISABLED;
405 }
406 }
407
408 /*
409 * The decision whether SMT is supported can only be done after the full
410 * CPU identification. Called from architecture code.
411 */
412 void __init cpu_smt_check_topology(void)
413 {
414 if (!topology_smt_supported())
415 cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
416 }
417
418 static int __init smt_cmdline_disable(char *str)
419 {
420 cpu_smt_disable(str && !strcmp(str, "force"));
421 return 0;
422 }
423 early_param("nosmt", smt_cmdline_disable);
424
425 static inline bool cpu_smt_allowed(unsigned int cpu)
426 {
427 if (cpu_smt_control == CPU_SMT_ENABLED)
428 return true;
429
430 if (topology_is_primary_thread(cpu))
431 return true;
432
433 /*
434 * On x86 it's required to boot all logical CPUs at least once so
435 * that the init code can get a chance to set CR4.MCE on each
436 * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
437 * core will shutdown the machine.
438 */
439 return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
440 }
441 #else
442 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
443 #endif
444
445 static inline enum cpuhp_state
446 cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
447 {
448 enum cpuhp_state prev_state = st->state;
449
450 st->rollback = false;
451 st->last = NULL;
452
453 st->target = target;
454 st->single = false;
455 st->bringup = st->state < target;
456
457 return prev_state;
458 }
459
460 static inline void
461 cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
462 {
463 st->rollback = true;
464
465 /*
466 * If we have st->last we need to undo partial multi_instance of this
467 * state first. Otherwise start undo at the previous state.
468 */
469 if (!st->last) {
470 if (st->bringup)
471 st->state--;
472 else
473 st->state++;
474 }
475
476 st->target = prev_state;
477 st->bringup = !st->bringup;
478 }
479
480 /* Regular hotplug invocation of the AP hotplug thread */
481 static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
482 {
483 if (!st->single && st->state == st->target)
484 return;
485
486 st->result = 0;
487 /*
488 * Make sure the above stores are visible before should_run becomes
489 * true. Paired with the mb() above in cpuhp_thread_fun()
490 */
491 smp_mb();
492 st->should_run = true;
493 wake_up_process(st->thread);
494 wait_for_ap_thread(st, st->bringup);
495 }
496
497 static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
498 {
499 enum cpuhp_state prev_state;
500 int ret;
501
502 prev_state = cpuhp_set_state(st, target);
503 __cpuhp_kick_ap(st);
504 if ((ret = st->result)) {
505 cpuhp_reset_state(st, prev_state);
506 __cpuhp_kick_ap(st);
507 }
508
509 return ret;
510 }
511
512 static int bringup_wait_for_ap(unsigned int cpu)
513 {
514 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
515
516 /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
517 wait_for_ap_thread(st, true);
518 if (WARN_ON_ONCE((!cpu_online(cpu))))
519 return -ECANCELED;
520
521 /* Unpark the stopper thread and the hotplug thread of the target cpu */
522 stop_machine_unpark(cpu);
523 kthread_unpark(st->thread);
524
525 /*
526 * SMT soft disabling on X86 requires to bring the CPU out of the
527 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
528 * CPU marked itself as booted_once in notify_cpu_starting() so the
529 * cpu_smt_allowed() check will now return false if this is not the
530 * primary sibling.
531 */
532 if (!cpu_smt_allowed(cpu))
533 return -ECANCELED;
534
535 if (st->target <= CPUHP_AP_ONLINE_IDLE)
536 return 0;
537
538 return cpuhp_kick_ap(st, st->target);
539 }
540
541 static int bringup_cpu(unsigned int cpu)
542 {
543 struct task_struct *idle = idle_thread_get(cpu);
544 int ret;
545
546 /*
547 * Some architectures have to walk the irq descriptors to
548 * setup the vector space for the cpu which comes online.
549 * Prevent irq alloc/free across the bringup.
550 */
551 irq_lock_sparse();
552
553 /* Arch-specific enabling code. */
554 ret = __cpu_up(cpu, idle);
555 irq_unlock_sparse();
556 if (ret)
557 return ret;
558 return bringup_wait_for_ap(cpu);
559 }
560
561 /*
562 * Hotplug state machine related functions
563 */
564
565 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
566 {
567 for (st->state--; st->state > st->target; st->state--)
568 cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
569 }
570
571 static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
572 {
573 if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
574 return true;
575 /*
576 * When CPU hotplug is disabled, then taking the CPU down is not
577 * possible because takedown_cpu() and the architecture and
578 * subsystem specific mechanisms are not available. So the CPU
579 * which would be completely unplugged again needs to stay around
580 * in the current state.
581 */
582 return st->state <= CPUHP_BRINGUP_CPU;
583 }
584
585 static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
586 enum cpuhp_state target)
587 {
588 enum cpuhp_state prev_state = st->state;
589 int ret = 0;
590
591 while (st->state < target) {
592 st->state++;
593 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
594 if (ret) {
595 if (can_rollback_cpu(st)) {
596 st->target = prev_state;
597 undo_cpu_up(cpu, st);
598 }
599 break;
600 }
601 }
602 return ret;
603 }
604
605 /*
606 * The cpu hotplug threads manage the bringup and teardown of the cpus
607 */
608 static void cpuhp_create(unsigned int cpu)
609 {
610 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
611
612 init_completion(&st->done_up);
613 init_completion(&st->done_down);
614 }
615
616 static int cpuhp_should_run(unsigned int cpu)
617 {
618 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
619
620 return st->should_run;
621 }
622
623 /*
624 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
625 * callbacks when a state gets [un]installed at runtime.
626 *
627 * Each invocation of this function by the smpboot thread does a single AP
628 * state callback.
629 *
630 * It has 3 modes of operation:
631 * - single: runs st->cb_state
632 * - up: runs ++st->state, while st->state < st->target
633 * - down: runs st->state--, while st->state > st->target
634 *
635 * When complete or on error, should_run is cleared and the completion is fired.
636 */
637 static void cpuhp_thread_fun(unsigned int cpu)
638 {
639 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
640 bool bringup = st->bringup;
641 enum cpuhp_state state;
642
643 if (WARN_ON_ONCE(!st->should_run))
644 return;
645
646 /*
647 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
648 * that if we see ->should_run we also see the rest of the state.
649 */
650 smp_mb();
651
652 /*
653 * The BP holds the hotplug lock, but we're now running on the AP,
654 * ensure that anybody asserting the lock is held, will actually find
655 * it so.
656 */
657 lockdep_acquire_cpus_lock();
658 cpuhp_lock_acquire(bringup);
659
660 if (st->single) {
661 state = st->cb_state;
662 st->should_run = false;
663 } else {
664 if (bringup) {
665 st->state++;
666 state = st->state;
667 st->should_run = (st->state < st->target);
668 WARN_ON_ONCE(st->state > st->target);
669 } else {
670 state = st->state;
671 st->state--;
672 st->should_run = (st->state > st->target);
673 WARN_ON_ONCE(st->state < st->target);
674 }
675 }
676
677 WARN_ON_ONCE(!cpuhp_is_ap_state(state));
678
679 if (cpuhp_is_atomic_state(state)) {
680 local_irq_disable();
681 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
682 local_irq_enable();
683
684 /*
685 * STARTING/DYING must not fail!
686 */
687 WARN_ON_ONCE(st->result);
688 } else {
689 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
690 }
691
692 if (st->result) {
693 /*
694 * If we fail on a rollback, we're up a creek without no
695 * paddle, no way forward, no way back. We loose, thanks for
696 * playing.
697 */
698 WARN_ON_ONCE(st->rollback);
699 st->should_run = false;
700 }
701
702 cpuhp_lock_release(bringup);
703 lockdep_release_cpus_lock();
704
705 if (!st->should_run)
706 complete_ap_thread(st, bringup);
707 }
708
709 /* Invoke a single callback on a remote cpu */
710 static int
711 cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
712 struct hlist_node *node)
713 {
714 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
715 int ret;
716
717 if (!cpu_online(cpu))
718 return 0;
719
720 cpuhp_lock_acquire(false);
721 cpuhp_lock_release(false);
722
723 cpuhp_lock_acquire(true);
724 cpuhp_lock_release(true);
725
726 /*
727 * If we are up and running, use the hotplug thread. For early calls
728 * we invoke the thread function directly.
729 */
730 if (!st->thread)
731 return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
732
733 st->rollback = false;
734 st->last = NULL;
735
736 st->node = node;
737 st->bringup = bringup;
738 st->cb_state = state;
739 st->single = true;
740
741 __cpuhp_kick_ap(st);
742
743 /*
744 * If we failed and did a partial, do a rollback.
745 */
746 if ((ret = st->result) && st->last) {
747 st->rollback = true;
748 st->bringup = !bringup;
749
750 __cpuhp_kick_ap(st);
751 }
752
753 /*
754 * Clean up the leftovers so the next hotplug operation wont use stale
755 * data.
756 */
757 st->node = st->last = NULL;
758 return ret;
759 }
760
761 static int cpuhp_kick_ap_work(unsigned int cpu)
762 {
763 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
764 enum cpuhp_state prev_state = st->state;
765 int ret;
766
767 cpuhp_lock_acquire(false);
768 cpuhp_lock_release(false);
769
770 cpuhp_lock_acquire(true);
771 cpuhp_lock_release(true);
772
773 trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
774 ret = cpuhp_kick_ap(st, st->target);
775 trace_cpuhp_exit(cpu, st->state, prev_state, ret);
776
777 return ret;
778 }
779
780 static struct smp_hotplug_thread cpuhp_threads = {
781 .store = &cpuhp_state.thread,
782 .create = &cpuhp_create,
783 .thread_should_run = cpuhp_should_run,
784 .thread_fn = cpuhp_thread_fun,
785 .thread_comm = "cpuhp/%u",
786 .selfparking = true,
787 };
788
789 void __init cpuhp_threads_init(void)
790 {
791 BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
792 kthread_unpark(this_cpu_read(cpuhp_state.thread));
793 }
794
795 #ifdef CONFIG_HOTPLUG_CPU
796 /**
797 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
798 * @cpu: a CPU id
799 *
800 * This function walks all processes, finds a valid mm struct for each one and
801 * then clears a corresponding bit in mm's cpumask. While this all sounds
802 * trivial, there are various non-obvious corner cases, which this function
803 * tries to solve in a safe manner.
804 *
805 * Also note that the function uses a somewhat relaxed locking scheme, so it may
806 * be called only for an already offlined CPU.
807 */
808 void clear_tasks_mm_cpumask(int cpu)
809 {
810 struct task_struct *p;
811
812 /*
813 * This function is called after the cpu is taken down and marked
814 * offline, so its not like new tasks will ever get this cpu set in
815 * their mm mask. -- Peter Zijlstra
816 * Thus, we may use rcu_read_lock() here, instead of grabbing
817 * full-fledged tasklist_lock.
818 */
819 WARN_ON(cpu_online(cpu));
820 rcu_read_lock();
821 for_each_process(p) {
822 struct task_struct *t;
823
824 /*
825 * Main thread might exit, but other threads may still have
826 * a valid mm. Find one.
827 */
828 t = find_lock_task_mm(p);
829 if (!t)
830 continue;
831 cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
832 task_unlock(t);
833 }
834 rcu_read_unlock();
835 }
836
837 /* Take this CPU down. */
838 static int take_cpu_down(void *_param)
839 {
840 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
841 enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
842 int err, cpu = smp_processor_id();
843 int ret;
844
845 /* Ensure this CPU doesn't handle any more interrupts. */
846 err = __cpu_disable();
847 if (err < 0)
848 return err;
849
850 /*
851 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
852 * do this step again.
853 */
854 WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
855 st->state--;
856 /* Invoke the former CPU_DYING callbacks */
857 for (; st->state > target; st->state--) {
858 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
859 /*
860 * DYING must not fail!
861 */
862 WARN_ON_ONCE(ret);
863 }
864
865 /* Give up timekeeping duties */
866 tick_handover_do_timer();
867 /* Remove CPU from timer broadcasting */
868 tick_offline_cpu(cpu);
869 /* Park the stopper thread */
870 stop_machine_park(cpu);
871 return 0;
872 }
873
874 static int takedown_cpu(unsigned int cpu)
875 {
876 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
877 int err;
878
879 /* Park the smpboot threads */
880 kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
881
882 /*
883 * Prevent irq alloc/free while the dying cpu reorganizes the
884 * interrupt affinities.
885 */
886 irq_lock_sparse();
887
888 /*
889 * So now all preempt/rcu users must observe !cpu_active().
890 */
891 err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
892 if (err) {
893 /* CPU refused to die */
894 irq_unlock_sparse();
895 /* Unpark the hotplug thread so we can rollback there */
896 kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
897 return err;
898 }
899 BUG_ON(cpu_online(cpu));
900
901 /*
902 * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
903 * all runnable tasks from the CPU, there's only the idle task left now
904 * that the migration thread is done doing the stop_machine thing.
905 *
906 * Wait for the stop thread to go away.
907 */
908 wait_for_ap_thread(st, false);
909 BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
910
911 /* Interrupts are moved away from the dying cpu, reenable alloc/free */
912 irq_unlock_sparse();
913
914 hotplug_cpu__broadcast_tick_pull(cpu);
915 /* This actually kills the CPU. */
916 __cpu_die(cpu);
917
918 tick_cleanup_dead_cpu(cpu);
919 rcutree_migrate_callbacks(cpu);
920 return 0;
921 }
922
923 static void cpuhp_complete_idle_dead(void *arg)
924 {
925 struct cpuhp_cpu_state *st = arg;
926
927 complete_ap_thread(st, false);
928 }
929
930 void cpuhp_report_idle_dead(void)
931 {
932 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
933
934 BUG_ON(st->state != CPUHP_AP_OFFLINE);
935 rcu_report_dead(smp_processor_id());
936 st->state = CPUHP_AP_IDLE_DEAD;
937 /*
938 * We cannot call complete after rcu_report_dead() so we delegate it
939 * to an online cpu.
940 */
941 smp_call_function_single(cpumask_first(cpu_online_mask),
942 cpuhp_complete_idle_dead, st, 0);
943 }
944
945 static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
946 {
947 for (st->state++; st->state < st->target; st->state++)
948 cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
949 }
950
951 static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
952 enum cpuhp_state target)
953 {
954 enum cpuhp_state prev_state = st->state;
955 int ret = 0;
956
957 for (; st->state > target; st->state--) {
958 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
959 if (ret) {
960 st->target = prev_state;
961 if (st->state < prev_state)
962 undo_cpu_down(cpu, st);
963 break;
964 }
965 }
966 return ret;
967 }
968
969 /* Requires cpu_add_remove_lock to be held */
970 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
971 enum cpuhp_state target)
972 {
973 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
974 int prev_state, ret = 0;
975
976 if (num_online_cpus() == 1)
977 return -EBUSY;
978
979 if (!cpu_present(cpu))
980 return -EINVAL;
981
982 cpus_write_lock();
983
984 cpuhp_tasks_frozen = tasks_frozen;
985
986 prev_state = cpuhp_set_state(st, target);
987 /*
988 * If the current CPU state is in the range of the AP hotplug thread,
989 * then we need to kick the thread.
990 */
991 if (st->state > CPUHP_TEARDOWN_CPU) {
992 st->target = max((int)target, CPUHP_TEARDOWN_CPU);
993 ret = cpuhp_kick_ap_work(cpu);
994 /*
995 * The AP side has done the error rollback already. Just
996 * return the error code..
997 */
998 if (ret)
999 goto out;
1000
1001 /*
1002 * We might have stopped still in the range of the AP hotplug
1003 * thread. Nothing to do anymore.
1004 */
1005 if (st->state > CPUHP_TEARDOWN_CPU)
1006 goto out;
1007
1008 st->target = target;
1009 }
1010 /*
1011 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1012 * to do the further cleanups.
1013 */
1014 ret = cpuhp_down_callbacks(cpu, st, target);
1015 if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
1016 cpuhp_reset_state(st, prev_state);
1017 __cpuhp_kick_ap(st);
1018 }
1019
1020 out:
1021 cpus_write_unlock();
1022 /*
1023 * Do post unplug cleanup. This is still protected against
1024 * concurrent CPU hotplug via cpu_add_remove_lock.
1025 */
1026 lockup_detector_cleanup();
1027 arch_smt_update();
1028 return ret;
1029 }
1030
1031 static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1032 {
1033 if (cpu_hotplug_disabled)
1034 return -EBUSY;
1035 return _cpu_down(cpu, 0, target);
1036 }
1037
1038 static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
1039 {
1040 int err;
1041
1042 cpu_maps_update_begin();
1043 err = cpu_down_maps_locked(cpu, target);
1044 cpu_maps_update_done();
1045 return err;
1046 }
1047
1048 int cpu_down(unsigned int cpu)
1049 {
1050 return do_cpu_down(cpu, CPUHP_OFFLINE);
1051 }
1052 EXPORT_SYMBOL(cpu_down);
1053
1054 #else
1055 #define takedown_cpu NULL
1056 #endif /*CONFIG_HOTPLUG_CPU*/
1057
1058 /**
1059 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1060 * @cpu: cpu that just started
1061 *
1062 * It must be called by the arch code on the new cpu, before the new cpu
1063 * enables interrupts and before the "boot" cpu returns from __cpu_up().
1064 */
1065 void notify_cpu_starting(unsigned int cpu)
1066 {
1067 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1068 enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1069 int ret;
1070
1071 rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
1072 cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1073 while (st->state < target) {
1074 st->state++;
1075 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1076 /*
1077 * STARTING must not fail!
1078 */
1079 WARN_ON_ONCE(ret);
1080 }
1081 }
1082
1083 /*
1084 * Called from the idle task. Wake up the controlling task which brings the
1085 * stopper and the hotplug thread of the upcoming CPU up and then delegates
1086 * the rest of the online bringup to the hotplug thread.
1087 */
1088 void cpuhp_online_idle(enum cpuhp_state state)
1089 {
1090 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1091
1092 /* Happens for the boot cpu */
1093 if (state != CPUHP_AP_ONLINE_IDLE)
1094 return;
1095
1096 st->state = CPUHP_AP_ONLINE_IDLE;
1097 complete_ap_thread(st, true);
1098 }
1099
1100 /* Requires cpu_add_remove_lock to be held */
1101 static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1102 {
1103 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1104 struct task_struct *idle;
1105 int ret = 0;
1106
1107 cpus_write_lock();
1108
1109 if (!cpu_present(cpu)) {
1110 ret = -EINVAL;
1111 goto out;
1112 }
1113
1114 /*
1115 * The caller of do_cpu_up might have raced with another
1116 * caller. Ignore it for now.
1117 */
1118 if (st->state >= target)
1119 goto out;
1120
1121 if (st->state == CPUHP_OFFLINE) {
1122 /* Let it fail before we try to bring the cpu up */
1123 idle = idle_thread_get(cpu);
1124 if (IS_ERR(idle)) {
1125 ret = PTR_ERR(idle);
1126 goto out;
1127 }
1128 }
1129
1130 cpuhp_tasks_frozen = tasks_frozen;
1131
1132 cpuhp_set_state(st, target);
1133 /*
1134 * If the current CPU state is in the range of the AP hotplug thread,
1135 * then we need to kick the thread once more.
1136 */
1137 if (st->state > CPUHP_BRINGUP_CPU) {
1138 ret = cpuhp_kick_ap_work(cpu);
1139 /*
1140 * The AP side has done the error rollback already. Just
1141 * return the error code..
1142 */
1143 if (ret)
1144 goto out;
1145 }
1146
1147 /*
1148 * Try to reach the target state. We max out on the BP at
1149 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1150 * responsible for bringing it up to the target state.
1151 */
1152 target = min((int)target, CPUHP_BRINGUP_CPU);
1153 ret = cpuhp_up_callbacks(cpu, st, target);
1154 out:
1155 cpus_write_unlock();
1156 arch_smt_update();
1157 return ret;
1158 }
1159
1160 static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
1161 {
1162 int err = 0;
1163
1164 if (!cpu_possible(cpu)) {
1165 pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1166 cpu);
1167 #if defined(CONFIG_IA64)
1168 pr_err("please check additional_cpus= boot parameter\n");
1169 #endif
1170 return -EINVAL;
1171 }
1172
1173 err = try_online_node(cpu_to_node(cpu));
1174 if (err)
1175 return err;
1176
1177 cpu_maps_update_begin();
1178
1179 if (cpu_hotplug_disabled) {
1180 err = -EBUSY;
1181 goto out;
1182 }
1183 if (!cpu_smt_allowed(cpu)) {
1184 err = -EPERM;
1185 goto out;
1186 }
1187
1188 err = _cpu_up(cpu, 0, target);
1189 out:
1190 cpu_maps_update_done();
1191 return err;
1192 }
1193
1194 int cpu_up(unsigned int cpu)
1195 {
1196 return do_cpu_up(cpu, CPUHP_ONLINE);
1197 }
1198 EXPORT_SYMBOL_GPL(cpu_up);
1199
1200 #ifdef CONFIG_PM_SLEEP_SMP
1201 static cpumask_var_t frozen_cpus;
1202
1203 int freeze_secondary_cpus(int primary)
1204 {
1205 int cpu, error = 0;
1206
1207 cpu_maps_update_begin();
1208 if (primary == -1) {
1209 primary = cpumask_first(cpu_online_mask);
1210 if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
1211 primary = housekeeping_any_cpu(HK_FLAG_TIMER);
1212 } else {
1213 if (!cpu_online(primary))
1214 primary = cpumask_first(cpu_online_mask);
1215 }
1216
1217 /*
1218 * We take down all of the non-boot CPUs in one shot to avoid races
1219 * with the userspace trying to use the CPU hotplug at the same time
1220 */
1221 cpumask_clear(frozen_cpus);
1222
1223 pr_info("Disabling non-boot CPUs ...\n");
1224 for_each_online_cpu(cpu) {
1225 if (cpu == primary)
1226 continue;
1227
1228 if (pm_wakeup_pending()) {
1229 pr_info("Wakeup pending. Abort CPU freeze\n");
1230 error = -EBUSY;
1231 break;
1232 }
1233
1234 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1235 error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1236 trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1237 if (!error)
1238 cpumask_set_cpu(cpu, frozen_cpus);
1239 else {
1240 pr_err("Error taking CPU%d down: %d\n", cpu, error);
1241 break;
1242 }
1243 }
1244
1245 if (!error)
1246 BUG_ON(num_online_cpus() > 1);
1247 else
1248 pr_err("Non-boot CPUs are not disabled\n");
1249
1250 /*
1251 * Make sure the CPUs won't be enabled by someone else. We need to do
1252 * this even in case of failure as all disable_nonboot_cpus() users are
1253 * supposed to do enable_nonboot_cpus() on the failure path.
1254 */
1255 cpu_hotplug_disabled++;
1256
1257 cpu_maps_update_done();
1258 return error;
1259 }
1260
1261 void __weak arch_enable_nonboot_cpus_begin(void)
1262 {
1263 }
1264
1265 void __weak arch_enable_nonboot_cpus_end(void)
1266 {
1267 }
1268
1269 void enable_nonboot_cpus(void)
1270 {
1271 int cpu, error;
1272
1273 /* Allow everyone to use the CPU hotplug again */
1274 cpu_maps_update_begin();
1275 __cpu_hotplug_enable();
1276 if (cpumask_empty(frozen_cpus))
1277 goto out;
1278
1279 pr_info("Enabling non-boot CPUs ...\n");
1280
1281 arch_enable_nonboot_cpus_begin();
1282
1283 for_each_cpu(cpu, frozen_cpus) {
1284 trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1285 error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1286 trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1287 if (!error) {
1288 pr_info("CPU%d is up\n", cpu);
1289 continue;
1290 }
1291 pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1292 }
1293
1294 arch_enable_nonboot_cpus_end();
1295
1296 cpumask_clear(frozen_cpus);
1297 out:
1298 cpu_maps_update_done();
1299 }
1300
1301 static int __init alloc_frozen_cpus(void)
1302 {
1303 if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1304 return -ENOMEM;
1305 return 0;
1306 }
1307 core_initcall(alloc_frozen_cpus);
1308
1309 /*
1310 * When callbacks for CPU hotplug notifications are being executed, we must
1311 * ensure that the state of the system with respect to the tasks being frozen
1312 * or not, as reported by the notification, remains unchanged *throughout the
1313 * duration* of the execution of the callbacks.
1314 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1315 *
1316 * This synchronization is implemented by mutually excluding regular CPU
1317 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1318 * Hibernate notifications.
1319 */
1320 static int
1321 cpu_hotplug_pm_callback(struct notifier_block *nb,
1322 unsigned long action, void *ptr)
1323 {
1324 switch (action) {
1325
1326 case PM_SUSPEND_PREPARE:
1327 case PM_HIBERNATION_PREPARE:
1328 cpu_hotplug_disable();
1329 break;
1330
1331 case PM_POST_SUSPEND:
1332 case PM_POST_HIBERNATION:
1333 cpu_hotplug_enable();
1334 break;
1335
1336 default:
1337 return NOTIFY_DONE;
1338 }
1339
1340 return NOTIFY_OK;
1341 }
1342
1343
1344 static int __init cpu_hotplug_pm_sync_init(void)
1345 {
1346 /*
1347 * cpu_hotplug_pm_callback has higher priority than x86
1348 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1349 * to disable cpu hotplug to avoid cpu hotplug race.
1350 */
1351 pm_notifier(cpu_hotplug_pm_callback, 0);
1352 return 0;
1353 }
1354 core_initcall(cpu_hotplug_pm_sync_init);
1355
1356 #endif /* CONFIG_PM_SLEEP_SMP */
1357
1358 int __boot_cpu_id;
1359
1360 #endif /* CONFIG_SMP */
1361
1362 /* Boot processor state steps */
1363 static struct cpuhp_step cpuhp_hp_states[] = {
1364 [CPUHP_OFFLINE] = {
1365 .name = "offline",
1366 .startup.single = NULL,
1367 .teardown.single = NULL,
1368 },
1369 #ifdef CONFIG_SMP
1370 [CPUHP_CREATE_THREADS]= {
1371 .name = "threads:prepare",
1372 .startup.single = smpboot_create_threads,
1373 .teardown.single = NULL,
1374 .cant_stop = true,
1375 },
1376 [CPUHP_PERF_PREPARE] = {
1377 .name = "perf:prepare",
1378 .startup.single = perf_event_init_cpu,
1379 .teardown.single = perf_event_exit_cpu,
1380 },
1381 [CPUHP_WORKQUEUE_PREP] = {
1382 .name = "workqueue:prepare",
1383 .startup.single = workqueue_prepare_cpu,
1384 .teardown.single = NULL,
1385 },
1386 [CPUHP_HRTIMERS_PREPARE] = {
1387 .name = "hrtimers:prepare",
1388 .startup.single = hrtimers_prepare_cpu,
1389 .teardown.single = hrtimers_dead_cpu,
1390 },
1391 [CPUHP_SMPCFD_PREPARE] = {
1392 .name = "smpcfd:prepare",
1393 .startup.single = smpcfd_prepare_cpu,
1394 .teardown.single = smpcfd_dead_cpu,
1395 },
1396 [CPUHP_RELAY_PREPARE] = {
1397 .name = "relay:prepare",
1398 .startup.single = relay_prepare_cpu,
1399 .teardown.single = NULL,
1400 },
1401 [CPUHP_SLAB_PREPARE] = {
1402 .name = "slab:prepare",
1403 .startup.single = slab_prepare_cpu,
1404 .teardown.single = slab_dead_cpu,
1405 },
1406 [CPUHP_RCUTREE_PREP] = {
1407 .name = "RCU/tree:prepare",
1408 .startup.single = rcutree_prepare_cpu,
1409 .teardown.single = rcutree_dead_cpu,
1410 },
1411 /*
1412 * On the tear-down path, timers_dead_cpu() must be invoked
1413 * before blk_mq_queue_reinit_notify() from notify_dead(),
1414 * otherwise a RCU stall occurs.
1415 */
1416 [CPUHP_TIMERS_PREPARE] = {
1417 .name = "timers:prepare",
1418 .startup.single = timers_prepare_cpu,
1419 .teardown.single = timers_dead_cpu,
1420 },
1421 /* Kicks the plugged cpu into life */
1422 [CPUHP_BRINGUP_CPU] = {
1423 .name = "cpu:bringup",
1424 .startup.single = bringup_cpu,
1425 .teardown.single = NULL,
1426 .cant_stop = true,
1427 },
1428 /* Final state before CPU kills itself */
1429 [CPUHP_AP_IDLE_DEAD] = {
1430 .name = "idle:dead",
1431 },
1432 /*
1433 * Last state before CPU enters the idle loop to die. Transient state
1434 * for synchronization.
1435 */
1436 [CPUHP_AP_OFFLINE] = {
1437 .name = "ap:offline",
1438 .cant_stop = true,
1439 },
1440 /* First state is scheduler control. Interrupts are disabled */
1441 [CPUHP_AP_SCHED_STARTING] = {
1442 .name = "sched:starting",
1443 .startup.single = sched_cpu_starting,
1444 .teardown.single = sched_cpu_dying,
1445 },
1446 [CPUHP_AP_RCUTREE_DYING] = {
1447 .name = "RCU/tree:dying",
1448 .startup.single = NULL,
1449 .teardown.single = rcutree_dying_cpu,
1450 },
1451 [CPUHP_AP_SMPCFD_DYING] = {
1452 .name = "smpcfd:dying",
1453 .startup.single = NULL,
1454 .teardown.single = smpcfd_dying_cpu,
1455 },
1456 /* Entry state on starting. Interrupts enabled from here on. Transient
1457 * state for synchronsization */
1458 [CPUHP_AP_ONLINE] = {
1459 .name = "ap:online",
1460 },
1461 /*
1462 * Handled on controll processor until the plugged processor manages
1463 * this itself.
1464 */
1465 [CPUHP_TEARDOWN_CPU] = {
1466 .name = "cpu:teardown",
1467 .startup.single = NULL,
1468 .teardown.single = takedown_cpu,
1469 .cant_stop = true,
1470 },
1471 /* Handle smpboot threads park/unpark */
1472 [CPUHP_AP_SMPBOOT_THREADS] = {
1473 .name = "smpboot/threads:online",
1474 .startup.single = smpboot_unpark_threads,
1475 .teardown.single = smpboot_park_threads,
1476 },
1477 [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
1478 .name = "irq/affinity:online",
1479 .startup.single = irq_affinity_online_cpu,
1480 .teardown.single = NULL,
1481 },
1482 [CPUHP_AP_PERF_ONLINE] = {
1483 .name = "perf:online",
1484 .startup.single = perf_event_init_cpu,
1485 .teardown.single = perf_event_exit_cpu,
1486 },
1487 [CPUHP_AP_WATCHDOG_ONLINE] = {
1488 .name = "lockup_detector:online",
1489 .startup.single = lockup_detector_online_cpu,
1490 .teardown.single = lockup_detector_offline_cpu,
1491 },
1492 [CPUHP_AP_WORKQUEUE_ONLINE] = {
1493 .name = "workqueue:online",
1494 .startup.single = workqueue_online_cpu,
1495 .teardown.single = workqueue_offline_cpu,
1496 },
1497 [CPUHP_AP_RCUTREE_ONLINE] = {
1498 .name = "RCU/tree:online",
1499 .startup.single = rcutree_online_cpu,
1500 .teardown.single = rcutree_offline_cpu,
1501 },
1502 #endif
1503 /*
1504 * The dynamically registered state space is here
1505 */
1506
1507 #ifdef CONFIG_SMP
1508 /* Last state is scheduler control setting the cpu active */
1509 [CPUHP_AP_ACTIVE] = {
1510 .name = "sched:active",
1511 .startup.single = sched_cpu_activate,
1512 .teardown.single = sched_cpu_deactivate,
1513 },
1514 #endif
1515
1516 /* CPU is fully up and running. */
1517 [CPUHP_ONLINE] = {
1518 .name = "online",
1519 .startup.single = NULL,
1520 .teardown.single = NULL,
1521 },
1522 };
1523
1524 /* Sanity check for callbacks */
1525 static int cpuhp_cb_check(enum cpuhp_state state)
1526 {
1527 if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
1528 return -EINVAL;
1529 return 0;
1530 }
1531
1532 /*
1533 * Returns a free for dynamic slot assignment of the Online state. The states
1534 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1535 * by having no name assigned.
1536 */
1537 static int cpuhp_reserve_state(enum cpuhp_state state)
1538 {
1539 enum cpuhp_state i, end;
1540 struct cpuhp_step *step;
1541
1542 switch (state) {
1543 case CPUHP_AP_ONLINE_DYN:
1544 step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
1545 end = CPUHP_AP_ONLINE_DYN_END;
1546 break;
1547 case CPUHP_BP_PREPARE_DYN:
1548 step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
1549 end = CPUHP_BP_PREPARE_DYN_END;
1550 break;
1551 default:
1552 return -EINVAL;
1553 }
1554
1555 for (i = state; i <= end; i++, step++) {
1556 if (!step->name)
1557 return i;
1558 }
1559 WARN(1, "No more dynamic states available for CPU hotplug\n");
1560 return -ENOSPC;
1561 }
1562
1563 static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
1564 int (*startup)(unsigned int cpu),
1565 int (*teardown)(unsigned int cpu),
1566 bool multi_instance)
1567 {
1568 /* (Un)Install the callbacks for further cpu hotplug operations */
1569 struct cpuhp_step *sp;
1570 int ret = 0;
1571
1572 /*
1573 * If name is NULL, then the state gets removed.
1574 *
1575 * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
1576 * the first allocation from these dynamic ranges, so the removal
1577 * would trigger a new allocation and clear the wrong (already
1578 * empty) state, leaving the callbacks of the to be cleared state
1579 * dangling, which causes wreckage on the next hotplug operation.
1580 */
1581 if (name && (state == CPUHP_AP_ONLINE_DYN ||
1582 state == CPUHP_BP_PREPARE_DYN)) {
1583 ret = cpuhp_reserve_state(state);
1584 if (ret < 0)
1585 return ret;
1586 state = ret;
1587 }
1588 sp = cpuhp_get_step(state);
1589 if (name && sp->name)
1590 return -EBUSY;
1591
1592 sp->startup.single = startup;
1593 sp->teardown.single = teardown;
1594 sp->name = name;
1595 sp->multi_instance = multi_instance;
1596 INIT_HLIST_HEAD(&sp->list);
1597 return ret;
1598 }
1599
1600 static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1601 {
1602 return cpuhp_get_step(state)->teardown.single;
1603 }
1604
1605 /*
1606 * Call the startup/teardown function for a step either on the AP or
1607 * on the current CPU.
1608 */
1609 static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1610 struct hlist_node *node)
1611 {
1612 struct cpuhp_step *sp = cpuhp_get_step(state);
1613 int ret;
1614
1615 /*
1616 * If there's nothing to do, we done.
1617 * Relies on the union for multi_instance.
1618 */
1619 if ((bringup && !sp->startup.single) ||
1620 (!bringup && !sp->teardown.single))
1621 return 0;
1622 /*
1623 * The non AP bound callbacks can fail on bringup. On teardown
1624 * e.g. module removal we crash for now.
1625 */
1626 #ifdef CONFIG_SMP
1627 if (cpuhp_is_ap_state(state))
1628 ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1629 else
1630 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1631 #else
1632 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1633 #endif
1634 BUG_ON(ret && !bringup);
1635 return ret;
1636 }
1637
1638 /*
1639 * Called from __cpuhp_setup_state on a recoverable failure.
1640 *
1641 * Note: The teardown callbacks for rollback are not allowed to fail!
1642 */
1643 static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1644 struct hlist_node *node)
1645 {
1646 int cpu;
1647
1648 /* Roll back the already executed steps on the other cpus */
1649 for_each_present_cpu(cpu) {
1650 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1651 int cpustate = st->state;
1652
1653 if (cpu >= failedcpu)
1654 break;
1655
1656 /* Did we invoke the startup call on that cpu ? */
1657 if (cpustate >= state)
1658 cpuhp_issue_call(cpu, state, false, node);
1659 }
1660 }
1661
1662 int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
1663 struct hlist_node *node,
1664 bool invoke)
1665 {
1666 struct cpuhp_step *sp;
1667 int cpu;
1668 int ret;
1669
1670 lockdep_assert_cpus_held();
1671
1672 sp = cpuhp_get_step(state);
1673 if (sp->multi_instance == false)
1674 return -EINVAL;
1675
1676 mutex_lock(&cpuhp_state_mutex);
1677
1678 if (!invoke || !sp->startup.multi)
1679 goto add_node;
1680
1681 /*
1682 * Try to call the startup callback for each present cpu
1683 * depending on the hotplug state of the cpu.
1684 */
1685 for_each_present_cpu(cpu) {
1686 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1687 int cpustate = st->state;
1688
1689 if (cpustate < state)
1690 continue;
1691
1692 ret = cpuhp_issue_call(cpu, state, true, node);
1693 if (ret) {
1694 if (sp->teardown.multi)
1695 cpuhp_rollback_install(cpu, state, node);
1696 goto unlock;
1697 }
1698 }
1699 add_node:
1700 ret = 0;
1701 hlist_add_head(node, &sp->list);
1702 unlock:
1703 mutex_unlock(&cpuhp_state_mutex);
1704 return ret;
1705 }
1706
1707 int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
1708 bool invoke)
1709 {
1710 int ret;
1711
1712 cpus_read_lock();
1713 ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
1714 cpus_read_unlock();
1715 return ret;
1716 }
1717 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
1718
1719 /**
1720 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
1721 * @state: The state to setup
1722 * @invoke: If true, the startup function is invoked for cpus where
1723 * cpu state >= @state
1724 * @startup: startup callback function
1725 * @teardown: teardown callback function
1726 * @multi_instance: State is set up for multiple instances which get
1727 * added afterwards.
1728 *
1729 * The caller needs to hold cpus read locked while calling this function.
1730 * Returns:
1731 * On success:
1732 * Positive state number if @state is CPUHP_AP_ONLINE_DYN
1733 * 0 for all other states
1734 * On failure: proper (negative) error code
1735 */
1736 int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
1737 const char *name, bool invoke,
1738 int (*startup)(unsigned int cpu),
1739 int (*teardown)(unsigned int cpu),
1740 bool multi_instance)
1741 {
1742 int cpu, ret = 0;
1743 bool dynstate;
1744
1745 lockdep_assert_cpus_held();
1746
1747 if (cpuhp_cb_check(state) || !name)
1748 return -EINVAL;
1749
1750 mutex_lock(&cpuhp_state_mutex);
1751
1752 ret = cpuhp_store_callbacks(state, name, startup, teardown,
1753 multi_instance);
1754
1755 dynstate = state == CPUHP_AP_ONLINE_DYN;
1756 if (ret > 0 && dynstate) {
1757 state = ret;
1758 ret = 0;
1759 }
1760
1761 if (ret || !invoke || !startup)
1762 goto out;
1763
1764 /*
1765 * Try to call the startup callback for each present cpu
1766 * depending on the hotplug state of the cpu.
1767 */
1768 for_each_present_cpu(cpu) {
1769 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1770 int cpustate = st->state;
1771
1772 if (cpustate < state)
1773 continue;
1774
1775 ret = cpuhp_issue_call(cpu, state, true, NULL);
1776 if (ret) {
1777 if (teardown)
1778 cpuhp_rollback_install(cpu, state, NULL);
1779 cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1780 goto out;
1781 }
1782 }
1783 out:
1784 mutex_unlock(&cpuhp_state_mutex);
1785 /*
1786 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
1787 * dynamically allocated state in case of success.
1788 */
1789 if (!ret && dynstate)
1790 return state;
1791 return ret;
1792 }
1793 EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
1794
1795 int __cpuhp_setup_state(enum cpuhp_state state,
1796 const char *name, bool invoke,
1797 int (*startup)(unsigned int cpu),
1798 int (*teardown)(unsigned int cpu),
1799 bool multi_instance)
1800 {
1801 int ret;
1802
1803 cpus_read_lock();
1804 ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
1805 teardown, multi_instance);
1806 cpus_read_unlock();
1807 return ret;
1808 }
1809 EXPORT_SYMBOL(__cpuhp_setup_state);
1810
1811 int __cpuhp_state_remove_instance(enum cpuhp_state state,
1812 struct hlist_node *node, bool invoke)
1813 {
1814 struct cpuhp_step *sp = cpuhp_get_step(state);
1815 int cpu;
1816
1817 BUG_ON(cpuhp_cb_check(state));
1818
1819 if (!sp->multi_instance)
1820 return -EINVAL;
1821
1822 cpus_read_lock();
1823 mutex_lock(&cpuhp_state_mutex);
1824
1825 if (!invoke || !cpuhp_get_teardown_cb(state))
1826 goto remove;
1827 /*
1828 * Call the teardown callback for each present cpu depending
1829 * on the hotplug state of the cpu. This function is not
1830 * allowed to fail currently!
1831 */
1832 for_each_present_cpu(cpu) {
1833 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1834 int cpustate = st->state;
1835
1836 if (cpustate >= state)
1837 cpuhp_issue_call(cpu, state, false, node);
1838 }
1839
1840 remove:
1841 hlist_del(node);
1842 mutex_unlock(&cpuhp_state_mutex);
1843 cpus_read_unlock();
1844
1845 return 0;
1846 }
1847 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1848
1849 /**
1850 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
1851 * @state: The state to remove
1852 * @invoke: If true, the teardown function is invoked for cpus where
1853 * cpu state >= @state
1854 *
1855 * The caller needs to hold cpus read locked while calling this function.
1856 * The teardown callback is currently not allowed to fail. Think
1857 * about module removal!
1858 */
1859 void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
1860 {
1861 struct cpuhp_step *sp = cpuhp_get_step(state);
1862 int cpu;
1863
1864 BUG_ON(cpuhp_cb_check(state));
1865
1866 lockdep_assert_cpus_held();
1867
1868 mutex_lock(&cpuhp_state_mutex);
1869 if (sp->multi_instance) {
1870 WARN(!hlist_empty(&sp->list),
1871 "Error: Removing state %d which has instances left.\n",
1872 state);
1873 goto remove;
1874 }
1875
1876 if (!invoke || !cpuhp_get_teardown_cb(state))
1877 goto remove;
1878
1879 /*
1880 * Call the teardown callback for each present cpu depending
1881 * on the hotplug state of the cpu. This function is not
1882 * allowed to fail currently!
1883 */
1884 for_each_present_cpu(cpu) {
1885 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1886 int cpustate = st->state;
1887
1888 if (cpustate >= state)
1889 cpuhp_issue_call(cpu, state, false, NULL);
1890 }
1891 remove:
1892 cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1893 mutex_unlock(&cpuhp_state_mutex);
1894 }
1895 EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
1896
1897 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
1898 {
1899 cpus_read_lock();
1900 __cpuhp_remove_state_cpuslocked(state, invoke);
1901 cpus_read_unlock();
1902 }
1903 EXPORT_SYMBOL(__cpuhp_remove_state);
1904
1905 #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
1906 static ssize_t show_cpuhp_state(struct device *dev,
1907 struct device_attribute *attr, char *buf)
1908 {
1909 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1910
1911 return sprintf(buf, "%d\n", st->state);
1912 }
1913 static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
1914
1915 static ssize_t write_cpuhp_target(struct device *dev,
1916 struct device_attribute *attr,
1917 const char *buf, size_t count)
1918 {
1919 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1920 struct cpuhp_step *sp;
1921 int target, ret;
1922
1923 ret = kstrtoint(buf, 10, &target);
1924 if (ret)
1925 return ret;
1926
1927 #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
1928 if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
1929 return -EINVAL;
1930 #else
1931 if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
1932 return -EINVAL;
1933 #endif
1934
1935 ret = lock_device_hotplug_sysfs();
1936 if (ret)
1937 return ret;
1938
1939 mutex_lock(&cpuhp_state_mutex);
1940 sp = cpuhp_get_step(target);
1941 ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
1942 mutex_unlock(&cpuhp_state_mutex);
1943 if (ret)
1944 goto out;
1945
1946 if (st->state < target)
1947 ret = do_cpu_up(dev->id, target);
1948 else
1949 ret = do_cpu_down(dev->id, target);
1950 out:
1951 unlock_device_hotplug();
1952 return ret ? ret : count;
1953 }
1954
1955 static ssize_t show_cpuhp_target(struct device *dev,
1956 struct device_attribute *attr, char *buf)
1957 {
1958 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1959
1960 return sprintf(buf, "%d\n", st->target);
1961 }
1962 static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
1963
1964
1965 static ssize_t write_cpuhp_fail(struct device *dev,
1966 struct device_attribute *attr,
1967 const char *buf, size_t count)
1968 {
1969 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1970 struct cpuhp_step *sp;
1971 int fail, ret;
1972
1973 ret = kstrtoint(buf, 10, &fail);
1974 if (ret)
1975 return ret;
1976
1977 if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
1978 return -EINVAL;
1979
1980 /*
1981 * Cannot fail STARTING/DYING callbacks.
1982 */
1983 if (cpuhp_is_atomic_state(fail))
1984 return -EINVAL;
1985
1986 /*
1987 * Cannot fail anything that doesn't have callbacks.
1988 */
1989 mutex_lock(&cpuhp_state_mutex);
1990 sp = cpuhp_get_step(fail);
1991 if (!sp->startup.single && !sp->teardown.single)
1992 ret = -EINVAL;
1993 mutex_unlock(&cpuhp_state_mutex);
1994 if (ret)
1995 return ret;
1996
1997 st->fail = fail;
1998
1999 return count;
2000 }
2001
2002 static ssize_t show_cpuhp_fail(struct device *dev,
2003 struct device_attribute *attr, char *buf)
2004 {
2005 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2006
2007 return sprintf(buf, "%d\n", st->fail);
2008 }
2009
2010 static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
2011
2012 static struct attribute *cpuhp_cpu_attrs[] = {
2013 &dev_attr_state.attr,
2014 &dev_attr_target.attr,
2015 &dev_attr_fail.attr,
2016 NULL
2017 };
2018
2019 static const struct attribute_group cpuhp_cpu_attr_group = {
2020 .attrs = cpuhp_cpu_attrs,
2021 .name = "hotplug",
2022 NULL
2023 };
2024
2025 static ssize_t show_cpuhp_states(struct device *dev,
2026 struct device_attribute *attr, char *buf)
2027 {
2028 ssize_t cur, res = 0;
2029 int i;
2030
2031 mutex_lock(&cpuhp_state_mutex);
2032 for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
2033 struct cpuhp_step *sp = cpuhp_get_step(i);
2034
2035 if (sp->name) {
2036 cur = sprintf(buf, "%3d: %s\n", i, sp->name);
2037 buf += cur;
2038 res += cur;
2039 }
2040 }
2041 mutex_unlock(&cpuhp_state_mutex);
2042 return res;
2043 }
2044 static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
2045
2046 static struct attribute *cpuhp_cpu_root_attrs[] = {
2047 &dev_attr_states.attr,
2048 NULL
2049 };
2050
2051 static const struct attribute_group cpuhp_cpu_root_attr_group = {
2052 .attrs = cpuhp_cpu_root_attrs,
2053 .name = "hotplug",
2054 NULL
2055 };
2056
2057 #ifdef CONFIG_HOTPLUG_SMT
2058
2059 static void cpuhp_offline_cpu_device(unsigned int cpu)
2060 {
2061 struct device *dev = get_cpu_device(cpu);
2062
2063 dev->offline = true;
2064 /* Tell user space about the state change */
2065 kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2066 }
2067
2068 static void cpuhp_online_cpu_device(unsigned int cpu)
2069 {
2070 struct device *dev = get_cpu_device(cpu);
2071
2072 dev->offline = false;
2073 /* Tell user space about the state change */
2074 kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2075 }
2076
2077 int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2078 {
2079 int cpu, ret = 0;
2080
2081 cpu_maps_update_begin();
2082 for_each_online_cpu(cpu) {
2083 if (topology_is_primary_thread(cpu))
2084 continue;
2085 ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2086 if (ret)
2087 break;
2088 /*
2089 * As this needs to hold the cpu maps lock it's impossible
2090 * to call device_offline() because that ends up calling
2091 * cpu_down() which takes cpu maps lock. cpu maps lock
2092 * needs to be held as this might race against in kernel
2093 * abusers of the hotplug machinery (thermal management).
2094 *
2095 * So nothing would update device:offline state. That would
2096 * leave the sysfs entry stale and prevent onlining after
2097 * smt control has been changed to 'off' again. This is
2098 * called under the sysfs hotplug lock, so it is properly
2099 * serialized against the regular offline usage.
2100 */
2101 cpuhp_offline_cpu_device(cpu);
2102 }
2103 if (!ret)
2104 cpu_smt_control = ctrlval;
2105 cpu_maps_update_done();
2106 return ret;
2107 }
2108
2109 int cpuhp_smt_enable(void)
2110 {
2111 int cpu, ret = 0;
2112
2113 cpu_maps_update_begin();
2114 cpu_smt_control = CPU_SMT_ENABLED;
2115 for_each_present_cpu(cpu) {
2116 /* Skip online CPUs and CPUs on offline nodes */
2117 if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
2118 continue;
2119 ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2120 if (ret)
2121 break;
2122 /* See comment in cpuhp_smt_disable() */
2123 cpuhp_online_cpu_device(cpu);
2124 }
2125 cpu_maps_update_done();
2126 return ret;
2127 }
2128
2129
2130 static ssize_t
2131 __store_smt_control(struct device *dev, struct device_attribute *attr,
2132 const char *buf, size_t count)
2133 {
2134 int ctrlval, ret;
2135
2136 if (sysfs_streq(buf, "on"))
2137 ctrlval = CPU_SMT_ENABLED;
2138 else if (sysfs_streq(buf, "off"))
2139 ctrlval = CPU_SMT_DISABLED;
2140 else if (sysfs_streq(buf, "forceoff"))
2141 ctrlval = CPU_SMT_FORCE_DISABLED;
2142 else
2143 return -EINVAL;
2144
2145 if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
2146 return -EPERM;
2147
2148 if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
2149 return -ENODEV;
2150
2151 ret = lock_device_hotplug_sysfs();
2152 if (ret)
2153 return ret;
2154
2155 if (ctrlval != cpu_smt_control) {
2156 switch (ctrlval) {
2157 case CPU_SMT_ENABLED:
2158 ret = cpuhp_smt_enable();
2159 break;
2160 case CPU_SMT_DISABLED:
2161 case CPU_SMT_FORCE_DISABLED:
2162 ret = cpuhp_smt_disable(ctrlval);
2163 break;
2164 }
2165 }
2166
2167 unlock_device_hotplug();
2168 return ret ? ret : count;
2169 }
2170
2171 #else /* !CONFIG_HOTPLUG_SMT */
2172 static ssize_t
2173 __store_smt_control(struct device *dev, struct device_attribute *attr,
2174 const char *buf, size_t count)
2175 {
2176 return -ENODEV;
2177 }
2178 #endif /* CONFIG_HOTPLUG_SMT */
2179
2180 static const char *smt_states[] = {
2181 [CPU_SMT_ENABLED] = "on",
2182 [CPU_SMT_DISABLED] = "off",
2183 [CPU_SMT_FORCE_DISABLED] = "forceoff",
2184 [CPU_SMT_NOT_SUPPORTED] = "notsupported",
2185 [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
2186 };
2187
2188 static ssize_t
2189 show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
2190 {
2191 const char *state = smt_states[cpu_smt_control];
2192
2193 return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
2194 }
2195
2196 static ssize_t
2197 store_smt_control(struct device *dev, struct device_attribute *attr,
2198 const char *buf, size_t count)
2199 {
2200 return __store_smt_control(dev, attr, buf, count);
2201 }
2202 static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
2203
2204 static ssize_t
2205 show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
2206 {
2207 return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
2208 }
2209 static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
2210
2211 static struct attribute *cpuhp_smt_attrs[] = {
2212 &dev_attr_control.attr,
2213 &dev_attr_active.attr,
2214 NULL
2215 };
2216
2217 static const struct attribute_group cpuhp_smt_attr_group = {
2218 .attrs = cpuhp_smt_attrs,
2219 .name = "smt",
2220 NULL
2221 };
2222
2223 static int __init cpu_smt_sysfs_init(void)
2224 {
2225 return sysfs_create_group(&cpu_subsys.dev_root->kobj,
2226 &cpuhp_smt_attr_group);
2227 }
2228
2229 static int __init cpuhp_sysfs_init(void)
2230 {
2231 int cpu, ret;
2232
2233 ret = cpu_smt_sysfs_init();
2234 if (ret)
2235 return ret;
2236
2237 ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
2238 &cpuhp_cpu_root_attr_group);
2239 if (ret)
2240 return ret;
2241
2242 for_each_possible_cpu(cpu) {
2243 struct device *dev = get_cpu_device(cpu);
2244
2245 if (!dev)
2246 continue;
2247 ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
2248 if (ret)
2249 return ret;
2250 }
2251 return 0;
2252 }
2253 device_initcall(cpuhp_sysfs_init);
2254 #endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2255
2256 /*
2257 * cpu_bit_bitmap[] is a special, "compressed" data structure that
2258 * represents all NR_CPUS bits binary values of 1<<nr.
2259 *
2260 * It is used by cpumask_of() to get a constant address to a CPU
2261 * mask value that has a single bit set only.
2262 */
2263
2264 /* cpu_bit_bitmap[0] is empty - so we can back into it */
2265 #define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x))
2266 #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
2267 #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
2268 #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
2269
2270 const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
2271
2272 MASK_DECLARE_8(0), MASK_DECLARE_8(8),
2273 MASK_DECLARE_8(16), MASK_DECLARE_8(24),
2274 #if BITS_PER_LONG > 32
2275 MASK_DECLARE_8(32), MASK_DECLARE_8(40),
2276 MASK_DECLARE_8(48), MASK_DECLARE_8(56),
2277 #endif
2278 };
2279 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
2280
2281 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
2282 EXPORT_SYMBOL(cpu_all_bits);
2283
2284 #ifdef CONFIG_INIT_ALL_POSSIBLE
2285 struct cpumask __cpu_possible_mask __read_mostly
2286 = {CPU_BITS_ALL};
2287 #else
2288 struct cpumask __cpu_possible_mask __read_mostly;
2289 #endif
2290 EXPORT_SYMBOL(__cpu_possible_mask);
2291
2292 struct cpumask __cpu_online_mask __read_mostly;
2293 EXPORT_SYMBOL(__cpu_online_mask);
2294
2295 struct cpumask __cpu_present_mask __read_mostly;
2296 EXPORT_SYMBOL(__cpu_present_mask);
2297
2298 struct cpumask __cpu_active_mask __read_mostly;
2299 EXPORT_SYMBOL(__cpu_active_mask);
2300
2301 atomic_t __num_online_cpus __read_mostly;
2302 EXPORT_SYMBOL(__num_online_cpus);
2303
2304 void init_cpu_present(const struct cpumask *src)
2305 {
2306 cpumask_copy(&__cpu_present_mask, src);
2307 }
2308
2309 void init_cpu_possible(const struct cpumask *src)
2310 {
2311 cpumask_copy(&__cpu_possible_mask, src);
2312 }
2313
2314 void init_cpu_online(const struct cpumask *src)
2315 {
2316 cpumask_copy(&__cpu_online_mask, src);
2317 }
2318
2319 void set_cpu_online(unsigned int cpu, bool online)
2320 {
2321 /*
2322 * atomic_inc/dec() is required to handle the horrid abuse of this
2323 * function by the reboot and kexec code which invoke it from
2324 * IPI/NMI broadcasts when shutting down CPUs. Invocation from
2325 * regular CPU hotplug is properly serialized.
2326 *
2327 * Note, that the fact that __num_online_cpus is of type atomic_t
2328 * does not protect readers which are not serialized against
2329 * concurrent hotplug operations.
2330 */
2331 if (online) {
2332 if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
2333 atomic_inc(&__num_online_cpus);
2334 } else {
2335 if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
2336 atomic_dec(&__num_online_cpus);
2337 }
2338 }
2339
2340 /*
2341 * Activate the first processor.
2342 */
2343 void __init boot_cpu_init(void)
2344 {
2345 int cpu = smp_processor_id();
2346
2347 /* Mark the boot cpu "present", "online" etc for SMP and UP case */
2348 set_cpu_online(cpu, true);
2349 set_cpu_active(cpu, true);
2350 set_cpu_present(cpu, true);
2351 set_cpu_possible(cpu, true);
2352
2353 #ifdef CONFIG_SMP
2354 __boot_cpu_id = cpu;
2355 #endif
2356 }
2357
2358 /*
2359 * Must be called _AFTER_ setting up the per_cpu areas
2360 */
2361 void __init boot_cpu_hotplug_init(void)
2362 {
2363 #ifdef CONFIG_SMP
2364 cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
2365 #endif
2366 this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2367 }
2368
2369 enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
2370
2371 static int __init mitigations_parse_cmdline(char *arg)
2372 {
2373 if (!strcmp(arg, "off"))
2374 cpu_mitigations = CPU_MITIGATIONS_OFF;
2375 else if (!strcmp(arg, "auto"))
2376 cpu_mitigations = CPU_MITIGATIONS_AUTO;
2377 else if (!strcmp(arg, "auto,nosmt"))
2378 cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
2379 else
2380 pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
2381 arg);
2382
2383 return 0;
2384 }
2385 early_param("mitigations", mitigations_parse_cmdline);