]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - kernel/smp.c
Merge tag 'xfs-5.12-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
[mirror_ubuntu-jammy-kernel.git] / kernel / smp.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Generic helpers for smp ipi calls
4 *
5 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
6 */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include <linux/irq_work.h>
11 #include <linux/rcupdate.h>
12 #include <linux/rculist.h>
13 #include <linux/kernel.h>
14 #include <linux/export.h>
15 #include <linux/percpu.h>
16 #include <linux/init.h>
17 #include <linux/interrupt.h>
18 #include <linux/gfp.h>
19 #include <linux/smp.h>
20 #include <linux/cpu.h>
21 #include <linux/sched.h>
22 #include <linux/sched/idle.h>
23 #include <linux/hypervisor.h>
24 #include <linux/sched/clock.h>
25 #include <linux/nmi.h>
26 #include <linux/sched/debug.h>
27
28 #include "smpboot.h"
29 #include "sched/smp.h"
30
31 #define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
32
33 struct call_function_data {
34 call_single_data_t __percpu *csd;
35 cpumask_var_t cpumask;
36 cpumask_var_t cpumask_ipi;
37 };
38
39 static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
40
41 static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
42
43 static void flush_smp_call_function_queue(bool warn_cpu_offline);
44
45 int smpcfd_prepare_cpu(unsigned int cpu)
46 {
47 struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
48
49 if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
50 cpu_to_node(cpu)))
51 return -ENOMEM;
52 if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
53 cpu_to_node(cpu))) {
54 free_cpumask_var(cfd->cpumask);
55 return -ENOMEM;
56 }
57 cfd->csd = alloc_percpu(call_single_data_t);
58 if (!cfd->csd) {
59 free_cpumask_var(cfd->cpumask);
60 free_cpumask_var(cfd->cpumask_ipi);
61 return -ENOMEM;
62 }
63
64 return 0;
65 }
66
67 int smpcfd_dead_cpu(unsigned int cpu)
68 {
69 struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
70
71 free_cpumask_var(cfd->cpumask);
72 free_cpumask_var(cfd->cpumask_ipi);
73 free_percpu(cfd->csd);
74 return 0;
75 }
76
77 int smpcfd_dying_cpu(unsigned int cpu)
78 {
79 /*
80 * The IPIs for the smp-call-function callbacks queued by other
81 * CPUs might arrive late, either due to hardware latencies or
82 * because this CPU disabled interrupts (inside stop-machine)
83 * before the IPIs were sent. So flush out any pending callbacks
84 * explicitly (without waiting for the IPIs to arrive), to
85 * ensure that the outgoing CPU doesn't go offline with work
86 * still pending.
87 */
88 flush_smp_call_function_queue(false);
89 irq_work_run();
90 return 0;
91 }
92
93 void __init call_function_init(void)
94 {
95 int i;
96
97 for_each_possible_cpu(i)
98 init_llist_head(&per_cpu(call_single_queue, i));
99
100 smpcfd_prepare_cpu(smp_processor_id());
101 }
102
103 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
104
105 static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
106 static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
107 static DEFINE_PER_CPU(void *, cur_csd_info);
108
109 #define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
110 static atomic_t csd_bug_count = ATOMIC_INIT(0);
111
112 /* Record current CSD work for current CPU, NULL to erase. */
113 static void csd_lock_record(call_single_data_t *csd)
114 {
115 if (!csd) {
116 smp_mb(); /* NULL cur_csd after unlock. */
117 __this_cpu_write(cur_csd, NULL);
118 return;
119 }
120 __this_cpu_write(cur_csd_func, csd->func);
121 __this_cpu_write(cur_csd_info, csd->info);
122 smp_wmb(); /* func and info before csd. */
123 __this_cpu_write(cur_csd, csd);
124 smp_mb(); /* Update cur_csd before function call. */
125 /* Or before unlock, as the case may be. */
126 }
127
128 static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd)
129 {
130 unsigned int csd_type;
131
132 csd_type = CSD_TYPE(csd);
133 if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC)
134 return csd->node.dst; /* Other CSD_TYPE_ values might not have ->dst. */
135 return -1;
136 }
137
138 /*
139 * Complain if too much time spent waiting. Note that only
140 * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
141 * so waiting on other types gets much less information.
142 */
143 static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
144 {
145 int cpu = -1;
146 int cpux;
147 bool firsttime;
148 u64 ts2, ts_delta;
149 call_single_data_t *cpu_cur_csd;
150 unsigned int flags = READ_ONCE(csd->node.u_flags);
151
152 if (!(flags & CSD_FLAG_LOCK)) {
153 if (!unlikely(*bug_id))
154 return true;
155 cpu = csd_lock_wait_getcpu(csd);
156 pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
157 *bug_id, raw_smp_processor_id(), cpu);
158 return true;
159 }
160
161 ts2 = sched_clock();
162 ts_delta = ts2 - *ts1;
163 if (likely(ts_delta <= CSD_LOCK_TIMEOUT))
164 return false;
165
166 firsttime = !*bug_id;
167 if (firsttime)
168 *bug_id = atomic_inc_return(&csd_bug_count);
169 cpu = csd_lock_wait_getcpu(csd);
170 if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu))
171 cpux = 0;
172 else
173 cpux = cpu;
174 cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
175 pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
176 firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0,
177 cpu, csd->func, csd->info);
178 if (cpu_cur_csd && csd != cpu_cur_csd) {
179 pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
180 *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
181 READ_ONCE(per_cpu(cur_csd_info, cpux)));
182 } else {
183 pr_alert("\tcsd: CSD lock (#%d) %s.\n",
184 *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
185 }
186 if (cpu >= 0) {
187 if (!trigger_single_cpu_backtrace(cpu))
188 dump_cpu_task(cpu);
189 if (!cpu_cur_csd) {
190 pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
191 arch_send_call_function_single_ipi(cpu);
192 }
193 }
194 dump_stack();
195 *ts1 = ts2;
196
197 return false;
198 }
199
200 /*
201 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
202 *
203 * For non-synchronous ipi calls the csd can still be in use by the
204 * previous function call. For multi-cpu calls its even more interesting
205 * as we'll have to ensure no other cpu is observing our csd.
206 */
207 static __always_inline void csd_lock_wait(call_single_data_t *csd)
208 {
209 int bug_id = 0;
210 u64 ts0, ts1;
211
212 ts1 = ts0 = sched_clock();
213 for (;;) {
214 if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id))
215 break;
216 cpu_relax();
217 }
218 smp_acquire__after_ctrl_dep();
219 }
220
221 #else
222 static void csd_lock_record(call_single_data_t *csd)
223 {
224 }
225
226 static __always_inline void csd_lock_wait(call_single_data_t *csd)
227 {
228 smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
229 }
230 #endif
231
232 static __always_inline void csd_lock(call_single_data_t *csd)
233 {
234 csd_lock_wait(csd);
235 csd->node.u_flags |= CSD_FLAG_LOCK;
236
237 /*
238 * prevent CPU from reordering the above assignment
239 * to ->flags with any subsequent assignments to other
240 * fields of the specified call_single_data_t structure:
241 */
242 smp_wmb();
243 }
244
245 static __always_inline void csd_unlock(call_single_data_t *csd)
246 {
247 WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK));
248
249 /*
250 * ensure we're all done before releasing data:
251 */
252 smp_store_release(&csd->node.u_flags, 0);
253 }
254
255 static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
256
257 void __smp_call_single_queue(int cpu, struct llist_node *node)
258 {
259 /*
260 * The list addition should be visible before sending the IPI
261 * handler locks the list to pull the entry off it because of
262 * normal cache coherency rules implied by spinlocks.
263 *
264 * If IPIs can go out of order to the cache coherency protocol
265 * in an architecture, sufficient synchronisation should be added
266 * to arch code to make it appear to obey cache coherency WRT
267 * locking and barrier primitives. Generic code isn't really
268 * equipped to do the right thing...
269 */
270 if (llist_add(node, &per_cpu(call_single_queue, cpu)))
271 send_call_function_single_ipi(cpu);
272 }
273
274 /*
275 * Insert a previously allocated call_single_data_t element
276 * for execution on the given CPU. data must already have
277 * ->func, ->info, and ->flags set.
278 */
279 static int generic_exec_single(int cpu, call_single_data_t *csd)
280 {
281 if (cpu == smp_processor_id()) {
282 smp_call_func_t func = csd->func;
283 void *info = csd->info;
284 unsigned long flags;
285
286 /*
287 * We can unlock early even for the synchronous on-stack case,
288 * since we're doing this from the same CPU..
289 */
290 csd_lock_record(csd);
291 csd_unlock(csd);
292 local_irq_save(flags);
293 func(info);
294 csd_lock_record(NULL);
295 local_irq_restore(flags);
296 return 0;
297 }
298
299 if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
300 csd_unlock(csd);
301 return -ENXIO;
302 }
303
304 __smp_call_single_queue(cpu, &csd->node.llist);
305
306 return 0;
307 }
308
309 /**
310 * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
311 *
312 * Invoked by arch to handle an IPI for call function single.
313 * Must be called with interrupts disabled.
314 */
315 void generic_smp_call_function_single_interrupt(void)
316 {
317 flush_smp_call_function_queue(true);
318 }
319
320 /**
321 * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
322 *
323 * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
324 * offline CPU. Skip this check if set to 'false'.
325 *
326 * Flush any pending smp-call-function callbacks queued on this CPU. This is
327 * invoked by the generic IPI handler, as well as by a CPU about to go offline,
328 * to ensure that all pending IPI callbacks are run before it goes completely
329 * offline.
330 *
331 * Loop through the call_single_queue and run all the queued callbacks.
332 * Must be called with interrupts disabled.
333 */
334 static void flush_smp_call_function_queue(bool warn_cpu_offline)
335 {
336 call_single_data_t *csd, *csd_next;
337 struct llist_node *entry, *prev;
338 struct llist_head *head;
339 static bool warned;
340
341 lockdep_assert_irqs_disabled();
342
343 head = this_cpu_ptr(&call_single_queue);
344 entry = llist_del_all(head);
345 entry = llist_reverse_order(entry);
346
347 /* There shouldn't be any pending callbacks on an offline CPU. */
348 if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
349 !warned && !llist_empty(head))) {
350 warned = true;
351 WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
352
353 /*
354 * We don't have to use the _safe() variant here
355 * because we are not invoking the IPI handlers yet.
356 */
357 llist_for_each_entry(csd, entry, node.llist) {
358 switch (CSD_TYPE(csd)) {
359 case CSD_TYPE_ASYNC:
360 case CSD_TYPE_SYNC:
361 case CSD_TYPE_IRQ_WORK:
362 pr_warn("IPI callback %pS sent to offline CPU\n",
363 csd->func);
364 break;
365
366 case CSD_TYPE_TTWU:
367 pr_warn("IPI task-wakeup sent to offline CPU\n");
368 break;
369
370 default:
371 pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
372 CSD_TYPE(csd));
373 break;
374 }
375 }
376 }
377
378 /*
379 * First; run all SYNC callbacks, people are waiting for us.
380 */
381 prev = NULL;
382 llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
383 /* Do we wait until *after* callback? */
384 if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
385 smp_call_func_t func = csd->func;
386 void *info = csd->info;
387
388 if (prev) {
389 prev->next = &csd_next->node.llist;
390 } else {
391 entry = &csd_next->node.llist;
392 }
393
394 csd_lock_record(csd);
395 func(info);
396 csd_unlock(csd);
397 csd_lock_record(NULL);
398 } else {
399 prev = &csd->node.llist;
400 }
401 }
402
403 if (!entry)
404 return;
405
406 /*
407 * Second; run all !SYNC callbacks.
408 */
409 prev = NULL;
410 llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
411 int type = CSD_TYPE(csd);
412
413 if (type != CSD_TYPE_TTWU) {
414 if (prev) {
415 prev->next = &csd_next->node.llist;
416 } else {
417 entry = &csd_next->node.llist;
418 }
419
420 if (type == CSD_TYPE_ASYNC) {
421 smp_call_func_t func = csd->func;
422 void *info = csd->info;
423
424 csd_lock_record(csd);
425 csd_unlock(csd);
426 func(info);
427 csd_lock_record(NULL);
428 } else if (type == CSD_TYPE_IRQ_WORK) {
429 irq_work_single(csd);
430 }
431
432 } else {
433 prev = &csd->node.llist;
434 }
435 }
436
437 /*
438 * Third; only CSD_TYPE_TTWU is left, issue those.
439 */
440 if (entry)
441 sched_ttwu_pending(entry);
442 }
443
444 void flush_smp_call_function_from_idle(void)
445 {
446 unsigned long flags;
447
448 if (llist_empty(this_cpu_ptr(&call_single_queue)))
449 return;
450
451 local_irq_save(flags);
452 flush_smp_call_function_queue(true);
453 if (local_softirq_pending())
454 do_softirq();
455
456 local_irq_restore(flags);
457 }
458
459 /*
460 * smp_call_function_single - Run a function on a specific CPU
461 * @func: The function to run. This must be fast and non-blocking.
462 * @info: An arbitrary pointer to pass to the function.
463 * @wait: If true, wait until function has completed on other CPUs.
464 *
465 * Returns 0 on success, else a negative status code.
466 */
467 int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
468 int wait)
469 {
470 call_single_data_t *csd;
471 call_single_data_t csd_stack = {
472 .node = { .u_flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, },
473 };
474 int this_cpu;
475 int err;
476
477 /*
478 * prevent preemption and reschedule on another processor,
479 * as well as CPU removal
480 */
481 this_cpu = get_cpu();
482
483 /*
484 * Can deadlock when called with interrupts disabled.
485 * We allow cpu's that are not yet online though, as no one else can
486 * send smp call function interrupt to this cpu and as such deadlocks
487 * can't happen.
488 */
489 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
490 && !oops_in_progress);
491
492 /*
493 * When @wait we can deadlock when we interrupt between llist_add() and
494 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
495 * csd_lock() on because the interrupt context uses the same csd
496 * storage.
497 */
498 WARN_ON_ONCE(!in_task());
499
500 csd = &csd_stack;
501 if (!wait) {
502 csd = this_cpu_ptr(&csd_data);
503 csd_lock(csd);
504 }
505
506 csd->func = func;
507 csd->info = info;
508 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
509 csd->node.src = smp_processor_id();
510 csd->node.dst = cpu;
511 #endif
512
513 err = generic_exec_single(cpu, csd);
514
515 if (wait)
516 csd_lock_wait(csd);
517
518 put_cpu();
519
520 return err;
521 }
522 EXPORT_SYMBOL(smp_call_function_single);
523
524 /**
525 * smp_call_function_single_async(): Run an asynchronous function on a
526 * specific CPU.
527 * @cpu: The CPU to run on.
528 * @csd: Pre-allocated and setup data structure
529 *
530 * Like smp_call_function_single(), but the call is asynchonous and
531 * can thus be done from contexts with disabled interrupts.
532 *
533 * The caller passes his own pre-allocated data structure
534 * (ie: embedded in an object) and is responsible for synchronizing it
535 * such that the IPIs performed on the @csd are strictly serialized.
536 *
537 * If the function is called with one csd which has not yet been
538 * processed by previous call to smp_call_function_single_async(), the
539 * function will return immediately with -EBUSY showing that the csd
540 * object is still in progress.
541 *
542 * NOTE: Be careful, there is unfortunately no current debugging facility to
543 * validate the correctness of this serialization.
544 */
545 int smp_call_function_single_async(int cpu, call_single_data_t *csd)
546 {
547 int err = 0;
548
549 preempt_disable();
550
551 if (csd->node.u_flags & CSD_FLAG_LOCK) {
552 err = -EBUSY;
553 goto out;
554 }
555
556 csd->node.u_flags = CSD_FLAG_LOCK;
557 smp_wmb();
558
559 err = generic_exec_single(cpu, csd);
560
561 out:
562 preempt_enable();
563
564 return err;
565 }
566 EXPORT_SYMBOL_GPL(smp_call_function_single_async);
567
568 /*
569 * smp_call_function_any - Run a function on any of the given cpus
570 * @mask: The mask of cpus it can run on.
571 * @func: The function to run. This must be fast and non-blocking.
572 * @info: An arbitrary pointer to pass to the function.
573 * @wait: If true, wait until function has completed.
574 *
575 * Returns 0 on success, else a negative status code (if no cpus were online).
576 *
577 * Selection preference:
578 * 1) current cpu if in @mask
579 * 2) any cpu of current node if in @mask
580 * 3) any other online cpu in @mask
581 */
582 int smp_call_function_any(const struct cpumask *mask,
583 smp_call_func_t func, void *info, int wait)
584 {
585 unsigned int cpu;
586 const struct cpumask *nodemask;
587 int ret;
588
589 /* Try for same CPU (cheapest) */
590 cpu = get_cpu();
591 if (cpumask_test_cpu(cpu, mask))
592 goto call;
593
594 /* Try for same node. */
595 nodemask = cpumask_of_node(cpu_to_node(cpu));
596 for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
597 cpu = cpumask_next_and(cpu, nodemask, mask)) {
598 if (cpu_online(cpu))
599 goto call;
600 }
601
602 /* Any online will do: smp_call_function_single handles nr_cpu_ids. */
603 cpu = cpumask_any_and(mask, cpu_online_mask);
604 call:
605 ret = smp_call_function_single(cpu, func, info, wait);
606 put_cpu();
607 return ret;
608 }
609 EXPORT_SYMBOL_GPL(smp_call_function_any);
610
611 static void smp_call_function_many_cond(const struct cpumask *mask,
612 smp_call_func_t func, void *info,
613 bool wait, smp_cond_func_t cond_func)
614 {
615 struct call_function_data *cfd;
616 int cpu, next_cpu, this_cpu = smp_processor_id();
617
618 /*
619 * Can deadlock when called with interrupts disabled.
620 * We allow cpu's that are not yet online though, as no one else can
621 * send smp call function interrupt to this cpu and as such deadlocks
622 * can't happen.
623 */
624 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
625 && !oops_in_progress && !early_boot_irqs_disabled);
626
627 /*
628 * When @wait we can deadlock when we interrupt between llist_add() and
629 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
630 * csd_lock() on because the interrupt context uses the same csd
631 * storage.
632 */
633 WARN_ON_ONCE(!in_task());
634
635 /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
636 cpu = cpumask_first_and(mask, cpu_online_mask);
637 if (cpu == this_cpu)
638 cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
639
640 /* No online cpus? We're done. */
641 if (cpu >= nr_cpu_ids)
642 return;
643
644 /* Do we have another CPU which isn't us? */
645 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
646 if (next_cpu == this_cpu)
647 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
648
649 /* Fastpath: do that cpu by itself. */
650 if (next_cpu >= nr_cpu_ids) {
651 if (!cond_func || cond_func(cpu, info))
652 smp_call_function_single(cpu, func, info, wait);
653 return;
654 }
655
656 cfd = this_cpu_ptr(&cfd_data);
657
658 cpumask_and(cfd->cpumask, mask, cpu_online_mask);
659 __cpumask_clear_cpu(this_cpu, cfd->cpumask);
660
661 /* Some callers race with other cpus changing the passed mask */
662 if (unlikely(!cpumask_weight(cfd->cpumask)))
663 return;
664
665 cpumask_clear(cfd->cpumask_ipi);
666 for_each_cpu(cpu, cfd->cpumask) {
667 call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
668
669 if (cond_func && !cond_func(cpu, info))
670 continue;
671
672 csd_lock(csd);
673 if (wait)
674 csd->node.u_flags |= CSD_TYPE_SYNC;
675 csd->func = func;
676 csd->info = info;
677 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
678 csd->node.src = smp_processor_id();
679 csd->node.dst = cpu;
680 #endif
681 if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu)))
682 __cpumask_set_cpu(cpu, cfd->cpumask_ipi);
683 }
684
685 /* Send a message to all CPUs in the map */
686 arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
687
688 if (wait) {
689 for_each_cpu(cpu, cfd->cpumask) {
690 call_single_data_t *csd;
691
692 csd = per_cpu_ptr(cfd->csd, cpu);
693 csd_lock_wait(csd);
694 }
695 }
696 }
697
698 /**
699 * smp_call_function_many(): Run a function on a set of other CPUs.
700 * @mask: The set of cpus to run on (only runs on online subset).
701 * @func: The function to run. This must be fast and non-blocking.
702 * @info: An arbitrary pointer to pass to the function.
703 * @wait: If true, wait (atomically) until function has completed
704 * on other CPUs.
705 *
706 * If @wait is true, then returns once @func has returned.
707 *
708 * You must not call this function with disabled interrupts or from a
709 * hardware interrupt handler or from a bottom half handler. Preemption
710 * must be disabled when calling this function.
711 */
712 void smp_call_function_many(const struct cpumask *mask,
713 smp_call_func_t func, void *info, bool wait)
714 {
715 smp_call_function_many_cond(mask, func, info, wait, NULL);
716 }
717 EXPORT_SYMBOL(smp_call_function_many);
718
719 /**
720 * smp_call_function(): Run a function on all other CPUs.
721 * @func: The function to run. This must be fast and non-blocking.
722 * @info: An arbitrary pointer to pass to the function.
723 * @wait: If true, wait (atomically) until function has completed
724 * on other CPUs.
725 *
726 * Returns 0.
727 *
728 * If @wait is true, then returns once @func has returned; otherwise
729 * it returns just before the target cpu calls @func.
730 *
731 * You must not call this function with disabled interrupts or from a
732 * hardware interrupt handler or from a bottom half handler.
733 */
734 void smp_call_function(smp_call_func_t func, void *info, int wait)
735 {
736 preempt_disable();
737 smp_call_function_many(cpu_online_mask, func, info, wait);
738 preempt_enable();
739 }
740 EXPORT_SYMBOL(smp_call_function);
741
742 /* Setup configured maximum number of CPUs to activate */
743 unsigned int setup_max_cpus = NR_CPUS;
744 EXPORT_SYMBOL(setup_max_cpus);
745
746
747 /*
748 * Setup routine for controlling SMP activation
749 *
750 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
751 * activation entirely (the MPS table probe still happens, though).
752 *
753 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
754 * greater than 0, limits the maximum number of CPUs activated in
755 * SMP mode to <NUM>.
756 */
757
758 void __weak arch_disable_smp_support(void) { }
759
760 static int __init nosmp(char *str)
761 {
762 setup_max_cpus = 0;
763 arch_disable_smp_support();
764
765 return 0;
766 }
767
768 early_param("nosmp", nosmp);
769
770 /* this is hard limit */
771 static int __init nrcpus(char *str)
772 {
773 int nr_cpus;
774
775 if (get_option(&str, &nr_cpus) && nr_cpus > 0 && nr_cpus < nr_cpu_ids)
776 nr_cpu_ids = nr_cpus;
777
778 return 0;
779 }
780
781 early_param("nr_cpus", nrcpus);
782
783 static int __init maxcpus(char *str)
784 {
785 get_option(&str, &setup_max_cpus);
786 if (setup_max_cpus == 0)
787 arch_disable_smp_support();
788
789 return 0;
790 }
791
792 early_param("maxcpus", maxcpus);
793
794 /* Setup number of possible processor ids */
795 unsigned int nr_cpu_ids __read_mostly = NR_CPUS;
796 EXPORT_SYMBOL(nr_cpu_ids);
797
798 /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
799 void __init setup_nr_cpu_ids(void)
800 {
801 nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
802 }
803
804 /* Called by boot processor to activate the rest. */
805 void __init smp_init(void)
806 {
807 int num_nodes, num_cpus;
808
809 idle_threads_init();
810 cpuhp_threads_init();
811
812 pr_info("Bringing up secondary CPUs ...\n");
813
814 bringup_nonboot_cpus(setup_max_cpus);
815
816 num_nodes = num_online_nodes();
817 num_cpus = num_online_cpus();
818 pr_info("Brought up %d node%s, %d CPU%s\n",
819 num_nodes, (num_nodes > 1 ? "s" : ""),
820 num_cpus, (num_cpus > 1 ? "s" : ""));
821
822 /* Any cleanup work */
823 smp_cpus_done(setup_max_cpus);
824 }
825
826 /*
827 * Call a function on all processors. May be used during early boot while
828 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
829 * of local_irq_disable/enable().
830 */
831 void on_each_cpu(smp_call_func_t func, void *info, int wait)
832 {
833 unsigned long flags;
834
835 preempt_disable();
836 smp_call_function(func, info, wait);
837 local_irq_save(flags);
838 func(info);
839 local_irq_restore(flags);
840 preempt_enable();
841 }
842 EXPORT_SYMBOL(on_each_cpu);
843
844 /**
845 * on_each_cpu_mask(): Run a function on processors specified by
846 * cpumask, which may include the local processor.
847 * @mask: The set of cpus to run on (only runs on online subset).
848 * @func: The function to run. This must be fast and non-blocking.
849 * @info: An arbitrary pointer to pass to the function.
850 * @wait: If true, wait (atomically) until function has completed
851 * on other CPUs.
852 *
853 * If @wait is true, then returns once @func has returned.
854 *
855 * You must not call this function with disabled interrupts or from a
856 * hardware interrupt handler or from a bottom half handler. The
857 * exception is that it may be used during early boot while
858 * early_boot_irqs_disabled is set.
859 */
860 void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
861 void *info, bool wait)
862 {
863 int cpu = get_cpu();
864
865 smp_call_function_many(mask, func, info, wait);
866 if (cpumask_test_cpu(cpu, mask)) {
867 unsigned long flags;
868 local_irq_save(flags);
869 func(info);
870 local_irq_restore(flags);
871 }
872 put_cpu();
873 }
874 EXPORT_SYMBOL(on_each_cpu_mask);
875
876 /*
877 * on_each_cpu_cond(): Call a function on each processor for which
878 * the supplied function cond_func returns true, optionally waiting
879 * for all the required CPUs to finish. This may include the local
880 * processor.
881 * @cond_func: A callback function that is passed a cpu id and
882 * the info parameter. The function is called
883 * with preemption disabled. The function should
884 * return a blooean value indicating whether to IPI
885 * the specified CPU.
886 * @func: The function to run on all applicable CPUs.
887 * This must be fast and non-blocking.
888 * @info: An arbitrary pointer to pass to both functions.
889 * @wait: If true, wait (atomically) until function has
890 * completed on other CPUs.
891 *
892 * Preemption is disabled to protect against CPUs going offline but not online.
893 * CPUs going online during the call will not be seen or sent an IPI.
894 *
895 * You must not call this function with disabled interrupts or
896 * from a hardware interrupt handler or from a bottom half handler.
897 */
898 void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
899 void *info, bool wait, const struct cpumask *mask)
900 {
901 int cpu = get_cpu();
902
903 smp_call_function_many_cond(mask, func, info, wait, cond_func);
904 if (cpumask_test_cpu(cpu, mask) && cond_func(cpu, info)) {
905 unsigned long flags;
906
907 local_irq_save(flags);
908 func(info);
909 local_irq_restore(flags);
910 }
911 put_cpu();
912 }
913 EXPORT_SYMBOL(on_each_cpu_cond_mask);
914
915 void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
916 void *info, bool wait)
917 {
918 on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
919 }
920 EXPORT_SYMBOL(on_each_cpu_cond);
921
922 static void do_nothing(void *unused)
923 {
924 }
925
926 /**
927 * kick_all_cpus_sync - Force all cpus out of idle
928 *
929 * Used to synchronize the update of pm_idle function pointer. It's
930 * called after the pointer is updated and returns after the dummy
931 * callback function has been executed on all cpus. The execution of
932 * the function can only happen on the remote cpus after they have
933 * left the idle function which had been called via pm_idle function
934 * pointer. So it's guaranteed that nothing uses the previous pointer
935 * anymore.
936 */
937 void kick_all_cpus_sync(void)
938 {
939 /* Make sure the change is visible before we kick the cpus */
940 smp_mb();
941 smp_call_function(do_nothing, NULL, 1);
942 }
943 EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
944
945 /**
946 * wake_up_all_idle_cpus - break all cpus out of idle
947 * wake_up_all_idle_cpus try to break all cpus which is in idle state even
948 * including idle polling cpus, for non-idle cpus, we will do nothing
949 * for them.
950 */
951 void wake_up_all_idle_cpus(void)
952 {
953 int cpu;
954
955 preempt_disable();
956 for_each_online_cpu(cpu) {
957 if (cpu == smp_processor_id())
958 continue;
959
960 wake_up_if_idle(cpu);
961 }
962 preempt_enable();
963 }
964 EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
965
966 /**
967 * smp_call_on_cpu - Call a function on a specific cpu
968 *
969 * Used to call a function on a specific cpu and wait for it to return.
970 * Optionally make sure the call is done on a specified physical cpu via vcpu
971 * pinning in order to support virtualized environments.
972 */
973 struct smp_call_on_cpu_struct {
974 struct work_struct work;
975 struct completion done;
976 int (*func)(void *);
977 void *data;
978 int ret;
979 int cpu;
980 };
981
982 static void smp_call_on_cpu_callback(struct work_struct *work)
983 {
984 struct smp_call_on_cpu_struct *sscs;
985
986 sscs = container_of(work, struct smp_call_on_cpu_struct, work);
987 if (sscs->cpu >= 0)
988 hypervisor_pin_vcpu(sscs->cpu);
989 sscs->ret = sscs->func(sscs->data);
990 if (sscs->cpu >= 0)
991 hypervisor_pin_vcpu(-1);
992
993 complete(&sscs->done);
994 }
995
996 int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
997 {
998 struct smp_call_on_cpu_struct sscs = {
999 .done = COMPLETION_INITIALIZER_ONSTACK(sscs.done),
1000 .func = func,
1001 .data = par,
1002 .cpu = phys ? cpu : -1,
1003 };
1004
1005 INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback);
1006
1007 if (cpu >= nr_cpu_ids || !cpu_online(cpu))
1008 return -ENXIO;
1009
1010 queue_work_on(cpu, system_wq, &sscs.work);
1011 wait_for_completion(&sscs.done);
1012
1013 return sscs.ret;
1014 }
1015 EXPORT_SYMBOL_GPL(smp_call_on_cpu);