]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - kernel/smp.c
Merge tag 'drm-misc-next-2021-03-03' of git://anongit.freedesktop.org/drm/drm-misc...
[mirror_ubuntu-jammy-kernel.git] / kernel / smp.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
3d442233
JA
2/*
3 * Generic helpers for smp ipi calls
4 *
5 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
3d442233 6 */
ca7dfdbb
ME
7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
47885016 10#include <linux/irq_work.h>
3d442233 11#include <linux/rcupdate.h>
59190f42 12#include <linux/rculist.h>
641cd4cf 13#include <linux/kernel.h>
9984de1a 14#include <linux/export.h>
0b13fda1
IM
15#include <linux/percpu.h>
16#include <linux/init.h>
f9d34595 17#include <linux/interrupt.h>
5a0e3ad6 18#include <linux/gfp.h>
3d442233 19#include <linux/smp.h>
8969a5ed 20#include <linux/cpu.h>
c6f4459f 21#include <linux/sched.h>
4c822698 22#include <linux/sched/idle.h>
47ae4b05 23#include <linux/hypervisor.h>
35feb604
PM
24#include <linux/sched/clock.h>
25#include <linux/nmi.h>
26#include <linux/sched/debug.h>
3d442233 27
3bb5d2ee 28#include "smpboot.h"
1f8db415 29#include "sched/smp.h"
3bb5d2ee 30
545b8c8d 31#define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
3d442233
JA
32
33struct call_function_data {
966a9671 34 call_single_data_t __percpu *csd;
0b13fda1 35 cpumask_var_t cpumask;
3fc5b3b6 36 cpumask_var_t cpumask_ipi;
3d442233
JA
37};
38
a22793c7 39static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
e03bcb68 40
6897fc22 41static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
8969a5ed 42
8d056c48
SB
43static void flush_smp_call_function_queue(bool warn_cpu_offline);
44
31487f83 45int smpcfd_prepare_cpu(unsigned int cpu)
8969a5ed 46{
8969a5ed
PZ
47 struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
48
31487f83
RW
49 if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
50 cpu_to_node(cpu)))
51 return -ENOMEM;
3fc5b3b6
AL
52 if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
53 cpu_to_node(cpu))) {
54 free_cpumask_var(cfd->cpumask);
55 return -ENOMEM;
56 }
966a9671 57 cfd->csd = alloc_percpu(call_single_data_t);
31487f83 58 if (!cfd->csd) {
8969a5ed 59 free_cpumask_var(cfd->cpumask);
3fc5b3b6 60 free_cpumask_var(cfd->cpumask_ipi);
31487f83
RW
61 return -ENOMEM;
62 }
63
64 return 0;
8969a5ed
PZ
65}
66
31487f83
RW
67int smpcfd_dead_cpu(unsigned int cpu)
68{
69 struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
70
71 free_cpumask_var(cfd->cpumask);
3fc5b3b6 72 free_cpumask_var(cfd->cpumask_ipi);
31487f83
RW
73 free_percpu(cfd->csd);
74 return 0;
75}
76
77int smpcfd_dying_cpu(unsigned int cpu)
78{
79 /*
80 * The IPIs for the smp-call-function callbacks queued by other
81 * CPUs might arrive late, either due to hardware latencies or
82 * because this CPU disabled interrupts (inside stop-machine)
83 * before the IPIs were sent. So flush out any pending callbacks
84 * explicitly (without waiting for the IPIs to arrive), to
85 * ensure that the outgoing CPU doesn't go offline with work
86 * still pending.
87 */
88 flush_smp_call_function_queue(false);
afaa653c 89 irq_work_run();
31487f83
RW
90 return 0;
91}
8969a5ed 92
d8ad7d11 93void __init call_function_init(void)
3d442233
JA
94{
95 int i;
96
6897fc22
CH
97 for_each_possible_cpu(i)
98 init_llist_head(&per_cpu(call_single_queue, i));
8969a5ed 99
31487f83 100 smpcfd_prepare_cpu(smp_processor_id());
3d442233
JA
101}
102
35feb604
PM
103#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
104
105static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
106static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
107static DEFINE_PER_CPU(void *, cur_csd_info);
108
109#define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
2b722160 110static atomic_t csd_bug_count = ATOMIC_INIT(0);
35feb604
PM
111
112/* Record current CSD work for current CPU, NULL to erase. */
113static void csd_lock_record(call_single_data_t *csd)
114{
115 if (!csd) {
116 smp_mb(); /* NULL cur_csd after unlock. */
117 __this_cpu_write(cur_csd, NULL);
118 return;
119 }
120 __this_cpu_write(cur_csd_func, csd->func);
121 __this_cpu_write(cur_csd_info, csd->info);
122 smp_wmb(); /* func and info before csd. */
123 __this_cpu_write(cur_csd, csd);
124 smp_mb(); /* Update cur_csd before function call. */
125 /* Or before unlock, as the case may be. */
126}
127
128static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd)
129{
130 unsigned int csd_type;
131
132 csd_type = CSD_TYPE(csd);
133 if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC)
a787bdaf 134 return csd->node.dst; /* Other CSD_TYPE_ values might not have ->dst. */
35feb604
PM
135 return -1;
136}
137
138/*
139 * Complain if too much time spent waiting. Note that only
140 * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
141 * so waiting on other types gets much less information.
142 */
143static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
144{
145 int cpu = -1;
146 int cpux;
147 bool firsttime;
148 u64 ts2, ts_delta;
149 call_single_data_t *cpu_cur_csd;
545b8c8d 150 unsigned int flags = READ_ONCE(csd->node.u_flags);
35feb604
PM
151
152 if (!(flags & CSD_FLAG_LOCK)) {
153 if (!unlikely(*bug_id))
154 return true;
155 cpu = csd_lock_wait_getcpu(csd);
156 pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
157 *bug_id, raw_smp_processor_id(), cpu);
158 return true;
159 }
160
161 ts2 = sched_clock();
162 ts_delta = ts2 - *ts1;
163 if (likely(ts_delta <= CSD_LOCK_TIMEOUT))
164 return false;
165
166 firsttime = !*bug_id;
167 if (firsttime)
168 *bug_id = atomic_inc_return(&csd_bug_count);
169 cpu = csd_lock_wait_getcpu(csd);
170 if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu))
171 cpux = 0;
172 else
173 cpux = cpu;
174 cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
175 pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
176 firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0,
177 cpu, csd->func, csd->info);
178 if (cpu_cur_csd && csd != cpu_cur_csd) {
179 pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
180 *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
181 READ_ONCE(per_cpu(cur_csd_info, cpux)));
182 } else {
183 pr_alert("\tcsd: CSD lock (#%d) %s.\n",
184 *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
185 }
186 if (cpu >= 0) {
187 if (!trigger_single_cpu_backtrace(cpu))
188 dump_cpu_task(cpu);
189 if (!cpu_cur_csd) {
190 pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
191 arch_send_call_function_single_ipi(cpu);
192 }
193 }
194 dump_stack();
195 *ts1 = ts2;
196
197 return false;
198}
199
8969a5ed
PZ
200/*
201 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
202 *
0b13fda1
IM
203 * For non-synchronous ipi calls the csd can still be in use by the
204 * previous function call. For multi-cpu calls its even more interesting
205 * as we'll have to ensure no other cpu is observing our csd.
8969a5ed 206 */
35feb604
PM
207static __always_inline void csd_lock_wait(call_single_data_t *csd)
208{
209 int bug_id = 0;
210 u64 ts0, ts1;
211
212 ts1 = ts0 = sched_clock();
213 for (;;) {
214 if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id))
215 break;
216 cpu_relax();
217 }
218 smp_acquire__after_ctrl_dep();
219}
220
221#else
222static void csd_lock_record(call_single_data_t *csd)
223{
224}
225
966a9671 226static __always_inline void csd_lock_wait(call_single_data_t *csd)
8969a5ed 227{
545b8c8d 228 smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
6e275637 229}
35feb604 230#endif
6e275637 231
966a9671 232static __always_inline void csd_lock(call_single_data_t *csd)
6e275637 233{
e1d12f32 234 csd_lock_wait(csd);
545b8c8d 235 csd->node.u_flags |= CSD_FLAG_LOCK;
8969a5ed
PZ
236
237 /*
0b13fda1
IM
238 * prevent CPU from reordering the above assignment
239 * to ->flags with any subsequent assignments to other
966a9671 240 * fields of the specified call_single_data_t structure:
8969a5ed 241 */
8053871d 242 smp_wmb();
8969a5ed
PZ
243}
244
966a9671 245static __always_inline void csd_unlock(call_single_data_t *csd)
8969a5ed 246{
545b8c8d 247 WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK));
0b13fda1 248
8969a5ed 249 /*
0b13fda1 250 * ensure we're all done before releasing data:
8969a5ed 251 */
545b8c8d 252 smp_store_release(&csd->node.u_flags, 0);
3d442233
JA
253}
254
966a9671 255static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
8b28499a 256
4b44a21d
PZ
257void __smp_call_single_queue(int cpu, struct llist_node *node)
258{
259 /*
260 * The list addition should be visible before sending the IPI
261 * handler locks the list to pull the entry off it because of
262 * normal cache coherency rules implied by spinlocks.
263 *
264 * If IPIs can go out of order to the cache coherency protocol
265 * in an architecture, sufficient synchronisation should be added
266 * to arch code to make it appear to obey cache coherency WRT
267 * locking and barrier primitives. Generic code isn't really
268 * equipped to do the right thing...
269 */
270 if (llist_add(node, &per_cpu(call_single_queue, cpu)))
271 send_call_function_single_ipi(cpu);
272}
273
3d442233 274/*
966a9671 275 * Insert a previously allocated call_single_data_t element
0b13fda1
IM
276 * for execution on the given CPU. data must already have
277 * ->func, ->info, and ->flags set.
3d442233 278 */
4b44a21d 279static int generic_exec_single(int cpu, call_single_data_t *csd)
3d442233 280{
8b28499a 281 if (cpu == smp_processor_id()) {
4b44a21d
PZ
282 smp_call_func_t func = csd->func;
283 void *info = csd->info;
8053871d
LT
284 unsigned long flags;
285
286 /*
287 * We can unlock early even for the synchronous on-stack case,
288 * since we're doing this from the same CPU..
289 */
35feb604 290 csd_lock_record(csd);
8053871d 291 csd_unlock(csd);
8b28499a
FW
292 local_irq_save(flags);
293 func(info);
35feb604 294 csd_lock_record(NULL);
8b28499a
FW
295 local_irq_restore(flags);
296 return 0;
297 }
298
5224b961
LT
299 if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
300 csd_unlock(csd);
8b28499a 301 return -ENXIO;
5224b961 302 }
8b28499a 303
545b8c8d 304 __smp_call_single_queue(cpu, &csd->node.llist);
3d442233 305
8b28499a 306 return 0;
3d442233
JA
307}
308
8d056c48
SB
309/**
310 * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
311 *
312 * Invoked by arch to handle an IPI for call function single.
313 * Must be called with interrupts disabled.
3d442233
JA
314 */
315void generic_smp_call_function_single_interrupt(void)
316{
8d056c48
SB
317 flush_smp_call_function_queue(true);
318}
319
320/**
321 * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
322 *
323 * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
324 * offline CPU. Skip this check if set to 'false'.
325 *
326 * Flush any pending smp-call-function callbacks queued on this CPU. This is
327 * invoked by the generic IPI handler, as well as by a CPU about to go offline,
328 * to ensure that all pending IPI callbacks are run before it goes completely
329 * offline.
330 *
331 * Loop through the call_single_queue and run all the queued callbacks.
332 * Must be called with interrupts disabled.
333 */
334static void flush_smp_call_function_queue(bool warn_cpu_offline)
335{
966a9671 336 call_single_data_t *csd, *csd_next;
52103be0
PZ
337 struct llist_node *entry, *prev;
338 struct llist_head *head;
a219ccf4
SB
339 static bool warned;
340
83efcbd0 341 lockdep_assert_irqs_disabled();
8d056c48 342
bb964a92 343 head = this_cpu_ptr(&call_single_queue);
8d056c48 344 entry = llist_del_all(head);
a219ccf4 345 entry = llist_reverse_order(entry);
3d442233 346
8d056c48
SB
347 /* There shouldn't be any pending callbacks on an offline CPU. */
348 if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
349 !warned && !llist_empty(head))) {
a219ccf4
SB
350 warned = true;
351 WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
352
353 /*
354 * We don't have to use the _safe() variant here
355 * because we are not invoking the IPI handlers yet.
356 */
545b8c8d 357 llist_for_each_entry(csd, entry, node.llist) {
4b44a21d
PZ
358 switch (CSD_TYPE(csd)) {
359 case CSD_TYPE_ASYNC:
360 case CSD_TYPE_SYNC:
361 case CSD_TYPE_IRQ_WORK:
362 pr_warn("IPI callback %pS sent to offline CPU\n",
363 csd->func);
364 break;
365
a1488664
PZ
366 case CSD_TYPE_TTWU:
367 pr_warn("IPI task-wakeup sent to offline CPU\n");
368 break;
369
4b44a21d
PZ
370 default:
371 pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
372 CSD_TYPE(csd));
373 break;
374 }
375 }
a219ccf4 376 }
3d442233 377
52103be0
PZ
378 /*
379 * First; run all SYNC callbacks, people are waiting for us.
380 */
381 prev = NULL;
545b8c8d 382 llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
8053871d 383 /* Do we wait until *after* callback? */
4b44a21d
PZ
384 if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
385 smp_call_func_t func = csd->func;
386 void *info = csd->info;
387
52103be0 388 if (prev) {
545b8c8d 389 prev->next = &csd_next->node.llist;
52103be0 390 } else {
545b8c8d 391 entry = &csd_next->node.llist;
52103be0 392 }
4b44a21d 393
35feb604 394 csd_lock_record(csd);
8053871d
LT
395 func(info);
396 csd_unlock(csd);
35feb604 397 csd_lock_record(NULL);
8053871d 398 } else {
545b8c8d 399 prev = &csd->node.llist;
8053871d 400 }
3d442233 401 }
47885016 402
a1488664
PZ
403 if (!entry)
404 return;
405
47885016 406 /*
52103be0 407 * Second; run all !SYNC callbacks.
47885016 408 */
a1488664 409 prev = NULL;
545b8c8d 410 llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
4b44a21d 411 int type = CSD_TYPE(csd);
52103be0 412
a1488664
PZ
413 if (type != CSD_TYPE_TTWU) {
414 if (prev) {
545b8c8d 415 prev->next = &csd_next->node.llist;
a1488664 416 } else {
545b8c8d 417 entry = &csd_next->node.llist;
a1488664 418 }
4b44a21d 419
a1488664
PZ
420 if (type == CSD_TYPE_ASYNC) {
421 smp_call_func_t func = csd->func;
422 void *info = csd->info;
423
35feb604 424 csd_lock_record(csd);
a1488664
PZ
425 csd_unlock(csd);
426 func(info);
35feb604 427 csd_lock_record(NULL);
a1488664
PZ
428 } else if (type == CSD_TYPE_IRQ_WORK) {
429 irq_work_single(csd);
430 }
431
432 } else {
545b8c8d 433 prev = &csd->node.llist;
4b44a21d 434 }
52103be0 435 }
a1488664
PZ
436
437 /*
438 * Third; only CSD_TYPE_TTWU is left, issue those.
439 */
440 if (entry)
441 sched_ttwu_pending(entry);
3d442233
JA
442}
443
b2a02fc4
PZ
444void flush_smp_call_function_from_idle(void)
445{
446 unsigned long flags;
447
448 if (llist_empty(this_cpu_ptr(&call_single_queue)))
449 return;
450
451 local_irq_save(flags);
452 flush_smp_call_function_queue(true);
f9d34595
SAS
453 if (local_softirq_pending())
454 do_softirq();
455
b2a02fc4 456 local_irq_restore(flags);
3d442233
JA
457}
458
459/*
460 * smp_call_function_single - Run a function on a specific CPU
461 * @func: The function to run. This must be fast and non-blocking.
462 * @info: An arbitrary pointer to pass to the function.
3d442233
JA
463 * @wait: If true, wait until function has completed on other CPUs.
464 *
72f279b2 465 * Returns 0 on success, else a negative status code.
3d442233 466 */
3a5f65df 467int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
8691e5a8 468 int wait)
3d442233 469{
966a9671
YH
470 call_single_data_t *csd;
471 call_single_data_t csd_stack = {
545b8c8d 472 .node = { .u_flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, },
966a9671 473 };
0b13fda1 474 int this_cpu;
8b28499a 475 int err;
3d442233 476
0b13fda1
IM
477 /*
478 * prevent preemption and reschedule on another processor,
479 * as well as CPU removal
480 */
481 this_cpu = get_cpu();
482
269c861b
SS
483 /*
484 * Can deadlock when called with interrupts disabled.
485 * We allow cpu's that are not yet online though, as no one else can
486 * send smp call function interrupt to this cpu and as such deadlocks
487 * can't happen.
488 */
489 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
490 && !oops_in_progress);
3d442233 491
19dbdcb8
PZ
492 /*
493 * When @wait we can deadlock when we interrupt between llist_add() and
494 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
495 * csd_lock() on because the interrupt context uses the same csd
496 * storage.
497 */
498 WARN_ON_ONCE(!in_task());
499
8053871d
LT
500 csd = &csd_stack;
501 if (!wait) {
502 csd = this_cpu_ptr(&csd_data);
503 csd_lock(csd);
504 }
505
4b44a21d
PZ
506 csd->func = func;
507 csd->info = info;
35feb604 508#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
545b8c8d
PZ
509 csd->node.src = smp_processor_id();
510 csd->node.dst = cpu;
e48c15b7 511#endif
4b44a21d
PZ
512
513 err = generic_exec_single(cpu, csd);
8053871d
LT
514
515 if (wait)
516 csd_lock_wait(csd);
3d442233
JA
517
518 put_cpu();
0b13fda1 519
f73be6de 520 return err;
3d442233
JA
521}
522EXPORT_SYMBOL(smp_call_function_single);
523
d7877c03 524/**
c46fff2a
FW
525 * smp_call_function_single_async(): Run an asynchronous function on a
526 * specific CPU.
d7877c03
FW
527 * @cpu: The CPU to run on.
528 * @csd: Pre-allocated and setup data structure
d7877c03 529 *
c46fff2a
FW
530 * Like smp_call_function_single(), but the call is asynchonous and
531 * can thus be done from contexts with disabled interrupts.
532 *
533 * The caller passes his own pre-allocated data structure
534 * (ie: embedded in an object) and is responsible for synchronizing it
535 * such that the IPIs performed on the @csd are strictly serialized.
536 *
5a18ceca
PX
537 * If the function is called with one csd which has not yet been
538 * processed by previous call to smp_call_function_single_async(), the
539 * function will return immediately with -EBUSY showing that the csd
540 * object is still in progress.
541 *
c46fff2a
FW
542 * NOTE: Be careful, there is unfortunately no current debugging facility to
543 * validate the correctness of this serialization.
d7877c03 544 */
966a9671 545int smp_call_function_single_async(int cpu, call_single_data_t *csd)
d7877c03
FW
546{
547 int err = 0;
d7877c03 548
fce8ad15 549 preempt_disable();
8053871d 550
545b8c8d 551 if (csd->node.u_flags & CSD_FLAG_LOCK) {
5a18ceca
PX
552 err = -EBUSY;
553 goto out;
554 }
8053871d 555
545b8c8d 556 csd->node.u_flags = CSD_FLAG_LOCK;
8053871d
LT
557 smp_wmb();
558
4b44a21d 559 err = generic_exec_single(cpu, csd);
5a18ceca
PX
560
561out:
fce8ad15 562 preempt_enable();
d7877c03
FW
563
564 return err;
565}
c46fff2a 566EXPORT_SYMBOL_GPL(smp_call_function_single_async);
d7877c03 567
2ea6dec4
RR
568/*
569 * smp_call_function_any - Run a function on any of the given cpus
570 * @mask: The mask of cpus it can run on.
571 * @func: The function to run. This must be fast and non-blocking.
572 * @info: An arbitrary pointer to pass to the function.
573 * @wait: If true, wait until function has completed.
574 *
575 * Returns 0 on success, else a negative status code (if no cpus were online).
2ea6dec4
RR
576 *
577 * Selection preference:
578 * 1) current cpu if in @mask
579 * 2) any cpu of current node if in @mask
580 * 3) any other online cpu in @mask
581 */
582int smp_call_function_any(const struct cpumask *mask,
3a5f65df 583 smp_call_func_t func, void *info, int wait)
2ea6dec4
RR
584{
585 unsigned int cpu;
586 const struct cpumask *nodemask;
587 int ret;
588
589 /* Try for same CPU (cheapest) */
590 cpu = get_cpu();
591 if (cpumask_test_cpu(cpu, mask))
592 goto call;
593
594 /* Try for same node. */
af2422c4 595 nodemask = cpumask_of_node(cpu_to_node(cpu));
2ea6dec4
RR
596 for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
597 cpu = cpumask_next_and(cpu, nodemask, mask)) {
598 if (cpu_online(cpu))
599 goto call;
600 }
601
602 /* Any online will do: smp_call_function_single handles nr_cpu_ids. */
603 cpu = cpumask_any_and(mask, cpu_online_mask);
604call:
605 ret = smp_call_function_single(cpu, func, info, wait);
606 put_cpu();
607 return ret;
608}
609EXPORT_SYMBOL_GPL(smp_call_function_any);
610
67719ef2
SAS
611static void smp_call_function_many_cond(const struct cpumask *mask,
612 smp_call_func_t func, void *info,
613 bool wait, smp_cond_func_t cond_func)
3d442233 614{
e1d12f32 615 struct call_function_data *cfd;
9a46ad6d 616 int cpu, next_cpu, this_cpu = smp_processor_id();
3d442233 617
269c861b
SS
618 /*
619 * Can deadlock when called with interrupts disabled.
620 * We allow cpu's that are not yet online though, as no one else can
621 * send smp call function interrupt to this cpu and as such deadlocks
622 * can't happen.
623 */
624 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
bd924e8c 625 && !oops_in_progress && !early_boot_irqs_disabled);
3d442233 626
19dbdcb8
PZ
627 /*
628 * When @wait we can deadlock when we interrupt between llist_add() and
629 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
630 * csd_lock() on because the interrupt context uses the same csd
631 * storage.
632 */
633 WARN_ON_ONCE(!in_task());
634
723aae25 635 /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
54b11e6d 636 cpu = cpumask_first_and(mask, cpu_online_mask);
0b13fda1 637 if (cpu == this_cpu)
54b11e6d 638 cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
0b13fda1 639
54b11e6d
RR
640 /* No online cpus? We're done. */
641 if (cpu >= nr_cpu_ids)
642 return;
643
644 /* Do we have another CPU which isn't us? */
645 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
0b13fda1 646 if (next_cpu == this_cpu)
54b11e6d
RR
647 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
648
649 /* Fastpath: do that cpu by itself. */
650 if (next_cpu >= nr_cpu_ids) {
25a3a154 651 if (!cond_func || cond_func(cpu, info))
67719ef2 652 smp_call_function_single(cpu, func, info, wait);
54b11e6d 653 return;
3d442233
JA
654 }
655
bb964a92 656 cfd = this_cpu_ptr(&cfd_data);
45a57919 657
e1d12f32 658 cpumask_and(cfd->cpumask, mask, cpu_online_mask);
6c8557bd 659 __cpumask_clear_cpu(this_cpu, cfd->cpumask);
723aae25
MM
660
661 /* Some callers race with other cpus changing the passed mask */
e1d12f32 662 if (unlikely(!cpumask_weight(cfd->cpumask)))
723aae25 663 return;
3d442233 664
3fc5b3b6 665 cpumask_clear(cfd->cpumask_ipi);
e1d12f32 666 for_each_cpu(cpu, cfd->cpumask) {
966a9671 667 call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
9a46ad6d 668
67719ef2
SAS
669 if (cond_func && !cond_func(cpu, info))
670 continue;
671
9a46ad6d 672 csd_lock(csd);
8053871d 673 if (wait)
545b8c8d 674 csd->node.u_flags |= CSD_TYPE_SYNC;
9a46ad6d
SL
675 csd->func = func;
676 csd->info = info;
35feb604 677#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
545b8c8d
PZ
678 csd->node.src = smp_processor_id();
679 csd->node.dst = cpu;
e48c15b7 680#endif
545b8c8d 681 if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu)))
6c8557bd 682 __cpumask_set_cpu(cpu, cfd->cpumask_ipi);
9a46ad6d 683 }
561920a0 684
3d442233 685 /* Send a message to all CPUs in the map */
3fc5b3b6 686 arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
3d442233 687
9a46ad6d 688 if (wait) {
e1d12f32 689 for_each_cpu(cpu, cfd->cpumask) {
966a9671 690 call_single_data_t *csd;
e1d12f32
AM
691
692 csd = per_cpu_ptr(cfd->csd, cpu);
9a46ad6d
SL
693 csd_lock_wait(csd);
694 }
695 }
3d442233 696}
67719ef2
SAS
697
698/**
699 * smp_call_function_many(): Run a function on a set of other CPUs.
700 * @mask: The set of cpus to run on (only runs on online subset).
701 * @func: The function to run. This must be fast and non-blocking.
702 * @info: An arbitrary pointer to pass to the function.
703 * @wait: If true, wait (atomically) until function has completed
704 * on other CPUs.
705 *
706 * If @wait is true, then returns once @func has returned.
707 *
708 * You must not call this function with disabled interrupts or from a
709 * hardware interrupt handler or from a bottom half handler. Preemption
710 * must be disabled when calling this function.
711 */
712void smp_call_function_many(const struct cpumask *mask,
713 smp_call_func_t func, void *info, bool wait)
714{
715 smp_call_function_many_cond(mask, func, info, wait, NULL);
716}
54b11e6d 717EXPORT_SYMBOL(smp_call_function_many);
3d442233
JA
718
719/**
720 * smp_call_function(): Run a function on all other CPUs.
721 * @func: The function to run. This must be fast and non-blocking.
722 * @info: An arbitrary pointer to pass to the function.
0b13fda1
IM
723 * @wait: If true, wait (atomically) until function has completed
724 * on other CPUs.
3d442233 725 *
54b11e6d 726 * Returns 0.
3d442233
JA
727 *
728 * If @wait is true, then returns once @func has returned; otherwise
72f279b2 729 * it returns just before the target cpu calls @func.
3d442233
JA
730 *
731 * You must not call this function with disabled interrupts or from a
732 * hardware interrupt handler or from a bottom half handler.
733 */
caa75932 734void smp_call_function(smp_call_func_t func, void *info, int wait)
3d442233 735{
3d442233 736 preempt_disable();
54b11e6d 737 smp_call_function_many(cpu_online_mask, func, info, wait);
3d442233 738 preempt_enable();
3d442233
JA
739}
740EXPORT_SYMBOL(smp_call_function);
351f8f8e 741
34db18a0
AW
742/* Setup configured maximum number of CPUs to activate */
743unsigned int setup_max_cpus = NR_CPUS;
744EXPORT_SYMBOL(setup_max_cpus);
745
746
747/*
748 * Setup routine for controlling SMP activation
749 *
750 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
751 * activation entirely (the MPS table probe still happens, though).
752 *
753 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
754 * greater than 0, limits the maximum number of CPUs activated in
755 * SMP mode to <NUM>.
756 */
757
758void __weak arch_disable_smp_support(void) { }
759
760static int __init nosmp(char *str)
761{
762 setup_max_cpus = 0;
763 arch_disable_smp_support();
764
765 return 0;
766}
767
768early_param("nosmp", nosmp);
769
770/* this is hard limit */
771static int __init nrcpus(char *str)
772{
773 int nr_cpus;
774
58934356 775 if (get_option(&str, &nr_cpus) && nr_cpus > 0 && nr_cpus < nr_cpu_ids)
34db18a0
AW
776 nr_cpu_ids = nr_cpus;
777
778 return 0;
779}
780
781early_param("nr_cpus", nrcpus);
782
783static int __init maxcpus(char *str)
784{
785 get_option(&str, &setup_max_cpus);
786 if (setup_max_cpus == 0)
787 arch_disable_smp_support();
788
789 return 0;
790}
791
792early_param("maxcpus", maxcpus);
793
794/* Setup number of possible processor ids */
9b130ad5 795unsigned int nr_cpu_ids __read_mostly = NR_CPUS;
34db18a0
AW
796EXPORT_SYMBOL(nr_cpu_ids);
797
798/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
799void __init setup_nr_cpu_ids(void)
800{
801 nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
802}
803
804/* Called by boot processor to activate the rest. */
805void __init smp_init(void)
806{
92b23278 807 int num_nodes, num_cpus;
34db18a0 808
3bb5d2ee 809 idle_threads_init();
4cb28ced 810 cpuhp_threads_init();
3bb5d2ee 811
51111dce
ME
812 pr_info("Bringing up secondary CPUs ...\n");
813
b99a2659 814 bringup_nonboot_cpus(setup_max_cpus);
34db18a0 815
92b23278
ME
816 num_nodes = num_online_nodes();
817 num_cpus = num_online_cpus();
818 pr_info("Brought up %d node%s, %d CPU%s\n",
819 num_nodes, (num_nodes > 1 ? "s" : ""),
820 num_cpus, (num_cpus > 1 ? "s" : ""));
821
34db18a0 822 /* Any cleanup work */
34db18a0
AW
823 smp_cpus_done(setup_max_cpus);
824}
825
351f8f8e 826/*
bd924e8c
TH
827 * Call a function on all processors. May be used during early boot while
828 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
829 * of local_irq_disable/enable().
351f8f8e 830 */
58eb7b77 831void on_each_cpu(smp_call_func_t func, void *info, int wait)
351f8f8e 832{
bd924e8c 833 unsigned long flags;
351f8f8e
AW
834
835 preempt_disable();
caa75932 836 smp_call_function(func, info, wait);
bd924e8c 837 local_irq_save(flags);
351f8f8e 838 func(info);
bd924e8c 839 local_irq_restore(flags);
351f8f8e 840 preempt_enable();
351f8f8e
AW
841}
842EXPORT_SYMBOL(on_each_cpu);
3fc498f1
GBY
843
844/**
845 * on_each_cpu_mask(): Run a function on processors specified by
846 * cpumask, which may include the local processor.
847 * @mask: The set of cpus to run on (only runs on online subset).
848 * @func: The function to run. This must be fast and non-blocking.
849 * @info: An arbitrary pointer to pass to the function.
850 * @wait: If true, wait (atomically) until function has completed
851 * on other CPUs.
852 *
853 * If @wait is true, then returns once @func has returned.
854 *
202da400
DD
855 * You must not call this function with disabled interrupts or from a
856 * hardware interrupt handler or from a bottom half handler. The
857 * exception is that it may be used during early boot while
858 * early_boot_irqs_disabled is set.
3fc498f1
GBY
859 */
860void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
861 void *info, bool wait)
862{
863 int cpu = get_cpu();
864
865 smp_call_function_many(mask, func, info, wait);
866 if (cpumask_test_cpu(cpu, mask)) {
202da400
DD
867 unsigned long flags;
868 local_irq_save(flags);
3fc498f1 869 func(info);
202da400 870 local_irq_restore(flags);
3fc498f1
GBY
871 }
872 put_cpu();
873}
874EXPORT_SYMBOL(on_each_cpu_mask);
b3a7e98e
GBY
875
876/*
877 * on_each_cpu_cond(): Call a function on each processor for which
878 * the supplied function cond_func returns true, optionally waiting
879 * for all the required CPUs to finish. This may include the local
880 * processor.
881 * @cond_func: A callback function that is passed a cpu id and
7b7b8a2c 882 * the info parameter. The function is called
b3a7e98e
GBY
883 * with preemption disabled. The function should
884 * return a blooean value indicating whether to IPI
885 * the specified CPU.
886 * @func: The function to run on all applicable CPUs.
887 * This must be fast and non-blocking.
888 * @info: An arbitrary pointer to pass to both functions.
889 * @wait: If true, wait (atomically) until function has
890 * completed on other CPUs.
b3a7e98e
GBY
891 *
892 * Preemption is disabled to protect against CPUs going offline but not online.
893 * CPUs going online during the call will not be seen or sent an IPI.
894 *
895 * You must not call this function with disabled interrupts or
896 * from a hardware interrupt handler or from a bottom half handler.
897 */
5671d814 898void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
cb923159 899 void *info, bool wait, const struct cpumask *mask)
b3a7e98e 900{
67719ef2
SAS
901 int cpu = get_cpu();
902
903 smp_call_function_many_cond(mask, func, info, wait, cond_func);
904 if (cpumask_test_cpu(cpu, mask) && cond_func(cpu, info)) {
905 unsigned long flags;
906
907 local_irq_save(flags);
908 func(info);
909 local_irq_restore(flags);
b3a7e98e 910 }
67719ef2 911 put_cpu();
b3a7e98e 912}
7d49b28a
RR
913EXPORT_SYMBOL(on_each_cpu_cond_mask);
914
5671d814 915void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
cb923159 916 void *info, bool wait)
7d49b28a 917{
cb923159 918 on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
7d49b28a 919}
b3a7e98e 920EXPORT_SYMBOL(on_each_cpu_cond);
f37f435f
TG
921
922static void do_nothing(void *unused)
923{
924}
925
926/**
927 * kick_all_cpus_sync - Force all cpus out of idle
928 *
929 * Used to synchronize the update of pm_idle function pointer. It's
930 * called after the pointer is updated and returns after the dummy
931 * callback function has been executed on all cpus. The execution of
932 * the function can only happen on the remote cpus after they have
933 * left the idle function which had been called via pm_idle function
934 * pointer. So it's guaranteed that nothing uses the previous pointer
935 * anymore.
936 */
937void kick_all_cpus_sync(void)
938{
939 /* Make sure the change is visible before we kick the cpus */
940 smp_mb();
941 smp_call_function(do_nothing, NULL, 1);
942}
943EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
c6f4459f
CL
944
945/**
946 * wake_up_all_idle_cpus - break all cpus out of idle
947 * wake_up_all_idle_cpus try to break all cpus which is in idle state even
948 * including idle polling cpus, for non-idle cpus, we will do nothing
949 * for them.
950 */
951void wake_up_all_idle_cpus(void)
952{
953 int cpu;
954
955 preempt_disable();
956 for_each_online_cpu(cpu) {
957 if (cpu == smp_processor_id())
958 continue;
959
960 wake_up_if_idle(cpu);
961 }
962 preempt_enable();
963}
964EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
df8ce9d7
JG
965
966/**
967 * smp_call_on_cpu - Call a function on a specific cpu
968 *
969 * Used to call a function on a specific cpu and wait for it to return.
970 * Optionally make sure the call is done on a specified physical cpu via vcpu
971 * pinning in order to support virtualized environments.
972 */
973struct smp_call_on_cpu_struct {
974 struct work_struct work;
975 struct completion done;
976 int (*func)(void *);
977 void *data;
978 int ret;
979 int cpu;
980};
981
982static void smp_call_on_cpu_callback(struct work_struct *work)
983{
984 struct smp_call_on_cpu_struct *sscs;
985
986 sscs = container_of(work, struct smp_call_on_cpu_struct, work);
987 if (sscs->cpu >= 0)
988 hypervisor_pin_vcpu(sscs->cpu);
989 sscs->ret = sscs->func(sscs->data);
990 if (sscs->cpu >= 0)
991 hypervisor_pin_vcpu(-1);
992
993 complete(&sscs->done);
994}
995
996int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
997{
998 struct smp_call_on_cpu_struct sscs = {
df8ce9d7
JG
999 .done = COMPLETION_INITIALIZER_ONSTACK(sscs.done),
1000 .func = func,
1001 .data = par,
1002 .cpu = phys ? cpu : -1,
1003 };
1004
8db54949
PZ
1005 INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback);
1006
df8ce9d7
JG
1007 if (cpu >= nr_cpu_ids || !cpu_online(cpu))
1008 return -ENXIO;
1009
1010 queue_work_on(cpu, system_wq, &sscs.work);
1011 wait_for_completion(&sscs.done);
1012
1013 return sscs.ret;
1014}
1015EXPORT_SYMBOL_GPL(smp_call_on_cpu);