1 // SPDX-License-Identifier: GPL-2.0-only
3 * Generic helpers for smp ipi calls
5 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 #include <linux/irq_work.h>
11 #include <linux/rcupdate.h>
12 #include <linux/rculist.h>
13 #include <linux/kernel.h>
14 #include <linux/export.h>
15 #include <linux/percpu.h>
16 #include <linux/init.h>
17 #include <linux/interrupt.h>
18 #include <linux/gfp.h>
19 #include <linux/smp.h>
20 #include <linux/cpu.h>
21 #include <linux/sched.h>
22 #include <linux/sched/idle.h>
23 #include <linux/hypervisor.h>
24 #include <linux/sched/clock.h>
25 #include <linux/nmi.h>
26 #include <linux/sched/debug.h>
27 #include <linux/jump_label.h>
30 #include "sched/smp.h"
32 #define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
34 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
40 #define CFD_SEQ_NOCPU 0xffff
42 #define CFD_SEQ_QUEUE 0
44 #define CFD_SEQ_NOIPI 2
45 #define CFD_SEQ_PING 3
46 #define CFD_SEQ_PINGED 4
47 #define CFD_SEQ_HANDLE 5
48 #define CFD_SEQ_DEQUEUE 6
49 #define CFD_SEQ_IDLE 7
50 #define CFD_SEQ_GOTIPI 8
51 #define CFD_SEQ_HDLEND 9
56 static char *seq_type
[] = {
57 [CFD_SEQ_QUEUE
] = "queue",
58 [CFD_SEQ_IPI
] = "ipi",
59 [CFD_SEQ_NOIPI
] = "noipi",
60 [CFD_SEQ_PING
] = "ping",
61 [CFD_SEQ_PINGED
] = "pinged",
62 [CFD_SEQ_HANDLE
] = "handle",
63 [CFD_SEQ_DEQUEUE
] = "dequeue (src CPU 0 == empty)",
64 [CFD_SEQ_IDLE
] = "idle",
65 [CFD_SEQ_GOTIPI
] = "gotipi",
66 [CFD_SEQ_HDLEND
] = "hdlend (src CPU 0 == early)",
69 struct cfd_seq_local
{
81 call_single_data_t csd
;
82 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
89 struct call_function_data
{
90 struct cfd_percpu __percpu
*pcpu
;
91 cpumask_var_t cpumask
;
92 cpumask_var_t cpumask_ipi
;
95 static DEFINE_PER_CPU_ALIGNED(struct call_function_data
, cfd_data
);
97 static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head
, call_single_queue
);
99 static void flush_smp_call_function_queue(bool warn_cpu_offline
);
101 int smpcfd_prepare_cpu(unsigned int cpu
)
103 struct call_function_data
*cfd
= &per_cpu(cfd_data
, cpu
);
105 if (!zalloc_cpumask_var_node(&cfd
->cpumask
, GFP_KERNEL
,
108 if (!zalloc_cpumask_var_node(&cfd
->cpumask_ipi
, GFP_KERNEL
,
110 free_cpumask_var(cfd
->cpumask
);
113 cfd
->pcpu
= alloc_percpu(struct cfd_percpu
);
115 free_cpumask_var(cfd
->cpumask
);
116 free_cpumask_var(cfd
->cpumask_ipi
);
123 int smpcfd_dead_cpu(unsigned int cpu
)
125 struct call_function_data
*cfd
= &per_cpu(cfd_data
, cpu
);
127 free_cpumask_var(cfd
->cpumask
);
128 free_cpumask_var(cfd
->cpumask_ipi
);
129 free_percpu(cfd
->pcpu
);
133 int smpcfd_dying_cpu(unsigned int cpu
)
136 * The IPIs for the smp-call-function callbacks queued by other
137 * CPUs might arrive late, either due to hardware latencies or
138 * because this CPU disabled interrupts (inside stop-machine)
139 * before the IPIs were sent. So flush out any pending callbacks
140 * explicitly (without waiting for the IPIs to arrive), to
141 * ensure that the outgoing CPU doesn't go offline with work
144 flush_smp_call_function_queue(false);
149 void __init
call_function_init(void)
153 for_each_possible_cpu(i
)
154 init_llist_head(&per_cpu(call_single_queue
, i
));
156 smpcfd_prepare_cpu(smp_processor_id());
159 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
161 static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled
);
162 static DEFINE_STATIC_KEY_FALSE(csdlock_debug_extended
);
164 static int __init
csdlock_debug(char *str
)
166 unsigned int val
= 0;
168 if (str
&& !strcmp(str
, "ext")) {
170 static_branch_enable(&csdlock_debug_extended
);
172 get_option(&str
, &val
);
175 static_branch_enable(&csdlock_debug_enabled
);
179 early_param("csdlock_debug", csdlock_debug
);
181 static DEFINE_PER_CPU(call_single_data_t
*, cur_csd
);
182 static DEFINE_PER_CPU(smp_call_func_t
, cur_csd_func
);
183 static DEFINE_PER_CPU(void *, cur_csd_info
);
184 static DEFINE_PER_CPU(struct cfd_seq_local
, cfd_seq_local
);
186 #define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
187 static atomic_t csd_bug_count
= ATOMIC_INIT(0);
190 #define CFD_SEQ(s, d, t, c) \
191 (union cfd_seq_cnt){ .u.src = s, .u.dst = d, .u.type = t, .u.cnt = c }
193 static u64
cfd_seq_inc(unsigned int src
, unsigned int dst
, unsigned int type
)
195 union cfd_seq_cnt
new, old
;
197 new = CFD_SEQ(src
, dst
, type
, 0);
200 old
.val
= READ_ONCE(cfd_seq
);
201 new.u
.cnt
= old
.u
.cnt
+ 1;
202 } while (cmpxchg(&cfd_seq
, old
.val
, new.val
) != old
.val
);
207 #define cfd_seq_store(var, src, dst, type) \
209 if (static_branch_unlikely(&csdlock_debug_extended)) \
210 var = cfd_seq_inc(src, dst, type); \
213 /* Record current CSD work for current CPU, NULL to erase. */
214 static void __csd_lock_record(struct __call_single_data
*csd
)
217 smp_mb(); /* NULL cur_csd after unlock. */
218 __this_cpu_write(cur_csd
, NULL
);
221 __this_cpu_write(cur_csd_func
, csd
->func
);
222 __this_cpu_write(cur_csd_info
, csd
->info
);
223 smp_wmb(); /* func and info before csd. */
224 __this_cpu_write(cur_csd
, csd
);
225 smp_mb(); /* Update cur_csd before function call. */
226 /* Or before unlock, as the case may be. */
229 static __always_inline
void csd_lock_record(struct __call_single_data
*csd
)
231 if (static_branch_unlikely(&csdlock_debug_enabled
))
232 __csd_lock_record(csd
);
235 static int csd_lock_wait_getcpu(struct __call_single_data
*csd
)
237 unsigned int csd_type
;
239 csd_type
= CSD_TYPE(csd
);
240 if (csd_type
== CSD_TYPE_ASYNC
|| csd_type
== CSD_TYPE_SYNC
)
241 return csd
->node
.dst
; /* Other CSD_TYPE_ values might not have ->dst. */
245 static void cfd_seq_data_add(u64 val
, unsigned int src
, unsigned int dst
,
246 unsigned int type
, union cfd_seq_cnt
*data
,
247 unsigned int *n_data
, unsigned int now
)
249 union cfd_seq_cnt
new[2];
250 unsigned int i
, j
, k
;
253 new[1] = CFD_SEQ(src
, dst
, type
, new[0].u
.cnt
+ 1);
255 for (i
= 0; i
< 2; i
++) {
256 if (new[i
].u
.cnt
<= now
)
257 new[i
].u
.cnt
|= 0x80000000U
;
258 for (j
= 0; j
< *n_data
; j
++) {
259 if (new[i
].u
.cnt
== data
[j
].u
.cnt
) {
260 /* Direct read value trumps generated one. */
262 data
[j
].val
= new[i
].val
;
265 if (new[i
].u
.cnt
< data
[j
].u
.cnt
) {
266 for (k
= *n_data
; k
> j
; k
--)
267 data
[k
].val
= data
[k
- 1].val
;
268 data
[j
].val
= new[i
].val
;
274 data
[j
].val
= new[i
].val
;
280 static const char *csd_lock_get_type(unsigned int type
)
282 return (type
>= ARRAY_SIZE(seq_type
)) ? "?" : seq_type
[type
];
285 static void csd_lock_print_extended(struct __call_single_data
*csd
, int cpu
)
287 struct cfd_seq_local
*seq
= &per_cpu(cfd_seq_local
, cpu
);
288 unsigned int srccpu
= csd
->node
.src
;
289 struct call_function_data
*cfd
= per_cpu_ptr(&cfd_data
, srccpu
);
290 struct cfd_percpu
*pcpu
= per_cpu_ptr(cfd
->pcpu
, cpu
);
292 union cfd_seq_cnt data
[2 * ARRAY_SIZE(seq_type
)];
293 unsigned int n_data
= 0, i
;
295 data
[0].val
= READ_ONCE(cfd_seq
);
298 cfd_seq_data_add(pcpu
->seq_queue
, srccpu
, cpu
, CFD_SEQ_QUEUE
, data
, &n_data
, now
);
299 cfd_seq_data_add(pcpu
->seq_ipi
, srccpu
, cpu
, CFD_SEQ_IPI
, data
, &n_data
, now
);
300 cfd_seq_data_add(pcpu
->seq_noipi
, srccpu
, cpu
, CFD_SEQ_NOIPI
, data
, &n_data
, now
);
302 cfd_seq_data_add(per_cpu(cfd_seq_local
.ping
, srccpu
), srccpu
, CFD_SEQ_NOCPU
, CFD_SEQ_PING
, data
, &n_data
, now
);
303 cfd_seq_data_add(per_cpu(cfd_seq_local
.pinged
, srccpu
), srccpu
, CFD_SEQ_NOCPU
, CFD_SEQ_PINGED
, data
, &n_data
, now
);
305 cfd_seq_data_add(seq
->idle
, CFD_SEQ_NOCPU
, cpu
, CFD_SEQ_IDLE
, data
, &n_data
, now
);
306 cfd_seq_data_add(seq
->gotipi
, CFD_SEQ_NOCPU
, cpu
, CFD_SEQ_GOTIPI
, data
, &n_data
, now
);
307 cfd_seq_data_add(seq
->handle
, CFD_SEQ_NOCPU
, cpu
, CFD_SEQ_HANDLE
, data
, &n_data
, now
);
308 cfd_seq_data_add(seq
->dequeue
, CFD_SEQ_NOCPU
, cpu
, CFD_SEQ_DEQUEUE
, data
, &n_data
, now
);
309 cfd_seq_data_add(seq
->hdlend
, CFD_SEQ_NOCPU
, cpu
, CFD_SEQ_HDLEND
, data
, &n_data
, now
);
311 for (i
= 0; i
< n_data
; i
++) {
312 pr_alert("\tcsd: cnt(%07x): %04x->%04x %s\n",
313 data
[i
].u
.cnt
& ~0x80000000U
, data
[i
].u
.src
,
314 data
[i
].u
.dst
, csd_lock_get_type(data
[i
].u
.type
));
316 pr_alert("\tcsd: cnt now: %07x\n", now
);
320 * Complain if too much time spent waiting. Note that only
321 * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
322 * so waiting on other types gets much less information.
324 static bool csd_lock_wait_toolong(struct __call_single_data
*csd
, u64 ts0
, u64
*ts1
, int *bug_id
)
330 call_single_data_t
*cpu_cur_csd
;
331 unsigned int flags
= READ_ONCE(csd
->node
.u_flags
);
333 if (!(flags
& CSD_FLAG_LOCK
)) {
334 if (!unlikely(*bug_id
))
336 cpu
= csd_lock_wait_getcpu(csd
);
337 pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
338 *bug_id
, raw_smp_processor_id(), cpu
);
343 ts_delta
= ts2
- *ts1
;
344 if (likely(ts_delta
<= CSD_LOCK_TIMEOUT
))
347 firsttime
= !*bug_id
;
349 *bug_id
= atomic_inc_return(&csd_bug_count
);
350 cpu
= csd_lock_wait_getcpu(csd
);
351 if (WARN_ONCE(cpu
< 0 || cpu
>= nr_cpu_ids
, "%s: cpu = %d\n", __func__
, cpu
))
355 cpu_cur_csd
= smp_load_acquire(&per_cpu(cur_csd
, cpux
)); /* Before func and info. */
356 pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
357 firsttime
? "Detected" : "Continued", *bug_id
, raw_smp_processor_id(), ts2
- ts0
,
358 cpu
, csd
->func
, csd
->info
);
359 if (cpu_cur_csd
&& csd
!= cpu_cur_csd
) {
360 pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
361 *bug_id
, READ_ONCE(per_cpu(cur_csd_func
, cpux
)),
362 READ_ONCE(per_cpu(cur_csd_info
, cpux
)));
364 pr_alert("\tcsd: CSD lock (#%d) %s.\n",
365 *bug_id
, !cpu_cur_csd
? "unresponsive" : "handling this request");
368 if (static_branch_unlikely(&csdlock_debug_extended
))
369 csd_lock_print_extended(csd
, cpu
);
370 if (!trigger_single_cpu_backtrace(cpu
))
373 pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id
, raw_smp_processor_id(), cpu
);
374 arch_send_call_function_single_ipi(cpu
);
384 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
386 * For non-synchronous ipi calls the csd can still be in use by the
387 * previous function call. For multi-cpu calls its even more interesting
388 * as we'll have to ensure no other cpu is observing our csd.
390 static void __csd_lock_wait(struct __call_single_data
*csd
)
395 ts1
= ts0
= sched_clock();
397 if (csd_lock_wait_toolong(csd
, ts0
, &ts1
, &bug_id
))
401 smp_acquire__after_ctrl_dep();
404 static __always_inline
void csd_lock_wait(struct __call_single_data
*csd
)
406 if (static_branch_unlikely(&csdlock_debug_enabled
)) {
407 __csd_lock_wait(csd
);
411 smp_cond_load_acquire(&csd
->node
.u_flags
, !(VAL
& CSD_FLAG_LOCK
));
414 static void __smp_call_single_queue_debug(int cpu
, struct llist_node
*node
)
416 unsigned int this_cpu
= smp_processor_id();
417 struct cfd_seq_local
*seq
= this_cpu_ptr(&cfd_seq_local
);
418 struct call_function_data
*cfd
= this_cpu_ptr(&cfd_data
);
419 struct cfd_percpu
*pcpu
= per_cpu_ptr(cfd
->pcpu
, cpu
);
421 cfd_seq_store(pcpu
->seq_queue
, this_cpu
, cpu
, CFD_SEQ_QUEUE
);
422 if (llist_add(node
, &per_cpu(call_single_queue
, cpu
))) {
423 cfd_seq_store(pcpu
->seq_ipi
, this_cpu
, cpu
, CFD_SEQ_IPI
);
424 cfd_seq_store(seq
->ping
, this_cpu
, cpu
, CFD_SEQ_PING
);
425 send_call_function_single_ipi(cpu
);
426 cfd_seq_store(seq
->pinged
, this_cpu
, cpu
, CFD_SEQ_PINGED
);
428 cfd_seq_store(pcpu
->seq_noipi
, this_cpu
, cpu
, CFD_SEQ_NOIPI
);
432 #define cfd_seq_store(var, src, dst, type)
434 static void csd_lock_record(struct __call_single_data
*csd
)
438 static __always_inline
void csd_lock_wait(struct __call_single_data
*csd
)
440 smp_cond_load_acquire(&csd
->node
.u_flags
, !(VAL
& CSD_FLAG_LOCK
));
444 static __always_inline
void csd_lock(struct __call_single_data
*csd
)
447 csd
->node
.u_flags
|= CSD_FLAG_LOCK
;
450 * prevent CPU from reordering the above assignment
451 * to ->flags with any subsequent assignments to other
452 * fields of the specified call_single_data_t structure:
457 static __always_inline
void csd_unlock(struct __call_single_data
*csd
)
459 WARN_ON(!(csd
->node
.u_flags
& CSD_FLAG_LOCK
));
462 * ensure we're all done before releasing data:
464 smp_store_release(&csd
->node
.u_flags
, 0);
467 static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t
, csd_data
);
469 void __smp_call_single_queue(int cpu
, struct llist_node
*node
)
471 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
472 if (static_branch_unlikely(&csdlock_debug_extended
)) {
475 type
= CSD_TYPE(container_of(node
, call_single_data_t
,
477 if (type
== CSD_TYPE_SYNC
|| type
== CSD_TYPE_ASYNC
) {
478 __smp_call_single_queue_debug(cpu
, node
);
485 * The list addition should be visible before sending the IPI
486 * handler locks the list to pull the entry off it because of
487 * normal cache coherency rules implied by spinlocks.
489 * If IPIs can go out of order to the cache coherency protocol
490 * in an architecture, sufficient synchronisation should be added
491 * to arch code to make it appear to obey cache coherency WRT
492 * locking and barrier primitives. Generic code isn't really
493 * equipped to do the right thing...
495 if (llist_add(node
, &per_cpu(call_single_queue
, cpu
)))
496 send_call_function_single_ipi(cpu
);
500 * Insert a previously allocated call_single_data_t element
501 * for execution on the given CPU. data must already have
502 * ->func, ->info, and ->flags set.
504 static int generic_exec_single(int cpu
, struct __call_single_data
*csd
)
506 if (cpu
== smp_processor_id()) {
507 smp_call_func_t func
= csd
->func
;
508 void *info
= csd
->info
;
512 * We can unlock early even for the synchronous on-stack case,
513 * since we're doing this from the same CPU..
515 csd_lock_record(csd
);
517 local_irq_save(flags
);
519 csd_lock_record(NULL
);
520 local_irq_restore(flags
);
524 if ((unsigned)cpu
>= nr_cpu_ids
|| !cpu_online(cpu
)) {
529 __smp_call_single_queue(cpu
, &csd
->node
.llist
);
535 * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
537 * Invoked by arch to handle an IPI for call function single.
538 * Must be called with interrupts disabled.
540 void generic_smp_call_function_single_interrupt(void)
542 cfd_seq_store(this_cpu_ptr(&cfd_seq_local
)->gotipi
, CFD_SEQ_NOCPU
,
543 smp_processor_id(), CFD_SEQ_GOTIPI
);
544 flush_smp_call_function_queue(true);
548 * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
550 * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
551 * offline CPU. Skip this check if set to 'false'.
553 * Flush any pending smp-call-function callbacks queued on this CPU. This is
554 * invoked by the generic IPI handler, as well as by a CPU about to go offline,
555 * to ensure that all pending IPI callbacks are run before it goes completely
558 * Loop through the call_single_queue and run all the queued callbacks.
559 * Must be called with interrupts disabled.
561 static void flush_smp_call_function_queue(bool warn_cpu_offline
)
563 call_single_data_t
*csd
, *csd_next
;
564 struct llist_node
*entry
, *prev
;
565 struct llist_head
*head
;
568 lockdep_assert_irqs_disabled();
570 head
= this_cpu_ptr(&call_single_queue
);
571 cfd_seq_store(this_cpu_ptr(&cfd_seq_local
)->handle
, CFD_SEQ_NOCPU
,
572 smp_processor_id(), CFD_SEQ_HANDLE
);
573 entry
= llist_del_all(head
);
574 cfd_seq_store(this_cpu_ptr(&cfd_seq_local
)->dequeue
,
575 /* Special meaning of source cpu: 0 == queue empty */
576 entry
? CFD_SEQ_NOCPU
: 0,
577 smp_processor_id(), CFD_SEQ_DEQUEUE
);
578 entry
= llist_reverse_order(entry
);
580 /* There shouldn't be any pending callbacks on an offline CPU. */
581 if (unlikely(warn_cpu_offline
&& !cpu_online(smp_processor_id()) &&
582 !warned
&& !llist_empty(head
))) {
584 WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
587 * We don't have to use the _safe() variant here
588 * because we are not invoking the IPI handlers yet.
590 llist_for_each_entry(csd
, entry
, node
.llist
) {
591 switch (CSD_TYPE(csd
)) {
594 case CSD_TYPE_IRQ_WORK
:
595 pr_warn("IPI callback %pS sent to offline CPU\n",
600 pr_warn("IPI task-wakeup sent to offline CPU\n");
604 pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
612 * First; run all SYNC callbacks, people are waiting for us.
615 llist_for_each_entry_safe(csd
, csd_next
, entry
, node
.llist
) {
616 /* Do we wait until *after* callback? */
617 if (CSD_TYPE(csd
) == CSD_TYPE_SYNC
) {
618 smp_call_func_t func
= csd
->func
;
619 void *info
= csd
->info
;
622 prev
->next
= &csd_next
->node
.llist
;
624 entry
= &csd_next
->node
.llist
;
627 csd_lock_record(csd
);
630 csd_lock_record(NULL
);
632 prev
= &csd
->node
.llist
;
637 cfd_seq_store(this_cpu_ptr(&cfd_seq_local
)->hdlend
,
638 0, smp_processor_id(),
644 * Second; run all !SYNC callbacks.
647 llist_for_each_entry_safe(csd
, csd_next
, entry
, node
.llist
) {
648 int type
= CSD_TYPE(csd
);
650 if (type
!= CSD_TYPE_TTWU
) {
652 prev
->next
= &csd_next
->node
.llist
;
654 entry
= &csd_next
->node
.llist
;
657 if (type
== CSD_TYPE_ASYNC
) {
658 smp_call_func_t func
= csd
->func
;
659 void *info
= csd
->info
;
661 csd_lock_record(csd
);
664 csd_lock_record(NULL
);
665 } else if (type
== CSD_TYPE_IRQ_WORK
) {
666 irq_work_single(csd
);
670 prev
= &csd
->node
.llist
;
675 * Third; only CSD_TYPE_TTWU is left, issue those.
678 sched_ttwu_pending(entry
);
680 cfd_seq_store(this_cpu_ptr(&cfd_seq_local
)->hdlend
, CFD_SEQ_NOCPU
,
681 smp_processor_id(), CFD_SEQ_HDLEND
);
684 void flush_smp_call_function_from_idle(void)
688 if (llist_empty(this_cpu_ptr(&call_single_queue
)))
691 cfd_seq_store(this_cpu_ptr(&cfd_seq_local
)->idle
, CFD_SEQ_NOCPU
,
692 smp_processor_id(), CFD_SEQ_IDLE
);
693 local_irq_save(flags
);
694 flush_smp_call_function_queue(true);
695 if (local_softirq_pending())
698 local_irq_restore(flags
);
702 * smp_call_function_single - Run a function on a specific CPU
703 * @func: The function to run. This must be fast and non-blocking.
704 * @info: An arbitrary pointer to pass to the function.
705 * @wait: If true, wait until function has completed on other CPUs.
707 * Returns 0 on success, else a negative status code.
709 int smp_call_function_single(int cpu
, smp_call_func_t func
, void *info
,
712 call_single_data_t
*csd
;
713 call_single_data_t csd_stack
= {
714 .node
= { .u_flags
= CSD_FLAG_LOCK
| CSD_TYPE_SYNC
, },
720 * prevent preemption and reschedule on another processor,
721 * as well as CPU removal
723 this_cpu
= get_cpu();
726 * Can deadlock when called with interrupts disabled.
727 * We allow cpu's that are not yet online though, as no one else can
728 * send smp call function interrupt to this cpu and as such deadlocks
731 WARN_ON_ONCE(cpu_online(this_cpu
) && irqs_disabled()
732 && !oops_in_progress
);
735 * When @wait we can deadlock when we interrupt between llist_add() and
736 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
737 * csd_lock() on because the interrupt context uses the same csd
740 WARN_ON_ONCE(!in_task());
744 csd
= this_cpu_ptr(&csd_data
);
750 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
751 csd
->node
.src
= smp_processor_id();
755 err
= generic_exec_single(cpu
, csd
);
764 EXPORT_SYMBOL(smp_call_function_single
);
767 * smp_call_function_single_async() - Run an asynchronous function on a
769 * @cpu: The CPU to run on.
770 * @csd: Pre-allocated and setup data structure
772 * Like smp_call_function_single(), but the call is asynchonous and
773 * can thus be done from contexts with disabled interrupts.
775 * The caller passes his own pre-allocated data structure
776 * (ie: embedded in an object) and is responsible for synchronizing it
777 * such that the IPIs performed on the @csd are strictly serialized.
779 * If the function is called with one csd which has not yet been
780 * processed by previous call to smp_call_function_single_async(), the
781 * function will return immediately with -EBUSY showing that the csd
782 * object is still in progress.
784 * NOTE: Be careful, there is unfortunately no current debugging facility to
785 * validate the correctness of this serialization.
787 * Return: %0 on success or negative errno value on error
789 int smp_call_function_single_async(int cpu
, struct __call_single_data
*csd
)
795 if (csd
->node
.u_flags
& CSD_FLAG_LOCK
) {
800 csd
->node
.u_flags
= CSD_FLAG_LOCK
;
803 err
= generic_exec_single(cpu
, csd
);
810 EXPORT_SYMBOL_GPL(smp_call_function_single_async
);
813 * smp_call_function_any - Run a function on any of the given cpus
814 * @mask: The mask of cpus it can run on.
815 * @func: The function to run. This must be fast and non-blocking.
816 * @info: An arbitrary pointer to pass to the function.
817 * @wait: If true, wait until function has completed.
819 * Returns 0 on success, else a negative status code (if no cpus were online).
821 * Selection preference:
822 * 1) current cpu if in @mask
823 * 2) any cpu of current node if in @mask
824 * 3) any other online cpu in @mask
826 int smp_call_function_any(const struct cpumask
*mask
,
827 smp_call_func_t func
, void *info
, int wait
)
830 const struct cpumask
*nodemask
;
833 /* Try for same CPU (cheapest) */
835 if (cpumask_test_cpu(cpu
, mask
))
838 /* Try for same node. */
839 nodemask
= cpumask_of_node(cpu_to_node(cpu
));
840 for (cpu
= cpumask_first_and(nodemask
, mask
); cpu
< nr_cpu_ids
;
841 cpu
= cpumask_next_and(cpu
, nodemask
, mask
)) {
846 /* Any online will do: smp_call_function_single handles nr_cpu_ids. */
847 cpu
= cpumask_any_and(mask
, cpu_online_mask
);
849 ret
= smp_call_function_single(cpu
, func
, info
, wait
);
853 EXPORT_SYMBOL_GPL(smp_call_function_any
);
856 * Flags to be used as scf_flags argument of smp_call_function_many_cond().
858 * %SCF_WAIT: Wait until function execution is completed
859 * %SCF_RUN_LOCAL: Run also locally if local cpu is set in cpumask
861 #define SCF_WAIT (1U << 0)
862 #define SCF_RUN_LOCAL (1U << 1)
864 static void smp_call_function_many_cond(const struct cpumask
*mask
,
865 smp_call_func_t func
, void *info
,
866 unsigned int scf_flags
,
867 smp_cond_func_t cond_func
)
869 int cpu
, last_cpu
, this_cpu
= smp_processor_id();
870 struct call_function_data
*cfd
;
871 bool wait
= scf_flags
& SCF_WAIT
;
872 bool run_remote
= false;
873 bool run_local
= false;
876 lockdep_assert_preemption_disabled();
879 * Can deadlock when called with interrupts disabled.
880 * We allow cpu's that are not yet online though, as no one else can
881 * send smp call function interrupt to this cpu and as such deadlocks
884 if (cpu_online(this_cpu
) && !oops_in_progress
&&
885 !early_boot_irqs_disabled
)
886 lockdep_assert_irqs_enabled();
889 * When @wait we can deadlock when we interrupt between llist_add() and
890 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
891 * csd_lock() on because the interrupt context uses the same csd
894 WARN_ON_ONCE(!in_task());
896 /* Check if we need local execution. */
897 if ((scf_flags
& SCF_RUN_LOCAL
) && cpumask_test_cpu(this_cpu
, mask
))
900 /* Check if we need remote execution, i.e., any CPU excluding this one. */
901 cpu
= cpumask_first_and(mask
, cpu_online_mask
);
903 cpu
= cpumask_next_and(cpu
, mask
, cpu_online_mask
);
904 if (cpu
< nr_cpu_ids
)
908 cfd
= this_cpu_ptr(&cfd_data
);
909 cpumask_and(cfd
->cpumask
, mask
, cpu_online_mask
);
910 __cpumask_clear_cpu(this_cpu
, cfd
->cpumask
);
912 cpumask_clear(cfd
->cpumask_ipi
);
913 for_each_cpu(cpu
, cfd
->cpumask
) {
914 struct cfd_percpu
*pcpu
= per_cpu_ptr(cfd
->pcpu
, cpu
);
915 call_single_data_t
*csd
= &pcpu
->csd
;
917 if (cond_func
&& !cond_func(cpu
, info
))
922 csd
->node
.u_flags
|= CSD_TYPE_SYNC
;
925 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
926 csd
->node
.src
= smp_processor_id();
929 cfd_seq_store(pcpu
->seq_queue
, this_cpu
, cpu
, CFD_SEQ_QUEUE
);
930 if (llist_add(&csd
->node
.llist
, &per_cpu(call_single_queue
, cpu
))) {
931 __cpumask_set_cpu(cpu
, cfd
->cpumask_ipi
);
935 cfd_seq_store(pcpu
->seq_ipi
, this_cpu
, cpu
, CFD_SEQ_IPI
);
937 cfd_seq_store(pcpu
->seq_noipi
, this_cpu
, cpu
, CFD_SEQ_NOIPI
);
941 cfd_seq_store(this_cpu_ptr(&cfd_seq_local
)->ping
, this_cpu
, CFD_SEQ_NOCPU
, CFD_SEQ_PING
);
944 * Choose the most efficient way to send an IPI. Note that the
945 * number of CPUs might be zero due to concurrent changes to the
949 send_call_function_single_ipi(last_cpu
);
950 else if (likely(nr_cpus
> 1))
951 arch_send_call_function_ipi_mask(cfd
->cpumask_ipi
);
953 cfd_seq_store(this_cpu_ptr(&cfd_seq_local
)->pinged
, this_cpu
, CFD_SEQ_NOCPU
, CFD_SEQ_PINGED
);
956 if (run_local
&& (!cond_func
|| cond_func(this_cpu
, info
))) {
959 local_irq_save(flags
);
961 local_irq_restore(flags
);
964 if (run_remote
&& wait
) {
965 for_each_cpu(cpu
, cfd
->cpumask
) {
966 call_single_data_t
*csd
;
968 csd
= &per_cpu_ptr(cfd
->pcpu
, cpu
)->csd
;
975 * smp_call_function_many(): Run a function on a set of CPUs.
976 * @mask: The set of cpus to run on (only runs on online subset).
977 * @func: The function to run. This must be fast and non-blocking.
978 * @info: An arbitrary pointer to pass to the function.
979 * @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait
980 * (atomically) until function has completed on other CPUs. If
981 * %SCF_RUN_LOCAL is set, the function will also be run locally
982 * if the local CPU is set in the @cpumask.
984 * If @wait is true, then returns once @func has returned.
986 * You must not call this function with disabled interrupts or from a
987 * hardware interrupt handler or from a bottom half handler. Preemption
988 * must be disabled when calling this function.
990 void smp_call_function_many(const struct cpumask
*mask
,
991 smp_call_func_t func
, void *info
, bool wait
)
993 smp_call_function_many_cond(mask
, func
, info
, wait
* SCF_WAIT
, NULL
);
995 EXPORT_SYMBOL(smp_call_function_many
);
998 * smp_call_function(): Run a function on all other CPUs.
999 * @func: The function to run. This must be fast and non-blocking.
1000 * @info: An arbitrary pointer to pass to the function.
1001 * @wait: If true, wait (atomically) until function has completed
1006 * If @wait is true, then returns once @func has returned; otherwise
1007 * it returns just before the target cpu calls @func.
1009 * You must not call this function with disabled interrupts or from a
1010 * hardware interrupt handler or from a bottom half handler.
1012 void smp_call_function(smp_call_func_t func
, void *info
, int wait
)
1015 smp_call_function_many(cpu_online_mask
, func
, info
, wait
);
1018 EXPORT_SYMBOL(smp_call_function
);
1020 /* Setup configured maximum number of CPUs to activate */
1021 unsigned int setup_max_cpus
= NR_CPUS
;
1022 EXPORT_SYMBOL(setup_max_cpus
);
1026 * Setup routine for controlling SMP activation
1028 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
1029 * activation entirely (the MPS table probe still happens, though).
1031 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
1032 * greater than 0, limits the maximum number of CPUs activated in
1033 * SMP mode to <NUM>.
1036 void __weak
arch_disable_smp_support(void) { }
1038 static int __init
nosmp(char *str
)
1041 arch_disable_smp_support();
1046 early_param("nosmp", nosmp
);
1048 /* this is hard limit */
1049 static int __init
nrcpus(char *str
)
1053 if (get_option(&str
, &nr_cpus
) && nr_cpus
> 0 && nr_cpus
< nr_cpu_ids
)
1054 nr_cpu_ids
= nr_cpus
;
1059 early_param("nr_cpus", nrcpus
);
1061 static int __init
maxcpus(char *str
)
1063 get_option(&str
, &setup_max_cpus
);
1064 if (setup_max_cpus
== 0)
1065 arch_disable_smp_support();
1070 early_param("maxcpus", maxcpus
);
1072 /* Setup number of possible processor ids */
1073 unsigned int nr_cpu_ids __read_mostly
= NR_CPUS
;
1074 EXPORT_SYMBOL(nr_cpu_ids
);
1076 /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
1077 void __init
setup_nr_cpu_ids(void)
1079 nr_cpu_ids
= find_last_bit(cpumask_bits(cpu_possible_mask
),NR_CPUS
) + 1;
1082 /* Called by boot processor to activate the rest. */
1083 void __init
smp_init(void)
1085 int num_nodes
, num_cpus
;
1087 idle_threads_init();
1088 cpuhp_threads_init();
1090 pr_info("Bringing up secondary CPUs ...\n");
1092 bringup_nonboot_cpus(setup_max_cpus
);
1094 num_nodes
= num_online_nodes();
1095 num_cpus
= num_online_cpus();
1096 pr_info("Brought up %d node%s, %d CPU%s\n",
1097 num_nodes
, (num_nodes
> 1 ? "s" : ""),
1098 num_cpus
, (num_cpus
> 1 ? "s" : ""));
1100 /* Any cleanup work */
1101 smp_cpus_done(setup_max_cpus
);
1105 * on_each_cpu_cond(): Call a function on each processor for which
1106 * the supplied function cond_func returns true, optionally waiting
1107 * for all the required CPUs to finish. This may include the local
1109 * @cond_func: A callback function that is passed a cpu id and
1110 * the info parameter. The function is called
1111 * with preemption disabled. The function should
1112 * return a blooean value indicating whether to IPI
1113 * the specified CPU.
1114 * @func: The function to run on all applicable CPUs.
1115 * This must be fast and non-blocking.
1116 * @info: An arbitrary pointer to pass to both functions.
1117 * @wait: If true, wait (atomically) until function has
1118 * completed on other CPUs.
1120 * Preemption is disabled to protect against CPUs going offline but not online.
1121 * CPUs going online during the call will not be seen or sent an IPI.
1123 * You must not call this function with disabled interrupts or
1124 * from a hardware interrupt handler or from a bottom half handler.
1126 void on_each_cpu_cond_mask(smp_cond_func_t cond_func
, smp_call_func_t func
,
1127 void *info
, bool wait
, const struct cpumask
*mask
)
1129 unsigned int scf_flags
= SCF_RUN_LOCAL
;
1132 scf_flags
|= SCF_WAIT
;
1135 smp_call_function_many_cond(mask
, func
, info
, scf_flags
, cond_func
);
1138 EXPORT_SYMBOL(on_each_cpu_cond_mask
);
1140 static void do_nothing(void *unused
)
1145 * kick_all_cpus_sync - Force all cpus out of idle
1147 * Used to synchronize the update of pm_idle function pointer. It's
1148 * called after the pointer is updated and returns after the dummy
1149 * callback function has been executed on all cpus. The execution of
1150 * the function can only happen on the remote cpus after they have
1151 * left the idle function which had been called via pm_idle function
1152 * pointer. So it's guaranteed that nothing uses the previous pointer
1155 void kick_all_cpus_sync(void)
1157 /* Make sure the change is visible before we kick the cpus */
1159 smp_call_function(do_nothing
, NULL
, 1);
1161 EXPORT_SYMBOL_GPL(kick_all_cpus_sync
);
1164 * wake_up_all_idle_cpus - break all cpus out of idle
1165 * wake_up_all_idle_cpus try to break all cpus which is in idle state even
1166 * including idle polling cpus, for non-idle cpus, we will do nothing
1169 void wake_up_all_idle_cpus(void)
1174 for_each_online_cpu(cpu
) {
1175 if (cpu
== smp_processor_id())
1178 wake_up_if_idle(cpu
);
1182 EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus
);
1185 * struct smp_call_on_cpu_struct - Call a function on a specific CPU
1186 * @work: &work_struct
1187 * @done: &completion to signal
1188 * @func: function to call
1189 * @data: function's data argument
1190 * @ret: return value from @func
1191 * @cpu: target CPU (%-1 for any CPU)
1193 * Used to call a function on a specific cpu and wait for it to return.
1194 * Optionally make sure the call is done on a specified physical cpu via vcpu
1195 * pinning in order to support virtualized environments.
1197 struct smp_call_on_cpu_struct
{
1198 struct work_struct work
;
1199 struct completion done
;
1200 int (*func
)(void *);
1206 static void smp_call_on_cpu_callback(struct work_struct
*work
)
1208 struct smp_call_on_cpu_struct
*sscs
;
1210 sscs
= container_of(work
, struct smp_call_on_cpu_struct
, work
);
1212 hypervisor_pin_vcpu(sscs
->cpu
);
1213 sscs
->ret
= sscs
->func(sscs
->data
);
1215 hypervisor_pin_vcpu(-1);
1217 complete(&sscs
->done
);
1220 int smp_call_on_cpu(unsigned int cpu
, int (*func
)(void *), void *par
, bool phys
)
1222 struct smp_call_on_cpu_struct sscs
= {
1223 .done
= COMPLETION_INITIALIZER_ONSTACK(sscs
.done
),
1226 .cpu
= phys
? cpu
: -1,
1229 INIT_WORK_ONSTACK(&sscs
.work
, smp_call_on_cpu_callback
);
1231 if (cpu
>= nr_cpu_ids
|| !cpu_online(cpu
))
1234 queue_work_on(cpu
, system_wq
, &sscs
.work
);
1235 wait_for_completion(&sscs
.done
);
1239 EXPORT_SYMBOL_GPL(smp_call_on_cpu
);