]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - arch/x86/kernel/nmi.c
x86/nmi: Add new NMI queues to deal with IO_CHK and SERR
[mirror_ubuntu-jammy-kernel.git] / arch / x86 / kernel / nmi.c
CommitLineData
1d48922c
DZ
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
9c48f1c6 4 * Copyright (C) 2011 Don Zickus Red Hat, Inc.
1d48922c
DZ
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 */
9
10/*
11 * Handle hardware traps and faults.
12 */
13#include <linux/spinlock.h>
14#include <linux/kprobes.h>
15#include <linux/kdebug.h>
16#include <linux/nmi.h>
c9126b2e
DZ
17#include <linux/delay.h>
18#include <linux/hardirq.h>
19#include <linux/slab.h>
69c60c88 20#include <linux/export.h>
1d48922c 21
d48b0e17
IM
22#include <linux/mca.h>
23
1d48922c
DZ
24#if defined(CONFIG_EDAC)
25#include <linux/edac.h>
26#endif
27
28#include <linux/atomic.h>
29#include <asm/traps.h>
30#include <asm/mach_traps.h>
c9126b2e 31#include <asm/nmi.h>
6fd36ba0 32#include <asm/x86_init.h>
c9126b2e
DZ
33
34#define NMI_MAX_NAMELEN 16
35struct nmiaction {
36 struct list_head list;
37 nmi_handler_t handler;
38 unsigned int flags;
39 char *name;
40};
41
42struct nmi_desc {
43 spinlock_t lock;
44 struct list_head head;
45};
46
47static struct nmi_desc nmi_desc[NMI_MAX] =
48{
49 {
50 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
51 .head = LIST_HEAD_INIT(nmi_desc[0].head),
52 },
53 {
54 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
55 .head = LIST_HEAD_INIT(nmi_desc[1].head),
56 },
553222f3
DZ
57 {
58 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
59 .head = LIST_HEAD_INIT(nmi_desc[2].head),
60 },
61 {
62 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
63 .head = LIST_HEAD_INIT(nmi_desc[3].head),
64 },
c9126b2e
DZ
65
66};
1d48922c 67
efc3aac5
DZ
68struct nmi_stats {
69 unsigned int normal;
70 unsigned int unknown;
71 unsigned int external;
72 unsigned int swallow;
73};
74
75static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
76
1d48922c
DZ
77static int ignore_nmis;
78
79int unknown_nmi_panic;
80/*
81 * Prevent NMI reason port (0x61) being accessed simultaneously, can
82 * only be used in NMI handler.
83 */
84static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
85
86static int __init setup_unknown_nmi_panic(char *str)
87{
88 unknown_nmi_panic = 1;
89 return 1;
90}
91__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
92
c9126b2e
DZ
93#define nmi_to_desc(type) (&nmi_desc[type])
94
b227e233 95static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
c9126b2e
DZ
96{
97 struct nmi_desc *desc = nmi_to_desc(type);
98 struct nmiaction *a;
99 int handled=0;
100
101 rcu_read_lock();
102
103 /*
104 * NMIs are edge-triggered, which means if you have enough
105 * of them concurrently, you can lose some because only one
106 * can be latched at any given time. Walk the whole list
107 * to handle those situations.
108 */
b227e233 109 list_for_each_entry_rcu(a, &desc->head, list)
c9126b2e
DZ
110 handled += a->handler(type, regs);
111
c9126b2e
DZ
112 rcu_read_unlock();
113
114 /* return total number of NMI events handled */
115 return handled;
116}
117
118static int __setup_nmi(unsigned int type, struct nmiaction *action)
119{
120 struct nmi_desc *desc = nmi_to_desc(type);
121 unsigned long flags;
122
123 spin_lock_irqsave(&desc->lock, flags);
124
b227e233
DZ
125 /*
126 * most handlers of type NMI_UNKNOWN never return because
127 * they just assume the NMI is theirs. Just a sanity check
128 * to manage expectations
129 */
130 WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
553222f3
DZ
131 WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
132 WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
b227e233 133
c9126b2e
DZ
134 /*
135 * some handlers need to be executed first otherwise a fake
136 * event confuses some handlers (kdump uses this flag)
137 */
138 if (action->flags & NMI_FLAG_FIRST)
139 list_add_rcu(&action->list, &desc->head);
140 else
141 list_add_tail_rcu(&action->list, &desc->head);
142
143 spin_unlock_irqrestore(&desc->lock, flags);
144 return 0;
145}
146
147static struct nmiaction *__free_nmi(unsigned int type, const char *name)
148{
149 struct nmi_desc *desc = nmi_to_desc(type);
150 struct nmiaction *n;
151 unsigned long flags;
152
153 spin_lock_irqsave(&desc->lock, flags);
154
155 list_for_each_entry_rcu(n, &desc->head, list) {
156 /*
157 * the name passed in to describe the nmi handler
158 * is used as the lookup key
159 */
160 if (!strcmp(n->name, name)) {
161 WARN(in_nmi(),
162 "Trying to free NMI (%s) from NMI context!\n", n->name);
163 list_del_rcu(&n->list);
164 break;
165 }
166 }
167
168 spin_unlock_irqrestore(&desc->lock, flags);
169 synchronize_rcu();
170 return (n);
171}
172
173int register_nmi_handler(unsigned int type, nmi_handler_t handler,
174 unsigned long nmiflags, const char *devname)
175{
176 struct nmiaction *action;
177 int retval = -ENOMEM;
178
179 if (!handler)
180 return -EINVAL;
181
182 action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
183 if (!action)
184 goto fail_action;
185
186 action->handler = handler;
187 action->flags = nmiflags;
188 action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
189 if (!action->name)
190 goto fail_action_name;
191
192 retval = __setup_nmi(type, action);
193
194 if (retval)
195 goto fail_setup_nmi;
196
197 return retval;
198
199fail_setup_nmi:
200 kfree(action->name);
201fail_action_name:
202 kfree(action);
203fail_action:
204
205 return retval;
206}
207EXPORT_SYMBOL_GPL(register_nmi_handler);
208
209void unregister_nmi_handler(unsigned int type, const char *name)
210{
211 struct nmiaction *a;
212
213 a = __free_nmi(type, name);
214 if (a) {
215 kfree(a->name);
216 kfree(a);
217 }
218}
219
220EXPORT_SYMBOL_GPL(unregister_nmi_handler);
221
1d48922c
DZ
222static notrace __kprobes void
223pci_serr_error(unsigned char reason, struct pt_regs *regs)
224{
553222f3
DZ
225 /* check to see if anyone registered against these types of errors */
226 if (nmi_handle(NMI_SERR, regs, false))
227 return;
228
1d48922c
DZ
229 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
230 reason, smp_processor_id());
231
232 /*
233 * On some machines, PCI SERR line is used to report memory
234 * errors. EDAC makes use of it.
235 */
236#if defined(CONFIG_EDAC)
237 if (edac_handler_set()) {
238 edac_atomic_assert_error();
239 return;
240 }
241#endif
242
243 if (panic_on_unrecovered_nmi)
244 panic("NMI: Not continuing");
245
246 pr_emerg("Dazed and confused, but trying to continue\n");
247
248 /* Clear and disable the PCI SERR error line. */
249 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
250 outb(reason, NMI_REASON_PORT);
251}
252
253static notrace __kprobes void
254io_check_error(unsigned char reason, struct pt_regs *regs)
255{
256 unsigned long i;
257
553222f3
DZ
258 /* check to see if anyone registered against these types of errors */
259 if (nmi_handle(NMI_IO_CHECK, regs, false))
260 return;
261
1d48922c
DZ
262 pr_emerg(
263 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
264 reason, smp_processor_id());
265 show_registers(regs);
266
267 if (panic_on_io_nmi)
268 panic("NMI IOCK error: Not continuing");
269
270 /* Re-enable the IOCK line, wait for a few seconds */
271 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
272 outb(reason, NMI_REASON_PORT);
273
274 i = 20000;
275 while (--i) {
276 touch_nmi_watchdog();
277 udelay(100);
278 }
279
280 reason &= ~NMI_REASON_CLEAR_IOCHK;
281 outb(reason, NMI_REASON_PORT);
282}
283
284static notrace __kprobes void
285unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
286{
9c48f1c6
DZ
287 int handled;
288
b227e233
DZ
289 /*
290 * Use 'false' as back-to-back NMIs are dealt with one level up.
291 * Of course this makes having multiple 'unknown' handlers useless
292 * as only the first one is ever run (unless it can actually determine
293 * if it caused the NMI)
294 */
295 handled = nmi_handle(NMI_UNKNOWN, regs, false);
efc3aac5
DZ
296 if (handled) {
297 __this_cpu_add(nmi_stats.unknown, handled);
1d48922c 298 return;
efc3aac5
DZ
299 }
300
301 __this_cpu_add(nmi_stats.unknown, 1);
302
1d48922c
DZ
303#ifdef CONFIG_MCA
304 /*
305 * Might actually be able to figure out what the guilty party
306 * is:
307 */
308 if (MCA_bus) {
309 mca_handle_nmi();
310 return;
311 }
312#endif
313 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
314 reason, smp_processor_id());
315
316 pr_emerg("Do you have a strange power saving mode enabled?\n");
317 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
318 panic("NMI: Not continuing");
319
320 pr_emerg("Dazed and confused, but trying to continue\n");
321}
322
b227e233
DZ
323static DEFINE_PER_CPU(bool, swallow_nmi);
324static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
325
1d48922c
DZ
326static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
327{
328 unsigned char reason = 0;
9c48f1c6 329 int handled;
b227e233 330 bool b2b = false;
1d48922c
DZ
331
332 /*
333 * CPU-specific NMI must be processed before non-CPU-specific
334 * NMI, otherwise we may lose it, because the CPU-specific
335 * NMI can not be detected/processed on other CPUs.
336 */
b227e233
DZ
337
338 /*
339 * Back-to-back NMIs are interesting because they can either
340 * be two NMI or more than two NMIs (any thing over two is dropped
341 * due to NMI being edge-triggered). If this is the second half
342 * of the back-to-back NMI, assume we dropped things and process
343 * more handlers. Otherwise reset the 'swallow' NMI behaviour
344 */
345 if (regs->ip == __this_cpu_read(last_nmi_rip))
346 b2b = true;
347 else
348 __this_cpu_write(swallow_nmi, false);
349
350 __this_cpu_write(last_nmi_rip, regs->ip);
351
352 handled = nmi_handle(NMI_LOCAL, regs, b2b);
efc3aac5 353 __this_cpu_add(nmi_stats.normal, handled);
b227e233
DZ
354 if (handled) {
355 /*
356 * There are cases when a NMI handler handles multiple
357 * events in the current NMI. One of these events may
358 * be queued for in the next NMI. Because the event is
359 * already handled, the next NMI will result in an unknown
360 * NMI. Instead lets flag this for a potential NMI to
361 * swallow.
362 */
363 if (handled > 1)
364 __this_cpu_write(swallow_nmi, true);
1d48922c 365 return;
b227e233 366 }
1d48922c
DZ
367
368 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
369 raw_spin_lock(&nmi_reason_lock);
064a59b6 370 reason = x86_platform.get_nmi_reason();
1d48922c
DZ
371
372 if (reason & NMI_REASON_MASK) {
373 if (reason & NMI_REASON_SERR)
374 pci_serr_error(reason, regs);
375 else if (reason & NMI_REASON_IOCHK)
376 io_check_error(reason, regs);
377#ifdef CONFIG_X86_32
378 /*
379 * Reassert NMI in case it became active
380 * meanwhile as it's edge-triggered:
381 */
382 reassert_nmi();
383#endif
efc3aac5 384 __this_cpu_add(nmi_stats.external, 1);
1d48922c
DZ
385 raw_spin_unlock(&nmi_reason_lock);
386 return;
387 }
388 raw_spin_unlock(&nmi_reason_lock);
389
b227e233
DZ
390 /*
391 * Only one NMI can be latched at a time. To handle
392 * this we may process multiple nmi handlers at once to
393 * cover the case where an NMI is dropped. The downside
394 * to this approach is we may process an NMI prematurely,
395 * while its real NMI is sitting latched. This will cause
396 * an unknown NMI on the next run of the NMI processing.
397 *
398 * We tried to flag that condition above, by setting the
399 * swallow_nmi flag when we process more than one event.
400 * This condition is also only present on the second half
401 * of a back-to-back NMI, so we flag that condition too.
402 *
403 * If both are true, we assume we already processed this
404 * NMI previously and we swallow it. Otherwise we reset
405 * the logic.
406 *
407 * There are scenarios where we may accidentally swallow
408 * a 'real' unknown NMI. For example, while processing
409 * a perf NMI another perf NMI comes in along with a
410 * 'real' unknown NMI. These two NMIs get combined into
411 * one (as descibed above). When the next NMI gets
412 * processed, it will be flagged by perf as handled, but
413 * noone will know that there was a 'real' unknown NMI sent
414 * also. As a result it gets swallowed. Or if the first
415 * perf NMI returns two events handled then the second
416 * NMI will get eaten by the logic below, again losing a
417 * 'real' unknown NMI. But this is the best we can do
418 * for now.
419 */
420 if (b2b && __this_cpu_read(swallow_nmi))
efc3aac5 421 __this_cpu_add(nmi_stats.swallow, 1);
b227e233
DZ
422 else
423 unknown_nmi_error(reason, regs);
1d48922c
DZ
424}
425
ccd49c23
SR
426/*
427 * NMIs can hit breakpoints which will cause it to lose its
428 * NMI context with the CPU when the breakpoint does an iret.
429 */
430#ifdef CONFIG_X86_32
431/*
432 * For i386, NMIs use the same stack as the kernel, and we can
433 * add a workaround to the iret problem in C. Simply have 3 states
434 * the NMI can be in.
435 *
436 * 1) not running
437 * 2) executing
438 * 3) latched
439 *
440 * When no NMI is in progress, it is in the "not running" state.
441 * When an NMI comes in, it goes into the "executing" state.
442 * Normally, if another NMI is triggered, it does not interrupt
443 * the running NMI and the HW will simply latch it so that when
444 * the first NMI finishes, it will restart the second NMI.
445 * (Note, the latch is binary, thus multiple NMIs triggering,
446 * when one is running, are ignored. Only one NMI is restarted.)
447 *
448 * If an NMI hits a breakpoint that executes an iret, another
449 * NMI can preempt it. We do not want to allow this new NMI
450 * to run, but we want to execute it when the first one finishes.
451 * We set the state to "latched", and the first NMI will perform
452 * an cmpxchg on the state, and if it doesn't successfully
453 * reset the state to "not running" it will restart the next
454 * NMI.
455 */
456enum nmi_states {
457 NMI_NOT_RUNNING,
458 NMI_EXECUTING,
459 NMI_LATCHED,
460};
461static DEFINE_PER_CPU(enum nmi_states, nmi_state);
462
463#define nmi_nesting_preprocess(regs) \
464 do { \
465 if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \
466 __get_cpu_var(nmi_state) = NMI_LATCHED; \
467 return; \
468 } \
469 nmi_restart: \
470 __get_cpu_var(nmi_state) = NMI_EXECUTING; \
471 } while (0)
472
473#define nmi_nesting_postprocess() \
474 do { \
475 if (cmpxchg(&__get_cpu_var(nmi_state), \
476 NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \
477 goto nmi_restart; \
478 } while (0)
479#else /* x86_64 */
480/*
481 * In x86_64 things are a bit more difficult. This has the same problem
482 * where an NMI hitting a breakpoint that calls iret will remove the
483 * NMI context, allowing a nested NMI to enter. What makes this more
484 * difficult is that both NMIs and breakpoints have their own stack.
485 * When a new NMI or breakpoint is executed, the stack is set to a fixed
486 * point. If an NMI is nested, it will have its stack set at that same
487 * fixed address that the first NMI had, and will start corrupting the
488 * stack. This is handled in entry_64.S, but the same problem exists with
489 * the breakpoint stack.
490 *
491 * If a breakpoint is being processed, and the debug stack is being used,
492 * if an NMI comes in and also hits a breakpoint, the stack pointer
493 * will be set to the same fixed address as the breakpoint that was
494 * interrupted, causing that stack to be corrupted. To handle this case,
495 * check if the stack that was interrupted is the debug stack, and if
496 * so, change the IDT so that new breakpoints will use the current stack
497 * and not switch to the fixed address. On return of the NMI, switch back
498 * to the original IDT.
499 */
500static DEFINE_PER_CPU(int, update_debug_stack);
228bdaa9 501
ccd49c23
SR
502static inline void nmi_nesting_preprocess(struct pt_regs *regs)
503{
228bdaa9
SR
504 /*
505 * If we interrupted a breakpoint, it is possible that
506 * the nmi handler will have breakpoints too. We need to
507 * change the IDT such that breakpoints that happen here
508 * continue to use the NMI stack.
509 */
510 if (unlikely(is_debug_stack(regs->sp))) {
511 debug_stack_set_zero();
ccd49c23 512 __get_cpu_var(update_debug_stack) = 1;
228bdaa9 513 }
ccd49c23
SR
514}
515
516static inline void nmi_nesting_postprocess(void)
517{
518 if (unlikely(__get_cpu_var(update_debug_stack)))
519 debug_stack_reset();
520}
521#endif
522
523dotraplinkage notrace __kprobes void
524do_nmi(struct pt_regs *regs, long error_code)
525{
526 nmi_nesting_preprocess(regs);
527
1d48922c
DZ
528 nmi_enter();
529
530 inc_irq_stat(__nmi_count);
531
532 if (!ignore_nmis)
533 default_do_nmi(regs);
534
535 nmi_exit();
228bdaa9 536
ccd49c23
SR
537 /* On i386, may loop back to preprocess */
538 nmi_nesting_postprocess();
1d48922c
DZ
539}
540
541void stop_nmi(void)
542{
543 ignore_nmis++;
544}
545
546void restart_nmi(void)
547{
548 ignore_nmis--;
549}
b227e233
DZ
550
551/* reset the back-to-back NMI logic */
552void local_touch_nmi(void)
553{
554 __this_cpu_write(last_nmi_rip, 0);
555}