]>
Commit | Line | Data |
---|---|---|
817f32d0 AK |
1 | /* |
2 | * MCE grading rules. | |
3 | * Copyright 2008, 2009 Intel Corporation. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU General Public License | |
7 | * as published by the Free Software Foundation; version 2 | |
8 | * of the License. | |
9 | * | |
10 | * Author: Andi Kleen | |
11 | */ | |
12 | #include <linux/kernel.h> | |
4611a6fa HY |
13 | #include <linux/seq_file.h> |
14 | #include <linux/init.h> | |
15 | #include <linux/debugfs.h> | |
817f32d0 AK |
16 | #include <asm/mce.h> |
17 | ||
18 | #include "mce-internal.h" | |
19 | ||
20 | /* | |
21 | * Grade an mce by severity. In general the most severe ones are processed | |
22 | * first. Since there are quite a lot of combinations test the bits in a | |
23 | * table-driven way. The rules are simply processed in order, first | |
24 | * match wins. | |
ed7290d0 AK |
25 | * |
26 | * Note this is only used for machine check exceptions, the corrected | |
27 | * errors use much simpler rules. The exceptions still check for the corrected | |
28 | * errors, but only to leave them alone for the CMCI handler (except for | |
29 | * panic situations) | |
817f32d0 AK |
30 | */ |
31 | ||
ed7290d0 AK |
32 | enum context { IN_KERNEL = 1, IN_USER = 2 }; |
33 | enum ser { SER_REQUIRED = 1, NO_SER = 2 }; | |
e3480271 | 34 | enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 }; |
ed7290d0 | 35 | |
817f32d0 AK |
36 | static struct severity { |
37 | u64 mask; | |
38 | u64 result; | |
39 | unsigned char sev; | |
40 | unsigned char mcgmask; | |
41 | unsigned char mcgres; | |
ed7290d0 AK |
42 | unsigned char ser; |
43 | unsigned char context; | |
e3480271 | 44 | unsigned char excp; |
4611a6fa | 45 | unsigned char covered; |
817f32d0 AK |
46 | char *msg; |
47 | } severities[] = { | |
a17957cd HS |
48 | #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } |
49 | #define KERNEL .context = IN_KERNEL | |
50 | #define USER .context = IN_USER | |
51 | #define SER .ser = SER_REQUIRED | |
52 | #define NOSER .ser = NO_SER | |
e3480271 CY |
53 | #define EXCP .excp = EXCP_CONTEXT |
54 | #define NOEXCP .excp = NO_EXCP | |
a17957cd HS |
55 | #define BITCLR(x) .mask = x, .result = 0 |
56 | #define BITSET(x) .mask = x, .result = x | |
57 | #define MCGMASK(x, y) .mcgmask = x, .mcgres = y | |
58 | #define MASK(x, y) .mask = x, .result = y | |
ed7290d0 AK |
59 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) |
60 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) | |
5f7b88d5 | 61 | #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) |
ed7290d0 | 62 | |
a17957cd HS |
63 | MCESEV( |
64 | NO, "Invalid", | |
65 | BITCLR(MCI_STATUS_VAL) | |
901d7691 | 66 | ), |
a17957cd HS |
67 | MCESEV( |
68 | NO, "Not enabled", | |
e3480271 | 69 | EXCP, BITCLR(MCI_STATUS_EN) |
901d7691 | 70 | ), |
a17957cd HS |
71 | MCESEV( |
72 | PANIC, "Processor context corrupt", | |
73 | BITSET(MCI_STATUS_PCC) | |
901d7691 | 74 | ), |
ed7290d0 | 75 | /* When MCIP is not set something is very confused */ |
a17957cd HS |
76 | MCESEV( |
77 | PANIC, "MCIP not set in MCA handler", | |
e3480271 | 78 | EXCP, MCGMASK(MCG_STATUS_MCIP, 0) |
901d7691 | 79 | ), |
ed7290d0 | 80 | /* Neither return not error IP -- no chance to recover -> PANIC */ |
a17957cd HS |
81 | MCESEV( |
82 | PANIC, "Neither restart nor error IP", | |
e3480271 | 83 | EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0) |
901d7691 | 84 | ), |
a17957cd | 85 | MCESEV( |
901d7691 | 86 | PANIC, "In kernel and no restart IP", |
e3480271 CY |
87 | EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) |
88 | ), | |
89 | MCESEV( | |
90 | DEFERRED, "Deferred error", | |
91 | NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED) | |
901d7691 | 92 | ), |
a17957cd | 93 | MCESEV( |
901d7691 | 94 | KEEP, "Corrected error", |
a17957cd | 95 | NOSER, BITCLR(MCI_STATUS_UC) |
901d7691 | 96 | ), |
ed7290d0 AK |
97 | |
98 | /* ignore OVER for UCNA */ | |
a17957cd | 99 | MCESEV( |
e3480271 | 100 | UCNA, "Uncorrected no action required", |
a17957cd | 101 | SER, MASK(MCI_UC_SAR, MCI_STATUS_UC) |
901d7691 | 102 | ), |
a17957cd | 103 | MCESEV( |
901d7691 | 104 | PANIC, "Illegal combination (UCNA with AR=1)", |
a17957cd HS |
105 | SER, |
106 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR) | |
901d7691 | 107 | ), |
a17957cd | 108 | MCESEV( |
901d7691 | 109 | KEEP, "Non signalled machine check", |
7639bfc7 | 110 | SER, BITCLR(MCI_STATUS_S) |
901d7691 | 111 | ), |
ed7290d0 | 112 | |
a17957cd | 113 | MCESEV( |
901d7691 | 114 | PANIC, "Action required with lost events", |
7639bfc7 | 115 | SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR) |
901d7691 | 116 | ), |
5f7b88d5 TL |
117 | |
118 | /* known AR MCACODs: */ | |
119 | #ifdef CONFIG_MEMORY_FAILURE | |
120 | MCESEV( | |
33d7885b | 121 | KEEP, "Action required but unaffected thread is continuable", |
1a7f0e3c TL |
122 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR), |
123 | MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV) | |
5f7b88d5 TL |
124 | ), |
125 | MCESEV( | |
33d7885b | 126 | AR, "Action required: data load error in a user process", |
08dda402 | 127 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), |
5f7b88d5 TL |
128 | USER |
129 | ), | |
37c3459b | 130 | MCESEV( |
33d7885b | 131 | AR, "Action required: instruction fetch error in a user process", |
37c3459b TL |
132 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), |
133 | USER | |
134 | ), | |
5f7b88d5 | 135 | #endif |
a17957cd | 136 | MCESEV( |
7639bfc7 | 137 | PANIC, "Action required: unknown MCACOD", |
a17957cd | 138 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) |
901d7691 | 139 | ), |
ed7290d0 AK |
140 | |
141 | /* known AO MCACODs: */ | |
a17957cd | 142 | MCESEV( |
901d7691 | 143 | AO, "Action optional: memory scrubbing error", |
08dda402 | 144 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB) |
901d7691 | 145 | ), |
a17957cd | 146 | MCESEV( |
901d7691 | 147 | AO, "Action optional: last level cache writeback error", |
08dda402 | 148 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB) |
901d7691 | 149 | ), |
a17957cd | 150 | MCESEV( |
7639bfc7 | 151 | SOME, "Action optional: unknown MCACOD", |
a17957cd | 152 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S) |
901d7691 | 153 | ), |
a17957cd | 154 | MCESEV( |
901d7691 | 155 | SOME, "Action optional with lost events", |
7639bfc7 | 156 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S) |
901d7691 | 157 | ), |
a17957cd HS |
158 | |
159 | MCESEV( | |
160 | PANIC, "Overflowed uncorrected", | |
7639bfc7 | 161 | BITSET(MCI_STATUS_OVER|MCI_STATUS_UC) |
901d7691 | 162 | ), |
a17957cd HS |
163 | MCESEV( |
164 | UC, "Uncorrected", | |
165 | BITSET(MCI_STATUS_UC) | |
901d7691 | 166 | ), |
a17957cd HS |
167 | MCESEV( |
168 | SOME, "No match", | |
169 | BITSET(0) | |
901d7691 | 170 | ) /* always matches. keep at end */ |
817f32d0 AK |
171 | }; |
172 | ||
ed7290d0 | 173 | /* |
875e2664 TL |
174 | * If mcgstatus indicated that ip/cs on the stack were |
175 | * no good, then "m->cs" will be zero and we will have | |
176 | * to assume the worst case (IN_KERNEL) as we actually | |
177 | * have no idea what we were executing when the machine | |
178 | * check hit. | |
179 | * If we do have a good "m->cs" (or a faked one in the | |
180 | * case we were executing in VM86 mode) we can use it to | |
181 | * distinguish an exception taken in user from from one | |
182 | * taken in the kernel. | |
ed7290d0 AK |
183 | */ |
184 | static int error_context(struct mce *m) | |
185 | { | |
875e2664 | 186 | return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; |
ed7290d0 AK |
187 | } |
188 | ||
bf80bbd7 AG |
189 | /* |
190 | * See AMD Error Scope Hierarchy table in a newer BKDG. For example | |
191 | * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features" | |
192 | */ | |
43eaa2a1 | 193 | static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp) |
bf80bbd7 | 194 | { |
43eaa2a1 AG |
195 | enum context ctx = error_context(m); |
196 | ||
bf80bbd7 AG |
197 | /* Processor Context Corrupt, no need to fumble too much, die! */ |
198 | if (m->status & MCI_STATUS_PCC) | |
199 | return MCE_PANIC_SEVERITY; | |
200 | ||
201 | if (m->status & MCI_STATUS_UC) { | |
202 | ||
203 | /* | |
204 | * On older systems where overflow_recov flag is not present, we | |
205 | * should simply panic if an error overflow occurs. If | |
206 | * overflow_recov flag is present and set, then software can try | |
207 | * to at least kill process to prolong system operation. | |
208 | */ | |
209 | if (mce_flags.overflow_recov) { | |
210 | /* software can try to contain */ | |
cee8f5a6 AG |
211 | if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL)) |
212 | return MCE_PANIC_SEVERITY; | |
bf80bbd7 | 213 | |
cee8f5a6 AG |
214 | /* kill current process */ |
215 | return MCE_AR_SEVERITY; | |
bf80bbd7 AG |
216 | } else { |
217 | /* at least one error was not logged */ | |
218 | if (m->status & MCI_STATUS_OVER) | |
219 | return MCE_PANIC_SEVERITY; | |
220 | } | |
221 | ||
222 | /* | |
223 | * For any other case, return MCE_UC_SEVERITY so that we log the | |
224 | * error and exit #MC handler. | |
225 | */ | |
226 | return MCE_UC_SEVERITY; | |
227 | } | |
228 | ||
229 | /* | |
230 | * deferred error: poll handler catches these and adds to mce_ring so | |
231 | * memory-failure can take recovery actions. | |
232 | */ | |
233 | if (m->status & MCI_STATUS_DEFERRED) | |
234 | return MCE_DEFERRED_SEVERITY; | |
235 | ||
236 | /* | |
237 | * corrected error: poll handler catches these and passes responsibility | |
238 | * of decoding the error to EDAC | |
239 | */ | |
240 | return MCE_KEEP_SEVERITY; | |
241 | } | |
242 | ||
43eaa2a1 | 243 | static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp) |
817f32d0 | 244 | { |
e3480271 | 245 | enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP); |
7639bfc7 | 246 | enum context ctx = error_context(m); |
817f32d0 | 247 | struct severity *s; |
ed7290d0 | 248 | |
817f32d0 | 249 | for (s = severities;; s++) { |
7639bfc7 | 250 | if ((m->status & s->mask) != s->result) |
817f32d0 | 251 | continue; |
7639bfc7 | 252 | if ((m->mcgstatus & s->mcgmask) != s->mcgres) |
817f32d0 | 253 | continue; |
1462594b | 254 | if (s->ser == SER_REQUIRED && !mca_cfg.ser) |
ed7290d0 | 255 | continue; |
1462594b | 256 | if (s->ser == NO_SER && mca_cfg.ser) |
ed7290d0 AK |
257 | continue; |
258 | if (s->context && ctx != s->context) | |
259 | continue; | |
e3480271 CY |
260 | if (s->excp && excp != s->excp) |
261 | continue; | |
817f32d0 AK |
262 | if (msg) |
263 | *msg = s->msg; | |
4611a6fa | 264 | s->covered = 1; |
ed7290d0 AK |
265 | if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { |
266 | if (panic_on_oops || tolerant < 1) | |
267 | return MCE_PANIC_SEVERITY; | |
268 | } | |
817f32d0 AK |
269 | return s->sev; |
270 | } | |
271 | } | |
4611a6fa | 272 | |
43eaa2a1 AG |
273 | /* Default to mce_severity_intel */ |
274 | int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) = | |
275 | mce_severity_intel; | |
276 | ||
277 | void __init mcheck_vendor_init_severity(void) | |
278 | { | |
279 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | |
280 | mce_severity = mce_severity_amd; | |
281 | } | |
282 | ||
e34e77ce | 283 | #ifdef CONFIG_DEBUG_FS |
4611a6fa HY |
284 | static void *s_start(struct seq_file *f, loff_t *pos) |
285 | { | |
286 | if (*pos >= ARRAY_SIZE(severities)) | |
287 | return NULL; | |
288 | return &severities[*pos]; | |
289 | } | |
290 | ||
291 | static void *s_next(struct seq_file *f, void *data, loff_t *pos) | |
292 | { | |
293 | if (++(*pos) >= ARRAY_SIZE(severities)) | |
294 | return NULL; | |
295 | return &severities[*pos]; | |
296 | } | |
297 | ||
298 | static void s_stop(struct seq_file *f, void *data) | |
299 | { | |
300 | } | |
301 | ||
302 | static int s_show(struct seq_file *f, void *data) | |
303 | { | |
304 | struct severity *ser = data; | |
305 | seq_printf(f, "%d\t%s\n", ser->covered, ser->msg); | |
306 | return 0; | |
307 | } | |
308 | ||
309 | static const struct seq_operations severities_seq_ops = { | |
310 | .start = s_start, | |
311 | .next = s_next, | |
312 | .stop = s_stop, | |
313 | .show = s_show, | |
314 | }; | |
315 | ||
316 | static int severities_coverage_open(struct inode *inode, struct file *file) | |
317 | { | |
318 | return seq_open(file, &severities_seq_ops); | |
319 | } | |
320 | ||
321 | static ssize_t severities_coverage_write(struct file *file, | |
322 | const char __user *ubuf, | |
323 | size_t count, loff_t *ppos) | |
324 | { | |
325 | int i; | |
326 | for (i = 0; i < ARRAY_SIZE(severities); i++) | |
327 | severities[i].covered = 0; | |
328 | return count; | |
329 | } | |
330 | ||
331 | static const struct file_operations severities_coverage_fops = { | |
332 | .open = severities_coverage_open, | |
333 | .release = seq_release, | |
334 | .read = seq_read, | |
335 | .write = severities_coverage_write, | |
6038f373 | 336 | .llseek = seq_lseek, |
4611a6fa HY |
337 | }; |
338 | ||
339 | static int __init severities_debugfs_init(void) | |
340 | { | |
7639bfc7 | 341 | struct dentry *dmce, *fsev; |
4611a6fa | 342 | |
5be9ed25 | 343 | dmce = mce_get_debugfs_dir(); |
7639bfc7 | 344 | if (!dmce) |
4611a6fa | 345 | goto err_out; |
7639bfc7 HS |
346 | |
347 | fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL, | |
348 | &severities_coverage_fops); | |
349 | if (!fsev) | |
4611a6fa HY |
350 | goto err_out; |
351 | ||
352 | return 0; | |
353 | ||
354 | err_out: | |
4611a6fa HY |
355 | return -ENOMEM; |
356 | } | |
357 | late_initcall(severities_debugfs_init); | |
7639bfc7 | 358 | #endif /* CONFIG_DEBUG_FS */ |