]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/edac/mce_amd.c
Merge branch 'for-upstream/mali-dp' of git://linux-arm.org/linux-ld into drm-fixes
[mirror_ubuntu-bionic-kernel.git] / drivers / edac / mce_amd.c
CommitLineData
b70ef010 1#include <linux/module.h>
888ab8e6
BP
2#include <linux/slab.h>
3
f3c0891c
BP
4#include <asm/cpu.h>
5
47ca08a4 6#include "mce_amd.h"
b52401ce 7
888ab8e6
BP
8static struct amd_decoder_ops *fam_ops;
9
2be64bfa 10static u8 xec_mask = 0xf;
5ce88f6e 11
549d042d 12static bool report_gart_errors;
5c332202 13static void (*decode_dram_ecc)(int node_id, struct mce *m);
549d042d
BP
14
15void amd_report_gart_errors(bool v)
16{
17 report_gart_errors = v;
18}
19EXPORT_SYMBOL_GPL(amd_report_gart_errors);
20
b0b07a2b 21void amd_register_ecc_decoder(void (*f)(int, struct mce *))
549d042d 22{
5c332202 23 decode_dram_ecc = f;
549d042d
BP
24}
25EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
26
b0b07a2b 27void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
549d042d 28{
5c332202
YG
29 if (decode_dram_ecc) {
30 WARN_ON(decode_dram_ecc != f);
549d042d 31
5c332202 32 decode_dram_ecc = NULL;
549d042d
BP
33 }
34}
35EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
36
b52401ce
DT
37/*
38 * string representation for the different MCA reported error types, see F3x48
39 * or MSR0000_0411.
40 */
6337583d
BP
41
42/* transaction type */
0f08669e 43static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
b52401ce 44
6337583d 45/* cache level */
0f08669e 46static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
b52401ce 47
6337583d 48/* memory transaction type */
0f08669e 49static const char * const rrrr_msgs[] = {
6337583d 50 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
b52401ce
DT
51};
52
6337583d 53/* participating processor */
ebe2aea8 54const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
b70ef010 55EXPORT_SYMBOL_GPL(pp_msgs);
b52401ce 56
6337583d 57/* request timeout */
0f08669e 58static const char * const to_msgs[] = { "no timeout", "timed out" };
b52401ce 59
6337583d 60/* memory or i/o */
0f08669e 61static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
b52401ce 62
980eec8b 63/* internal error type */
0f08669e 64static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
980eec8b 65
f05c41a9 66static const char * const f15h_mc1_mce_desc[] = {
86039cd4
BP
67 "UC during a demand linefill from L2",
68 "Parity error during data load from IC",
69 "Parity error for IC valid bit",
70 "Main tag parity error",
71 "Parity error in prediction queue",
72 "PFB data/address parity error",
73 "Parity error in the branch status reg",
74 "PFB promotion address error",
75 "Tag error during probe/victimization",
76 "Parity error for IC probe tag valid bit",
77 "PFB non-cacheable bit parity error",
78 "PFB valid bit parity error", /* xec = 0xd */
6c1173a6 79 "Microcode Patch Buffer", /* xec = 010 */
86039cd4
BP
80 "uop queue",
81 "insn buffer",
82 "predecode buffer",
eba4bfb3
AG
83 "fetch address FIFO",
84 "dispatch uop queue"
86039cd4
BP
85};
86
f05c41a9 87static const char * const f15h_mc2_mce_desc[] = {
70fdb494
BP
88 "Fill ECC error on data fills", /* xec = 0x4 */
89 "Fill parity error on insn fills",
90 "Prefetcher request FIFO parity error",
91 "PRQ address parity error",
92 "PRQ data parity error",
93 "WCC Tag ECC error",
94 "WCC Data ECC error",
95 "WCB Data parity error",
b64a99c1 96 "VB Data ECC or parity error",
70fdb494
BP
97 "L2 Tag ECC error", /* xec = 0x10 */
98 "Hard L2 Tag ECC error",
99 "Multiple hits on L2 tag",
100 "XAB parity error",
101 "PRB address parity error"
102};
103
f05c41a9 104static const char * const mc4_mce_desc[] = {
68782673
BP
105 "DRAM ECC error detected on the NB",
106 "CRC error detected on HT link",
107 "Link-defined sync error packets detected on HT link",
108 "HT Master abort",
109 "HT Target abort",
110 "Invalid GART PTE entry during GART table walk",
111 "Unsupported atomic RMW received from an IO link",
112 "Watchdog timeout due to lack of progress",
113 "DRAM ECC error detected on the NB",
114 "SVM DMA Exclusion Vector error",
115 "HT data error detected on link",
116 "Protocol error (link, L3, probe filter)",
117 "NB internal arrays parity error",
118 "DRAM addr/ctl signals parity error",
119 "IO link transmission error",
120 "L3 data cache ECC error", /* xec = 0x1c */
121 "L3 cache tag error",
122 "L3 LRU parity bits error",
123 "ECC Error in the Probe Filter directory"
124};
125
f05c41a9 126static const char * const mc5_mce_desc[] = {
8259a7e5
BP
127 "CPU Watchdog timer expire",
128 "Wakeup array dest tag",
129 "AG payload array",
130 "EX payload array",
131 "IDRF array",
132 "Retire dispatch queue",
133 "Mapper checkpoint array",
134 "Physical register file EX0 port",
135 "Physical register file EX1 port",
136 "Physical register file AG0 port",
137 "Physical register file AG1 port",
138 "Flag register file",
aad19e51
AG
139 "DE error occurred",
140 "Retire status queue"
8259a7e5
BP
141};
142
bc4febe9
AG
143static const char * const mc6_mce_desc[] = {
144 "Hardware Assertion",
145 "Free List",
146 "Physical Register File",
147 "Retire Queue",
148 "Scheduler table",
149 "Status Register File",
150};
151
be0aec23 152/* Scalable MCA error strings */
856095b1 153static const char * const smca_ls_mce_desc[] = {
be0aec23
AG
154 "Load queue parity",
155 "Store queue parity",
156 "Miss address buffer payload parity",
157 "L1 TLB parity",
856095b1 158 "Reserved",
be0aec23
AG
159 "DC tag error type 6",
160 "DC tag error type 1",
161 "Internal error type 1",
162 "Internal error type 2",
163 "Sys Read data error thread 0",
164 "Sys read data error thread 1",
165 "DC tag error type 2",
bdf1bf17 166 "DC data error type 1 (poison consumption)",
be0aec23
AG
167 "DC data error type 2",
168 "DC data error type 3",
169 "DC tag error type 4",
170 "L2 TLB parity",
171 "PDC parity error",
172 "DC tag error type 3",
173 "DC tag error type 5",
174 "L2 fill data error",
175};
176
856095b1 177static const char * const smca_if_mce_desc[] = {
be0aec23
AG
178 "microtag probe port parity error",
179 "IC microtag or full tag multi-hit error",
180 "IC full tag parity",
181 "IC data array parity",
182 "Decoupling queue phys addr parity error",
183 "L0 ITLB parity error",
184 "L1 ITLB parity error",
185 "L2 ITLB parity error",
186 "BPQ snoop parity on Thread 0",
187 "BPQ snoop parity on Thread 1",
188 "L1 BTB multi-match error",
189 "L2 BTB multi-match error",
c019b951
YG
190 "L2 Cache Response Poison error",
191 "System Read Data error",
be0aec23
AG
192};
193
856095b1 194static const char * const smca_l2_mce_desc[] = {
be0aec23
AG
195 "L2M tag multi-way-hit error",
196 "L2M tag ECC error",
197 "L2M data ECC error",
198 "HW assert",
199};
200
856095b1 201static const char * const smca_de_mce_desc[] = {
be0aec23
AG
202 "uop cache tag parity error",
203 "uop cache data parity error",
204 "Insn buffer parity error",
c019b951 205 "uop queue parity error",
be0aec23
AG
206 "Insn dispatch queue parity error",
207 "Fetch address FIFO parity",
208 "Patch RAM data parity",
209 "Patch RAM sequencer parity",
210 "uop buffer parity"
211};
212
856095b1 213static const char * const smca_ex_mce_desc[] = {
be0aec23
AG
214 "Watchdog timeout error",
215 "Phy register file parity",
216 "Flag register file parity",
217 "Immediate displacement register file parity",
218 "Address generator payload parity",
219 "EX payload parity",
220 "Checkpoint queue parity",
221 "Retire dispatch queue parity",
c019b951
YG
222 "Retire status queue parity error",
223 "Scheduling queue parity error",
224 "Branch buffer queue parity error",
be0aec23
AG
225};
226
856095b1 227static const char * const smca_fp_mce_desc[] = {
be0aec23
AG
228 "Physical register file parity",
229 "Freelist parity error",
230 "Schedule queue parity",
231 "NSQ parity error",
232 "Retire queue parity",
233 "Status register file parity",
c019b951 234 "Hardware assertion",
be0aec23
AG
235};
236
856095b1 237static const char * const smca_l3_mce_desc[] = {
be0aec23
AG
238 "Shadow tag macro ECC error",
239 "Shadow tag macro multi-way-hit error",
240 "L3M tag ECC error",
241 "L3M tag multi-way-hit error",
242 "L3M data ECC error",
243 "XI parity, L3 fill done channel error",
244 "L3 victim queue parity",
245 "L3 HW assert",
246};
247
856095b1 248static const char * const smca_cs_mce_desc[] = {
be0aec23
AG
249 "Illegal request from transport layer",
250 "Address violation",
251 "Security violation",
252 "Illegal response from transport layer",
253 "Unexpected response",
254 "Parity error on incoming request or probe response data",
255 "Parity error on incoming read response data",
256 "Atomic request parity",
257 "ECC error on probe filter access",
258};
259
856095b1 260static const char * const smca_pie_mce_desc[] = {
be0aec23
AG
261 "HW assert",
262 "Internal PIE register security violation",
263 "Error on GMI link",
264 "Poison data written to internal PIE register",
265};
266
856095b1 267static const char * const smca_umc_mce_desc[] = {
be0aec23
AG
268 "DRAM ECC error",
269 "Data poison error on DRAM",
270 "SDP parity error",
271 "Advanced peripheral bus error",
272 "Command/address parity error",
273 "Write data CRC error",
274};
275
856095b1 276static const char * const smca_pb_mce_desc[] = {
be0aec23
AG
277 "Parameter Block RAM ECC error",
278};
279
856095b1 280static const char * const smca_psp_mce_desc[] = {
be0aec23
AG
281 "PSP RAM ECC or parity error",
282};
283
856095b1 284static const char * const smca_smu_mce_desc[] = {
be0aec23
AG
285 "SMU RAM ECC or parity error",
286};
287
5896820e
YG
288struct smca_mce_desc {
289 const char * const *descs;
290 unsigned int num_descs;
291};
292
293static struct smca_mce_desc smca_mce_descs[] = {
294 [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
295 [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
296 [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
297 [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
298 [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) },
299 [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
300 [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
301 [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
302 [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
303 [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
304 [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
305 [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
306 [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
307};
308
f05c41a9 309static bool f12h_mc0_mce(u16 ec, u8 xec)
51966241 310{
888ab8e6 311 bool ret = false;
51966241 312
888ab8e6 313 if (MEM_ERROR(ec)) {
62452882 314 u8 ll = LL(ec);
888ab8e6 315 ret = true;
51966241 316
888ab8e6
BP
317 if (ll == LL_L2)
318 pr_cont("during L1 linefill from L2.\n");
319 else if (ll == LL_L1)
62452882 320 pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
888ab8e6
BP
321 else
322 ret = false;
323 }
324 return ret;
325}
51966241 326
f05c41a9 327static bool f10h_mc0_mce(u16 ec, u8 xec)
9be0bb10 328{
62452882 329 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
9be0bb10
BP
330 pr_cont("during data scrub.\n");
331 return true;
332 }
f05c41a9 333 return f12h_mc0_mce(ec, xec);
9be0bb10
BP
334}
335
f05c41a9 336static bool k8_mc0_mce(u16 ec, u8 xec)
888ab8e6
BP
337{
338 if (BUS_ERROR(ec)) {
339 pr_cont("during system linefill.\n");
340 return true;
341 }
51966241 342
f05c41a9 343 return f10h_mc0_mce(ec, xec);
888ab8e6
BP
344}
345
980eec8b 346static bool cat_mc0_mce(u16 ec, u8 xec)
888ab8e6 347{
62452882 348 u8 r4 = R4(ec);
888ab8e6
BP
349 bool ret = true;
350
351 if (MEM_ERROR(ec)) {
352
62452882 353 if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
888ab8e6
BP
354 return false;
355
356 switch (r4) {
357 case R4_DRD:
358 case R4_DWR:
359 pr_cont("Data/Tag parity error due to %s.\n",
360 (r4 == R4_DRD ? "load/hw prf" : "store"));
361 break;
362 case R4_EVICT:
363 pr_cont("Copyback parity error on a tag miss.\n");
364 break;
365 case R4_SNOOP:
366 pr_cont("Tag parity error during snoop.\n");
367 break;
368 default:
369 ret = false;
370 }
371 } else if (BUS_ERROR(ec)) {
372
62452882 373 if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
888ab8e6
BP
374 return false;
375
376 pr_cont("System read data error on a ");
377
378 switch (r4) {
379 case R4_RD:
380 pr_cont("TLB reload.\n");
381 break;
382 case R4_DWR:
383 pr_cont("store.\n");
384 break;
385 case R4_DRD:
386 pr_cont("load.\n");
387 break;
388 default:
389 ret = false;
390 }
391 } else {
392 ret = false;
393 }
394
395 return ret;
396}
397
f05c41a9 398static bool f15h_mc0_mce(u16 ec, u8 xec)
25a4f8b0
BP
399{
400 bool ret = true;
401
402 if (MEM_ERROR(ec)) {
403
404 switch (xec) {
405 case 0x0:
406 pr_cont("Data Array access error.\n");
407 break;
408
409 case 0x1:
410 pr_cont("UC error during a linefill from L2/NB.\n");
411 break;
412
413 case 0x2:
414 case 0x11:
415 pr_cont("STQ access error.\n");
416 break;
417
418 case 0x3:
419 pr_cont("SCB access error.\n");
420 break;
421
422 case 0x10:
423 pr_cont("Tag error.\n");
424 break;
425
426 case 0x12:
427 pr_cont("LDQ access error.\n");
428 break;
429
430 default:
431 ret = false;
432 }
433 } else if (BUS_ERROR(ec)) {
434
435 if (!xec)
344f0a06 436 pr_cont("System Read Data Error.\n");
25a4f8b0 437 else
344f0a06 438 pr_cont(" Internal error condition type %d.\n", xec);
eba4bfb3
AG
439 } else if (INT_ERROR(ec)) {
440 if (xec <= 0x1f)
441 pr_cont("Hardware Assert.\n");
442 else
443 ret = false;
444
25a4f8b0
BP
445 } else
446 ret = false;
447
448 return ret;
449}
450
f05c41a9 451static void decode_mc0_mce(struct mce *m)
888ab8e6 452{
62452882
BP
453 u16 ec = EC(m->status);
454 u8 xec = XEC(m->status, xec_mask);
888ab8e6 455
f05c41a9 456 pr_emerg(HW_ERR "MC0 Error: ");
888ab8e6
BP
457
458 /* TLB error signatures are the same across families */
459 if (TLB_ERROR(ec)) {
62452882 460 if (TT(ec) == TT_DATA) {
888ab8e6 461 pr_cont("%s TLB %s.\n", LL_MSG(ec),
25a4f8b0
BP
462 ((xec == 2) ? "locked miss"
463 : (xec ? "multimatch" : "parity")));
888ab8e6
BP
464 return;
465 }
f05c41a9 466 } else if (fam_ops->mc0_mce(ec, xec))
25a4f8b0
BP
467 ;
468 else
f05c41a9 469 pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
51966241
BP
470}
471
f05c41a9 472static bool k8_mc1_mce(u16 ec, u8 xec)
ab5535e7 473{
62452882 474 u8 ll = LL(ec);
dd53bce4 475 bool ret = true;
ab5535e7 476
dd53bce4
BP
477 if (!MEM_ERROR(ec))
478 return false;
ab5535e7 479
dd53bce4
BP
480 if (ll == 0x2)
481 pr_cont("during a linefill from L2.\n");
482 else if (ll == 0x1) {
62452882 483 switch (R4(ec)) {
dd53bce4
BP
484 case R4_IRD:
485 pr_cont("Parity error during data load.\n");
486 break;
ab5535e7 487
dd53bce4
BP
488 case R4_EVICT:
489 pr_cont("Copyback Parity/Victim error.\n");
490 break;
491
492 case R4_SNOOP:
493 pr_cont("Tag Snoop error.\n");
494 break;
495
496 default:
497 ret = false;
498 break;
499 }
ab5535e7 500 } else
dd53bce4 501 ret = false;
ab5535e7 502
dd53bce4
BP
503 return ret;
504}
505
980eec8b 506static bool cat_mc1_mce(u16 ec, u8 xec)
dd53bce4 507{
62452882 508 u8 r4 = R4(ec);
dd53bce4 509 bool ret = true;
ab5535e7 510
980eec8b
JS
511 if (!MEM_ERROR(ec))
512 return false;
513
514 if (TT(ec) != TT_INSTR)
515 return false;
516
517 if (r4 == R4_IRD)
518 pr_cont("Data/tag array parity error for a tag hit.\n");
519 else if (r4 == R4_SNOOP)
520 pr_cont("Tag error during snoop/victimization.\n");
521 else if (xec == 0x0)
522 pr_cont("Tag parity error from victim castout.\n");
523 else if (xec == 0x2)
524 pr_cont("Microcode patch RAM parity error.\n");
525 else
526 ret = false;
dd53bce4 527
dd53bce4
BP
528 return ret;
529}
530
f05c41a9 531static bool f15h_mc1_mce(u16 ec, u8 xec)
86039cd4
BP
532{
533 bool ret = true;
534
535 if (!MEM_ERROR(ec))
536 return false;
537
538 switch (xec) {
539 case 0x0 ... 0xa:
f05c41a9 540 pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
86039cd4
BP
541 break;
542
543 case 0xd:
f05c41a9 544 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
86039cd4
BP
545 break;
546
6c1173a6 547 case 0x10:
f05c41a9 548 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
6c1173a6
BP
549 break;
550
eba4bfb3 551 case 0x11 ... 0x15:
f05c41a9 552 pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
86039cd4
BP
553 break;
554
555 default:
556 ret = false;
557 }
558 return ret;
559}
560
f05c41a9 561static void decode_mc1_mce(struct mce *m)
dd53bce4 562{
62452882
BP
563 u16 ec = EC(m->status);
564 u8 xec = XEC(m->status, xec_mask);
dd53bce4 565
f05c41a9 566 pr_emerg(HW_ERR "MC1 Error: ");
dd53bce4
BP
567
568 if (TLB_ERROR(ec))
569 pr_cont("%s TLB %s.\n", LL_MSG(ec),
570 (xec ? "multimatch" : "parity error"));
571 else if (BUS_ERROR(ec)) {
525906bc 572 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
dd53bce4
BP
573
574 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
eba4bfb3
AG
575 } else if (INT_ERROR(ec)) {
576 if (xec <= 0x3f)
577 pr_cont("Hardware Assert.\n");
578 else
579 goto wrong_mc1_mce;
f05c41a9 580 } else if (fam_ops->mc1_mce(ec, xec))
dd53bce4
BP
581 ;
582 else
eba4bfb3
AG
583 goto wrong_mc1_mce;
584
585 return;
586
587wrong_mc1_mce:
588 pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
ab5535e7
BP
589}
590
4a73d3de 591static bool k8_mc2_mce(u16 ec, u8 xec)
56cad2d6 592{
4a73d3de 593 bool ret = true;
56cad2d6
BP
594
595 if (xec == 0x1)
596 pr_cont(" in the write data buffers.\n");
597 else if (xec == 0x3)
598 pr_cont(" in the victim data buffers.\n");
599 else if (xec == 0x2 && MEM_ERROR(ec))
62452882 600 pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
56cad2d6
BP
601 else if (xec == 0x0) {
602 if (TLB_ERROR(ec))
50872ccd
BP
603 pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
604 TT_MSG(ec));
56cad2d6
BP
605 else if (BUS_ERROR(ec))
606 pr_cont(": %s/ECC error in data read from NB: %s.\n",
62452882 607 R4_MSG(ec), PP_MSG(ec));
56cad2d6 608 else if (MEM_ERROR(ec)) {
62452882 609 u8 r4 = R4(ec);
56cad2d6 610
62452882 611 if (r4 >= 0x7)
56cad2d6 612 pr_cont(": %s error during data copyback.\n",
62452882
BP
613 R4_MSG(ec));
614 else if (r4 <= 0x1)
56cad2d6 615 pr_cont(": %s parity/ECC error during data "
62452882 616 "access from L2.\n", R4_MSG(ec));
56cad2d6 617 else
4a73d3de 618 ret = false;
56cad2d6 619 } else
4a73d3de 620 ret = false;
56cad2d6 621 } else
4a73d3de 622 ret = false;
56cad2d6 623
4a73d3de 624 return ret;
56cad2d6
BP
625}
626
4a73d3de 627static bool f15h_mc2_mce(u16 ec, u8 xec)
70fdb494 628{
4a73d3de 629 bool ret = true;
70fdb494
BP
630
631 if (TLB_ERROR(ec)) {
632 if (xec == 0x0)
633 pr_cont("Data parity TLB read error.\n");
634 else if (xec == 0x1)
635 pr_cont("Poison data provided for TLB fill.\n");
636 else
4a73d3de 637 ret = false;
70fdb494
BP
638 } else if (BUS_ERROR(ec)) {
639 if (xec > 2)
4a73d3de 640 ret = false;
70fdb494
BP
641
642 pr_cont("Error during attempted NB data read.\n");
643 } else if (MEM_ERROR(ec)) {
644 switch (xec) {
645 case 0x4 ... 0xc:
f05c41a9 646 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
70fdb494
BP
647 break;
648
649 case 0x10 ... 0x14:
f05c41a9 650 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
70fdb494
BP
651 break;
652
653 default:
4a73d3de 654 ret = false;
70fdb494 655 }
eba4bfb3
AG
656 } else if (INT_ERROR(ec)) {
657 if (xec <= 0x3f)
658 pr_cont("Hardware Assert.\n");
659 else
660 ret = false;
70fdb494
BP
661 }
662
4a73d3de
JS
663 return ret;
664}
665
980eec8b
JS
666static bool f16h_mc2_mce(u16 ec, u8 xec)
667{
668 u8 r4 = R4(ec);
669
670 if (!MEM_ERROR(ec))
671 return false;
672
673 switch (xec) {
674 case 0x04 ... 0x05:
675 pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
676 break;
677
678 case 0x09 ... 0x0b:
679 case 0x0d ... 0x0f:
680 pr_cont("ECC error in L2 tag (%s).\n",
681 ((r4 == R4_GEN) ? "BankReq" :
682 ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
683 break;
684
685 case 0x10 ... 0x19:
686 case 0x1b:
687 pr_cont("ECC error in L2 data array (%s).\n",
688 (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
689 ((r4 == R4_GEN) ? "Attr" :
690 ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
691 break;
692
693 case 0x1c ... 0x1d:
694 case 0x1f:
695 pr_cont("Parity error in L2 attribute bits (%s).\n",
696 ((r4 == R4_RD) ? "Hit" :
697 ((r4 == R4_GEN) ? "Attr" : "Fill")));
698 break;
699
700 default:
701 return false;
702 }
703
704 return true;
705}
706
4a73d3de
JS
707static void decode_mc2_mce(struct mce *m)
708{
709 u16 ec = EC(m->status);
710 u8 xec = XEC(m->status, xec_mask);
70fdb494 711
4a73d3de
JS
712 pr_emerg(HW_ERR "MC2 Error: ");
713
714 if (!fam_ops->mc2_mce(ec, xec))
715 pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
70fdb494
BP
716}
717
f05c41a9 718static void decode_mc3_mce(struct mce *m)
f9350efd 719{
62452882
BP
720 u16 ec = EC(m->status);
721 u8 xec = XEC(m->status, xec_mask);
ded50623 722
b18434ca 723 if (boot_cpu_data.x86 >= 0x14) {
f05c41a9 724 pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
ded50623
BP
725 " please report on LKML.\n");
726 return;
727 }
f9350efd 728
f05c41a9 729 pr_emerg(HW_ERR "MC3 Error");
f9350efd
BP
730
731 if (xec == 0x0) {
62452882 732 u8 r4 = R4(ec);
f9350efd 733
ded50623 734 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
f05c41a9 735 goto wrong_mc3_mce;
f9350efd 736
62452882 737 pr_cont(" during %s.\n", R4_MSG(ec));
ded50623 738 } else
f05c41a9 739 goto wrong_mc3_mce;
ded50623 740
f9350efd
BP
741 return;
742
f05c41a9
BP
743 wrong_mc3_mce:
744 pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
f9350efd
BP
745}
746
f05c41a9 747static void decode_mc4_mce(struct mce *m)
5ce88f6e 748{
f3c0891c 749 unsigned int fam = x86_family(m->cpuid);
68782673
BP
750 int node_id = amd_get_nb_id(m->extcpu);
751 u16 ec = EC(m->status);
752 u8 xec = XEC(m->status, 0x1f);
753 u8 offset = 0;
5ce88f6e 754
f05c41a9 755 pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
5ce88f6e 756
68782673
BP
757 switch (xec) {
758 case 0x0 ... 0xe:
5ce88f6e 759
68782673
BP
760 /* special handling for DRAM ECCs */
761 if (xec == 0x0 || xec == 0x8) {
762 /* no ECCs on F11h */
f3c0891c 763 if (fam == 0x11)
f05c41a9 764 goto wrong_mc4_mce;
5ce88f6e 765
f05c41a9 766 pr_cont("%s.\n", mc4_mce_desc[xec]);
5ce88f6e 767
5c332202
YG
768 if (decode_dram_ecc)
769 decode_dram_ecc(node_id, m);
68782673
BP
770 return;
771 }
5ce88f6e
BP
772 break;
773
774 case 0xf:
775 if (TLB_ERROR(ec))
776 pr_cont("GART Table Walk data error.\n");
777 else if (BUS_ERROR(ec))
778 pr_cont("DMA Exclusion Vector Table Walk error.\n");
779 else
f05c41a9 780 goto wrong_mc4_mce;
68782673 781 return;
5ce88f6e 782
05cd667d 783 case 0x19:
f3c0891c 784 if (fam == 0x15 || fam == 0x16)
05cd667d
BP
785 pr_cont("Compute Unit Data Error.\n");
786 else
f05c41a9 787 goto wrong_mc4_mce;
68782673 788 return;
05cd667d 789
5ce88f6e 790 case 0x1c ... 0x1f:
68782673 791 offset = 13;
5ce88f6e
BP
792 break;
793
794 default:
f05c41a9 795 goto wrong_mc4_mce;
68782673 796 }
5ce88f6e 797
f05c41a9 798 pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
5ce88f6e
BP
799 return;
800
f05c41a9
BP
801 wrong_mc4_mce:
802 pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
d93cc222 803}
d93cc222 804
f05c41a9 805static void decode_mc5_mce(struct mce *m)
53bd5fed 806{
f3c0891c 807 unsigned int fam = x86_family(m->cpuid);
eba4bfb3 808 u16 ec = EC(m->status);
62452882 809 u8 xec = XEC(m->status, xec_mask);
8259a7e5 810
f3c0891c 811 if (fam == 0xf || fam == 0x11)
f05c41a9 812 goto wrong_mc5_mce;
fe4ea262 813
f05c41a9 814 pr_emerg(HW_ERR "MC5 Error: ");
8259a7e5 815
eba4bfb3
AG
816 if (INT_ERROR(ec)) {
817 if (xec <= 0x1f) {
818 pr_cont("Hardware Assert.\n");
819 return;
820 } else
821 goto wrong_mc5_mce;
822 }
823
8259a7e5 824 if (xec == 0x0 || xec == 0xc)
f05c41a9 825 pr_cont("%s.\n", mc5_mce_desc[xec]);
aad19e51 826 else if (xec <= 0xd)
f05c41a9 827 pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
8259a7e5 828 else
f05c41a9 829 goto wrong_mc5_mce;
8259a7e5
BP
830
831 return;
fe4ea262 832
f05c41a9
BP
833 wrong_mc5_mce:
834 pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
53bd5fed
BP
835}
836
f05c41a9 837static void decode_mc6_mce(struct mce *m)
b8f85c47 838{
62452882 839 u8 xec = XEC(m->status, xec_mask);
b8f85c47 840
f05c41a9 841 pr_emerg(HW_ERR "MC6 Error: ");
b8f85c47 842
bc4febe9 843 if (xec > 0x5)
f05c41a9 844 goto wrong_mc6_mce;
b8f85c47 845
bc4febe9 846 pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
b8f85c47
BP
847 return;
848
f05c41a9
BP
849 wrong_mc6_mce:
850 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
b8f85c47
BP
851}
852
be0aec23 853/* Decode errors according to Scalable MCA specification */
4ab1784b 854static void decode_smca_error(struct mce *m)
be0aec23 855{
1ce9cd7f 856 struct smca_hwid *hwid;
5896820e 857 unsigned int bank_type;
be0aec23 858 const char *ip_name;
5896820e 859 u8 xec = XEC(m->status, xec_mask);
be0aec23 860
5896820e 861 if (m->bank >= ARRAY_SIZE(smca_banks))
be0aec23 862 return;
be0aec23 863
f3c0891c 864 if (x86_family(m->cpuid) >= 0x17 && m->bank == 4)
a884675b
YG
865 pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
866
1ce9cd7f
BP
867 hwid = smca_banks[m->bank].hwid;
868 if (!hwid)
be0aec23 869 return;
be0aec23 870
1ce9cd7f 871 bank_type = hwid->bank_type;
c09a8c40 872 ip_name = smca_get_long_name(bank_type);
be0aec23 873
5896820e 874 pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
be0aec23 875
5896820e
YG
876 /* Only print the decode of valid error codes */
877 if (xec < smca_mce_descs[bank_type].num_descs &&
1ce9cd7f 878 (hwid->xec_bitmap & BIT_ULL(xec))) {
5896820e
YG
879 pr_emerg(HW_ERR "%s Error: ", ip_name);
880 pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
881 }
5c332202 882
5c332202 883 if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
fbe63acf 884 decode_dram_ecc(cpu_to_node(m->extcpu), m);
be0aec23
AG
885}
886
6337583d 887static inline void amd_decode_err_code(u16 ec)
d93cc222 888{
980eec8b
JS
889 if (INT_ERROR(ec)) {
890 pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
891 return;
892 }
fa7ae8cc
BP
893
894 pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
895
896 if (BUS_ERROR(ec))
897 pr_cont(", mem/io: %s", II_MSG(ec));
898 else
899 pr_cont(", tx: %s", TT_MSG(ec));
900
901 if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
902 pr_cont(", mem-tx: %s", R4_MSG(ec));
903
904 if (BUS_ERROR(ec))
905 pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
906 }
907
908 pr_cont("\n");
549d042d 909}
549d042d 910
5ce88f6e
BP
911/*
912 * Filter out unwanted MCE signatures here.
913 */
914static bool amd_filter_mce(struct mce *m)
915{
5ce88f6e
BP
916 /*
917 * NB GART TLB error reporting is disabled by default.
918 */
39844347 919 if (m->bank == 4 && XEC(m->status, 0x1f) == 0x5 && !report_gart_errors)
5ce88f6e
BP
920 return true;
921
922 return false;
923}
924
d5c6770d
BP
925static const char *decode_error_status(struct mce *m)
926{
927 if (m->status & MCI_STATUS_UC) {
928 if (m->status & MCI_STATUS_PCC)
929 return "System Fatal error.";
930 if (m->mcgstatus & MCG_STATUS_RIPV)
931 return "Uncorrected, software restartable error.";
932 return "Uncorrected, software containable error.";
933 }
934
935 if (m->status & MCI_STATUS_DEFERRED)
67d7fd30 936 return "Deferred error, no action required.";
d5c6770d
BP
937
938 return "Corrected error, no action required.";
939}
940
1fbcd909
BP
941static int
942amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
549d042d 943{
fb253195 944 struct mce *m = (struct mce *)data;
f3c0891c 945 unsigned int fam = x86_family(m->cpuid);
b0b07a2b 946 int ecc;
549d042d 947
5ce88f6e
BP
948 if (amd_filter_mce(m))
949 return NOTIFY_STOP;
950
fd0f5fff
BP
951 pr_emerg(HW_ERR "%s\n", decode_error_status(m));
952
953 pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
954 m->extcpu,
f3c0891c 955 fam, x86_model(m->cpuid), x86_stepping(m->cpuid),
fd0f5fff
BP
956 m->bank,
957 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
99e1dfb7
AG
958 ((m->status & MCI_STATUS_UC) ? "UE" :
959 (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
fd0f5fff
BP
960 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
961 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
962 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
963
f3c0891c 964 if (fam >= 0x15) {
a6c14dce
YG
965 pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
966
967 /* F15h, bank4, bit 43 is part of McaStatSubCache. */
f3c0891c 968 if (fam != 0x15 || m->bank != 4)
a6c14dce
YG
969 pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
970 }
fd0f5fff 971
a348ed83 972 if (boot_cpu_has(X86_FEATURE_SMCA)) {
be0aec23
AG
973 u32 low, high;
974 u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
975
b300e873
YG
976 pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
977
be0aec23
AG
978 if (!rdmsr_safe(addr, &low, &high) &&
979 (low & MCI_CONFIG_MCAX))
980 pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
981 }
982
fd0f5fff
BP
983 /* do the two bits[14:13] together */
984 ecc = (m->status >> 45) & 0x3;
985 if (ecc)
986 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
987
988 pr_cont("]: 0x%016llx\n", m->status);
989
990 if (m->status & MCI_STATUS_ADDRV)
75bf2f64 991 pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
fd0f5fff 992
a348ed83 993 if (boot_cpu_has(X86_FEATURE_SMCA)) {
75bf2f64
YG
994 pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
995
b300e873
YG
996 if (m->status & MCI_STATUS_SYNDV)
997 pr_cont(", Syndrome: 0x%016llx", m->synd);
998
999 pr_cont("\n");
1000
4ab1784b 1001 decode_smca_error(m);
be0aec23 1002 goto err_code;
75bf2f64 1003 }
be0aec23 1004
0bceab67
BP
1005 if (m->tsc)
1006 pr_emerg(HW_ERR "TSC: %llu\n", m->tsc);
1007
fd0f5fff
BP
1008 if (!fam_ops)
1009 goto err_code;
1010
51966241
BP
1011 switch (m->bank) {
1012 case 0:
f05c41a9 1013 decode_mc0_mce(m);
51966241 1014 break;
d93cc222 1015
ab5535e7 1016 case 1:
f05c41a9 1017 decode_mc1_mce(m);
ab5535e7
BP
1018 break;
1019
56cad2d6 1020 case 2:
4a73d3de 1021 decode_mc2_mce(m);
56cad2d6
BP
1022 break;
1023
f9350efd 1024 case 3:
f05c41a9 1025 decode_mc3_mce(m);
f9350efd
BP
1026 break;
1027
51966241 1028 case 4:
f05c41a9 1029 decode_mc4_mce(m);
51966241
BP
1030 break;
1031
53bd5fed 1032 case 5:
f05c41a9 1033 decode_mc5_mce(m);
53bd5fed
BP
1034 break;
1035
b8f85c47 1036 case 6:
f05c41a9 1037 decode_mc6_mce(m);
b8f85c47
BP
1038 break;
1039
51966241
BP
1040 default:
1041 break;
b69b29de 1042 }
51966241 1043
fd0f5fff 1044 err_code:
51966241 1045 amd_decode_err_code(m->status & 0xffff);
fb253195
BP
1046
1047 return NOTIFY_STOP;
549d042d 1048}
f436f8bb 1049
fb253195
BP
1050static struct notifier_block amd_mce_dec_nb = {
1051 .notifier_call = amd_decode_mce,
9026cc82 1052 .priority = MCE_PRIO_EDAC,
fb253195
BP
1053};
1054
f436f8bb
IM
1055static int __init mce_amd_init(void)
1056{
bad11e03
BP
1057 struct cpuinfo_x86 *c = &boot_cpu_data;
1058
1059 if (c->x86_vendor != X86_VENDOR_AMD)
fd0f5fff 1060 return -ENODEV;
e045c291 1061
888ab8e6
BP
1062 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
1063 if (!fam_ops)
1064 return -ENOMEM;
1065
bad11e03 1066 switch (c->x86) {
888ab8e6 1067 case 0xf:
f05c41a9
BP
1068 fam_ops->mc0_mce = k8_mc0_mce;
1069 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1070 fam_ops->mc2_mce = k8_mc2_mce;
888ab8e6
BP
1071 break;
1072
1073 case 0x10:
f05c41a9
BP
1074 fam_ops->mc0_mce = f10h_mc0_mce;
1075 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1076 fam_ops->mc2_mce = k8_mc2_mce;
888ab8e6
BP
1077 break;
1078
f0157b3a 1079 case 0x11:
f05c41a9
BP
1080 fam_ops->mc0_mce = k8_mc0_mce;
1081 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1082 fam_ops->mc2_mce = k8_mc2_mce;
f0157b3a
BP
1083 break;
1084
9be0bb10 1085 case 0x12:
f05c41a9
BP
1086 fam_ops->mc0_mce = f12h_mc0_mce;
1087 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1088 fam_ops->mc2_mce = k8_mc2_mce;
9be0bb10
BP
1089 break;
1090
888ab8e6 1091 case 0x14:
980eec8b
JS
1092 fam_ops->mc0_mce = cat_mc0_mce;
1093 fam_ops->mc1_mce = cat_mc1_mce;
4a73d3de 1094 fam_ops->mc2_mce = k8_mc2_mce;
888ab8e6
BP
1095 break;
1096
2be64bfa 1097 case 0x15:
eba4bfb3
AG
1098 xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
1099
f05c41a9
BP
1100 fam_ops->mc0_mce = f15h_mc0_mce;
1101 fam_ops->mc1_mce = f15h_mc1_mce;
4a73d3de 1102 fam_ops->mc2_mce = f15h_mc2_mce;
2be64bfa
BP
1103 break;
1104
980eec8b
JS
1105 case 0x16:
1106 xec_mask = 0x1f;
1107 fam_ops->mc0_mce = cat_mc0_mce;
1108 fam_ops->mc1_mce = cat_mc1_mce;
1109 fam_ops->mc2_mce = f16h_mc2_mce;
1110 break;
1111
be0aec23 1112 case 0x17:
be0aec23 1113 xec_mask = 0x3f;
a348ed83 1114 if (!boot_cpu_has(X86_FEATURE_SMCA)) {
be0aec23
AG
1115 printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
1116 goto err_out;
1117 }
1118 break;
1119
888ab8e6 1120 default:
ec3e82d6 1121 printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
be0aec23 1122 goto err_out;
888ab8e6
BP
1123 }
1124
9530d608
BP
1125 pr_info("MCE: In-kernel MCE decoding enabled.\n");
1126
3653ada5 1127 mce_register_decode_chain(&amd_mce_dec_nb);
f436f8bb
IM
1128
1129 return 0;
be0aec23
AG
1130
1131err_out:
1132 kfree(fam_ops);
1133 fam_ops = NULL;
1134 return -EINVAL;
f436f8bb
IM
1135}
1136early_initcall(mce_amd_init);
0d18b2e3
BP
1137
1138#ifdef MODULE
1139static void __exit mce_amd_exit(void)
1140{
3653ada5 1141 mce_unregister_decode_chain(&amd_mce_dec_nb);
888ab8e6 1142 kfree(fam_ops);
0d18b2e3
BP
1143}
1144
1145MODULE_DESCRIPTION("AMD MCE decoder");
1146MODULE_ALIAS("edac-mce-amd");
1147MODULE_LICENSE("GPL");
1148module_exit(mce_amd_exit);
1149#endif