]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/edac/mce_amd.c
Merge tag 'nfs-for-4.13-4' of git://git.linux-nfs.org/projects/anna/linux-nfs
[mirror_ubuntu-artful-kernel.git] / drivers / edac / mce_amd.c
CommitLineData
b70ef010 1#include <linux/module.h>
888ab8e6
BP
2#include <linux/slab.h>
3
47ca08a4 4#include "mce_amd.h"
b52401ce 5
888ab8e6
BP
6static struct amd_decoder_ops *fam_ops;
7
2be64bfa 8static u8 xec_mask = 0xf;
5ce88f6e 9
549d042d 10static bool report_gart_errors;
5c332202 11static void (*decode_dram_ecc)(int node_id, struct mce *m);
549d042d
BP
12
13void amd_report_gart_errors(bool v)
14{
15 report_gart_errors = v;
16}
17EXPORT_SYMBOL_GPL(amd_report_gart_errors);
18
b0b07a2b 19void amd_register_ecc_decoder(void (*f)(int, struct mce *))
549d042d 20{
5c332202 21 decode_dram_ecc = f;
549d042d
BP
22}
23EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
24
b0b07a2b 25void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
549d042d 26{
5c332202
YG
27 if (decode_dram_ecc) {
28 WARN_ON(decode_dram_ecc != f);
549d042d 29
5c332202 30 decode_dram_ecc = NULL;
549d042d
BP
31 }
32}
33EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
34
b52401ce
DT
35/*
36 * string representation for the different MCA reported error types, see F3x48
37 * or MSR0000_0411.
38 */
6337583d
BP
39
40/* transaction type */
0f08669e 41static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
b52401ce 42
6337583d 43/* cache level */
0f08669e 44static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
b52401ce 45
6337583d 46/* memory transaction type */
0f08669e 47static const char * const rrrr_msgs[] = {
6337583d 48 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
b52401ce
DT
49};
50
6337583d 51/* participating processor */
ebe2aea8 52const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
b70ef010 53EXPORT_SYMBOL_GPL(pp_msgs);
b52401ce 54
6337583d 55/* request timeout */
0f08669e 56static const char * const to_msgs[] = { "no timeout", "timed out" };
b52401ce 57
6337583d 58/* memory or i/o */
0f08669e 59static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
b52401ce 60
980eec8b 61/* internal error type */
0f08669e 62static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
980eec8b 63
f05c41a9 64static const char * const f15h_mc1_mce_desc[] = {
86039cd4
BP
65 "UC during a demand linefill from L2",
66 "Parity error during data load from IC",
67 "Parity error for IC valid bit",
68 "Main tag parity error",
69 "Parity error in prediction queue",
70 "PFB data/address parity error",
71 "Parity error in the branch status reg",
72 "PFB promotion address error",
73 "Tag error during probe/victimization",
74 "Parity error for IC probe tag valid bit",
75 "PFB non-cacheable bit parity error",
76 "PFB valid bit parity error", /* xec = 0xd */
6c1173a6 77 "Microcode Patch Buffer", /* xec = 010 */
86039cd4
BP
78 "uop queue",
79 "insn buffer",
80 "predecode buffer",
eba4bfb3
AG
81 "fetch address FIFO",
82 "dispatch uop queue"
86039cd4
BP
83};
84
f05c41a9 85static const char * const f15h_mc2_mce_desc[] = {
70fdb494
BP
86 "Fill ECC error on data fills", /* xec = 0x4 */
87 "Fill parity error on insn fills",
88 "Prefetcher request FIFO parity error",
89 "PRQ address parity error",
90 "PRQ data parity error",
91 "WCC Tag ECC error",
92 "WCC Data ECC error",
93 "WCB Data parity error",
b64a99c1 94 "VB Data ECC or parity error",
70fdb494
BP
95 "L2 Tag ECC error", /* xec = 0x10 */
96 "Hard L2 Tag ECC error",
97 "Multiple hits on L2 tag",
98 "XAB parity error",
99 "PRB address parity error"
100};
101
f05c41a9 102static const char * const mc4_mce_desc[] = {
68782673
BP
103 "DRAM ECC error detected on the NB",
104 "CRC error detected on HT link",
105 "Link-defined sync error packets detected on HT link",
106 "HT Master abort",
107 "HT Target abort",
108 "Invalid GART PTE entry during GART table walk",
109 "Unsupported atomic RMW received from an IO link",
110 "Watchdog timeout due to lack of progress",
111 "DRAM ECC error detected on the NB",
112 "SVM DMA Exclusion Vector error",
113 "HT data error detected on link",
114 "Protocol error (link, L3, probe filter)",
115 "NB internal arrays parity error",
116 "DRAM addr/ctl signals parity error",
117 "IO link transmission error",
118 "L3 data cache ECC error", /* xec = 0x1c */
119 "L3 cache tag error",
120 "L3 LRU parity bits error",
121 "ECC Error in the Probe Filter directory"
122};
123
f05c41a9 124static const char * const mc5_mce_desc[] = {
8259a7e5
BP
125 "CPU Watchdog timer expire",
126 "Wakeup array dest tag",
127 "AG payload array",
128 "EX payload array",
129 "IDRF array",
130 "Retire dispatch queue",
131 "Mapper checkpoint array",
132 "Physical register file EX0 port",
133 "Physical register file EX1 port",
134 "Physical register file AG0 port",
135 "Physical register file AG1 port",
136 "Flag register file",
aad19e51
AG
137 "DE error occurred",
138 "Retire status queue"
8259a7e5
BP
139};
140
bc4febe9
AG
141static const char * const mc6_mce_desc[] = {
142 "Hardware Assertion",
143 "Free List",
144 "Physical Register File",
145 "Retire Queue",
146 "Scheduler table",
147 "Status Register File",
148};
149
be0aec23 150/* Scalable MCA error strings */
856095b1 151static const char * const smca_ls_mce_desc[] = {
be0aec23
AG
152 "Load queue parity",
153 "Store queue parity",
154 "Miss address buffer payload parity",
155 "L1 TLB parity",
856095b1 156 "Reserved",
be0aec23
AG
157 "DC tag error type 6",
158 "DC tag error type 1",
159 "Internal error type 1",
160 "Internal error type 2",
161 "Sys Read data error thread 0",
162 "Sys read data error thread 1",
163 "DC tag error type 2",
bdf1bf17 164 "DC data error type 1 (poison consumption)",
be0aec23
AG
165 "DC data error type 2",
166 "DC data error type 3",
167 "DC tag error type 4",
168 "L2 TLB parity",
169 "PDC parity error",
170 "DC tag error type 3",
171 "DC tag error type 5",
172 "L2 fill data error",
173};
174
856095b1 175static const char * const smca_if_mce_desc[] = {
be0aec23
AG
176 "microtag probe port parity error",
177 "IC microtag or full tag multi-hit error",
178 "IC full tag parity",
179 "IC data array parity",
180 "Decoupling queue phys addr parity error",
181 "L0 ITLB parity error",
182 "L1 ITLB parity error",
183 "L2 ITLB parity error",
184 "BPQ snoop parity on Thread 0",
185 "BPQ snoop parity on Thread 1",
186 "L1 BTB multi-match error",
187 "L2 BTB multi-match error",
c019b951
YG
188 "L2 Cache Response Poison error",
189 "System Read Data error",
be0aec23
AG
190};
191
856095b1 192static const char * const smca_l2_mce_desc[] = {
be0aec23
AG
193 "L2M tag multi-way-hit error",
194 "L2M tag ECC error",
195 "L2M data ECC error",
196 "HW assert",
197};
198
856095b1 199static const char * const smca_de_mce_desc[] = {
be0aec23
AG
200 "uop cache tag parity error",
201 "uop cache data parity error",
202 "Insn buffer parity error",
c019b951 203 "uop queue parity error",
be0aec23
AG
204 "Insn dispatch queue parity error",
205 "Fetch address FIFO parity",
206 "Patch RAM data parity",
207 "Patch RAM sequencer parity",
208 "uop buffer parity"
209};
210
856095b1 211static const char * const smca_ex_mce_desc[] = {
be0aec23
AG
212 "Watchdog timeout error",
213 "Phy register file parity",
214 "Flag register file parity",
215 "Immediate displacement register file parity",
216 "Address generator payload parity",
217 "EX payload parity",
218 "Checkpoint queue parity",
219 "Retire dispatch queue parity",
c019b951
YG
220 "Retire status queue parity error",
221 "Scheduling queue parity error",
222 "Branch buffer queue parity error",
be0aec23
AG
223};
224
856095b1 225static const char * const smca_fp_mce_desc[] = {
be0aec23
AG
226 "Physical register file parity",
227 "Freelist parity error",
228 "Schedule queue parity",
229 "NSQ parity error",
230 "Retire queue parity",
231 "Status register file parity",
c019b951 232 "Hardware assertion",
be0aec23
AG
233};
234
856095b1 235static const char * const smca_l3_mce_desc[] = {
be0aec23
AG
236 "Shadow tag macro ECC error",
237 "Shadow tag macro multi-way-hit error",
238 "L3M tag ECC error",
239 "L3M tag multi-way-hit error",
240 "L3M data ECC error",
241 "XI parity, L3 fill done channel error",
242 "L3 victim queue parity",
243 "L3 HW assert",
244};
245
856095b1 246static const char * const smca_cs_mce_desc[] = {
be0aec23
AG
247 "Illegal request from transport layer",
248 "Address violation",
249 "Security violation",
250 "Illegal response from transport layer",
251 "Unexpected response",
252 "Parity error on incoming request or probe response data",
253 "Parity error on incoming read response data",
254 "Atomic request parity",
255 "ECC error on probe filter access",
256};
257
856095b1 258static const char * const smca_pie_mce_desc[] = {
be0aec23
AG
259 "HW assert",
260 "Internal PIE register security violation",
261 "Error on GMI link",
262 "Poison data written to internal PIE register",
263};
264
856095b1 265static const char * const smca_umc_mce_desc[] = {
be0aec23
AG
266 "DRAM ECC error",
267 "Data poison error on DRAM",
268 "SDP parity error",
269 "Advanced peripheral bus error",
270 "Command/address parity error",
271 "Write data CRC error",
272};
273
856095b1 274static const char * const smca_pb_mce_desc[] = {
be0aec23
AG
275 "Parameter Block RAM ECC error",
276};
277
856095b1 278static const char * const smca_psp_mce_desc[] = {
be0aec23
AG
279 "PSP RAM ECC or parity error",
280};
281
856095b1 282static const char * const smca_smu_mce_desc[] = {
be0aec23
AG
283 "SMU RAM ECC or parity error",
284};
285
5896820e
YG
286struct smca_mce_desc {
287 const char * const *descs;
288 unsigned int num_descs;
289};
290
291static struct smca_mce_desc smca_mce_descs[] = {
292 [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
293 [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
294 [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
295 [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
296 [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) },
297 [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
298 [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
299 [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
300 [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
301 [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
302 [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
303 [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
304 [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
305};
306
f05c41a9 307static bool f12h_mc0_mce(u16 ec, u8 xec)
51966241 308{
888ab8e6 309 bool ret = false;
51966241 310
888ab8e6 311 if (MEM_ERROR(ec)) {
62452882 312 u8 ll = LL(ec);
888ab8e6 313 ret = true;
51966241 314
888ab8e6
BP
315 if (ll == LL_L2)
316 pr_cont("during L1 linefill from L2.\n");
317 else if (ll == LL_L1)
62452882 318 pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
888ab8e6
BP
319 else
320 ret = false;
321 }
322 return ret;
323}
51966241 324
f05c41a9 325static bool f10h_mc0_mce(u16 ec, u8 xec)
9be0bb10 326{
62452882 327 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
9be0bb10
BP
328 pr_cont("during data scrub.\n");
329 return true;
330 }
f05c41a9 331 return f12h_mc0_mce(ec, xec);
9be0bb10
BP
332}
333
f05c41a9 334static bool k8_mc0_mce(u16 ec, u8 xec)
888ab8e6
BP
335{
336 if (BUS_ERROR(ec)) {
337 pr_cont("during system linefill.\n");
338 return true;
339 }
51966241 340
f05c41a9 341 return f10h_mc0_mce(ec, xec);
888ab8e6
BP
342}
343
980eec8b 344static bool cat_mc0_mce(u16 ec, u8 xec)
888ab8e6 345{
62452882 346 u8 r4 = R4(ec);
888ab8e6
BP
347 bool ret = true;
348
349 if (MEM_ERROR(ec)) {
350
62452882 351 if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
888ab8e6
BP
352 return false;
353
354 switch (r4) {
355 case R4_DRD:
356 case R4_DWR:
357 pr_cont("Data/Tag parity error due to %s.\n",
358 (r4 == R4_DRD ? "load/hw prf" : "store"));
359 break;
360 case R4_EVICT:
361 pr_cont("Copyback parity error on a tag miss.\n");
362 break;
363 case R4_SNOOP:
364 pr_cont("Tag parity error during snoop.\n");
365 break;
366 default:
367 ret = false;
368 }
369 } else if (BUS_ERROR(ec)) {
370
62452882 371 if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
888ab8e6
BP
372 return false;
373
374 pr_cont("System read data error on a ");
375
376 switch (r4) {
377 case R4_RD:
378 pr_cont("TLB reload.\n");
379 break;
380 case R4_DWR:
381 pr_cont("store.\n");
382 break;
383 case R4_DRD:
384 pr_cont("load.\n");
385 break;
386 default:
387 ret = false;
388 }
389 } else {
390 ret = false;
391 }
392
393 return ret;
394}
395
f05c41a9 396static bool f15h_mc0_mce(u16 ec, u8 xec)
25a4f8b0
BP
397{
398 bool ret = true;
399
400 if (MEM_ERROR(ec)) {
401
402 switch (xec) {
403 case 0x0:
404 pr_cont("Data Array access error.\n");
405 break;
406
407 case 0x1:
408 pr_cont("UC error during a linefill from L2/NB.\n");
409 break;
410
411 case 0x2:
412 case 0x11:
413 pr_cont("STQ access error.\n");
414 break;
415
416 case 0x3:
417 pr_cont("SCB access error.\n");
418 break;
419
420 case 0x10:
421 pr_cont("Tag error.\n");
422 break;
423
424 case 0x12:
425 pr_cont("LDQ access error.\n");
426 break;
427
428 default:
429 ret = false;
430 }
431 } else if (BUS_ERROR(ec)) {
432
433 if (!xec)
344f0a06 434 pr_cont("System Read Data Error.\n");
25a4f8b0 435 else
344f0a06 436 pr_cont(" Internal error condition type %d.\n", xec);
eba4bfb3
AG
437 } else if (INT_ERROR(ec)) {
438 if (xec <= 0x1f)
439 pr_cont("Hardware Assert.\n");
440 else
441 ret = false;
442
25a4f8b0
BP
443 } else
444 ret = false;
445
446 return ret;
447}
448
f05c41a9 449static void decode_mc0_mce(struct mce *m)
888ab8e6 450{
62452882
BP
451 u16 ec = EC(m->status);
452 u8 xec = XEC(m->status, xec_mask);
888ab8e6 453
f05c41a9 454 pr_emerg(HW_ERR "MC0 Error: ");
888ab8e6
BP
455
456 /* TLB error signatures are the same across families */
457 if (TLB_ERROR(ec)) {
62452882 458 if (TT(ec) == TT_DATA) {
888ab8e6 459 pr_cont("%s TLB %s.\n", LL_MSG(ec),
25a4f8b0
BP
460 ((xec == 2) ? "locked miss"
461 : (xec ? "multimatch" : "parity")));
888ab8e6
BP
462 return;
463 }
f05c41a9 464 } else if (fam_ops->mc0_mce(ec, xec))
25a4f8b0
BP
465 ;
466 else
f05c41a9 467 pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
51966241
BP
468}
469
f05c41a9 470static bool k8_mc1_mce(u16 ec, u8 xec)
ab5535e7 471{
62452882 472 u8 ll = LL(ec);
dd53bce4 473 bool ret = true;
ab5535e7 474
dd53bce4
BP
475 if (!MEM_ERROR(ec))
476 return false;
ab5535e7 477
dd53bce4
BP
478 if (ll == 0x2)
479 pr_cont("during a linefill from L2.\n");
480 else if (ll == 0x1) {
62452882 481 switch (R4(ec)) {
dd53bce4
BP
482 case R4_IRD:
483 pr_cont("Parity error during data load.\n");
484 break;
ab5535e7 485
dd53bce4
BP
486 case R4_EVICT:
487 pr_cont("Copyback Parity/Victim error.\n");
488 break;
489
490 case R4_SNOOP:
491 pr_cont("Tag Snoop error.\n");
492 break;
493
494 default:
495 ret = false;
496 break;
497 }
ab5535e7 498 } else
dd53bce4 499 ret = false;
ab5535e7 500
dd53bce4
BP
501 return ret;
502}
503
980eec8b 504static bool cat_mc1_mce(u16 ec, u8 xec)
dd53bce4 505{
62452882 506 u8 r4 = R4(ec);
dd53bce4 507 bool ret = true;
ab5535e7 508
980eec8b
JS
509 if (!MEM_ERROR(ec))
510 return false;
511
512 if (TT(ec) != TT_INSTR)
513 return false;
514
515 if (r4 == R4_IRD)
516 pr_cont("Data/tag array parity error for a tag hit.\n");
517 else if (r4 == R4_SNOOP)
518 pr_cont("Tag error during snoop/victimization.\n");
519 else if (xec == 0x0)
520 pr_cont("Tag parity error from victim castout.\n");
521 else if (xec == 0x2)
522 pr_cont("Microcode patch RAM parity error.\n");
523 else
524 ret = false;
dd53bce4 525
dd53bce4
BP
526 return ret;
527}
528
f05c41a9 529static bool f15h_mc1_mce(u16 ec, u8 xec)
86039cd4
BP
530{
531 bool ret = true;
532
533 if (!MEM_ERROR(ec))
534 return false;
535
536 switch (xec) {
537 case 0x0 ... 0xa:
f05c41a9 538 pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
86039cd4
BP
539 break;
540
541 case 0xd:
f05c41a9 542 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
86039cd4
BP
543 break;
544
6c1173a6 545 case 0x10:
f05c41a9 546 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
6c1173a6
BP
547 break;
548
eba4bfb3 549 case 0x11 ... 0x15:
f05c41a9 550 pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
86039cd4
BP
551 break;
552
553 default:
554 ret = false;
555 }
556 return ret;
557}
558
f05c41a9 559static void decode_mc1_mce(struct mce *m)
dd53bce4 560{
62452882
BP
561 u16 ec = EC(m->status);
562 u8 xec = XEC(m->status, xec_mask);
dd53bce4 563
f05c41a9 564 pr_emerg(HW_ERR "MC1 Error: ");
dd53bce4
BP
565
566 if (TLB_ERROR(ec))
567 pr_cont("%s TLB %s.\n", LL_MSG(ec),
568 (xec ? "multimatch" : "parity error"));
569 else if (BUS_ERROR(ec)) {
525906bc 570 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
dd53bce4
BP
571
572 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
eba4bfb3
AG
573 } else if (INT_ERROR(ec)) {
574 if (xec <= 0x3f)
575 pr_cont("Hardware Assert.\n");
576 else
577 goto wrong_mc1_mce;
f05c41a9 578 } else if (fam_ops->mc1_mce(ec, xec))
dd53bce4
BP
579 ;
580 else
eba4bfb3
AG
581 goto wrong_mc1_mce;
582
583 return;
584
585wrong_mc1_mce:
586 pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
ab5535e7
BP
587}
588
4a73d3de 589static bool k8_mc2_mce(u16 ec, u8 xec)
56cad2d6 590{
4a73d3de 591 bool ret = true;
56cad2d6
BP
592
593 if (xec == 0x1)
594 pr_cont(" in the write data buffers.\n");
595 else if (xec == 0x3)
596 pr_cont(" in the victim data buffers.\n");
597 else if (xec == 0x2 && MEM_ERROR(ec))
62452882 598 pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
56cad2d6
BP
599 else if (xec == 0x0) {
600 if (TLB_ERROR(ec))
50872ccd
BP
601 pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
602 TT_MSG(ec));
56cad2d6
BP
603 else if (BUS_ERROR(ec))
604 pr_cont(": %s/ECC error in data read from NB: %s.\n",
62452882 605 R4_MSG(ec), PP_MSG(ec));
56cad2d6 606 else if (MEM_ERROR(ec)) {
62452882 607 u8 r4 = R4(ec);
56cad2d6 608
62452882 609 if (r4 >= 0x7)
56cad2d6 610 pr_cont(": %s error during data copyback.\n",
62452882
BP
611 R4_MSG(ec));
612 else if (r4 <= 0x1)
56cad2d6 613 pr_cont(": %s parity/ECC error during data "
62452882 614 "access from L2.\n", R4_MSG(ec));
56cad2d6 615 else
4a73d3de 616 ret = false;
56cad2d6 617 } else
4a73d3de 618 ret = false;
56cad2d6 619 } else
4a73d3de 620 ret = false;
56cad2d6 621
4a73d3de 622 return ret;
56cad2d6
BP
623}
624
4a73d3de 625static bool f15h_mc2_mce(u16 ec, u8 xec)
70fdb494 626{
4a73d3de 627 bool ret = true;
70fdb494
BP
628
629 if (TLB_ERROR(ec)) {
630 if (xec == 0x0)
631 pr_cont("Data parity TLB read error.\n");
632 else if (xec == 0x1)
633 pr_cont("Poison data provided for TLB fill.\n");
634 else
4a73d3de 635 ret = false;
70fdb494
BP
636 } else if (BUS_ERROR(ec)) {
637 if (xec > 2)
4a73d3de 638 ret = false;
70fdb494
BP
639
640 pr_cont("Error during attempted NB data read.\n");
641 } else if (MEM_ERROR(ec)) {
642 switch (xec) {
643 case 0x4 ... 0xc:
f05c41a9 644 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
70fdb494
BP
645 break;
646
647 case 0x10 ... 0x14:
f05c41a9 648 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
70fdb494
BP
649 break;
650
651 default:
4a73d3de 652 ret = false;
70fdb494 653 }
eba4bfb3
AG
654 } else if (INT_ERROR(ec)) {
655 if (xec <= 0x3f)
656 pr_cont("Hardware Assert.\n");
657 else
658 ret = false;
70fdb494
BP
659 }
660
4a73d3de
JS
661 return ret;
662}
663
980eec8b
JS
664static bool f16h_mc2_mce(u16 ec, u8 xec)
665{
666 u8 r4 = R4(ec);
667
668 if (!MEM_ERROR(ec))
669 return false;
670
671 switch (xec) {
672 case 0x04 ... 0x05:
673 pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
674 break;
675
676 case 0x09 ... 0x0b:
677 case 0x0d ... 0x0f:
678 pr_cont("ECC error in L2 tag (%s).\n",
679 ((r4 == R4_GEN) ? "BankReq" :
680 ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
681 break;
682
683 case 0x10 ... 0x19:
684 case 0x1b:
685 pr_cont("ECC error in L2 data array (%s).\n",
686 (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
687 ((r4 == R4_GEN) ? "Attr" :
688 ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
689 break;
690
691 case 0x1c ... 0x1d:
692 case 0x1f:
693 pr_cont("Parity error in L2 attribute bits (%s).\n",
694 ((r4 == R4_RD) ? "Hit" :
695 ((r4 == R4_GEN) ? "Attr" : "Fill")));
696 break;
697
698 default:
699 return false;
700 }
701
702 return true;
703}
704
4a73d3de
JS
705static void decode_mc2_mce(struct mce *m)
706{
707 u16 ec = EC(m->status);
708 u8 xec = XEC(m->status, xec_mask);
70fdb494 709
4a73d3de
JS
710 pr_emerg(HW_ERR "MC2 Error: ");
711
712 if (!fam_ops->mc2_mce(ec, xec))
713 pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
70fdb494
BP
714}
715
f05c41a9 716static void decode_mc3_mce(struct mce *m)
f9350efd 717{
62452882
BP
718 u16 ec = EC(m->status);
719 u8 xec = XEC(m->status, xec_mask);
ded50623 720
b18434ca 721 if (boot_cpu_data.x86 >= 0x14) {
f05c41a9 722 pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
ded50623
BP
723 " please report on LKML.\n");
724 return;
725 }
f9350efd 726
f05c41a9 727 pr_emerg(HW_ERR "MC3 Error");
f9350efd
BP
728
729 if (xec == 0x0) {
62452882 730 u8 r4 = R4(ec);
f9350efd 731
ded50623 732 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
f05c41a9 733 goto wrong_mc3_mce;
f9350efd 734
62452882 735 pr_cont(" during %s.\n", R4_MSG(ec));
ded50623 736 } else
f05c41a9 737 goto wrong_mc3_mce;
ded50623 738
f9350efd
BP
739 return;
740
f05c41a9
BP
741 wrong_mc3_mce:
742 pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
f9350efd
BP
743}
744
f05c41a9 745static void decode_mc4_mce(struct mce *m)
5ce88f6e 746{
68782673
BP
747 struct cpuinfo_x86 *c = &boot_cpu_data;
748 int node_id = amd_get_nb_id(m->extcpu);
749 u16 ec = EC(m->status);
750 u8 xec = XEC(m->status, 0x1f);
751 u8 offset = 0;
5ce88f6e 752
f05c41a9 753 pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
5ce88f6e 754
68782673
BP
755 switch (xec) {
756 case 0x0 ... 0xe:
5ce88f6e 757
68782673
BP
758 /* special handling for DRAM ECCs */
759 if (xec == 0x0 || xec == 0x8) {
760 /* no ECCs on F11h */
761 if (c->x86 == 0x11)
f05c41a9 762 goto wrong_mc4_mce;
5ce88f6e 763
f05c41a9 764 pr_cont("%s.\n", mc4_mce_desc[xec]);
5ce88f6e 765
5c332202
YG
766 if (decode_dram_ecc)
767 decode_dram_ecc(node_id, m);
68782673
BP
768 return;
769 }
5ce88f6e
BP
770 break;
771
772 case 0xf:
773 if (TLB_ERROR(ec))
774 pr_cont("GART Table Walk data error.\n");
775 else if (BUS_ERROR(ec))
776 pr_cont("DMA Exclusion Vector Table Walk error.\n");
777 else
f05c41a9 778 goto wrong_mc4_mce;
68782673 779 return;
5ce88f6e 780
05cd667d 781 case 0x19:
980eec8b 782 if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
05cd667d
BP
783 pr_cont("Compute Unit Data Error.\n");
784 else
f05c41a9 785 goto wrong_mc4_mce;
68782673 786 return;
05cd667d 787
5ce88f6e 788 case 0x1c ... 0x1f:
68782673 789 offset = 13;
5ce88f6e
BP
790 break;
791
792 default:
f05c41a9 793 goto wrong_mc4_mce;
68782673 794 }
5ce88f6e 795
f05c41a9 796 pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
5ce88f6e
BP
797 return;
798
f05c41a9
BP
799 wrong_mc4_mce:
800 pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
d93cc222 801}
d93cc222 802
f05c41a9 803static void decode_mc5_mce(struct mce *m)
53bd5fed 804{
8259a7e5 805 struct cpuinfo_x86 *c = &boot_cpu_data;
eba4bfb3 806 u16 ec = EC(m->status);
62452882 807 u8 xec = XEC(m->status, xec_mask);
8259a7e5
BP
808
809 if (c->x86 == 0xf || c->x86 == 0x11)
f05c41a9 810 goto wrong_mc5_mce;
fe4ea262 811
f05c41a9 812 pr_emerg(HW_ERR "MC5 Error: ");
8259a7e5 813
eba4bfb3
AG
814 if (INT_ERROR(ec)) {
815 if (xec <= 0x1f) {
816 pr_cont("Hardware Assert.\n");
817 return;
818 } else
819 goto wrong_mc5_mce;
820 }
821
8259a7e5 822 if (xec == 0x0 || xec == 0xc)
f05c41a9 823 pr_cont("%s.\n", mc5_mce_desc[xec]);
aad19e51 824 else if (xec <= 0xd)
f05c41a9 825 pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
8259a7e5 826 else
f05c41a9 827 goto wrong_mc5_mce;
8259a7e5
BP
828
829 return;
fe4ea262 830
f05c41a9
BP
831 wrong_mc5_mce:
832 pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
53bd5fed
BP
833}
834
f05c41a9 835static void decode_mc6_mce(struct mce *m)
b8f85c47 836{
62452882 837 u8 xec = XEC(m->status, xec_mask);
b8f85c47 838
f05c41a9 839 pr_emerg(HW_ERR "MC6 Error: ");
b8f85c47 840
bc4febe9 841 if (xec > 0x5)
f05c41a9 842 goto wrong_mc6_mce;
b8f85c47 843
bc4febe9 844 pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
b8f85c47
BP
845 return;
846
f05c41a9
BP
847 wrong_mc6_mce:
848 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
b8f85c47
BP
849}
850
be0aec23
AG
851/* Decode errors according to Scalable MCA specification */
852static void decode_smca_errors(struct mce *m)
853{
1ce9cd7f 854 struct smca_hwid *hwid;
5896820e 855 unsigned int bank_type;
be0aec23 856 const char *ip_name;
5896820e 857 u8 xec = XEC(m->status, xec_mask);
be0aec23 858
5896820e 859 if (m->bank >= ARRAY_SIZE(smca_banks))
be0aec23 860 return;
be0aec23 861
a884675b
YG
862 if (boot_cpu_data.x86 >= 0x17 && m->bank == 4)
863 pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
864
1ce9cd7f
BP
865 hwid = smca_banks[m->bank].hwid;
866 if (!hwid)
be0aec23 867 return;
be0aec23 868
1ce9cd7f 869 bank_type = hwid->bank_type;
c09a8c40 870 ip_name = smca_get_long_name(bank_type);
be0aec23 871
5896820e 872 pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
be0aec23 873
5896820e
YG
874 /* Only print the decode of valid error codes */
875 if (xec < smca_mce_descs[bank_type].num_descs &&
1ce9cd7f 876 (hwid->xec_bitmap & BIT_ULL(xec))) {
5896820e
YG
877 pr_emerg(HW_ERR "%s Error: ", ip_name);
878 pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
879 }
5c332202
YG
880
881 /*
882 * amd_get_nb_id() returns the last level cache id.
883 * The last level cache on Fam17h is 1 level below the node.
884 */
885 if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
886 decode_dram_ecc(amd_get_nb_id(m->extcpu) >> 1, m);
be0aec23
AG
887}
888
6337583d 889static inline void amd_decode_err_code(u16 ec)
d93cc222 890{
980eec8b
JS
891 if (INT_ERROR(ec)) {
892 pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
893 return;
894 }
fa7ae8cc
BP
895
896 pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
897
898 if (BUS_ERROR(ec))
899 pr_cont(", mem/io: %s", II_MSG(ec));
900 else
901 pr_cont(", tx: %s", TT_MSG(ec));
902
903 if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
904 pr_cont(", mem-tx: %s", R4_MSG(ec));
905
906 if (BUS_ERROR(ec))
907 pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
908 }
909
910 pr_cont("\n");
549d042d 911}
549d042d 912
5ce88f6e
BP
913/*
914 * Filter out unwanted MCE signatures here.
915 */
916static bool amd_filter_mce(struct mce *m)
917{
918 u8 xec = (m->status >> 16) & 0x1f;
919
920 /*
921 * NB GART TLB error reporting is disabled by default.
922 */
923 if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
924 return true;
925
926 return false;
927}
928
d5c6770d
BP
929static const char *decode_error_status(struct mce *m)
930{
931 if (m->status & MCI_STATUS_UC) {
932 if (m->status & MCI_STATUS_PCC)
933 return "System Fatal error.";
934 if (m->mcgstatus & MCG_STATUS_RIPV)
935 return "Uncorrected, software restartable error.";
936 return "Uncorrected, software containable error.";
937 }
938
939 if (m->status & MCI_STATUS_DEFERRED)
67d7fd30 940 return "Deferred error, no action required.";
d5c6770d
BP
941
942 return "Corrected error, no action required.";
943}
944
1fbcd909
BP
945static int
946amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
549d042d 947{
fb253195 948 struct mce *m = (struct mce *)data;
f89f8388 949 struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
b0b07a2b 950 int ecc;
549d042d 951
5ce88f6e
BP
952 if (amd_filter_mce(m))
953 return NOTIFY_STOP;
954
fd0f5fff
BP
955 pr_emerg(HW_ERR "%s\n", decode_error_status(m));
956
957 pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
958 m->extcpu,
959 c->x86, c->x86_model, c->x86_mask,
960 m->bank,
961 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
99e1dfb7
AG
962 ((m->status & MCI_STATUS_UC) ? "UE" :
963 (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
fd0f5fff
BP
964 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
965 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
966 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
967
a6c14dce
YG
968 if (c->x86 >= 0x15) {
969 pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
970
971 /* F15h, bank4, bit 43 is part of McaStatSubCache. */
972 if (c->x86 != 0x15 || m->bank != 4)
973 pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
974 }
fd0f5fff 975
a348ed83 976 if (boot_cpu_has(X86_FEATURE_SMCA)) {
be0aec23
AG
977 u32 low, high;
978 u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
979
b300e873
YG
980 pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
981
be0aec23
AG
982 if (!rdmsr_safe(addr, &low, &high) &&
983 (low & MCI_CONFIG_MCAX))
984 pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
985 }
986
fd0f5fff
BP
987 /* do the two bits[14:13] together */
988 ecc = (m->status >> 45) & 0x3;
989 if (ecc)
990 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
991
992 pr_cont("]: 0x%016llx\n", m->status);
993
994 if (m->status & MCI_STATUS_ADDRV)
75bf2f64 995 pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
fd0f5fff 996
a348ed83 997 if (boot_cpu_has(X86_FEATURE_SMCA)) {
75bf2f64
YG
998 pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
999
b300e873
YG
1000 if (m->status & MCI_STATUS_SYNDV)
1001 pr_cont(", Syndrome: 0x%016llx", m->synd);
1002
1003 pr_cont("\n");
1004
be0aec23
AG
1005 decode_smca_errors(m);
1006 goto err_code;
75bf2f64 1007 }
be0aec23 1008
0bceab67
BP
1009 if (m->tsc)
1010 pr_emerg(HW_ERR "TSC: %llu\n", m->tsc);
1011
fd0f5fff
BP
1012 if (!fam_ops)
1013 goto err_code;
1014
51966241
BP
1015 switch (m->bank) {
1016 case 0:
f05c41a9 1017 decode_mc0_mce(m);
51966241 1018 break;
d93cc222 1019
ab5535e7 1020 case 1:
f05c41a9 1021 decode_mc1_mce(m);
ab5535e7
BP
1022 break;
1023
56cad2d6 1024 case 2:
4a73d3de 1025 decode_mc2_mce(m);
56cad2d6
BP
1026 break;
1027
f9350efd 1028 case 3:
f05c41a9 1029 decode_mc3_mce(m);
f9350efd
BP
1030 break;
1031
51966241 1032 case 4:
f05c41a9 1033 decode_mc4_mce(m);
51966241
BP
1034 break;
1035
53bd5fed 1036 case 5:
f05c41a9 1037 decode_mc5_mce(m);
53bd5fed
BP
1038 break;
1039
b8f85c47 1040 case 6:
f05c41a9 1041 decode_mc6_mce(m);
b8f85c47
BP
1042 break;
1043
51966241
BP
1044 default:
1045 break;
b69b29de 1046 }
51966241 1047
fd0f5fff 1048 err_code:
51966241 1049 amd_decode_err_code(m->status & 0xffff);
fb253195
BP
1050
1051 return NOTIFY_STOP;
549d042d 1052}
f436f8bb 1053
fb253195
BP
1054static struct notifier_block amd_mce_dec_nb = {
1055 .notifier_call = amd_decode_mce,
9026cc82 1056 .priority = MCE_PRIO_EDAC,
fb253195
BP
1057};
1058
f436f8bb
IM
1059static int __init mce_amd_init(void)
1060{
bad11e03
BP
1061 struct cpuinfo_x86 *c = &boot_cpu_data;
1062
1063 if (c->x86_vendor != X86_VENDOR_AMD)
fd0f5fff 1064 return -ENODEV;
e045c291 1065
888ab8e6
BP
1066 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
1067 if (!fam_ops)
1068 return -ENOMEM;
1069
bad11e03 1070 switch (c->x86) {
888ab8e6 1071 case 0xf:
f05c41a9
BP
1072 fam_ops->mc0_mce = k8_mc0_mce;
1073 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1074 fam_ops->mc2_mce = k8_mc2_mce;
888ab8e6
BP
1075 break;
1076
1077 case 0x10:
f05c41a9
BP
1078 fam_ops->mc0_mce = f10h_mc0_mce;
1079 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1080 fam_ops->mc2_mce = k8_mc2_mce;
888ab8e6
BP
1081 break;
1082
f0157b3a 1083 case 0x11:
f05c41a9
BP
1084 fam_ops->mc0_mce = k8_mc0_mce;
1085 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1086 fam_ops->mc2_mce = k8_mc2_mce;
f0157b3a
BP
1087 break;
1088
9be0bb10 1089 case 0x12:
f05c41a9
BP
1090 fam_ops->mc0_mce = f12h_mc0_mce;
1091 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1092 fam_ops->mc2_mce = k8_mc2_mce;
9be0bb10
BP
1093 break;
1094
888ab8e6 1095 case 0x14:
980eec8b
JS
1096 fam_ops->mc0_mce = cat_mc0_mce;
1097 fam_ops->mc1_mce = cat_mc1_mce;
4a73d3de 1098 fam_ops->mc2_mce = k8_mc2_mce;
888ab8e6
BP
1099 break;
1100
2be64bfa 1101 case 0x15:
eba4bfb3
AG
1102 xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
1103
f05c41a9
BP
1104 fam_ops->mc0_mce = f15h_mc0_mce;
1105 fam_ops->mc1_mce = f15h_mc1_mce;
4a73d3de 1106 fam_ops->mc2_mce = f15h_mc2_mce;
2be64bfa
BP
1107 break;
1108
980eec8b
JS
1109 case 0x16:
1110 xec_mask = 0x1f;
1111 fam_ops->mc0_mce = cat_mc0_mce;
1112 fam_ops->mc1_mce = cat_mc1_mce;
1113 fam_ops->mc2_mce = f16h_mc2_mce;
1114 break;
1115
be0aec23 1116 case 0x17:
be0aec23 1117 xec_mask = 0x3f;
a348ed83 1118 if (!boot_cpu_has(X86_FEATURE_SMCA)) {
be0aec23
AG
1119 printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
1120 goto err_out;
1121 }
1122 break;
1123
888ab8e6 1124 default:
ec3e82d6 1125 printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
be0aec23 1126 goto err_out;
888ab8e6
BP
1127 }
1128
9530d608
BP
1129 pr_info("MCE: In-kernel MCE decoding enabled.\n");
1130
3653ada5 1131 mce_register_decode_chain(&amd_mce_dec_nb);
f436f8bb
IM
1132
1133 return 0;
be0aec23
AG
1134
1135err_out:
1136 kfree(fam_ops);
1137 fam_ops = NULL;
1138 return -EINVAL;
f436f8bb
IM
1139}
1140early_initcall(mce_amd_init);
0d18b2e3
BP
1141
1142#ifdef MODULE
1143static void __exit mce_amd_exit(void)
1144{
3653ada5 1145 mce_unregister_decode_chain(&amd_mce_dec_nb);
888ab8e6 1146 kfree(fam_ops);
0d18b2e3
BP
1147}
1148
1149MODULE_DESCRIPTION("AMD MCE decoder");
1150MODULE_ALIAS("edac-mce-amd");
1151MODULE_LICENSE("GPL");
1152module_exit(mce_amd_exit);
1153#endif