]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/edac/mce_amd.c
EDAC/mce_amd: Use SMCA prefix for error descriptions arrays
[mirror_ubuntu-bionic-kernel.git] / drivers / edac / mce_amd.c
CommitLineData
b70ef010 1#include <linux/module.h>
888ab8e6
BP
2#include <linux/slab.h>
3
47ca08a4 4#include "mce_amd.h"
b52401ce 5
888ab8e6
BP
6static struct amd_decoder_ops *fam_ops;
7
2be64bfa 8static u8 xec_mask = 0xf;
5ce88f6e 9
549d042d 10static bool report_gart_errors;
b0b07a2b 11static void (*nb_bus_decoder)(int node_id, struct mce *m);
549d042d
BP
12
13void amd_report_gart_errors(bool v)
14{
15 report_gart_errors = v;
16}
17EXPORT_SYMBOL_GPL(amd_report_gart_errors);
18
b0b07a2b 19void amd_register_ecc_decoder(void (*f)(int, struct mce *))
549d042d
BP
20{
21 nb_bus_decoder = f;
22}
23EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
24
b0b07a2b 25void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
549d042d
BP
26{
27 if (nb_bus_decoder) {
28 WARN_ON(nb_bus_decoder != f);
29
30 nb_bus_decoder = NULL;
31 }
32}
33EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
34
b52401ce
DT
35/*
36 * string representation for the different MCA reported error types, see F3x48
37 * or MSR0000_0411.
38 */
6337583d
BP
39
40/* transaction type */
0f08669e 41static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
b52401ce 42
6337583d 43/* cache level */
0f08669e 44static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
b52401ce 45
6337583d 46/* memory transaction type */
0f08669e 47static const char * const rrrr_msgs[] = {
6337583d 48 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
b52401ce
DT
49};
50
6337583d 51/* participating processor */
ebe2aea8 52const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
b70ef010 53EXPORT_SYMBOL_GPL(pp_msgs);
b52401ce 54
6337583d 55/* request timeout */
0f08669e 56static const char * const to_msgs[] = { "no timeout", "timed out" };
b52401ce 57
6337583d 58/* memory or i/o */
0f08669e 59static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
b52401ce 60
980eec8b 61/* internal error type */
0f08669e 62static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
980eec8b 63
f05c41a9 64static const char * const f15h_mc1_mce_desc[] = {
86039cd4
BP
65 "UC during a demand linefill from L2",
66 "Parity error during data load from IC",
67 "Parity error for IC valid bit",
68 "Main tag parity error",
69 "Parity error in prediction queue",
70 "PFB data/address parity error",
71 "Parity error in the branch status reg",
72 "PFB promotion address error",
73 "Tag error during probe/victimization",
74 "Parity error for IC probe tag valid bit",
75 "PFB non-cacheable bit parity error",
76 "PFB valid bit parity error", /* xec = 0xd */
6c1173a6 77 "Microcode Patch Buffer", /* xec = 010 */
86039cd4
BP
78 "uop queue",
79 "insn buffer",
80 "predecode buffer",
eba4bfb3
AG
81 "fetch address FIFO",
82 "dispatch uop queue"
86039cd4
BP
83};
84
f05c41a9 85static const char * const f15h_mc2_mce_desc[] = {
70fdb494
BP
86 "Fill ECC error on data fills", /* xec = 0x4 */
87 "Fill parity error on insn fills",
88 "Prefetcher request FIFO parity error",
89 "PRQ address parity error",
90 "PRQ data parity error",
91 "WCC Tag ECC error",
92 "WCC Data ECC error",
93 "WCB Data parity error",
b64a99c1 94 "VB Data ECC or parity error",
70fdb494
BP
95 "L2 Tag ECC error", /* xec = 0x10 */
96 "Hard L2 Tag ECC error",
97 "Multiple hits on L2 tag",
98 "XAB parity error",
99 "PRB address parity error"
100};
101
f05c41a9 102static const char * const mc4_mce_desc[] = {
68782673
BP
103 "DRAM ECC error detected on the NB",
104 "CRC error detected on HT link",
105 "Link-defined sync error packets detected on HT link",
106 "HT Master abort",
107 "HT Target abort",
108 "Invalid GART PTE entry during GART table walk",
109 "Unsupported atomic RMW received from an IO link",
110 "Watchdog timeout due to lack of progress",
111 "DRAM ECC error detected on the NB",
112 "SVM DMA Exclusion Vector error",
113 "HT data error detected on link",
114 "Protocol error (link, L3, probe filter)",
115 "NB internal arrays parity error",
116 "DRAM addr/ctl signals parity error",
117 "IO link transmission error",
118 "L3 data cache ECC error", /* xec = 0x1c */
119 "L3 cache tag error",
120 "L3 LRU parity bits error",
121 "ECC Error in the Probe Filter directory"
122};
123
f05c41a9 124static const char * const mc5_mce_desc[] = {
8259a7e5
BP
125 "CPU Watchdog timer expire",
126 "Wakeup array dest tag",
127 "AG payload array",
128 "EX payload array",
129 "IDRF array",
130 "Retire dispatch queue",
131 "Mapper checkpoint array",
132 "Physical register file EX0 port",
133 "Physical register file EX1 port",
134 "Physical register file AG0 port",
135 "Physical register file AG1 port",
136 "Flag register file",
aad19e51
AG
137 "DE error occurred",
138 "Retire status queue"
8259a7e5
BP
139};
140
bc4febe9
AG
141static const char * const mc6_mce_desc[] = {
142 "Hardware Assertion",
143 "Free List",
144 "Physical Register File",
145 "Retire Queue",
146 "Scheduler table",
147 "Status Register File",
148};
149
be0aec23 150/* Scalable MCA error strings */
856095b1 151static const char * const smca_ls_mce_desc[] = {
be0aec23
AG
152 "Load queue parity",
153 "Store queue parity",
154 "Miss address buffer payload parity",
155 "L1 TLB parity",
856095b1 156 "Reserved",
be0aec23
AG
157 "DC tag error type 6",
158 "DC tag error type 1",
159 "Internal error type 1",
160 "Internal error type 2",
161 "Sys Read data error thread 0",
162 "Sys read data error thread 1",
163 "DC tag error type 2",
164 "DC data error type 1 (poison comsumption)",
165 "DC data error type 2",
166 "DC data error type 3",
167 "DC tag error type 4",
168 "L2 TLB parity",
169 "PDC parity error",
170 "DC tag error type 3",
171 "DC tag error type 5",
172 "L2 fill data error",
173};
174
856095b1 175static const char * const smca_if_mce_desc[] = {
be0aec23
AG
176 "microtag probe port parity error",
177 "IC microtag or full tag multi-hit error",
178 "IC full tag parity",
179 "IC data array parity",
180 "Decoupling queue phys addr parity error",
181 "L0 ITLB parity error",
182 "L1 ITLB parity error",
183 "L2 ITLB parity error",
184 "BPQ snoop parity on Thread 0",
185 "BPQ snoop parity on Thread 1",
186 "L1 BTB multi-match error",
187 "L2 BTB multi-match error",
c019b951
YG
188 "L2 Cache Response Poison error",
189 "System Read Data error",
be0aec23
AG
190};
191
856095b1 192static const char * const smca_l2_mce_desc[] = {
be0aec23
AG
193 "L2M tag multi-way-hit error",
194 "L2M tag ECC error",
195 "L2M data ECC error",
196 "HW assert",
197};
198
856095b1 199static const char * const smca_de_mce_desc[] = {
be0aec23
AG
200 "uop cache tag parity error",
201 "uop cache data parity error",
202 "Insn buffer parity error",
c019b951 203 "uop queue parity error",
be0aec23
AG
204 "Insn dispatch queue parity error",
205 "Fetch address FIFO parity",
206 "Patch RAM data parity",
207 "Patch RAM sequencer parity",
208 "uop buffer parity"
209};
210
856095b1 211static const char * const smca_ex_mce_desc[] = {
be0aec23
AG
212 "Watchdog timeout error",
213 "Phy register file parity",
214 "Flag register file parity",
215 "Immediate displacement register file parity",
216 "Address generator payload parity",
217 "EX payload parity",
218 "Checkpoint queue parity",
219 "Retire dispatch queue parity",
c019b951
YG
220 "Retire status queue parity error",
221 "Scheduling queue parity error",
222 "Branch buffer queue parity error",
be0aec23
AG
223};
224
856095b1 225static const char * const smca_fp_mce_desc[] = {
be0aec23
AG
226 "Physical register file parity",
227 "Freelist parity error",
228 "Schedule queue parity",
229 "NSQ parity error",
230 "Retire queue parity",
231 "Status register file parity",
c019b951 232 "Hardware assertion",
be0aec23
AG
233};
234
856095b1 235static const char * const smca_l3_mce_desc[] = {
be0aec23
AG
236 "Shadow tag macro ECC error",
237 "Shadow tag macro multi-way-hit error",
238 "L3M tag ECC error",
239 "L3M tag multi-way-hit error",
240 "L3M data ECC error",
241 "XI parity, L3 fill done channel error",
242 "L3 victim queue parity",
243 "L3 HW assert",
244};
245
856095b1 246static const char * const smca_cs_mce_desc[] = {
be0aec23
AG
247 "Illegal request from transport layer",
248 "Address violation",
249 "Security violation",
250 "Illegal response from transport layer",
251 "Unexpected response",
252 "Parity error on incoming request or probe response data",
253 "Parity error on incoming read response data",
254 "Atomic request parity",
255 "ECC error on probe filter access",
256};
257
856095b1 258static const char * const smca_pie_mce_desc[] = {
be0aec23
AG
259 "HW assert",
260 "Internal PIE register security violation",
261 "Error on GMI link",
262 "Poison data written to internal PIE register",
263};
264
856095b1 265static const char * const smca_umc_mce_desc[] = {
be0aec23
AG
266 "DRAM ECC error",
267 "Data poison error on DRAM",
268 "SDP parity error",
269 "Advanced peripheral bus error",
270 "Command/address parity error",
271 "Write data CRC error",
272};
273
856095b1 274static const char * const smca_pb_mce_desc[] = {
be0aec23
AG
275 "Parameter Block RAM ECC error",
276};
277
856095b1 278static const char * const smca_psp_mce_desc[] = {
be0aec23
AG
279 "PSP RAM ECC or parity error",
280};
281
856095b1 282static const char * const smca_smu_mce_desc[] = {
be0aec23
AG
283 "SMU RAM ECC or parity error",
284};
285
f05c41a9 286static bool f12h_mc0_mce(u16 ec, u8 xec)
51966241 287{
888ab8e6 288 bool ret = false;
51966241 289
888ab8e6 290 if (MEM_ERROR(ec)) {
62452882 291 u8 ll = LL(ec);
888ab8e6 292 ret = true;
51966241 293
888ab8e6
BP
294 if (ll == LL_L2)
295 pr_cont("during L1 linefill from L2.\n");
296 else if (ll == LL_L1)
62452882 297 pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
888ab8e6
BP
298 else
299 ret = false;
300 }
301 return ret;
302}
51966241 303
f05c41a9 304static bool f10h_mc0_mce(u16 ec, u8 xec)
9be0bb10 305{
62452882 306 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
9be0bb10
BP
307 pr_cont("during data scrub.\n");
308 return true;
309 }
f05c41a9 310 return f12h_mc0_mce(ec, xec);
9be0bb10
BP
311}
312
f05c41a9 313static bool k8_mc0_mce(u16 ec, u8 xec)
888ab8e6
BP
314{
315 if (BUS_ERROR(ec)) {
316 pr_cont("during system linefill.\n");
317 return true;
318 }
51966241 319
f05c41a9 320 return f10h_mc0_mce(ec, xec);
888ab8e6
BP
321}
322
980eec8b 323static bool cat_mc0_mce(u16 ec, u8 xec)
888ab8e6 324{
62452882 325 u8 r4 = R4(ec);
888ab8e6
BP
326 bool ret = true;
327
328 if (MEM_ERROR(ec)) {
329
62452882 330 if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
888ab8e6
BP
331 return false;
332
333 switch (r4) {
334 case R4_DRD:
335 case R4_DWR:
336 pr_cont("Data/Tag parity error due to %s.\n",
337 (r4 == R4_DRD ? "load/hw prf" : "store"));
338 break;
339 case R4_EVICT:
340 pr_cont("Copyback parity error on a tag miss.\n");
341 break;
342 case R4_SNOOP:
343 pr_cont("Tag parity error during snoop.\n");
344 break;
345 default:
346 ret = false;
347 }
348 } else if (BUS_ERROR(ec)) {
349
62452882 350 if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
888ab8e6
BP
351 return false;
352
353 pr_cont("System read data error on a ");
354
355 switch (r4) {
356 case R4_RD:
357 pr_cont("TLB reload.\n");
358 break;
359 case R4_DWR:
360 pr_cont("store.\n");
361 break;
362 case R4_DRD:
363 pr_cont("load.\n");
364 break;
365 default:
366 ret = false;
367 }
368 } else {
369 ret = false;
370 }
371
372 return ret;
373}
374
f05c41a9 375static bool f15h_mc0_mce(u16 ec, u8 xec)
25a4f8b0
BP
376{
377 bool ret = true;
378
379 if (MEM_ERROR(ec)) {
380
381 switch (xec) {
382 case 0x0:
383 pr_cont("Data Array access error.\n");
384 break;
385
386 case 0x1:
387 pr_cont("UC error during a linefill from L2/NB.\n");
388 break;
389
390 case 0x2:
391 case 0x11:
392 pr_cont("STQ access error.\n");
393 break;
394
395 case 0x3:
396 pr_cont("SCB access error.\n");
397 break;
398
399 case 0x10:
400 pr_cont("Tag error.\n");
401 break;
402
403 case 0x12:
404 pr_cont("LDQ access error.\n");
405 break;
406
407 default:
408 ret = false;
409 }
410 } else if (BUS_ERROR(ec)) {
411
412 if (!xec)
344f0a06 413 pr_cont("System Read Data Error.\n");
25a4f8b0 414 else
344f0a06 415 pr_cont(" Internal error condition type %d.\n", xec);
eba4bfb3
AG
416 } else if (INT_ERROR(ec)) {
417 if (xec <= 0x1f)
418 pr_cont("Hardware Assert.\n");
419 else
420 ret = false;
421
25a4f8b0
BP
422 } else
423 ret = false;
424
425 return ret;
426}
427
f05c41a9 428static void decode_mc0_mce(struct mce *m)
888ab8e6 429{
62452882
BP
430 u16 ec = EC(m->status);
431 u8 xec = XEC(m->status, xec_mask);
888ab8e6 432
f05c41a9 433 pr_emerg(HW_ERR "MC0 Error: ");
888ab8e6
BP
434
435 /* TLB error signatures are the same across families */
436 if (TLB_ERROR(ec)) {
62452882 437 if (TT(ec) == TT_DATA) {
888ab8e6 438 pr_cont("%s TLB %s.\n", LL_MSG(ec),
25a4f8b0
BP
439 ((xec == 2) ? "locked miss"
440 : (xec ? "multimatch" : "parity")));
888ab8e6
BP
441 return;
442 }
f05c41a9 443 } else if (fam_ops->mc0_mce(ec, xec))
25a4f8b0
BP
444 ;
445 else
f05c41a9 446 pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
51966241
BP
447}
448
f05c41a9 449static bool k8_mc1_mce(u16 ec, u8 xec)
ab5535e7 450{
62452882 451 u8 ll = LL(ec);
dd53bce4 452 bool ret = true;
ab5535e7 453
dd53bce4
BP
454 if (!MEM_ERROR(ec))
455 return false;
ab5535e7 456
dd53bce4
BP
457 if (ll == 0x2)
458 pr_cont("during a linefill from L2.\n");
459 else if (ll == 0x1) {
62452882 460 switch (R4(ec)) {
dd53bce4
BP
461 case R4_IRD:
462 pr_cont("Parity error during data load.\n");
463 break;
ab5535e7 464
dd53bce4
BP
465 case R4_EVICT:
466 pr_cont("Copyback Parity/Victim error.\n");
467 break;
468
469 case R4_SNOOP:
470 pr_cont("Tag Snoop error.\n");
471 break;
472
473 default:
474 ret = false;
475 break;
476 }
ab5535e7 477 } else
dd53bce4 478 ret = false;
ab5535e7 479
dd53bce4
BP
480 return ret;
481}
482
980eec8b 483static bool cat_mc1_mce(u16 ec, u8 xec)
dd53bce4 484{
62452882 485 u8 r4 = R4(ec);
dd53bce4 486 bool ret = true;
ab5535e7 487
980eec8b
JS
488 if (!MEM_ERROR(ec))
489 return false;
490
491 if (TT(ec) != TT_INSTR)
492 return false;
493
494 if (r4 == R4_IRD)
495 pr_cont("Data/tag array parity error for a tag hit.\n");
496 else if (r4 == R4_SNOOP)
497 pr_cont("Tag error during snoop/victimization.\n");
498 else if (xec == 0x0)
499 pr_cont("Tag parity error from victim castout.\n");
500 else if (xec == 0x2)
501 pr_cont("Microcode patch RAM parity error.\n");
502 else
503 ret = false;
dd53bce4 504
dd53bce4
BP
505 return ret;
506}
507
f05c41a9 508static bool f15h_mc1_mce(u16 ec, u8 xec)
86039cd4
BP
509{
510 bool ret = true;
511
512 if (!MEM_ERROR(ec))
513 return false;
514
515 switch (xec) {
516 case 0x0 ... 0xa:
f05c41a9 517 pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
86039cd4
BP
518 break;
519
520 case 0xd:
f05c41a9 521 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
86039cd4
BP
522 break;
523
6c1173a6 524 case 0x10:
f05c41a9 525 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
6c1173a6
BP
526 break;
527
eba4bfb3 528 case 0x11 ... 0x15:
f05c41a9 529 pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
86039cd4
BP
530 break;
531
532 default:
533 ret = false;
534 }
535 return ret;
536}
537
f05c41a9 538static void decode_mc1_mce(struct mce *m)
dd53bce4 539{
62452882
BP
540 u16 ec = EC(m->status);
541 u8 xec = XEC(m->status, xec_mask);
dd53bce4 542
f05c41a9 543 pr_emerg(HW_ERR "MC1 Error: ");
dd53bce4
BP
544
545 if (TLB_ERROR(ec))
546 pr_cont("%s TLB %s.\n", LL_MSG(ec),
547 (xec ? "multimatch" : "parity error"));
548 else if (BUS_ERROR(ec)) {
525906bc 549 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
dd53bce4
BP
550
551 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
eba4bfb3
AG
552 } else if (INT_ERROR(ec)) {
553 if (xec <= 0x3f)
554 pr_cont("Hardware Assert.\n");
555 else
556 goto wrong_mc1_mce;
f05c41a9 557 } else if (fam_ops->mc1_mce(ec, xec))
dd53bce4
BP
558 ;
559 else
eba4bfb3
AG
560 goto wrong_mc1_mce;
561
562 return;
563
564wrong_mc1_mce:
565 pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
ab5535e7
BP
566}
567
4a73d3de 568static bool k8_mc2_mce(u16 ec, u8 xec)
56cad2d6 569{
4a73d3de 570 bool ret = true;
56cad2d6
BP
571
572 if (xec == 0x1)
573 pr_cont(" in the write data buffers.\n");
574 else if (xec == 0x3)
575 pr_cont(" in the victim data buffers.\n");
576 else if (xec == 0x2 && MEM_ERROR(ec))
62452882 577 pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
56cad2d6
BP
578 else if (xec == 0x0) {
579 if (TLB_ERROR(ec))
50872ccd
BP
580 pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
581 TT_MSG(ec));
56cad2d6
BP
582 else if (BUS_ERROR(ec))
583 pr_cont(": %s/ECC error in data read from NB: %s.\n",
62452882 584 R4_MSG(ec), PP_MSG(ec));
56cad2d6 585 else if (MEM_ERROR(ec)) {
62452882 586 u8 r4 = R4(ec);
56cad2d6 587
62452882 588 if (r4 >= 0x7)
56cad2d6 589 pr_cont(": %s error during data copyback.\n",
62452882
BP
590 R4_MSG(ec));
591 else if (r4 <= 0x1)
56cad2d6 592 pr_cont(": %s parity/ECC error during data "
62452882 593 "access from L2.\n", R4_MSG(ec));
56cad2d6 594 else
4a73d3de 595 ret = false;
56cad2d6 596 } else
4a73d3de 597 ret = false;
56cad2d6 598 } else
4a73d3de 599 ret = false;
56cad2d6 600
4a73d3de 601 return ret;
56cad2d6
BP
602}
603
4a73d3de 604static bool f15h_mc2_mce(u16 ec, u8 xec)
70fdb494 605{
4a73d3de 606 bool ret = true;
70fdb494
BP
607
608 if (TLB_ERROR(ec)) {
609 if (xec == 0x0)
610 pr_cont("Data parity TLB read error.\n");
611 else if (xec == 0x1)
612 pr_cont("Poison data provided for TLB fill.\n");
613 else
4a73d3de 614 ret = false;
70fdb494
BP
615 } else if (BUS_ERROR(ec)) {
616 if (xec > 2)
4a73d3de 617 ret = false;
70fdb494
BP
618
619 pr_cont("Error during attempted NB data read.\n");
620 } else if (MEM_ERROR(ec)) {
621 switch (xec) {
622 case 0x4 ... 0xc:
f05c41a9 623 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
70fdb494
BP
624 break;
625
626 case 0x10 ... 0x14:
f05c41a9 627 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
70fdb494
BP
628 break;
629
630 default:
4a73d3de 631 ret = false;
70fdb494 632 }
eba4bfb3
AG
633 } else if (INT_ERROR(ec)) {
634 if (xec <= 0x3f)
635 pr_cont("Hardware Assert.\n");
636 else
637 ret = false;
70fdb494
BP
638 }
639
4a73d3de
JS
640 return ret;
641}
642
980eec8b
JS
643static bool f16h_mc2_mce(u16 ec, u8 xec)
644{
645 u8 r4 = R4(ec);
646
647 if (!MEM_ERROR(ec))
648 return false;
649
650 switch (xec) {
651 case 0x04 ... 0x05:
652 pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
653 break;
654
655 case 0x09 ... 0x0b:
656 case 0x0d ... 0x0f:
657 pr_cont("ECC error in L2 tag (%s).\n",
658 ((r4 == R4_GEN) ? "BankReq" :
659 ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
660 break;
661
662 case 0x10 ... 0x19:
663 case 0x1b:
664 pr_cont("ECC error in L2 data array (%s).\n",
665 (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
666 ((r4 == R4_GEN) ? "Attr" :
667 ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
668 break;
669
670 case 0x1c ... 0x1d:
671 case 0x1f:
672 pr_cont("Parity error in L2 attribute bits (%s).\n",
673 ((r4 == R4_RD) ? "Hit" :
674 ((r4 == R4_GEN) ? "Attr" : "Fill")));
675 break;
676
677 default:
678 return false;
679 }
680
681 return true;
682}
683
4a73d3de
JS
684static void decode_mc2_mce(struct mce *m)
685{
686 u16 ec = EC(m->status);
687 u8 xec = XEC(m->status, xec_mask);
70fdb494 688
4a73d3de
JS
689 pr_emerg(HW_ERR "MC2 Error: ");
690
691 if (!fam_ops->mc2_mce(ec, xec))
692 pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
70fdb494
BP
693}
694
f05c41a9 695static void decode_mc3_mce(struct mce *m)
f9350efd 696{
62452882
BP
697 u16 ec = EC(m->status);
698 u8 xec = XEC(m->status, xec_mask);
ded50623 699
b18434ca 700 if (boot_cpu_data.x86 >= 0x14) {
f05c41a9 701 pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
ded50623
BP
702 " please report on LKML.\n");
703 return;
704 }
f9350efd 705
f05c41a9 706 pr_emerg(HW_ERR "MC3 Error");
f9350efd
BP
707
708 if (xec == 0x0) {
62452882 709 u8 r4 = R4(ec);
f9350efd 710
ded50623 711 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
f05c41a9 712 goto wrong_mc3_mce;
f9350efd 713
62452882 714 pr_cont(" during %s.\n", R4_MSG(ec));
ded50623 715 } else
f05c41a9 716 goto wrong_mc3_mce;
ded50623 717
f9350efd
BP
718 return;
719
f05c41a9
BP
720 wrong_mc3_mce:
721 pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
f9350efd
BP
722}
723
f05c41a9 724static void decode_mc4_mce(struct mce *m)
5ce88f6e 725{
68782673
BP
726 struct cpuinfo_x86 *c = &boot_cpu_data;
727 int node_id = amd_get_nb_id(m->extcpu);
728 u16 ec = EC(m->status);
729 u8 xec = XEC(m->status, 0x1f);
730 u8 offset = 0;
5ce88f6e 731
f05c41a9 732 pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
5ce88f6e 733
68782673
BP
734 switch (xec) {
735 case 0x0 ... 0xe:
5ce88f6e 736
68782673
BP
737 /* special handling for DRAM ECCs */
738 if (xec == 0x0 || xec == 0x8) {
739 /* no ECCs on F11h */
740 if (c->x86 == 0x11)
f05c41a9 741 goto wrong_mc4_mce;
5ce88f6e 742
f05c41a9 743 pr_cont("%s.\n", mc4_mce_desc[xec]);
5ce88f6e 744
68782673
BP
745 if (nb_bus_decoder)
746 nb_bus_decoder(node_id, m);
747 return;
748 }
5ce88f6e
BP
749 break;
750
751 case 0xf:
752 if (TLB_ERROR(ec))
753 pr_cont("GART Table Walk data error.\n");
754 else if (BUS_ERROR(ec))
755 pr_cont("DMA Exclusion Vector Table Walk error.\n");
756 else
f05c41a9 757 goto wrong_mc4_mce;
68782673 758 return;
5ce88f6e 759
05cd667d 760 case 0x19:
980eec8b 761 if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
05cd667d
BP
762 pr_cont("Compute Unit Data Error.\n");
763 else
f05c41a9 764 goto wrong_mc4_mce;
68782673 765 return;
05cd667d 766
5ce88f6e 767 case 0x1c ... 0x1f:
68782673 768 offset = 13;
5ce88f6e
BP
769 break;
770
771 default:
f05c41a9 772 goto wrong_mc4_mce;
68782673 773 }
5ce88f6e 774
f05c41a9 775 pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
5ce88f6e
BP
776 return;
777
f05c41a9
BP
778 wrong_mc4_mce:
779 pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
d93cc222 780}
d93cc222 781
f05c41a9 782static void decode_mc5_mce(struct mce *m)
53bd5fed 783{
8259a7e5 784 struct cpuinfo_x86 *c = &boot_cpu_data;
eba4bfb3 785 u16 ec = EC(m->status);
62452882 786 u8 xec = XEC(m->status, xec_mask);
8259a7e5
BP
787
788 if (c->x86 == 0xf || c->x86 == 0x11)
f05c41a9 789 goto wrong_mc5_mce;
fe4ea262 790
f05c41a9 791 pr_emerg(HW_ERR "MC5 Error: ");
8259a7e5 792
eba4bfb3
AG
793 if (INT_ERROR(ec)) {
794 if (xec <= 0x1f) {
795 pr_cont("Hardware Assert.\n");
796 return;
797 } else
798 goto wrong_mc5_mce;
799 }
800
8259a7e5 801 if (xec == 0x0 || xec == 0xc)
f05c41a9 802 pr_cont("%s.\n", mc5_mce_desc[xec]);
aad19e51 803 else if (xec <= 0xd)
f05c41a9 804 pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
8259a7e5 805 else
f05c41a9 806 goto wrong_mc5_mce;
8259a7e5
BP
807
808 return;
fe4ea262 809
f05c41a9
BP
810 wrong_mc5_mce:
811 pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
53bd5fed
BP
812}
813
f05c41a9 814static void decode_mc6_mce(struct mce *m)
b8f85c47 815{
62452882 816 u8 xec = XEC(m->status, xec_mask);
b8f85c47 817
f05c41a9 818 pr_emerg(HW_ERR "MC6 Error: ");
b8f85c47 819
bc4febe9 820 if (xec > 0x5)
f05c41a9 821 goto wrong_mc6_mce;
b8f85c47 822
bc4febe9 823 pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
b8f85c47
BP
824 return;
825
f05c41a9
BP
826 wrong_mc6_mce:
827 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
b8f85c47
BP
828}
829
be0aec23
AG
830static void decode_f17h_core_errors(const char *ip_name, u8 xec,
831 unsigned int mca_type)
832{
833 const char * const *error_desc_array;
834 size_t len;
835
836 pr_emerg(HW_ERR "%s Error: ", ip_name);
837
838 switch (mca_type) {
839 case SMCA_LS:
856095b1
YG
840 error_desc_array = smca_ls_mce_desc;
841 len = ARRAY_SIZE(smca_ls_mce_desc) - 1;
be0aec23
AG
842
843 if (xec == 0x4) {
844 pr_cont("Unrecognized LS MCA error code.\n");
845 return;
846 }
847 break;
848
849 case SMCA_IF:
856095b1
YG
850 error_desc_array = smca_if_mce_desc;
851 len = ARRAY_SIZE(smca_if_mce_desc) - 1;
be0aec23
AG
852 break;
853
854 case SMCA_L2_CACHE:
856095b1
YG
855 error_desc_array = smca_l2_mce_desc;
856 len = ARRAY_SIZE(smca_l2_mce_desc) - 1;
be0aec23
AG
857 break;
858
859 case SMCA_DE:
856095b1
YG
860 error_desc_array = smca_de_mce_desc;
861 len = ARRAY_SIZE(smca_de_mce_desc) - 1;
be0aec23
AG
862 break;
863
864 case SMCA_EX:
856095b1
YG
865 error_desc_array = smca_ex_mce_desc;
866 len = ARRAY_SIZE(smca_ex_mce_desc) - 1;
be0aec23
AG
867 break;
868
869 case SMCA_FP:
856095b1
YG
870 error_desc_array = smca_fp_mce_desc;
871 len = ARRAY_SIZE(smca_fp_mce_desc) - 1;
be0aec23
AG
872 break;
873
874 case SMCA_L3_CACHE:
856095b1
YG
875 error_desc_array = smca_l3_mce_desc;
876 len = ARRAY_SIZE(smca_l3_mce_desc) - 1;
be0aec23
AG
877 break;
878
879 default:
880 pr_cont("Corrupted MCA core error info.\n");
881 return;
882 }
883
884 if (xec > len) {
885 pr_cont("Unrecognized %s MCA bank error code.\n",
886 amd_core_mcablock_names[mca_type]);
887 return;
888 }
889
890 pr_cont("%s.\n", error_desc_array[xec]);
891}
892
893static void decode_df_errors(u8 xec, unsigned int mca_type)
894{
895 const char * const *error_desc_array;
896 size_t len;
897
898 pr_emerg(HW_ERR "Data Fabric Error: ");
899
900 switch (mca_type) {
901 case SMCA_CS:
856095b1
YG
902 error_desc_array = smca_cs_mce_desc;
903 len = ARRAY_SIZE(smca_cs_mce_desc) - 1;
be0aec23
AG
904 break;
905
906 case SMCA_PIE:
856095b1
YG
907 error_desc_array = smca_pie_mce_desc;
908 len = ARRAY_SIZE(smca_pie_mce_desc) - 1;
be0aec23
AG
909 break;
910
911 default:
912 pr_cont("Corrupted MCA Data Fabric info.\n");
913 return;
914 }
915
916 if (xec > len) {
917 pr_cont("Unrecognized %s MCA bank error code.\n",
918 amd_df_mcablock_names[mca_type]);
919 return;
920 }
921
922 pr_cont("%s.\n", error_desc_array[xec]);
923}
924
925/* Decode errors according to Scalable MCA specification */
926static void decode_smca_errors(struct mce *m)
927{
928 u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
929 unsigned int hwid, mca_type, i;
930 u8 xec = XEC(m->status, xec_mask);
931 const char * const *error_desc_array;
932 const char *ip_name;
933 u32 low, high;
934 size_t len;
935
936 if (rdmsr_safe(addr, &low, &high)) {
b300e873 937 pr_emerg(HW_ERR "Invalid IP block specified.\n");
be0aec23
AG
938 return;
939 }
940
941 hwid = high & MCI_IPID_HWID;
942 mca_type = (high & MCI_IPID_MCATYPE) >> 16;
943
944 pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
945
946 /*
947 * Based on hwid and mca_type values, decode errors from respective IPs.
948 * Note: mca_type values make sense only in the context of an hwid.
949 */
950 for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
951 if (amd_hwids[i].hwid == hwid)
952 break;
953
954 switch (i) {
955 case SMCA_F17H_CORE:
956 ip_name = (mca_type == SMCA_L3_CACHE) ?
957 "L3 Cache" : "F17h Core";
958 return decode_f17h_core_errors(ip_name, xec, mca_type);
959 break;
960
961 case SMCA_DF:
962 return decode_df_errors(xec, mca_type);
963 break;
964
965 case SMCA_UMC:
856095b1
YG
966 error_desc_array = smca_umc_mce_desc;
967 len = ARRAY_SIZE(smca_umc_mce_desc) - 1;
be0aec23
AG
968 break;
969
970 case SMCA_PB:
856095b1
YG
971 error_desc_array = smca_pb_mce_desc;
972 len = ARRAY_SIZE(smca_pb_mce_desc) - 1;
be0aec23
AG
973 break;
974
975 case SMCA_PSP:
856095b1
YG
976 error_desc_array = smca_psp_mce_desc;
977 len = ARRAY_SIZE(smca_psp_mce_desc) - 1;
be0aec23
AG
978 break;
979
980 case SMCA_SMU:
856095b1
YG
981 error_desc_array = smca_smu_mce_desc;
982 len = ARRAY_SIZE(smca_smu_mce_desc) - 1;
be0aec23
AG
983 break;
984
985 default:
986 pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
987 return;
988 }
989
990 ip_name = amd_hwids[i].name;
991 pr_emerg(HW_ERR "%s Error: ", ip_name);
992
993 if (xec > len) {
994 pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
995 return;
996 }
997
998 pr_cont("%s.\n", error_desc_array[xec]);
999}
1000
6337583d 1001static inline void amd_decode_err_code(u16 ec)
d93cc222 1002{
980eec8b
JS
1003 if (INT_ERROR(ec)) {
1004 pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
1005 return;
1006 }
fa7ae8cc
BP
1007
1008 pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
1009
1010 if (BUS_ERROR(ec))
1011 pr_cont(", mem/io: %s", II_MSG(ec));
1012 else
1013 pr_cont(", tx: %s", TT_MSG(ec));
1014
1015 if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
1016 pr_cont(", mem-tx: %s", R4_MSG(ec));
1017
1018 if (BUS_ERROR(ec))
1019 pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
1020 }
1021
1022 pr_cont("\n");
549d042d 1023}
549d042d 1024
5ce88f6e
BP
1025/*
1026 * Filter out unwanted MCE signatures here.
1027 */
1028static bool amd_filter_mce(struct mce *m)
1029{
1030 u8 xec = (m->status >> 16) & 0x1f;
1031
1032 /*
1033 * NB GART TLB error reporting is disabled by default.
1034 */
1035 if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
1036 return true;
1037
1038 return false;
1039}
1040
d5c6770d
BP
1041static const char *decode_error_status(struct mce *m)
1042{
1043 if (m->status & MCI_STATUS_UC) {
1044 if (m->status & MCI_STATUS_PCC)
1045 return "System Fatal error.";
1046 if (m->mcgstatus & MCG_STATUS_RIPV)
1047 return "Uncorrected, software restartable error.";
1048 return "Uncorrected, software containable error.";
1049 }
1050
1051 if (m->status & MCI_STATUS_DEFERRED)
1052 return "Deferred error.";
1053
1054 return "Corrected error, no action required.";
1055}
1056
9cdeb404 1057int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
549d042d 1058{
fb253195 1059 struct mce *m = (struct mce *)data;
f89f8388 1060 struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
b0b07a2b 1061 int ecc;
549d042d 1062
5ce88f6e
BP
1063 if (amd_filter_mce(m))
1064 return NOTIFY_STOP;
1065
fd0f5fff
BP
1066 pr_emerg(HW_ERR "%s\n", decode_error_status(m));
1067
1068 pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
1069 m->extcpu,
1070 c->x86, c->x86_model, c->x86_mask,
1071 m->bank,
1072 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
99e1dfb7
AG
1073 ((m->status & MCI_STATUS_UC) ? "UE" :
1074 (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
fd0f5fff
BP
1075 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
1076 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
1077 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
1078
be0aec23 1079 if (c->x86 >= 0x15)
fd0f5fff
BP
1080 pr_cont("|%s|%s",
1081 ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
1082 ((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
1083
a348ed83 1084 if (boot_cpu_has(X86_FEATURE_SMCA)) {
be0aec23
AG
1085 u32 low, high;
1086 u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
1087
b300e873
YG
1088 pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
1089
be0aec23
AG
1090 if (!rdmsr_safe(addr, &low, &high) &&
1091 (low & MCI_CONFIG_MCAX))
1092 pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
1093 }
1094
fd0f5fff
BP
1095 /* do the two bits[14:13] together */
1096 ecc = (m->status >> 45) & 0x3;
1097 if (ecc)
1098 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
1099
1100 pr_cont("]: 0x%016llx\n", m->status);
1101
1102 if (m->status & MCI_STATUS_ADDRV)
b300e873 1103 pr_emerg(HW_ERR "Error Addr: 0x%016llx", m->addr);
fd0f5fff 1104
a348ed83 1105 if (boot_cpu_has(X86_FEATURE_SMCA)) {
b300e873
YG
1106 if (m->status & MCI_STATUS_SYNDV)
1107 pr_cont(", Syndrome: 0x%016llx", m->synd);
1108
1109 pr_cont("\n");
1110
be0aec23
AG
1111 decode_smca_errors(m);
1112 goto err_code;
b300e873
YG
1113 } else
1114 pr_cont("\n");
be0aec23 1115
fd0f5fff
BP
1116 if (!fam_ops)
1117 goto err_code;
1118
51966241
BP
1119 switch (m->bank) {
1120 case 0:
f05c41a9 1121 decode_mc0_mce(m);
51966241 1122 break;
d93cc222 1123
ab5535e7 1124 case 1:
f05c41a9 1125 decode_mc1_mce(m);
ab5535e7
BP
1126 break;
1127
56cad2d6 1128 case 2:
4a73d3de 1129 decode_mc2_mce(m);
56cad2d6
BP
1130 break;
1131
f9350efd 1132 case 3:
f05c41a9 1133 decode_mc3_mce(m);
f9350efd
BP
1134 break;
1135
51966241 1136 case 4:
f05c41a9 1137 decode_mc4_mce(m);
51966241
BP
1138 break;
1139
53bd5fed 1140 case 5:
f05c41a9 1141 decode_mc5_mce(m);
53bd5fed
BP
1142 break;
1143
b8f85c47 1144 case 6:
f05c41a9 1145 decode_mc6_mce(m);
b8f85c47
BP
1146 break;
1147
51966241
BP
1148 default:
1149 break;
b69b29de 1150 }
51966241 1151
fd0f5fff 1152 err_code:
51966241 1153 amd_decode_err_code(m->status & 0xffff);
fb253195
BP
1154
1155 return NOTIFY_STOP;
549d042d 1156}
9cdeb404 1157EXPORT_SYMBOL_GPL(amd_decode_mce);
f436f8bb 1158
fb253195
BP
1159static struct notifier_block amd_mce_dec_nb = {
1160 .notifier_call = amd_decode_mce,
1161};
1162
f436f8bb
IM
1163static int __init mce_amd_init(void)
1164{
bad11e03
BP
1165 struct cpuinfo_x86 *c = &boot_cpu_data;
1166
1167 if (c->x86_vendor != X86_VENDOR_AMD)
fd0f5fff 1168 return -ENODEV;
e045c291 1169
888ab8e6
BP
1170 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
1171 if (!fam_ops)
1172 return -ENOMEM;
1173
bad11e03 1174 switch (c->x86) {
888ab8e6 1175 case 0xf:
f05c41a9
BP
1176 fam_ops->mc0_mce = k8_mc0_mce;
1177 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1178 fam_ops->mc2_mce = k8_mc2_mce;
888ab8e6
BP
1179 break;
1180
1181 case 0x10:
f05c41a9
BP
1182 fam_ops->mc0_mce = f10h_mc0_mce;
1183 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1184 fam_ops->mc2_mce = k8_mc2_mce;
888ab8e6
BP
1185 break;
1186
f0157b3a 1187 case 0x11:
f05c41a9
BP
1188 fam_ops->mc0_mce = k8_mc0_mce;
1189 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1190 fam_ops->mc2_mce = k8_mc2_mce;
f0157b3a
BP
1191 break;
1192
9be0bb10 1193 case 0x12:
f05c41a9
BP
1194 fam_ops->mc0_mce = f12h_mc0_mce;
1195 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1196 fam_ops->mc2_mce = k8_mc2_mce;
9be0bb10
BP
1197 break;
1198
888ab8e6 1199 case 0x14:
980eec8b
JS
1200 fam_ops->mc0_mce = cat_mc0_mce;
1201 fam_ops->mc1_mce = cat_mc1_mce;
4a73d3de 1202 fam_ops->mc2_mce = k8_mc2_mce;
888ab8e6
BP
1203 break;
1204
2be64bfa 1205 case 0x15:
eba4bfb3
AG
1206 xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
1207
f05c41a9
BP
1208 fam_ops->mc0_mce = f15h_mc0_mce;
1209 fam_ops->mc1_mce = f15h_mc1_mce;
4a73d3de 1210 fam_ops->mc2_mce = f15h_mc2_mce;
2be64bfa
BP
1211 break;
1212
980eec8b
JS
1213 case 0x16:
1214 xec_mask = 0x1f;
1215 fam_ops->mc0_mce = cat_mc0_mce;
1216 fam_ops->mc1_mce = cat_mc1_mce;
1217 fam_ops->mc2_mce = f16h_mc2_mce;
1218 break;
1219
be0aec23 1220 case 0x17:
be0aec23 1221 xec_mask = 0x3f;
a348ed83 1222 if (!boot_cpu_has(X86_FEATURE_SMCA)) {
be0aec23
AG
1223 printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
1224 goto err_out;
1225 }
1226 break;
1227
888ab8e6 1228 default:
ec3e82d6 1229 printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
be0aec23 1230 goto err_out;
888ab8e6
BP
1231 }
1232
9530d608
BP
1233 pr_info("MCE: In-kernel MCE decoding enabled.\n");
1234
3653ada5 1235 mce_register_decode_chain(&amd_mce_dec_nb);
f436f8bb
IM
1236
1237 return 0;
be0aec23
AG
1238
1239err_out:
1240 kfree(fam_ops);
1241 fam_ops = NULL;
1242 return -EINVAL;
f436f8bb
IM
1243}
1244early_initcall(mce_amd_init);
0d18b2e3
BP
1245
1246#ifdef MODULE
1247static void __exit mce_amd_exit(void)
1248{
3653ada5 1249 mce_unregister_decode_chain(&amd_mce_dec_nb);
888ab8e6 1250 kfree(fam_ops);
0d18b2e3
BP
1251}
1252
1253MODULE_DESCRIPTION("AMD MCE decoder");
1254MODULE_ALIAS("edac-mce-amd");
1255MODULE_LICENSE("GPL");
1256module_exit(mce_amd_exit);
1257#endif