1 #include <linux/module.h>
2 #include <linux/slab.h>
6 static struct amd_decoder_ops
*fam_ops
;
8 static u8 nb_err_cpumask
= 0xf;
10 static bool report_gart_errors
;
11 static void (*nb_bus_decoder
)(int node_id
, struct mce
*m
, u32 nbcfg
);
13 void amd_report_gart_errors(bool v
)
15 report_gart_errors
= v
;
17 EXPORT_SYMBOL_GPL(amd_report_gart_errors
);
19 void amd_register_ecc_decoder(void (*f
)(int, struct mce
*, u32
))
23 EXPORT_SYMBOL_GPL(amd_register_ecc_decoder
);
25 void amd_unregister_ecc_decoder(void (*f
)(int, struct mce
*, u32
))
28 WARN_ON(nb_bus_decoder
!= f
);
30 nb_bus_decoder
= NULL
;
33 EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder
);
36 * string representation for the different MCA reported error types, see F3x48
40 /* transaction type */
41 const char *tt_msgs
[] = { "INSN", "DATA", "GEN", "RESV" };
42 EXPORT_SYMBOL_GPL(tt_msgs
);
45 const char *ll_msgs
[] = { "RESV", "L1", "L2", "L3/GEN" };
46 EXPORT_SYMBOL_GPL(ll_msgs
);
48 /* memory transaction type */
49 const char *rrrr_msgs
[] = {
50 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
52 EXPORT_SYMBOL_GPL(rrrr_msgs
);
54 /* participating processor */
55 const char *pp_msgs
[] = { "SRC", "RES", "OBS", "GEN" };
56 EXPORT_SYMBOL_GPL(pp_msgs
);
59 const char *to_msgs
[] = { "no timeout", "timed out" };
60 EXPORT_SYMBOL_GPL(to_msgs
);
63 const char *ii_msgs
[] = { "MEM", "RESV", "IO", "GEN" };
64 EXPORT_SYMBOL_GPL(ii_msgs
);
66 static const char *f10h_nb_mce_desc
[] = {
68 "Protocol error (link, L3, probe filter, etc.)",
69 "Parity error in NB-internal arrays",
70 "Link Retry due to IO link transmission error",
71 "L3 ECC data cache error",
72 "ECC error in L3 cache tag",
73 "L3 LRU parity bits error",
74 "ECC Error in the Probe Filter directory"
77 static bool f10h_dc_mce(u16 ec
)
79 u8 r4
= (ec
>> 4) & 0xf;
83 pr_cont("during data scrub.\n");
92 pr_cont("during L1 linefill from L2.\n");
94 pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec
));
101 static bool k8_dc_mce(u16 ec
)
104 pr_cont("during system linefill.\n");
108 return f10h_dc_mce(ec
);
111 static bool f14h_dc_mce(u16 ec
)
113 u8 r4
= (ec
>> 4) & 0xf;
115 u8 tt
= (ec
>> 2) & 0x3;
121 if (tt
!= TT_DATA
|| ll
!= LL_L1
)
127 pr_cont("Data/Tag parity error due to %s.\n",
128 (r4
== R4_DRD
? "load/hw prf" : "store"));
131 pr_cont("Copyback parity error on a tag miss.\n");
134 pr_cont("Tag parity error during snoop.\n");
139 } else if (BUS_ERROR(ec
)) {
141 if ((ii
!= II_MEM
&& ii
!= II_IO
) || ll
!= LL_LG
)
144 pr_cont("System read data error on a ");
148 pr_cont("TLB reload.\n");
166 static void amd_decode_dc_mce(struct mce
*m
)
168 u16 ec
= m
->status
& 0xffff;
169 u8 xec
= (m
->status
>> 16) & 0xf;
171 pr_emerg(HW_ERR
"Data Cache Error: ");
173 /* TLB error signatures are the same across families */
175 u8 tt
= (ec
>> 2) & 0x3;
178 pr_cont("%s TLB %s.\n", LL_MSG(ec
),
179 (xec
? "multimatch" : "parity error"));
186 if (!fam_ops
->dc_mce(ec
))
192 pr_emerg(HW_ERR
"Corrupted DC MCE info?\n");
195 static bool k8_ic_mce(u16 ec
)
198 u8 r4
= (ec
>> 4) & 0xf;
205 pr_cont("during a linefill from L2.\n");
206 else if (ll
== 0x1) {
209 pr_cont("Parity error during data load.\n");
213 pr_cont("Copyback Parity/Victim error.\n");
217 pr_cont("Tag Snoop error.\n");
230 static bool f14h_ic_mce(u16 ec
)
233 u8 tt
= (ec
>> 2) & 0x3;
234 u8 r4
= (ec
>> 4) & 0xf;
238 if (tt
!= 0 || ll
!= 1)
242 pr_cont("Data/tag array parity error for a tag hit.\n");
243 else if (r4
== R4_SNOOP
)
244 pr_cont("Tag error during snoop/victimization.\n");
251 static void amd_decode_ic_mce(struct mce
*m
)
253 u16 ec
= m
->status
& 0xffff;
254 u8 xec
= (m
->status
>> 16) & 0xf;
256 pr_emerg(HW_ERR
"Instruction Cache Error: ");
259 pr_cont("%s TLB %s.\n", LL_MSG(ec
),
260 (xec
? "multimatch" : "parity error"));
261 else if (BUS_ERROR(ec
)) {
262 bool k8
= (boot_cpu_data
.x86
== 0xf && (m
->status
& BIT(58)));
264 pr_cont("during %s.\n", (k8
? "system linefill" : "NB data read"));
265 } else if (fam_ops
->ic_mce(ec
))
268 pr_emerg(HW_ERR
"Corrupted IC MCE info?\n");
271 static void amd_decode_bu_mce(struct mce
*m
)
273 u32 ec
= m
->status
& 0xffff;
274 u32 xec
= (m
->status
>> 16) & 0xf;
276 pr_emerg(HW_ERR
"Bus Unit Error");
279 pr_cont(" in the write data buffers.\n");
281 pr_cont(" in the victim data buffers.\n");
282 else if (xec
== 0x2 && MEM_ERROR(ec
))
283 pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec
));
284 else if (xec
== 0x0) {
286 pr_cont(": %s error in a Page Descriptor Cache or "
287 "Guest TLB.\n", TT_MSG(ec
));
288 else if (BUS_ERROR(ec
))
289 pr_cont(": %s/ECC error in data read from NB: %s.\n",
290 RRRR_MSG(ec
), PP_MSG(ec
));
291 else if (MEM_ERROR(ec
)) {
292 u8 rrrr
= (ec
>> 4) & 0xf;
295 pr_cont(": %s error during data copyback.\n",
297 else if (rrrr
<= 0x1)
298 pr_cont(": %s parity/ECC error during data "
299 "access from L2.\n", RRRR_MSG(ec
));
310 pr_emerg(HW_ERR
"Corrupted BU MCE info?\n");
313 static void amd_decode_ls_mce(struct mce
*m
)
315 u16 ec
= m
->status
& 0xffff;
316 u8 xec
= (m
->status
>> 16) & 0xf;
318 if (boot_cpu_data
.x86
== 0x14) {
319 pr_emerg("You shouldn't be seeing an LS MCE on this cpu family,"
320 " please report on LKML.\n");
324 pr_emerg(HW_ERR
"Load Store Error");
327 u8 r4
= (ec
>> 4) & 0xf;
329 if (!BUS_ERROR(ec
) || (r4
!= R4_DRD
&& r4
!= R4_DWR
))
332 pr_cont(" during %s.\n", RRRR_MSG(ec
));
339 pr_emerg(HW_ERR
"Corrupted LS MCE info?\n");
342 static bool k8_nb_mce(u16 ec
, u8 xec
)
348 pr_cont("CRC error detected on HT link.\n");
352 pr_cont("Invalid GART PTE entry during GART table walk.\n");
356 pr_cont("Unsupported atomic RMW received from an IO link.\n");
361 if (boot_cpu_data
.x86
== 0x11)
364 pr_cont("DRAM ECC error detected on the NB.\n");
368 pr_cont("Parity error on the DRAM addr/ctl signals.\n");
379 static bool f10h_nb_mce(u16 ec
, u8 xec
)
384 if (k8_nb_mce(ec
, xec
))
398 pr_cont("GART Table Walk data error.\n");
399 else if (BUS_ERROR(ec
))
400 pr_cont("DMA Exclusion Vector Table Walk error.\n");
418 pr_cont("%s.\n", f10h_nb_mce_desc
[xec
- offset
]);
424 static bool f14h_nb_mce(u16 ec
, u8 xec
)
429 void amd_decode_nb_mce(int node_id
, struct mce
*m
, u32 nbcfg
)
431 u8 xec
= (m
->status
>> 16) & 0x1f;
432 u16 ec
= m
->status
& 0xffff;
433 u32 nbsh
= (u32
)(m
->status
>> 32);
435 pr_emerg(HW_ERR
"Northbridge Error, node %d: ", node_id
);
438 * F10h, revD can disable ErrCpu[3:0] so check that first and also the
439 * value encoding has changed so interpret those differently
441 if ((boot_cpu_data
.x86
== 0x10) &&
442 (boot_cpu_data
.x86_model
> 7)) {
443 if (nbsh
& K8_NBSH_ERR_CPU_VAL
)
444 pr_cont(", core: %u", (u8
)(nbsh
& nb_err_cpumask
));
446 u8 assoc_cpus
= nbsh
& nb_err_cpumask
;
449 pr_cont(", core: %d", fls(assoc_cpus
) - 1);
454 pr_cont("Sync error (sync packets on HT link detected).\n");
458 pr_cont("HT Master abort.\n");
462 pr_cont("HT Target abort.\n");
466 pr_cont("NB Watchdog timeout.\n");
470 pr_cont("SVM DMA Exclusion Vector error.\n");
477 if (!fam_ops
->nb_mce(ec
, xec
))
480 if (boot_cpu_data
.x86
== 0xf || boot_cpu_data
.x86
== 0x10)
481 if ((xec
== 0x8 || xec
== 0x0) && nb_bus_decoder
)
482 nb_bus_decoder(node_id
, m
, nbcfg
);
487 pr_emerg(HW_ERR
"Corrupted NB MCE info?\n");
489 EXPORT_SYMBOL_GPL(amd_decode_nb_mce
);
491 static void amd_decode_fr_mce(struct mce
*m
)
493 if (boot_cpu_data
.x86
== 0xf ||
494 boot_cpu_data
.x86
== 0x11)
497 /* we have only one error signature so match all fields at once. */
498 if ((m
->status
& 0xffff) == 0x0f0f) {
499 pr_emerg(HW_ERR
"FR Error: CPU Watchdog timer expire.\n");
504 pr_emerg(HW_ERR
"Corrupted FR MCE info?\n");
507 static inline void amd_decode_err_code(u16 ec
)
510 pr_emerg(HW_ERR
"Transaction: %s, Cache Level: %s\n",
511 TT_MSG(ec
), LL_MSG(ec
));
512 } else if (MEM_ERROR(ec
)) {
513 pr_emerg(HW_ERR
"Transaction: %s, Type: %s, Cache Level: %s\n",
514 RRRR_MSG(ec
), TT_MSG(ec
), LL_MSG(ec
));
515 } else if (BUS_ERROR(ec
)) {
516 pr_emerg(HW_ERR
"Transaction: %s (%s), %s, Cache Level: %s, "
517 "Participating Processor: %s\n",
518 RRRR_MSG(ec
), II_MSG(ec
), TO_MSG(ec
), LL_MSG(ec
),
521 pr_emerg(HW_ERR
"Huh? Unknown MCE error 0x%x\n", ec
);
525 * Filter out unwanted MCE signatures here.
527 static bool amd_filter_mce(struct mce
*m
)
529 u8 xec
= (m
->status
>> 16) & 0x1f;
532 * NB GART TLB error reporting is disabled by default.
534 if (m
->bank
== 4 && xec
== 0x5 && !report_gart_errors
)
540 int amd_decode_mce(struct notifier_block
*nb
, unsigned long val
, void *data
)
542 struct mce
*m
= (struct mce
*)data
;
545 if (amd_filter_mce(m
))
548 pr_emerg(HW_ERR
"MC%d_STATUS: ", m
->bank
);
550 pr_cont("%sorrected error, other errors lost: %s, "
551 "CPU context corrupt: %s",
552 ((m
->status
& MCI_STATUS_UC
) ? "Unc" : "C"),
553 ((m
->status
& MCI_STATUS_OVER
) ? "yes" : "no"),
554 ((m
->status
& MCI_STATUS_PCC
) ? "yes" : "no"));
556 /* do the two bits[14:13] together */
557 ecc
= (m
->status
>> 45) & 0x3;
559 pr_cont(", %sECC Error", ((ecc
== 2) ? "C" : "U"));
565 amd_decode_dc_mce(m
);
569 amd_decode_ic_mce(m
);
573 amd_decode_bu_mce(m
);
577 amd_decode_ls_mce(m
);
581 node
= amd_get_nb_id(m
->extcpu
);
582 amd_decode_nb_mce(node
, m
, 0);
586 amd_decode_fr_mce(m
);
593 amd_decode_err_code(m
->status
& 0xffff);
597 EXPORT_SYMBOL_GPL(amd_decode_mce
);
599 static struct notifier_block amd_mce_dec_nb
= {
600 .notifier_call
= amd_decode_mce
,
603 static int __init
mce_amd_init(void)
605 if (boot_cpu_data
.x86_vendor
!= X86_VENDOR_AMD
)
608 if ((boot_cpu_data
.x86
< 0xf || boot_cpu_data
.x86
> 0x11) &&
609 (boot_cpu_data
.x86
!= 0x14 || boot_cpu_data
.x86_model
> 0xf))
612 fam_ops
= kzalloc(sizeof(struct amd_decoder_ops
), GFP_KERNEL
);
616 switch (boot_cpu_data
.x86
) {
618 fam_ops
->dc_mce
= k8_dc_mce
;
619 fam_ops
->ic_mce
= k8_ic_mce
;
620 fam_ops
->nb_mce
= k8_nb_mce
;
624 fam_ops
->dc_mce
= f10h_dc_mce
;
625 fam_ops
->ic_mce
= k8_ic_mce
;
626 fam_ops
->nb_mce
= f10h_nb_mce
;
630 fam_ops
->dc_mce
= k8_dc_mce
;
631 fam_ops
->ic_mce
= k8_ic_mce
;
632 fam_ops
->nb_mce
= f10h_nb_mce
;
636 nb_err_cpumask
= 0x3;
637 fam_ops
->dc_mce
= f14h_dc_mce
;
638 fam_ops
->ic_mce
= f14h_ic_mce
;
639 fam_ops
->nb_mce
= f14h_nb_mce
;
643 printk(KERN_WARNING
"Huh? What family is that: %d?!\n",
649 pr_info("MCE: In-kernel MCE decoding enabled.\n");
651 atomic_notifier_chain_register(&x86_mce_decoder_chain
, &amd_mce_dec_nb
);
655 early_initcall(mce_amd_init
);
658 static void __exit
mce_amd_exit(void)
660 atomic_notifier_chain_unregister(&x86_mce_decoder_chain
, &amd_mce_dec_nb
);
664 MODULE_DESCRIPTION("AMD MCE decoder");
665 MODULE_ALIAS("edac-mce-amd");
666 MODULE_LICENSE("GPL");
667 module_exit(mce_amd_exit
);