]>
Commit | Line | Data |
---|---|---|
53f2d028 MCC |
1 | #undef TRACE_SYSTEM |
2 | #define TRACE_SYSTEM ras | |
3 | #define TRACE_INCLUDE_FILE ras_event | |
4 | ||
5 | #if !defined(_TRACE_HW_EVENT_MC_H) || defined(TRACE_HEADER_MULTI_READ) | |
6 | #define _TRACE_HW_EVENT_MC_H | |
7 | ||
8 | #include <linux/tracepoint.h> | |
9 | #include <linux/edac.h> | |
10 | #include <linux/ktime.h> | |
0a2409aa | 11 | #include <linux/aer.h> |
2dfb7d51 CG |
12 | #include <linux/cper.h> |
13 | ||
14 | /* | |
15 | * MCE Extended Error Log trace event | |
16 | * | |
17 | * These events are generated when hardware detects a corrected or | |
18 | * uncorrected event. | |
19 | */ | |
20 | ||
21 | /* memory trace event */ | |
22 | ||
23 | #if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE) | |
24 | TRACE_EVENT(extlog_mem_event, | |
25 | TP_PROTO(struct cper_sec_mem_err *mem, | |
26 | u32 err_seq, | |
27 | const uuid_le *fru_id, | |
28 | const char *fru_text, | |
29 | u8 sev), | |
30 | ||
31 | TP_ARGS(mem, err_seq, fru_id, fru_text, sev), | |
32 | ||
33 | TP_STRUCT__entry( | |
34 | __field(u32, err_seq) | |
35 | __field(u8, etype) | |
36 | __field(u8, sev) | |
37 | __field(u64, pa) | |
38 | __field(u8, pa_mask_lsb) | |
39 | __field_struct(uuid_le, fru_id) | |
40 | __string(fru_text, fru_text) | |
41 | __field_struct(struct cper_mem_err_compact, data) | |
42 | ), | |
43 | ||
44 | TP_fast_assign( | |
45 | __entry->err_seq = err_seq; | |
46 | if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) | |
47 | __entry->etype = mem->error_type; | |
48 | else | |
49 | __entry->etype = ~0; | |
50 | __entry->sev = sev; | |
51 | if (mem->validation_bits & CPER_MEM_VALID_PA) | |
52 | __entry->pa = mem->physical_addr; | |
53 | else | |
54 | __entry->pa = ~0ull; | |
55 | ||
56 | if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) | |
57 | __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask); | |
58 | else | |
59 | __entry->pa_mask_lsb = ~0; | |
60 | __entry->fru_id = *fru_id; | |
61 | __assign_str(fru_text, fru_text); | |
62 | cper_mem_err_pack(mem, &__entry->data); | |
63 | ), | |
64 | ||
65 | TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s", | |
66 | __entry->err_seq, | |
67 | cper_severity_str(__entry->sev), | |
68 | cper_mem_err_type_str(__entry->etype), | |
69 | __entry->pa, | |
70 | __entry->pa_mask_lsb, | |
71 | cper_mem_err_unpack(p, &__entry->data), | |
72 | &__entry->fru_id, | |
73 | __get_str(fru_text)) | |
74 | ); | |
75 | #endif | |
53f2d028 MCC |
76 | |
77 | /* | |
78 | * Hardware Events Report | |
79 | * | |
80 | * Those events are generated when hardware detected a corrected or | |
81 | * uncorrected event, and are meant to replace the current API to report | |
82 | * errors defined on both EDAC and MCE subsystems. | |
83 | * | |
84 | * FIXME: Add events for handling memory errors originated from the | |
85 | * MCE subsystem. | |
86 | */ | |
87 | ||
88 | /* | |
89 | * Hardware-independent Memory Controller specific events | |
90 | */ | |
91 | ||
92 | /* | |
93 | * Default error mechanisms for Memory Controller errors (CE and UE) | |
94 | */ | |
95 | TRACE_EVENT(mc_event, | |
96 | ||
97 | TP_PROTO(const unsigned int err_type, | |
98 | const char *error_msg, | |
99 | const char *label, | |
100 | const int error_count, | |
101 | const u8 mc_index, | |
102 | const s8 top_layer, | |
103 | const s8 mid_layer, | |
104 | const s8 low_layer, | |
105 | unsigned long address, | |
106 | const u8 grain_bits, | |
107 | unsigned long syndrome, | |
108 | const char *driver_detail), | |
109 | ||
110 | TP_ARGS(err_type, error_msg, label, error_count, mc_index, | |
111 | top_layer, mid_layer, low_layer, address, grain_bits, | |
112 | syndrome, driver_detail), | |
113 | ||
114 | TP_STRUCT__entry( | |
115 | __field( unsigned int, error_type ) | |
116 | __string( msg, error_msg ) | |
117 | __string( label, label ) | |
118 | __field( u16, error_count ) | |
119 | __field( u8, mc_index ) | |
120 | __field( s8, top_layer ) | |
121 | __field( s8, middle_layer ) | |
122 | __field( s8, lower_layer ) | |
123 | __field( long, address ) | |
124 | __field( u8, grain_bits ) | |
125 | __field( long, syndrome ) | |
126 | __string( driver_detail, driver_detail ) | |
127 | ), | |
128 | ||
129 | TP_fast_assign( | |
130 | __entry->error_type = err_type; | |
131 | __assign_str(msg, error_msg); | |
132 | __assign_str(label, label); | |
133 | __entry->error_count = error_count; | |
134 | __entry->mc_index = mc_index; | |
135 | __entry->top_layer = top_layer; | |
136 | __entry->middle_layer = mid_layer; | |
137 | __entry->lower_layer = low_layer; | |
138 | __entry->address = address; | |
139 | __entry->grain_bits = grain_bits; | |
140 | __entry->syndrome = syndrome; | |
141 | __assign_str(driver_detail, driver_detail); | |
142 | ), | |
143 | ||
144 | TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)", | |
145 | __entry->error_count, | |
8dd93d45 | 146 | mc_event_error_type(__entry->error_type), |
53f2d028 MCC |
147 | __entry->error_count > 1 ? "s" : "", |
148 | ((char *)__get_str(msg))[0] ? " " : "", | |
149 | __get_str(msg), | |
150 | __get_str(label), | |
151 | __entry->mc_index, | |
152 | __entry->top_layer, | |
153 | __entry->middle_layer, | |
154 | __entry->lower_layer, | |
155 | __entry->address, | |
156 | 1 << __entry->grain_bits, | |
157 | __entry->syndrome, | |
158 | ((char *)__get_str(driver_detail))[0] ? " " : "", | |
159 | __get_str(driver_detail)) | |
160 | ); | |
161 | ||
0a2409aa CG |
162 | /* |
163 | * PCIe AER Trace event | |
164 | * | |
165 | * These events are generated when hardware detects a corrected or | |
166 | * uncorrected event on a PCIe device. The event report has | |
167 | * the following structure: | |
168 | * | |
169 | * char * dev_name - The name of the slot where the device resides | |
170 | * ([domain:]bus:device.function). | |
171 | * u32 status - Either the correctable or uncorrectable register | |
172 | * indicating what error or errors have been seen | |
173 | * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED | |
174 | */ | |
175 | ||
176 | #define aer_correctable_errors \ | |
177 | {BIT(0), "Receiver Error"}, \ | |
178 | {BIT(6), "Bad TLP"}, \ | |
179 | {BIT(7), "Bad DLLP"}, \ | |
180 | {BIT(8), "RELAY_NUM Rollover"}, \ | |
181 | {BIT(12), "Replay Timer Timeout"}, \ | |
182 | {BIT(13), "Advisory Non-Fatal"} | |
183 | ||
184 | #define aer_uncorrectable_errors \ | |
185 | {BIT(4), "Data Link Protocol"}, \ | |
186 | {BIT(12), "Poisoned TLP"}, \ | |
187 | {BIT(13), "Flow Control Protocol"}, \ | |
188 | {BIT(14), "Completion Timeout"}, \ | |
189 | {BIT(15), "Completer Abort"}, \ | |
190 | {BIT(16), "Unexpected Completion"}, \ | |
191 | {BIT(17), "Receiver Overflow"}, \ | |
192 | {BIT(18), "Malformed TLP"}, \ | |
193 | {BIT(19), "ECRC"}, \ | |
194 | {BIT(20), "Unsupported Request"} | |
195 | ||
196 | TRACE_EVENT(aer_event, | |
197 | TP_PROTO(const char *dev_name, | |
198 | const u32 status, | |
199 | const u8 severity), | |
200 | ||
201 | TP_ARGS(dev_name, status, severity), | |
202 | ||
203 | TP_STRUCT__entry( | |
204 | __string( dev_name, dev_name ) | |
205 | __field( u32, status ) | |
206 | __field( u8, severity ) | |
207 | ), | |
208 | ||
209 | TP_fast_assign( | |
210 | __assign_str(dev_name, dev_name); | |
211 | __entry->status = status; | |
212 | __entry->severity = severity; | |
213 | ), | |
214 | ||
215 | TP_printk("%s PCIe Bus Error: severity=%s, %s\n", | |
216 | __get_str(dev_name), | |
217 | __entry->severity == AER_CORRECTABLE ? "Corrected" : | |
218 | __entry->severity == AER_FATAL ? | |
219 | "Fatal" : "Uncorrected, non-fatal", | |
220 | __entry->severity == AER_CORRECTABLE ? | |
221 | __print_flags(__entry->status, "|", aer_correctable_errors) : | |
222 | __print_flags(__entry->status, "|", aer_uncorrectable_errors)) | |
223 | ); | |
224 | ||
53f2d028 MCC |
225 | #endif /* _TRACE_HW_EVENT_MC_H */ |
226 | ||
227 | /* This part must be outside protection */ | |
228 | #include <trace/define_trace.h> |