]>
Commit | Line | Data |
---|---|---|
36df96f8 MS |
1 | /* |
2 | * Machine check exception handling. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | * | |
18 | * Copyright 2013 IBM Corporation | |
19 | * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | |
20 | */ | |
21 | ||
22 | #undef DEBUG | |
23 | #define pr_fmt(fmt) "mce: " fmt | |
24 | ||
25 | #include <linux/types.h> | |
26 | #include <linux/ptrace.h> | |
27 | #include <linux/percpu.h> | |
28 | #include <linux/export.h> | |
30c82635 | 29 | #include <linux/irq_work.h> |
36df96f8 MS |
30 | #include <asm/mce.h> |
31 | ||
32 | static DEFINE_PER_CPU(int, mce_nest_count); | |
33 | static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); | |
34 | ||
b5ff4211 MS |
35 | /* Queue for delayed MCE events. */ |
36 | static DEFINE_PER_CPU(int, mce_queue_count); | |
37 | static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); | |
38 | ||
30c82635 | 39 | static void machine_check_process_queued_event(struct irq_work *work); |
635218c7 | 40 | static struct irq_work mce_event_process_work = { |
30c82635 MS |
41 | .func = machine_check_process_queued_event, |
42 | }; | |
43 | ||
36df96f8 MS |
44 | static void mce_set_error_info(struct machine_check_event *mce, |
45 | struct mce_error_info *mce_err) | |
46 | { | |
47 | mce->error_type = mce_err->error_type; | |
48 | switch (mce_err->error_type) { | |
49 | case MCE_ERROR_TYPE_UE: | |
50 | mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; | |
51 | break; | |
52 | case MCE_ERROR_TYPE_SLB: | |
53 | mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; | |
54 | break; | |
55 | case MCE_ERROR_TYPE_ERAT: | |
56 | mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; | |
57 | break; | |
58 | case MCE_ERROR_TYPE_TLB: | |
59 | mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; | |
60 | break; | |
61 | case MCE_ERROR_TYPE_UNKNOWN: | |
62 | default: | |
63 | break; | |
64 | } | |
65 | } | |
66 | ||
67 | /* | |
68 | * Decode and save high level MCE information into per cpu buffer which | |
69 | * is an array of machine_check_event structure. | |
70 | */ | |
71 | void save_mce_event(struct pt_regs *regs, long handled, | |
72 | struct mce_error_info *mce_err, | |
55672ecf | 73 | uint64_t nip, uint64_t addr) |
36df96f8 | 74 | { |
ffb2d78e | 75 | int index = __this_cpu_inc_return(mce_nest_count) - 1; |
69111bac | 76 | struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); |
36df96f8 MS |
77 | |
78 | /* | |
79 | * Return if we don't have enough space to log mce event. | |
80 | * mce_nest_count may go beyond MAX_MC_EVT but that's ok, | |
81 | * the check below will stop buffer overrun. | |
82 | */ | |
83 | if (index >= MAX_MC_EVT) | |
84 | return; | |
85 | ||
86 | /* Populate generic machine check info */ | |
87 | mce->version = MCE_V1; | |
55672ecf | 88 | mce->srr0 = nip; |
36df96f8 MS |
89 | mce->srr1 = regs->msr; |
90 | mce->gpr3 = regs->gpr[3]; | |
91 | mce->in_use = 1; | |
92 | ||
93 | mce->initiator = MCE_INITIATOR_CPU; | |
c74dd88e MS |
94 | /* Mark it recovered if we have handled it and MSR(RI=1). */ |
95 | if (handled && (regs->msr & MSR_RI)) | |
36df96f8 MS |
96 | mce->disposition = MCE_DISPOSITION_RECOVERED; |
97 | else | |
98 | mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; | |
99 | mce->severity = MCE_SEV_ERROR_SYNC; | |
100 | ||
36df96f8 MS |
101 | /* |
102 | * Populate the mce error_type and type-specific error_type. | |
103 | */ | |
104 | mce_set_error_info(mce, mce_err); | |
105 | ||
106 | if (!addr) | |
107 | return; | |
108 | ||
109 | if (mce->error_type == MCE_ERROR_TYPE_TLB) { | |
110 | mce->u.tlb_error.effective_address_provided = true; | |
111 | mce->u.tlb_error.effective_address = addr; | |
112 | } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { | |
113 | mce->u.slb_error.effective_address_provided = true; | |
114 | mce->u.slb_error.effective_address = addr; | |
115 | } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { | |
116 | mce->u.erat_error.effective_address_provided = true; | |
117 | mce->u.erat_error.effective_address = addr; | |
118 | } else if (mce->error_type == MCE_ERROR_TYPE_UE) { | |
119 | mce->u.ue_error.effective_address_provided = true; | |
120 | mce->u.ue_error.effective_address = addr; | |
121 | } | |
122 | return; | |
123 | } | |
124 | ||
125 | /* | |
126 | * get_mce_event: | |
127 | * mce Pointer to machine_check_event structure to be filled. | |
128 | * release Flag to indicate whether to free the event slot or not. | |
129 | * 0 <= do not release the mce event. Caller will invoke | |
130 | * release_mce_event() once event has been consumed. | |
131 | * 1 <= release the slot. | |
132 | * | |
133 | * return 1 = success | |
134 | * 0 = failure | |
135 | * | |
136 | * get_mce_event() will be called by platform specific machine check | |
137 | * handle routine and in KVM. | |
138 | * When we call get_mce_event(), we are still in interrupt context and | |
139 | * preemption will not be scheduled until ret_from_expect() routine | |
140 | * is called. | |
141 | */ | |
142 | int get_mce_event(struct machine_check_event *mce, bool release) | |
143 | { | |
69111bac | 144 | int index = __this_cpu_read(mce_nest_count) - 1; |
36df96f8 MS |
145 | struct machine_check_event *mc_evt; |
146 | int ret = 0; | |
147 | ||
148 | /* Sanity check */ | |
149 | if (index < 0) | |
150 | return ret; | |
151 | ||
152 | /* Check if we have MCE info to process. */ | |
153 | if (index < MAX_MC_EVT) { | |
69111bac | 154 | mc_evt = this_cpu_ptr(&mce_event[index]); |
36df96f8 MS |
155 | /* Copy the event structure and release the original */ |
156 | if (mce) | |
157 | *mce = *mc_evt; | |
158 | if (release) | |
159 | mc_evt->in_use = 0; | |
160 | ret = 1; | |
161 | } | |
162 | /* Decrement the count to free the slot. */ | |
163 | if (release) | |
69111bac | 164 | __this_cpu_dec(mce_nest_count); |
36df96f8 MS |
165 | |
166 | return ret; | |
167 | } | |
168 | ||
169 | void release_mce_event(void) | |
170 | { | |
171 | get_mce_event(NULL, true); | |
172 | } | |
b5ff4211 MS |
173 | |
174 | /* | |
175 | * Queue up the MCE event which then can be handled later. | |
176 | */ | |
177 | void machine_check_queue_event(void) | |
178 | { | |
179 | int index; | |
180 | struct machine_check_event evt; | |
181 | ||
182 | if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) | |
183 | return; | |
184 | ||
ffb2d78e | 185 | index = __this_cpu_inc_return(mce_queue_count) - 1; |
b5ff4211 MS |
186 | /* If queue is full, just return for now. */ |
187 | if (index >= MAX_MC_EVT) { | |
69111bac | 188 | __this_cpu_dec(mce_queue_count); |
b5ff4211 MS |
189 | return; |
190 | } | |
69111bac | 191 | memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); |
30c82635 MS |
192 | |
193 | /* Queue irq work to process this event later. */ | |
194 | irq_work_queue(&mce_event_process_work); | |
b5ff4211 MS |
195 | } |
196 | ||
197 | /* | |
198 | * process pending MCE event from the mce event queue. This function will be | |
199 | * called during syscall exit. | |
200 | */ | |
30c82635 | 201 | static void machine_check_process_queued_event(struct irq_work *work) |
b5ff4211 MS |
202 | { |
203 | int index; | |
204 | ||
b5ff4211 MS |
205 | /* |
206 | * For now just print it to console. | |
207 | * TODO: log this error event to FSP or nvram. | |
208 | */ | |
69111bac CL |
209 | while (__this_cpu_read(mce_queue_count) > 0) { |
210 | index = __this_cpu_read(mce_queue_count) - 1; | |
b5ff4211 | 211 | machine_check_print_event_info( |
69111bac CL |
212 | this_cpu_ptr(&mce_event_queue[index])); |
213 | __this_cpu_dec(mce_queue_count); | |
b5ff4211 | 214 | } |
b5ff4211 MS |
215 | } |
216 | ||
217 | void machine_check_print_event_info(struct machine_check_event *evt) | |
218 | { | |
219 | const char *level, *sevstr, *subtype; | |
220 | static const char *mc_ue_types[] = { | |
221 | "Indeterminate", | |
222 | "Instruction fetch", | |
223 | "Page table walk ifetch", | |
224 | "Load/Store", | |
225 | "Page table walk Load/Store", | |
226 | }; | |
227 | static const char *mc_slb_types[] = { | |
228 | "Indeterminate", | |
229 | "Parity", | |
230 | "Multihit", | |
231 | }; | |
232 | static const char *mc_erat_types[] = { | |
233 | "Indeterminate", | |
234 | "Parity", | |
235 | "Multihit", | |
236 | }; | |
237 | static const char *mc_tlb_types[] = { | |
238 | "Indeterminate", | |
239 | "Parity", | |
240 | "Multihit", | |
241 | }; | |
242 | ||
243 | /* Print things out */ | |
244 | if (evt->version != MCE_V1) { | |
245 | pr_err("Machine Check Exception, Unknown event version %d !\n", | |
246 | evt->version); | |
247 | return; | |
248 | } | |
249 | switch (evt->severity) { | |
250 | case MCE_SEV_NO_ERROR: | |
251 | level = KERN_INFO; | |
252 | sevstr = "Harmless"; | |
253 | break; | |
254 | case MCE_SEV_WARNING: | |
255 | level = KERN_WARNING; | |
256 | sevstr = ""; | |
257 | break; | |
258 | case MCE_SEV_ERROR_SYNC: | |
259 | level = KERN_ERR; | |
260 | sevstr = "Severe"; | |
261 | break; | |
262 | case MCE_SEV_FATAL: | |
263 | default: | |
264 | level = KERN_ERR; | |
265 | sevstr = "Fatal"; | |
266 | break; | |
267 | } | |
268 | ||
269 | printk("%s%s Machine check interrupt [%s]\n", level, sevstr, | |
270 | evt->disposition == MCE_DISPOSITION_RECOVERED ? | |
271 | "Recovered" : "[Not recovered"); | |
272 | printk("%s Initiator: %s\n", level, | |
273 | evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); | |
274 | switch (evt->error_type) { | |
275 | case MCE_ERROR_TYPE_UE: | |
276 | subtype = evt->u.ue_error.ue_error_type < | |
277 | ARRAY_SIZE(mc_ue_types) ? | |
278 | mc_ue_types[evt->u.ue_error.ue_error_type] | |
279 | : "Unknown"; | |
280 | printk("%s Error type: UE [%s]\n", level, subtype); | |
281 | if (evt->u.ue_error.effective_address_provided) | |
282 | printk("%s Effective address: %016llx\n", | |
283 | level, evt->u.ue_error.effective_address); | |
284 | if (evt->u.ue_error.physical_address_provided) | |
c01e0159 | 285 | printk("%s Physical address: %016llx\n", |
b5ff4211 MS |
286 | level, evt->u.ue_error.physical_address); |
287 | break; | |
288 | case MCE_ERROR_TYPE_SLB: | |
289 | subtype = evt->u.slb_error.slb_error_type < | |
290 | ARRAY_SIZE(mc_slb_types) ? | |
291 | mc_slb_types[evt->u.slb_error.slb_error_type] | |
292 | : "Unknown"; | |
293 | printk("%s Error type: SLB [%s]\n", level, subtype); | |
294 | if (evt->u.slb_error.effective_address_provided) | |
295 | printk("%s Effective address: %016llx\n", | |
296 | level, evt->u.slb_error.effective_address); | |
297 | break; | |
298 | case MCE_ERROR_TYPE_ERAT: | |
299 | subtype = evt->u.erat_error.erat_error_type < | |
300 | ARRAY_SIZE(mc_erat_types) ? | |
301 | mc_erat_types[evt->u.erat_error.erat_error_type] | |
302 | : "Unknown"; | |
303 | printk("%s Error type: ERAT [%s]\n", level, subtype); | |
304 | if (evt->u.erat_error.effective_address_provided) | |
305 | printk("%s Effective address: %016llx\n", | |
306 | level, evt->u.erat_error.effective_address); | |
307 | break; | |
308 | case MCE_ERROR_TYPE_TLB: | |
309 | subtype = evt->u.tlb_error.tlb_error_type < | |
310 | ARRAY_SIZE(mc_tlb_types) ? | |
311 | mc_tlb_types[evt->u.tlb_error.tlb_error_type] | |
312 | : "Unknown"; | |
313 | printk("%s Error type: TLB [%s]\n", level, subtype); | |
314 | if (evt->u.tlb_error.effective_address_provided) | |
315 | printk("%s Effective address: %016llx\n", | |
316 | level, evt->u.tlb_error.effective_address); | |
317 | break; | |
318 | default: | |
319 | case MCE_ERROR_TYPE_UNKNOWN: | |
320 | printk("%s Error type: Unknown\n", level); | |
321 | break; | |
322 | } | |
323 | } | |
b63a0ffe MS |
324 | |
325 | uint64_t get_mce_fault_addr(struct machine_check_event *evt) | |
326 | { | |
327 | switch (evt->error_type) { | |
328 | case MCE_ERROR_TYPE_UE: | |
329 | if (evt->u.ue_error.effective_address_provided) | |
330 | return evt->u.ue_error.effective_address; | |
331 | break; | |
332 | case MCE_ERROR_TYPE_SLB: | |
333 | if (evt->u.slb_error.effective_address_provided) | |
334 | return evt->u.slb_error.effective_address; | |
335 | break; | |
336 | case MCE_ERROR_TYPE_ERAT: | |
337 | if (evt->u.erat_error.effective_address_provided) | |
338 | return evt->u.erat_error.effective_address; | |
339 | break; | |
340 | case MCE_ERROR_TYPE_TLB: | |
341 | if (evt->u.tlb_error.effective_address_provided) | |
342 | return evt->u.tlb_error.effective_address; | |
343 | break; | |
344 | default: | |
345 | case MCE_ERROR_TYPE_UNKNOWN: | |
346 | break; | |
347 | } | |
348 | return 0; | |
349 | } | |
350 | EXPORT_SYMBOL(get_mce_fault_addr); |