]>
Commit | Line | Data |
---|---|---|
36df96f8 MS |
1 | /* |
2 | * Machine check exception handling. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | * | |
18 | * Copyright 2013 IBM Corporation | |
19 | * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | |
20 | */ | |
21 | ||
22 | #undef DEBUG | |
23 | #define pr_fmt(fmt) "mce: " fmt | |
24 | ||
25 | #include <linux/types.h> | |
26 | #include <linux/ptrace.h> | |
27 | #include <linux/percpu.h> | |
28 | #include <linux/export.h> | |
30c82635 | 29 | #include <linux/irq_work.h> |
36df96f8 MS |
30 | #include <asm/mce.h> |
31 | ||
32 | static DEFINE_PER_CPU(int, mce_nest_count); | |
33 | static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); | |
34 | ||
b5ff4211 MS |
35 | /* Queue for delayed MCE events. */ |
36 | static DEFINE_PER_CPU(int, mce_queue_count); | |
37 | static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); | |
38 | ||
30c82635 | 39 | static void machine_check_process_queued_event(struct irq_work *work); |
635218c7 | 40 | static struct irq_work mce_event_process_work = { |
30c82635 MS |
41 | .func = machine_check_process_queued_event, |
42 | }; | |
43 | ||
36df96f8 MS |
44 | static void mce_set_error_info(struct machine_check_event *mce, |
45 | struct mce_error_info *mce_err) | |
46 | { | |
47 | mce->error_type = mce_err->error_type; | |
48 | switch (mce_err->error_type) { | |
49 | case MCE_ERROR_TYPE_UE: | |
50 | mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; | |
51 | break; | |
52 | case MCE_ERROR_TYPE_SLB: | |
53 | mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; | |
54 | break; | |
55 | case MCE_ERROR_TYPE_ERAT: | |
56 | mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; | |
57 | break; | |
58 | case MCE_ERROR_TYPE_TLB: | |
59 | mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; | |
60 | break; | |
61 | case MCE_ERROR_TYPE_UNKNOWN: | |
62 | default: | |
63 | break; | |
64 | } | |
65 | } | |
66 | ||
67 | /* | |
68 | * Decode and save high level MCE information into per cpu buffer which | |
69 | * is an array of machine_check_event structure. | |
70 | */ | |
71 | void save_mce_event(struct pt_regs *regs, long handled, | |
72 | struct mce_error_info *mce_err, | |
55672ecf | 73 | uint64_t nip, uint64_t addr) |
36df96f8 MS |
74 | { |
75 | uint64_t srr1; | |
ffb2d78e | 76 | int index = __this_cpu_inc_return(mce_nest_count) - 1; |
69111bac | 77 | struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); |
36df96f8 MS |
78 | |
79 | /* | |
80 | * Return if we don't have enough space to log mce event. | |
81 | * mce_nest_count may go beyond MAX_MC_EVT but that's ok, | |
82 | * the check below will stop buffer overrun. | |
83 | */ | |
84 | if (index >= MAX_MC_EVT) | |
85 | return; | |
86 | ||
87 | /* Populate generic machine check info */ | |
88 | mce->version = MCE_V1; | |
55672ecf | 89 | mce->srr0 = nip; |
36df96f8 MS |
90 | mce->srr1 = regs->msr; |
91 | mce->gpr3 = regs->gpr[3]; | |
92 | mce->in_use = 1; | |
93 | ||
94 | mce->initiator = MCE_INITIATOR_CPU; | |
c74dd88e MS |
95 | /* Mark it recovered if we have handled it and MSR(RI=1). */ |
96 | if (handled && (regs->msr & MSR_RI)) | |
36df96f8 MS |
97 | mce->disposition = MCE_DISPOSITION_RECOVERED; |
98 | else | |
99 | mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; | |
100 | mce->severity = MCE_SEV_ERROR_SYNC; | |
101 | ||
102 | srr1 = regs->msr; | |
103 | ||
104 | /* | |
105 | * Populate the mce error_type and type-specific error_type. | |
106 | */ | |
107 | mce_set_error_info(mce, mce_err); | |
108 | ||
109 | if (!addr) | |
110 | return; | |
111 | ||
112 | if (mce->error_type == MCE_ERROR_TYPE_TLB) { | |
113 | mce->u.tlb_error.effective_address_provided = true; | |
114 | mce->u.tlb_error.effective_address = addr; | |
115 | } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { | |
116 | mce->u.slb_error.effective_address_provided = true; | |
117 | mce->u.slb_error.effective_address = addr; | |
118 | } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { | |
119 | mce->u.erat_error.effective_address_provided = true; | |
120 | mce->u.erat_error.effective_address = addr; | |
121 | } else if (mce->error_type == MCE_ERROR_TYPE_UE) { | |
122 | mce->u.ue_error.effective_address_provided = true; | |
123 | mce->u.ue_error.effective_address = addr; | |
124 | } | |
125 | return; | |
126 | } | |
127 | ||
128 | /* | |
129 | * get_mce_event: | |
130 | * mce Pointer to machine_check_event structure to be filled. | |
131 | * release Flag to indicate whether to free the event slot or not. | |
132 | * 0 <= do not release the mce event. Caller will invoke | |
133 | * release_mce_event() once event has been consumed. | |
134 | * 1 <= release the slot. | |
135 | * | |
136 | * return 1 = success | |
137 | * 0 = failure | |
138 | * | |
139 | * get_mce_event() will be called by platform specific machine check | |
140 | * handle routine and in KVM. | |
141 | * When we call get_mce_event(), we are still in interrupt context and | |
142 | * preemption will not be scheduled until ret_from_expect() routine | |
143 | * is called. | |
144 | */ | |
145 | int get_mce_event(struct machine_check_event *mce, bool release) | |
146 | { | |
69111bac | 147 | int index = __this_cpu_read(mce_nest_count) - 1; |
36df96f8 MS |
148 | struct machine_check_event *mc_evt; |
149 | int ret = 0; | |
150 | ||
151 | /* Sanity check */ | |
152 | if (index < 0) | |
153 | return ret; | |
154 | ||
155 | /* Check if we have MCE info to process. */ | |
156 | if (index < MAX_MC_EVT) { | |
69111bac | 157 | mc_evt = this_cpu_ptr(&mce_event[index]); |
36df96f8 MS |
158 | /* Copy the event structure and release the original */ |
159 | if (mce) | |
160 | *mce = *mc_evt; | |
161 | if (release) | |
162 | mc_evt->in_use = 0; | |
163 | ret = 1; | |
164 | } | |
165 | /* Decrement the count to free the slot. */ | |
166 | if (release) | |
69111bac | 167 | __this_cpu_dec(mce_nest_count); |
36df96f8 MS |
168 | |
169 | return ret; | |
170 | } | |
171 | ||
172 | void release_mce_event(void) | |
173 | { | |
174 | get_mce_event(NULL, true); | |
175 | } | |
b5ff4211 MS |
176 | |
177 | /* | |
178 | * Queue up the MCE event which then can be handled later. | |
179 | */ | |
180 | void machine_check_queue_event(void) | |
181 | { | |
182 | int index; | |
183 | struct machine_check_event evt; | |
184 | ||
185 | if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) | |
186 | return; | |
187 | ||
ffb2d78e | 188 | index = __this_cpu_inc_return(mce_queue_count) - 1; |
b5ff4211 MS |
189 | /* If queue is full, just return for now. */ |
190 | if (index >= MAX_MC_EVT) { | |
69111bac | 191 | __this_cpu_dec(mce_queue_count); |
b5ff4211 MS |
192 | return; |
193 | } | |
69111bac | 194 | memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); |
30c82635 MS |
195 | |
196 | /* Queue irq work to process this event later. */ | |
197 | irq_work_queue(&mce_event_process_work); | |
b5ff4211 MS |
198 | } |
199 | ||
200 | /* | |
201 | * process pending MCE event from the mce event queue. This function will be | |
202 | * called during syscall exit. | |
203 | */ | |
30c82635 | 204 | static void machine_check_process_queued_event(struct irq_work *work) |
b5ff4211 MS |
205 | { |
206 | int index; | |
207 | ||
b5ff4211 MS |
208 | /* |
209 | * For now just print it to console. | |
210 | * TODO: log this error event to FSP or nvram. | |
211 | */ | |
69111bac CL |
212 | while (__this_cpu_read(mce_queue_count) > 0) { |
213 | index = __this_cpu_read(mce_queue_count) - 1; | |
b5ff4211 | 214 | machine_check_print_event_info( |
69111bac CL |
215 | this_cpu_ptr(&mce_event_queue[index])); |
216 | __this_cpu_dec(mce_queue_count); | |
b5ff4211 | 217 | } |
b5ff4211 MS |
218 | } |
219 | ||
220 | void machine_check_print_event_info(struct machine_check_event *evt) | |
221 | { | |
222 | const char *level, *sevstr, *subtype; | |
223 | static const char *mc_ue_types[] = { | |
224 | "Indeterminate", | |
225 | "Instruction fetch", | |
226 | "Page table walk ifetch", | |
227 | "Load/Store", | |
228 | "Page table walk Load/Store", | |
229 | }; | |
230 | static const char *mc_slb_types[] = { | |
231 | "Indeterminate", | |
232 | "Parity", | |
233 | "Multihit", | |
234 | }; | |
235 | static const char *mc_erat_types[] = { | |
236 | "Indeterminate", | |
237 | "Parity", | |
238 | "Multihit", | |
239 | }; | |
240 | static const char *mc_tlb_types[] = { | |
241 | "Indeterminate", | |
242 | "Parity", | |
243 | "Multihit", | |
244 | }; | |
245 | ||
246 | /* Print things out */ | |
247 | if (evt->version != MCE_V1) { | |
248 | pr_err("Machine Check Exception, Unknown event version %d !\n", | |
249 | evt->version); | |
250 | return; | |
251 | } | |
252 | switch (evt->severity) { | |
253 | case MCE_SEV_NO_ERROR: | |
254 | level = KERN_INFO; | |
255 | sevstr = "Harmless"; | |
256 | break; | |
257 | case MCE_SEV_WARNING: | |
258 | level = KERN_WARNING; | |
259 | sevstr = ""; | |
260 | break; | |
261 | case MCE_SEV_ERROR_SYNC: | |
262 | level = KERN_ERR; | |
263 | sevstr = "Severe"; | |
264 | break; | |
265 | case MCE_SEV_FATAL: | |
266 | default: | |
267 | level = KERN_ERR; | |
268 | sevstr = "Fatal"; | |
269 | break; | |
270 | } | |
271 | ||
272 | printk("%s%s Machine check interrupt [%s]\n", level, sevstr, | |
273 | evt->disposition == MCE_DISPOSITION_RECOVERED ? | |
274 | "Recovered" : "[Not recovered"); | |
275 | printk("%s Initiator: %s\n", level, | |
276 | evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); | |
277 | switch (evt->error_type) { | |
278 | case MCE_ERROR_TYPE_UE: | |
279 | subtype = evt->u.ue_error.ue_error_type < | |
280 | ARRAY_SIZE(mc_ue_types) ? | |
281 | mc_ue_types[evt->u.ue_error.ue_error_type] | |
282 | : "Unknown"; | |
283 | printk("%s Error type: UE [%s]\n", level, subtype); | |
284 | if (evt->u.ue_error.effective_address_provided) | |
285 | printk("%s Effective address: %016llx\n", | |
286 | level, evt->u.ue_error.effective_address); | |
287 | if (evt->u.ue_error.physical_address_provided) | |
c01e0159 | 288 | printk("%s Physical address: %016llx\n", |
b5ff4211 MS |
289 | level, evt->u.ue_error.physical_address); |
290 | break; | |
291 | case MCE_ERROR_TYPE_SLB: | |
292 | subtype = evt->u.slb_error.slb_error_type < | |
293 | ARRAY_SIZE(mc_slb_types) ? | |
294 | mc_slb_types[evt->u.slb_error.slb_error_type] | |
295 | : "Unknown"; | |
296 | printk("%s Error type: SLB [%s]\n", level, subtype); | |
297 | if (evt->u.slb_error.effective_address_provided) | |
298 | printk("%s Effective address: %016llx\n", | |
299 | level, evt->u.slb_error.effective_address); | |
300 | break; | |
301 | case MCE_ERROR_TYPE_ERAT: | |
302 | subtype = evt->u.erat_error.erat_error_type < | |
303 | ARRAY_SIZE(mc_erat_types) ? | |
304 | mc_erat_types[evt->u.erat_error.erat_error_type] | |
305 | : "Unknown"; | |
306 | printk("%s Error type: ERAT [%s]\n", level, subtype); | |
307 | if (evt->u.erat_error.effective_address_provided) | |
308 | printk("%s Effective address: %016llx\n", | |
309 | level, evt->u.erat_error.effective_address); | |
310 | break; | |
311 | case MCE_ERROR_TYPE_TLB: | |
312 | subtype = evt->u.tlb_error.tlb_error_type < | |
313 | ARRAY_SIZE(mc_tlb_types) ? | |
314 | mc_tlb_types[evt->u.tlb_error.tlb_error_type] | |
315 | : "Unknown"; | |
316 | printk("%s Error type: TLB [%s]\n", level, subtype); | |
317 | if (evt->u.tlb_error.effective_address_provided) | |
318 | printk("%s Effective address: %016llx\n", | |
319 | level, evt->u.tlb_error.effective_address); | |
320 | break; | |
321 | default: | |
322 | case MCE_ERROR_TYPE_UNKNOWN: | |
323 | printk("%s Error type: Unknown\n", level); | |
324 | break; | |
325 | } | |
326 | } | |
b63a0ffe MS |
327 | |
328 | uint64_t get_mce_fault_addr(struct machine_check_event *evt) | |
329 | { | |
330 | switch (evt->error_type) { | |
331 | case MCE_ERROR_TYPE_UE: | |
332 | if (evt->u.ue_error.effective_address_provided) | |
333 | return evt->u.ue_error.effective_address; | |
334 | break; | |
335 | case MCE_ERROR_TYPE_SLB: | |
336 | if (evt->u.slb_error.effective_address_provided) | |
337 | return evt->u.slb_error.effective_address; | |
338 | break; | |
339 | case MCE_ERROR_TYPE_ERAT: | |
340 | if (evt->u.erat_error.effective_address_provided) | |
341 | return evt->u.erat_error.effective_address; | |
342 | break; | |
343 | case MCE_ERROR_TYPE_TLB: | |
344 | if (evt->u.tlb_error.effective_address_provided) | |
345 | return evt->u.tlb_error.effective_address; | |
346 | break; | |
347 | default: | |
348 | case MCE_ERROR_TYPE_UNKNOWN: | |
349 | break; | |
350 | } | |
351 | return 0; | |
352 | } | |
353 | EXPORT_SYMBOL(get_mce_fault_addr); |