]>
Commit | Line | Data |
---|---|---|
1a59d1b8 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
1da177e4 | 2 | /* |
1da177e4 | 3 | * Copyright (C) 2001 Dave Engebretsen IBM Corporation |
1da177e4 LT |
4 | */ |
5 | ||
1da177e4 | 6 | #include <linux/sched.h> |
1da177e4 | 7 | #include <linux/interrupt.h> |
1da177e4 | 8 | #include <linux/irq.h> |
90128997 | 9 | #include <linux/of.h> |
55fc0c56 AB |
10 | #include <linux/fs.h> |
11 | #include <linux/reboot.h> | |
94675cce | 12 | #include <linux/irq_work.h> |
1da177e4 | 13 | |
1da177e4 LT |
14 | #include <asm/machdep.h> |
15 | #include <asm/rtas.h> | |
8c4f1f29 | 16 | #include <asm/firmware.h> |
a43c1590 | 17 | #include <asm/mce.h> |
1da177e4 | 18 | |
577830b0 | 19 | #include "pseries.h" |
c902be71 | 20 | |
1da177e4 LT |
21 | static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; |
22 | static DEFINE_SPINLOCK(ras_log_buf_lock); | |
23 | ||
1da177e4 LT |
24 | static int ras_check_exception_token; |
25 | ||
94675cce MS |
26 | static void mce_process_errlog_event(struct irq_work *work); |
27 | static struct irq_work mce_errlog_process_work = { | |
28 | .func = mce_process_errlog_event, | |
29 | }; | |
30 | ||
1da177e4 LT |
31 | #define EPOW_SENSOR_TOKEN 9 |
32 | #define EPOW_SENSOR_INDEX 0 | |
1da177e4 | 33 | |
b4af279a VP |
34 | /* EPOW events counter variable */ |
35 | static int num_epow_events; | |
36 | ||
b7d9eb39 | 37 | static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id); |
7d12e780 DH |
38 | static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); |
39 | static irqreturn_t ras_error_interrupt(int irq, void *dev_id); | |
1da177e4 | 40 | |
04fce21c MS |
41 | /* RTAS pseries MCE errorlog section. */ |
42 | struct pseries_mc_errorlog { | |
43 | __be32 fru_id; | |
44 | __be32 proc_id; | |
45 | u8 error_type; | |
46 | /* | |
47 | * sub_err_type (1 byte). Bit fields depends on error_type | |
48 | * | |
49 | * MSB0 | |
50 | * | | |
51 | * V | |
52 | * 01234567 | |
53 | * XXXXXXXX | |
54 | * | |
55 | * For error_type == MC_ERROR_TYPE_UE | |
56 | * XXXXXXXX | |
57 | * X 1: Permanent or Transient UE. | |
58 | * X 1: Effective address provided. | |
59 | * X 1: Logical address provided. | |
60 | * XX 2: Reserved. | |
61 | * XXX 3: Type of UE error. | |
62 | * | |
63 | * For error_type != MC_ERROR_TYPE_UE | |
64 | * XXXXXXXX | |
65 | * X 1: Effective address provided. | |
66 | * XXXXX 5: Reserved. | |
67 | * XX 2: Type of SLB/ERAT/TLB error. | |
68 | */ | |
69 | u8 sub_err_type; | |
70 | u8 reserved_1[6]; | |
71 | __be64 effective_address; | |
72 | __be64 logical_address; | |
73 | } __packed; | |
74 | ||
75 | /* RTAS pseries MCE error types */ | |
76 | #define MC_ERROR_TYPE_UE 0x00 | |
77 | #define MC_ERROR_TYPE_SLB 0x01 | |
78 | #define MC_ERROR_TYPE_ERAT 0x02 | |
79 | #define MC_ERROR_TYPE_TLB 0x04 | |
80 | #define MC_ERROR_TYPE_D_CACHE 0x05 | |
81 | #define MC_ERROR_TYPE_I_CACHE 0x07 | |
82 | ||
83 | /* RTAS pseries MCE error sub types */ | |
84 | #define MC_ERROR_UE_INDETERMINATE 0 | |
85 | #define MC_ERROR_UE_IFETCH 1 | |
86 | #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 | |
87 | #define MC_ERROR_UE_LOAD_STORE 3 | |
88 | #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 | |
89 | ||
90 | #define MC_ERROR_SLB_PARITY 0 | |
91 | #define MC_ERROR_SLB_MULTIHIT 1 | |
92 | #define MC_ERROR_SLB_INDETERMINATE 2 | |
93 | ||
94 | #define MC_ERROR_ERAT_PARITY 1 | |
95 | #define MC_ERROR_ERAT_MULTIHIT 2 | |
96 | #define MC_ERROR_ERAT_INDETERMINATE 3 | |
97 | ||
98 | #define MC_ERROR_TLB_PARITY 1 | |
99 | #define MC_ERROR_TLB_MULTIHIT 2 | |
100 | #define MC_ERROR_TLB_INDETERMINATE 3 | |
101 | ||
102 | static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) | |
103 | { | |
104 | switch (mlog->error_type) { | |
105 | case MC_ERROR_TYPE_UE: | |
106 | return (mlog->sub_err_type & 0x07); | |
107 | case MC_ERROR_TYPE_SLB: | |
108 | case MC_ERROR_TYPE_ERAT: | |
109 | case MC_ERROR_TYPE_TLB: | |
110 | return (mlog->sub_err_type & 0x03); | |
111 | default: | |
112 | return 0; | |
113 | } | |
114 | } | |
115 | ||
116 | static | |
117 | inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog) | |
118 | { | |
119 | __be64 addr = 0; | |
120 | ||
121 | switch (mlog->error_type) { | |
122 | case MC_ERROR_TYPE_UE: | |
123 | if (mlog->sub_err_type & 0x40) | |
124 | addr = mlog->effective_address; | |
125 | break; | |
126 | case MC_ERROR_TYPE_SLB: | |
127 | case MC_ERROR_TYPE_ERAT: | |
128 | case MC_ERROR_TYPE_TLB: | |
129 | if (mlog->sub_err_type & 0x80) | |
130 | addr = mlog->effective_address; | |
131 | default: | |
132 | break; | |
133 | } | |
134 | return be64_to_cpu(addr); | |
135 | } | |
0ebfff14 | 136 | |
c9dccf1d SB |
137 | /* |
138 | * Enable the hotplug interrupt late because processing them may touch other | |
139 | * devices or systems (e.g. hugepages) that have not been initialized at the | |
140 | * subsys stage. | |
141 | */ | |
142 | int __init init_ras_hotplug_IRQ(void) | |
143 | { | |
144 | struct device_node *np; | |
145 | ||
146 | /* Hotplug Events */ | |
147 | np = of_find_node_by_path("/event-sources/hot-plug-events"); | |
148 | if (np != NULL) { | |
149 | if (dlpar_workqueue_init() == 0) | |
150 | request_event_sources_irqs(np, ras_hotplug_interrupt, | |
151 | "RAS_HOTPLUG"); | |
152 | of_node_put(np); | |
153 | } | |
154 | ||
155 | return 0; | |
156 | } | |
157 | machine_late_initcall(pseries, init_ras_hotplug_IRQ); | |
158 | ||
1da177e4 LT |
159 | /* |
160 | * Initialize handlers for the set of interrupts caused by hardware errors | |
161 | * and power system events. | |
162 | */ | |
163 | static int __init init_ras_IRQ(void) | |
164 | { | |
165 | struct device_node *np; | |
166 | ||
1da177e4 LT |
167 | ras_check_exception_token = rtas_token("check-exception"); |
168 | ||
169 | /* Internal Errors */ | |
170 | np = of_find_node_by_path("/event-sources/internal-errors"); | |
171 | if (np != NULL) { | |
32c96f77 MN |
172 | request_event_sources_irqs(np, ras_error_interrupt, |
173 | "RAS_ERROR"); | |
1da177e4 LT |
174 | of_node_put(np); |
175 | } | |
176 | ||
177 | /* EPOW Events */ | |
178 | np = of_find_node_by_path("/event-sources/epow-events"); | |
179 | if (np != NULL) { | |
32c96f77 | 180 | request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW"); |
1da177e4 LT |
181 | of_node_put(np); |
182 | } | |
183 | ||
69ed3324 | 184 | return 0; |
1da177e4 | 185 | } |
8e83e905 | 186 | machine_subsys_initcall(pseries, init_ras_IRQ); |
1da177e4 | 187 | |
55fc0c56 AB |
188 | #define EPOW_SHUTDOWN_NORMAL 1 |
189 | #define EPOW_SHUTDOWN_ON_UPS 2 | |
190 | #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3 | |
191 | #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4 | |
192 | ||
193 | static void handle_system_shutdown(char event_modifier) | |
194 | { | |
195 | switch (event_modifier) { | |
196 | case EPOW_SHUTDOWN_NORMAL: | |
b4af279a | 197 | pr_emerg("Power off requested\n"); |
1b7e0cbe | 198 | orderly_poweroff(true); |
55fc0c56 AB |
199 | break; |
200 | ||
201 | case EPOW_SHUTDOWN_ON_UPS: | |
b4af279a VP |
202 | pr_emerg("Loss of system power detected. System is running on" |
203 | " UPS/battery. Check RTAS error log for details\n"); | |
79872e35 | 204 | orderly_poweroff(true); |
55fc0c56 AB |
205 | break; |
206 | ||
207 | case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: | |
b4af279a VP |
208 | pr_emerg("Loss of system critical functions detected. Check" |
209 | " RTAS error log for details\n"); | |
1b7e0cbe | 210 | orderly_poweroff(true); |
55fc0c56 AB |
211 | break; |
212 | ||
213 | case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: | |
b4af279a VP |
214 | pr_emerg("High ambient temperature detected. Check RTAS" |
215 | " error log for details\n"); | |
1b7e0cbe | 216 | orderly_poweroff(true); |
55fc0c56 AB |
217 | break; |
218 | ||
219 | default: | |
b4af279a | 220 | pr_err("Unknown power/cooling shutdown event (modifier = %d)\n", |
55fc0c56 AB |
221 | event_modifier); |
222 | } | |
223 | } | |
224 | ||
225 | struct epow_errorlog { | |
226 | unsigned char sensor_value; | |
227 | unsigned char event_modifier; | |
228 | unsigned char extended_modifier; | |
229 | unsigned char reserved; | |
230 | unsigned char platform_reason; | |
231 | }; | |
232 | ||
233 | #define EPOW_RESET 0 | |
234 | #define EPOW_WARN_COOLING 1 | |
235 | #define EPOW_WARN_POWER 2 | |
236 | #define EPOW_SYSTEM_SHUTDOWN 3 | |
237 | #define EPOW_SYSTEM_HALT 4 | |
238 | #define EPOW_MAIN_ENCLOSURE 5 | |
239 | #define EPOW_POWER_OFF 7 | |
240 | ||
e51df2c1 | 241 | static void rtas_parse_epow_errlog(struct rtas_error_log *log) |
55fc0c56 AB |
242 | { |
243 | struct pseries_errorlog *pseries_log; | |
244 | struct epow_errorlog *epow_log; | |
245 | char action_code; | |
246 | char modifier; | |
247 | ||
248 | pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW); | |
249 | if (pseries_log == NULL) | |
250 | return; | |
251 | ||
252 | epow_log = (struct epow_errorlog *)pseries_log->data; | |
253 | action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */ | |
254 | modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */ | |
255 | ||
256 | switch (action_code) { | |
257 | case EPOW_RESET: | |
b4af279a VP |
258 | if (num_epow_events) { |
259 | pr_info("Non critical power/cooling issue cleared\n"); | |
260 | num_epow_events--; | |
261 | } | |
55fc0c56 AB |
262 | break; |
263 | ||
264 | case EPOW_WARN_COOLING: | |
b4af279a VP |
265 | pr_info("Non-critical cooling issue detected. Check RTAS error" |
266 | " log for details\n"); | |
55fc0c56 AB |
267 | break; |
268 | ||
269 | case EPOW_WARN_POWER: | |
b4af279a VP |
270 | pr_info("Non-critical power issue detected. Check RTAS error" |
271 | " log for details\n"); | |
55fc0c56 AB |
272 | break; |
273 | ||
274 | case EPOW_SYSTEM_SHUTDOWN: | |
275 | handle_system_shutdown(epow_log->event_modifier); | |
276 | break; | |
277 | ||
278 | case EPOW_SYSTEM_HALT: | |
b4af279a VP |
279 | pr_emerg("Critical power/cooling issue detected. Check RTAS" |
280 | " error log for details. Powering off.\n"); | |
1b7e0cbe | 281 | orderly_poweroff(true); |
55fc0c56 AB |
282 | break; |
283 | ||
284 | case EPOW_MAIN_ENCLOSURE: | |
285 | case EPOW_POWER_OFF: | |
b4af279a VP |
286 | pr_emerg("System about to lose power. Check RTAS error log " |
287 | " for details. Powering off immediately.\n"); | |
55fc0c56 AB |
288 | emergency_sync(); |
289 | kernel_power_off(); | |
290 | break; | |
291 | ||
292 | default: | |
b4af279a | 293 | pr_err("Unknown power/cooling event (action code = %d)\n", |
55fc0c56 AB |
294 | action_code); |
295 | } | |
b4af279a VP |
296 | |
297 | /* Increment epow events counter variable */ | |
298 | if (action_code != EPOW_RESET) | |
299 | num_epow_events++; | |
55fc0c56 AB |
300 | } |
301 | ||
b7d9eb39 JA |
302 | static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) |
303 | { | |
304 | struct pseries_errorlog *pseries_log; | |
305 | struct pseries_hp_errorlog *hp_elog; | |
306 | ||
307 | spin_lock(&ras_log_buf_lock); | |
308 | ||
309 | rtas_call(ras_check_exception_token, 6, 1, NULL, | |
310 | RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq), | |
311 | RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf), | |
312 | rtas_get_error_log_max()); | |
313 | ||
314 | pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf, | |
315 | PSERIES_ELOG_SECT_ID_HOTPLUG); | |
316 | hp_elog = (struct pseries_hp_errorlog *)pseries_log->data; | |
317 | ||
318 | /* | |
319 | * Since PCI hotplug is not currently supported on pseries, put PCI | |
320 | * hotplug events on the ras_log_buf to be handled by rtas_errd. | |
321 | */ | |
322 | if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || | |
4c5d87db OH |
323 | hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU || |
324 | hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM) | |
fd12527a | 325 | queue_hotplug_event(hp_elog); |
b7d9eb39 JA |
326 | else |
327 | log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); | |
328 | ||
329 | spin_unlock(&ras_log_buf_lock); | |
330 | return IRQ_HANDLED; | |
331 | } | |
332 | ||
55fc0c56 | 333 | /* Handle environmental and power warning (EPOW) interrupts. */ |
7d12e780 | 334 | static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) |
1da177e4 | 335 | { |
55fc0c56 AB |
336 | int status; |
337 | int state; | |
1da177e4 LT |
338 | int critical; |
339 | ||
1c2cb594 TH |
340 | status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, |
341 | &state); | |
1da177e4 LT |
342 | |
343 | if (state > 3) | |
55fc0c56 | 344 | critical = 1; /* Time Critical */ |
1da177e4 LT |
345 | else |
346 | critical = 0; | |
347 | ||
348 | spin_lock(&ras_log_buf_lock); | |
349 | ||
350 | status = rtas_call(ras_check_exception_token, 6, 1, NULL, | |
b08e281b | 351 | RTAS_VECTOR_EXTERNAL_INTERRUPT, |
476eb491 | 352 | virq_to_hw(irq), |
6f43747f | 353 | RTAS_EPOW_WARNING, |
1da177e4 LT |
354 | critical, __pa(&ras_log_buf), |
355 | rtas_get_error_log_max()); | |
356 | ||
1da177e4 LT |
357 | log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); |
358 | ||
55fc0c56 AB |
359 | rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf); |
360 | ||
1da177e4 LT |
361 | spin_unlock(&ras_log_buf_lock); |
362 | return IRQ_HANDLED; | |
363 | } | |
364 | ||
365 | /* | |
366 | * Handle hardware error interrupts. | |
367 | * | |
368 | * RTAS check-exception is called to collect data on the exception. If | |
369 | * the error is deemed recoverable, we log a warning and return. | |
370 | * For nonrecoverable errors, an error is logged and we stop all processing | |
371 | * as quickly as possible in order to prevent propagation of the failure. | |
372 | */ | |
7d12e780 | 373 | static irqreturn_t ras_error_interrupt(int irq, void *dev_id) |
1da177e4 LT |
374 | { |
375 | struct rtas_error_log *rtas_elog; | |
cc8b5263 | 376 | int status; |
1da177e4 LT |
377 | int fatal; |
378 | ||
379 | spin_lock(&ras_log_buf_lock); | |
380 | ||
381 | status = rtas_call(ras_check_exception_token, 6, 1, NULL, | |
b08e281b | 382 | RTAS_VECTOR_EXTERNAL_INTERRUPT, |
476eb491 | 383 | virq_to_hw(irq), |
cc8b5263 | 384 | RTAS_INTERNAL_ERROR, 1 /* Time Critical */, |
1da177e4 LT |
385 | __pa(&ras_log_buf), |
386 | rtas_get_error_log_max()); | |
387 | ||
388 | rtas_elog = (struct rtas_error_log *)ras_log_buf; | |
389 | ||
a08a53ea GK |
390 | if (status == 0 && |
391 | rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC) | |
1da177e4 LT |
392 | fatal = 1; |
393 | else | |
394 | fatal = 0; | |
395 | ||
396 | /* format and print the extended information */ | |
397 | log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); | |
398 | ||
399 | if (fatal) { | |
b4af279a VP |
400 | pr_emerg("Fatal hardware error detected. Check RTAS error" |
401 | " log for details. Powering off immediately\n"); | |
cc8b5263 AB |
402 | emergency_sync(); |
403 | kernel_power_off(); | |
1da177e4 | 404 | } else { |
b4af279a | 405 | pr_err("Recoverable hardware error detected\n"); |
1da177e4 LT |
406 | } |
407 | ||
408 | spin_unlock(&ras_log_buf_lock); | |
409 | return IRQ_HANDLED; | |
410 | } | |
411 | ||
d368514c AB |
412 | /* |
413 | * Some versions of FWNMI place the buffer inside the 4kB page starting at | |
414 | * 0x7000. Other versions place it inside the rtas buffer. We check both. | |
415 | */ | |
416 | #define VALID_FWNMI_BUFFER(A) \ | |
417 | ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ | |
418 | (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) | |
419 | ||
94675cce MS |
420 | static inline struct rtas_error_log *fwnmi_get_errlog(void) |
421 | { | |
422 | return (struct rtas_error_log *)local_paca->mce_data_buf; | |
423 | } | |
424 | ||
d368514c AB |
425 | /* |
426 | * Get the error information for errors coming through the | |
1da177e4 LT |
427 | * FWNMI vectors. The pt_regs' r3 will be updated to reflect |
428 | * the actual r3 if possible, and a ptr to the error log entry | |
429 | * will be returned if found. | |
430 | * | |
94675cce | 431 | * Use one buffer mce_data_buf per cpu to store RTAS error. |
d368514c | 432 | * |
94675cce | 433 | * The mce_data_buf does not have any locks or protection around it, |
1da177e4 LT |
434 | * if a second machine check comes in, or a system reset is done |
435 | * before we have logged the error, then we will get corruption in the | |
436 | * error log. This is preferable over holding off on calling | |
437 | * ibm,nmi-interlock which would result in us checkstopping if a | |
438 | * second machine check did come in. | |
439 | */ | |
440 | static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) | |
441 | { | |
1da177e4 | 442 | unsigned long *savep; |
94675cce | 443 | struct rtas_error_log *h; |
1da177e4 | 444 | |
ee1dd1e3 MS |
445 | /* Mask top two bits */ |
446 | regs->gpr[3] &= ~(0x3UL << 62); | |
447 | ||
d368514c | 448 | if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { |
f0e939ae | 449 | printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); |
d368514c AB |
450 | return NULL; |
451 | } | |
452 | ||
453 | savep = __va(regs->gpr[3]); | |
cd813e1c | 454 | regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ |
d368514c | 455 | |
d368514c | 456 | h = (struct rtas_error_log *)&savep[1]; |
94675cce MS |
457 | /* Use the per cpu buffer from paca to store rtas error log */ |
458 | memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX); | |
a08a53ea | 459 | if (!rtas_error_extended(h)) { |
94675cce | 460 | memcpy(local_paca->mce_data_buf, h, sizeof(__u64)); |
1da177e4 | 461 | } else { |
a08a53ea | 462 | int len, error_log_length; |
d368514c | 463 | |
a08a53ea | 464 | error_log_length = 8 + rtas_error_extended_log_length(h); |
74e96bf4 | 465 | len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); |
94675cce | 466 | memcpy(local_paca->mce_data_buf, h, len); |
1da177e4 | 467 | } |
d368514c | 468 | |
94675cce | 469 | return (struct rtas_error_log *)local_paca->mce_data_buf; |
1da177e4 LT |
470 | } |
471 | ||
472 | /* Call this when done with the data returned by FWNMI_get_errinfo. | |
473 | * It will release the saved data area for other CPUs in the | |
474 | * partition to receive FWNMI errors. | |
475 | */ | |
476 | static void fwnmi_release_errinfo(void) | |
477 | { | |
478 | int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); | |
479 | if (ret != 0) | |
d368514c | 480 | printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); |
1da177e4 LT |
481 | } |
482 | ||
c902be71 | 483 | int pSeries_system_reset_exception(struct pt_regs *regs) |
1da177e4 | 484 | { |
bded0706 NP |
485 | #ifdef __LITTLE_ENDIAN__ |
486 | /* | |
487 | * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try | |
488 | * to detect the bad SRR1 pattern here. Flip the NIP back to correct | |
489 | * endian for reporting purposes. Unfortunately the MSR can't be fixed, | |
490 | * so clear it. It will be missing MSR_RI so we won't try to recover. | |
491 | */ | |
492 | if ((be64_to_cpu(regs->msr) & | |
493 | (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| | |
494 | MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { | |
495 | regs->nip = be64_to_cpu((__be64)regs->nip); | |
496 | regs->msr = 0; | |
497 | } | |
498 | #endif | |
499 | ||
1da177e4 LT |
500 | if (fwnmi_active) { |
501 | struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs); | |
502 | if (errhdr) { | |
503 | /* XXX Should look at FWNMI information */ | |
504 | } | |
505 | fwnmi_release_errinfo(); | |
506 | } | |
102c05e8 NP |
507 | |
508 | if (smp_handle_nmi_ipi(regs)) | |
509 | return 1; | |
510 | ||
c902be71 | 511 | return 0; /* need to perform reset */ |
1da177e4 LT |
512 | } |
513 | ||
8f0b8056 MS |
514 | #define VAL_TO_STRING(ar, val) \ |
515 | (((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown") | |
516 | ||
517 | static void pseries_print_mce_info(struct pt_regs *regs, | |
518 | struct rtas_error_log *errp) | |
519 | { | |
520 | const char *level, *sevstr; | |
521 | struct pseries_errorlog *pseries_log; | |
522 | struct pseries_mc_errorlog *mce_log; | |
523 | u8 error_type, err_sub_type; | |
524 | u64 addr; | |
525 | u8 initiator = rtas_error_initiator(errp); | |
526 | int disposition = rtas_error_disposition(errp); | |
527 | ||
528 | static const char * const initiators[] = { | |
c9d8dda4 MS |
529 | [0] = "Unknown", |
530 | [1] = "CPU", | |
531 | [2] = "PCI", | |
532 | [3] = "ISA", | |
533 | [4] = "Memory", | |
534 | [5] = "Power Mgmt", | |
8f0b8056 MS |
535 | }; |
536 | static const char * const mc_err_types[] = { | |
c9d8dda4 MS |
537 | [0] = "UE", |
538 | [1] = "SLB", | |
539 | [2] = "ERAT", | |
540 | [3] = "Unknown", | |
541 | [4] = "TLB", | |
542 | [5] = "D-Cache", | |
543 | [6] = "Unknown", | |
544 | [7] = "I-Cache", | |
8f0b8056 MS |
545 | }; |
546 | static const char * const mc_ue_types[] = { | |
c9d8dda4 MS |
547 | [0] = "Indeterminate", |
548 | [1] = "Instruction fetch", | |
549 | [2] = "Page table walk ifetch", | |
550 | [3] = "Load/Store", | |
551 | [4] = "Page table walk Load/Store", | |
8f0b8056 MS |
552 | }; |
553 | ||
554 | /* SLB sub errors valid values are 0x0, 0x1, 0x2 */ | |
555 | static const char * const mc_slb_types[] = { | |
c9d8dda4 MS |
556 | [0] = "Parity", |
557 | [1] = "Multihit", | |
558 | [2] = "Indeterminate", | |
8f0b8056 MS |
559 | }; |
560 | ||
561 | /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */ | |
562 | static const char * const mc_soft_types[] = { | |
c9d8dda4 MS |
563 | [0] = "Unknown", |
564 | [1] = "Parity", | |
565 | [2] = "Multihit", | |
566 | [3] = "Indeterminate", | |
8f0b8056 MS |
567 | }; |
568 | ||
569 | if (!rtas_error_extended(errp)) { | |
570 | pr_err("Machine check interrupt: Missing extended error log\n"); | |
571 | return; | |
572 | } | |
573 | ||
574 | pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); | |
575 | if (pseries_log == NULL) | |
576 | return; | |
577 | ||
578 | mce_log = (struct pseries_mc_errorlog *)pseries_log->data; | |
579 | ||
580 | error_type = mce_log->error_type; | |
581 | err_sub_type = rtas_mc_error_sub_type(mce_log); | |
582 | ||
583 | switch (rtas_error_severity(errp)) { | |
584 | case RTAS_SEVERITY_NO_ERROR: | |
585 | level = KERN_INFO; | |
586 | sevstr = "Harmless"; | |
587 | break; | |
588 | case RTAS_SEVERITY_WARNING: | |
589 | level = KERN_WARNING; | |
590 | sevstr = ""; | |
591 | break; | |
592 | case RTAS_SEVERITY_ERROR: | |
593 | case RTAS_SEVERITY_ERROR_SYNC: | |
594 | level = KERN_ERR; | |
595 | sevstr = "Severe"; | |
596 | break; | |
597 | case RTAS_SEVERITY_FATAL: | |
598 | default: | |
599 | level = KERN_ERR; | |
600 | sevstr = "Fatal"; | |
601 | break; | |
602 | } | |
603 | ||
c6d15258 MS |
604 | #ifdef CONFIG_PPC_BOOK3S_64 |
605 | /* Display faulty slb contents for SLB errors. */ | |
606 | if (error_type == MC_ERROR_TYPE_SLB) | |
607 | slb_dump_contents(local_paca->mce_faulty_slbs); | |
608 | #endif | |
609 | ||
8f0b8056 MS |
610 | printk("%s%s Machine check interrupt [%s]\n", level, sevstr, |
611 | disposition == RTAS_DISP_FULLY_RECOVERED ? | |
612 | "Recovered" : "Not recovered"); | |
613 | if (user_mode(regs)) { | |
614 | printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level, | |
615 | regs->nip, current->pid, current->comm); | |
616 | } else { | |
617 | printk("%s NIP [%016lx]: %pS\n", level, regs->nip, | |
618 | (void *)regs->nip); | |
619 | } | |
620 | printk("%s Initiator: %s\n", level, | |
621 | VAL_TO_STRING(initiators, initiator)); | |
622 | ||
623 | switch (error_type) { | |
624 | case MC_ERROR_TYPE_UE: | |
625 | printk("%s Error type: %s [%s]\n", level, | |
626 | VAL_TO_STRING(mc_err_types, error_type), | |
627 | VAL_TO_STRING(mc_ue_types, err_sub_type)); | |
628 | break; | |
629 | case MC_ERROR_TYPE_SLB: | |
630 | printk("%s Error type: %s [%s]\n", level, | |
631 | VAL_TO_STRING(mc_err_types, error_type), | |
632 | VAL_TO_STRING(mc_slb_types, err_sub_type)); | |
633 | break; | |
634 | case MC_ERROR_TYPE_ERAT: | |
635 | case MC_ERROR_TYPE_TLB: | |
636 | printk("%s Error type: %s [%s]\n", level, | |
637 | VAL_TO_STRING(mc_err_types, error_type), | |
638 | VAL_TO_STRING(mc_soft_types, err_sub_type)); | |
639 | break; | |
640 | default: | |
641 | printk("%s Error type: %s\n", level, | |
642 | VAL_TO_STRING(mc_err_types, error_type)); | |
643 | break; | |
644 | } | |
645 | ||
646 | addr = rtas_mc_get_effective_addr(mce_log); | |
647 | if (addr) | |
648 | printk("%s Effective address: %016llx\n", level, addr); | |
649 | } | |
650 | ||
a43c1590 MS |
651 | static int mce_handle_error(struct rtas_error_log *errp) |
652 | { | |
653 | struct pseries_errorlog *pseries_log; | |
654 | struct pseries_mc_errorlog *mce_log; | |
655 | int disposition = rtas_error_disposition(errp); | |
656 | u8 error_type; | |
657 | ||
658 | if (!rtas_error_extended(errp)) | |
659 | goto out; | |
660 | ||
661 | pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); | |
662 | if (pseries_log == NULL) | |
663 | goto out; | |
664 | ||
665 | mce_log = (struct pseries_mc_errorlog *)pseries_log->data; | |
666 | error_type = mce_log->error_type; | |
667 | ||
668 | #ifdef CONFIG_PPC_BOOK3S_64 | |
669 | if (disposition == RTAS_DISP_NOT_RECOVERED) { | |
670 | switch (error_type) { | |
671 | case MC_ERROR_TYPE_SLB: | |
672 | case MC_ERROR_TYPE_ERAT: | |
c6d15258 MS |
673 | /* |
674 | * Store the old slb content in paca before flushing. | |
675 | * Print this when we go to virtual mode. | |
676 | * There are chances that we may hit MCE again if there | |
677 | * is a parity error on the SLB entry we trying to read | |
678 | * for saving. Hence limit the slb saving to single | |
679 | * level of recursion. | |
680 | */ | |
681 | if (local_paca->in_mce == 1) | |
682 | slb_save_contents(local_paca->mce_faulty_slbs); | |
a43c1590 MS |
683 | flush_and_reload_slb(); |
684 | disposition = RTAS_DISP_FULLY_RECOVERED; | |
685 | rtas_set_disposition_recovered(errp); | |
686 | break; | |
687 | default: | |
688 | break; | |
689 | } | |
690 | } | |
691 | #endif | |
692 | ||
693 | out: | |
694 | return disposition; | |
695 | } | |
696 | ||
7f177f98 GG |
697 | #ifdef CONFIG_MEMORY_FAILURE |
698 | ||
699 | static DEFINE_PER_CPU(int, rtas_ue_count); | |
700 | static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]); | |
701 | ||
702 | #define UE_EFFECTIVE_ADDR_PROVIDED 0x40 | |
703 | #define UE_LOGICAL_ADDR_PROVIDED 0x20 | |
704 | ||
705 | ||
706 | static void pseries_hwpoison_work_fn(struct work_struct *work) | |
707 | { | |
708 | unsigned long paddr; | |
709 | int index; | |
710 | ||
711 | while (__this_cpu_read(rtas_ue_count) > 0) { | |
712 | index = __this_cpu_read(rtas_ue_count) - 1; | |
713 | paddr = __this_cpu_read(rtas_ue_paddr[index]); | |
714 | memory_failure(paddr >> PAGE_SHIFT, 0); | |
715 | __this_cpu_dec(rtas_ue_count); | |
716 | } | |
717 | } | |
718 | ||
719 | static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn); | |
720 | ||
721 | static void queue_ue_paddr(unsigned long paddr) | |
722 | { | |
723 | int index; | |
724 | ||
725 | index = __this_cpu_inc_return(rtas_ue_count) - 1; | |
726 | if (index >= MAX_MC_EVT) { | |
727 | __this_cpu_dec(rtas_ue_count); | |
728 | return; | |
729 | } | |
730 | this_cpu_write(rtas_ue_paddr[index], paddr); | |
731 | schedule_work(&hwpoison_work); | |
732 | } | |
733 | ||
734 | static void pseries_do_memory_failure(struct pt_regs *regs, | |
735 | struct pseries_mc_errorlog *mce_log) | |
736 | { | |
737 | unsigned long paddr; | |
738 | ||
739 | if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) { | |
740 | paddr = be64_to_cpu(mce_log->logical_address); | |
741 | } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) { | |
742 | unsigned long pfn; | |
743 | ||
744 | pfn = addr_to_pfn(regs, | |
745 | be64_to_cpu(mce_log->effective_address)); | |
746 | if (pfn == ULONG_MAX) | |
747 | return; | |
748 | paddr = pfn << PAGE_SHIFT; | |
749 | } else { | |
750 | return; | |
751 | } | |
752 | queue_ue_paddr(paddr); | |
753 | } | |
754 | ||
755 | static void pseries_process_ue(struct pt_regs *regs, | |
756 | struct rtas_error_log *errp) | |
757 | { | |
758 | struct pseries_errorlog *pseries_log; | |
759 | struct pseries_mc_errorlog *mce_log; | |
760 | ||
761 | if (!rtas_error_extended(errp)) | |
762 | return; | |
763 | ||
764 | pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); | |
765 | if (!pseries_log) | |
766 | return; | |
767 | ||
768 | mce_log = (struct pseries_mc_errorlog *)pseries_log->data; | |
769 | ||
770 | if (mce_log->error_type == MC_ERROR_TYPE_UE) | |
771 | pseries_do_memory_failure(regs, mce_log); | |
772 | } | |
773 | #else | |
774 | static inline void pseries_process_ue(struct pt_regs *regs, | |
775 | struct rtas_error_log *errp) { } | |
776 | #endif /*CONFIG_MEMORY_FAILURE */ | |
777 | ||
94675cce MS |
778 | /* |
779 | * Process MCE rtas errlog event. | |
780 | */ | |
781 | static void mce_process_errlog_event(struct irq_work *work) | |
782 | { | |
783 | struct rtas_error_log *err; | |
784 | ||
785 | err = fwnmi_get_errlog(); | |
786 | log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); | |
787 | } | |
788 | ||
1da177e4 LT |
789 | /* |
790 | * See if we can recover from a machine check exception. | |
791 | * This is only called on power4 (or above) and only via | |
792 | * the Firmware Non-Maskable Interrupts (fwnmi) handler | |
793 | * which provides the error analysis for us. | |
794 | * | |
795 | * Return 1 if corrected (or delivered a signal). | |
796 | * Return 0 if there is nothing we can do. | |
797 | */ | |
d47d1d8a | 798 | static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) |
1da177e4 | 799 | { |
d47d1d8a | 800 | int recovered = 0; |
a08a53ea | 801 | int disposition = rtas_error_disposition(err); |
1da177e4 | 802 | |
8f0b8056 MS |
803 | pseries_print_mce_info(regs, err); |
804 | ||
d47d1d8a AB |
805 | if (!(regs->msr & MSR_RI)) { |
806 | /* If MSR_RI isn't set, we cannot recover */ | |
8f0b8056 | 807 | pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); |
d47d1d8a AB |
808 | recovered = 0; |
809 | ||
a08a53ea | 810 | } else if (disposition == RTAS_DISP_FULLY_RECOVERED) { |
1da177e4 | 811 | /* Platform corrected itself */ |
d47d1d8a AB |
812 | recovered = 1; |
813 | ||
a08a53ea | 814 | } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { |
d47d1d8a AB |
815 | /* Platform corrected itself but could be degraded */ |
816 | printk(KERN_ERR "MCE: limited recovery, system may " | |
817 | "be degraded\n"); | |
818 | recovered = 1; | |
819 | ||
820 | } else if (user_mode(regs) && !is_global_init(current) && | |
a08a53ea | 821 | rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) { |
d47d1d8a AB |
822 | |
823 | /* | |
824 | * If we received a synchronous error when in userspace | |
825 | * kill the task. Firmware may report details of the fail | |
826 | * asynchronously, so we can't rely on the target and type | |
827 | * fields being valid here. | |
828 | */ | |
829 | printk(KERN_ERR "MCE: uncorrectable error, killing task " | |
830 | "%s:%d\n", current->comm, current->pid); | |
831 | ||
832 | _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); | |
833 | recovered = 1; | |
1da177e4 LT |
834 | } |
835 | ||
7f177f98 GG |
836 | pseries_process_ue(regs, err); |
837 | ||
94675cce MS |
838 | /* Queue irq work to log this rtas event later. */ |
839 | irq_work_queue(&mce_errlog_process_work); | |
1da177e4 | 840 | |
d47d1d8a | 841 | return recovered; |
1da177e4 LT |
842 | } |
843 | ||
844 | /* | |
845 | * Handle a machine check. | |
846 | * | |
847 | * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi) | |
848 | * should be present. If so the handler which called us tells us if the | |
849 | * error was recovered (never true if RI=0). | |
850 | * | |
851 | * On hardware prior to Power 4 these exceptions were asynchronous which | |
852 | * means we can't tell exactly where it occurred and so we can't recover. | |
853 | */ | |
854 | int pSeries_machine_check_exception(struct pt_regs *regs) | |
855 | { | |
856 | struct rtas_error_log *errp; | |
857 | ||
858 | if (fwnmi_active) { | |
1da177e4 | 859 | fwnmi_release_errinfo(); |
a43c1590 | 860 | errp = fwnmi_get_errlog(); |
1da177e4 LT |
861 | if (errp && recover_mce(regs, errp)) |
862 | return 1; | |
863 | } | |
864 | ||
865 | return 0; | |
866 | } | |
a43c1590 MS |
867 | |
868 | long pseries_machine_check_realmode(struct pt_regs *regs) | |
869 | { | |
870 | struct rtas_error_log *errp; | |
871 | int disposition; | |
872 | ||
873 | if (fwnmi_active) { | |
874 | errp = fwnmi_get_errinfo(regs); | |
875 | /* | |
876 | * Call to fwnmi_release_errinfo() in real mode causes kernel | |
877 | * to panic. Hence we will call it as soon as we go into | |
878 | * virtual mode. | |
879 | */ | |
880 | disposition = mce_handle_error(errp); | |
881 | if (disposition == RTAS_DISP_FULLY_RECOVERED) | |
882 | return 1; | |
883 | } | |
884 | ||
885 | return 0; | |
886 | } |