]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - drivers/firmware/efi/cper.c
90247570431103193af874c8a5e44d7fad5e82df
[mirror_ubuntu-zesty-kernel.git] / drivers / firmware / efi / cper.c
1 /*
2 * UEFI Common Platform Error Record (CPER) support
3 *
4 * Copyright (C) 2010, Intel Corp.
5 * Author: Huang Ying <ying.huang@intel.com>
6 *
7 * CPER is the format used to describe platform hardware error by
8 * various tables, such as ERST, BERT and HEST etc.
9 *
10 * For more information about CPER, please refer to Appendix N of UEFI
11 * Specification version 2.4.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/dmi.h>
32 #include <linux/acpi.h>
33 #include <linux/pci.h>
34 #include <linux/aer.h>
35 #include <acpi/ghes.h>
36
37 #define INDENT_SP " "
38
39 static char rcd_decode_str[CPER_REC_LEN];
40
41 /*
42 * CPER record ID need to be unique even after reboot, because record
43 * ID is used as index for ERST storage, while CPER records from
44 * multiple boot may co-exist in ERST.
45 */
46 u64 cper_next_record_id(void)
47 {
48 static atomic64_t seq;
49
50 if (!atomic64_read(&seq))
51 atomic64_set(&seq, ((u64)get_seconds()) << 32);
52
53 return atomic64_inc_return(&seq);
54 }
55 EXPORT_SYMBOL_GPL(cper_next_record_id);
56
57 static const char * const severity_strs[] = {
58 "recoverable",
59 "fatal",
60 "corrected",
61 "info",
62 };
63
64 const char *cper_severity_str(unsigned int severity)
65 {
66 return severity < ARRAY_SIZE(severity_strs) ?
67 severity_strs[severity] : "unknown";
68 }
69 EXPORT_SYMBOL_GPL(cper_severity_str);
70
71 /*
72 * cper_print_bits - print strings for set bits
73 * @pfx: prefix for each line, including log level and prefix string
74 * @bits: bit mask
75 * @strs: string array, indexed by bit position
76 * @strs_size: size of the string array: @strs
77 *
78 * For each set bit in @bits, print the corresponding string in @strs.
79 * If the output length is longer than 80, multiple line will be
80 * printed, with @pfx is printed at the beginning of each line.
81 */
82 void cper_print_bits(const char *pfx, unsigned int bits,
83 const char * const strs[], unsigned int strs_size)
84 {
85 int i, len = 0;
86 const char *str;
87 char buf[84];
88
89 for (i = 0; i < strs_size; i++) {
90 if (!(bits & (1U << i)))
91 continue;
92 str = strs[i];
93 if (!str)
94 continue;
95 if (len && len + strlen(str) + 2 > 80) {
96 printk("%s\n", buf);
97 len = 0;
98 }
99 if (!len)
100 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
101 else
102 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
103 }
104 if (len)
105 printk("%s\n", buf);
106 }
107
108 static const char * const proc_type_strs[] = {
109 "IA32/X64",
110 "IA64",
111 };
112
113 static const char * const proc_isa_strs[] = {
114 "IA32",
115 "IA64",
116 "X64",
117 };
118
119 static const char * const proc_error_type_strs[] = {
120 "cache error",
121 "TLB error",
122 "bus error",
123 "micro-architectural error",
124 };
125
126 static const char * const proc_op_strs[] = {
127 "unknown or generic",
128 "data read",
129 "data write",
130 "instruction execution",
131 };
132
133 static const char * const proc_flag_strs[] = {
134 "restartable",
135 "precise IP",
136 "overflow",
137 "corrected",
138 };
139
140 static void cper_print_proc_generic(const char *pfx,
141 const struct cper_sec_proc_generic *proc)
142 {
143 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
144 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
145 proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
146 proc_type_strs[proc->proc_type] : "unknown");
147 if (proc->validation_bits & CPER_PROC_VALID_ISA)
148 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
149 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
150 proc_isa_strs[proc->proc_isa] : "unknown");
151 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
152 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
153 cper_print_bits(pfx, proc->proc_error_type,
154 proc_error_type_strs,
155 ARRAY_SIZE(proc_error_type_strs));
156 }
157 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
158 printk("%s""operation: %d, %s\n", pfx, proc->operation,
159 proc->operation < ARRAY_SIZE(proc_op_strs) ?
160 proc_op_strs[proc->operation] : "unknown");
161 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
162 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
163 cper_print_bits(pfx, proc->flags, proc_flag_strs,
164 ARRAY_SIZE(proc_flag_strs));
165 }
166 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
167 printk("%s""level: %d\n", pfx, proc->level);
168 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
169 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
170 if (proc->validation_bits & CPER_PROC_VALID_ID)
171 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
172 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
173 printk("%s""target_address: 0x%016llx\n",
174 pfx, proc->target_addr);
175 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
176 printk("%s""requestor_id: 0x%016llx\n",
177 pfx, proc->requestor_id);
178 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
179 printk("%s""responder_id: 0x%016llx\n",
180 pfx, proc->responder_id);
181 if (proc->validation_bits & CPER_PROC_VALID_IP)
182 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
183 }
184
185 static const char * const mem_err_type_strs[] = {
186 "unknown",
187 "no error",
188 "single-bit ECC",
189 "multi-bit ECC",
190 "single-symbol chipkill ECC",
191 "multi-symbol chipkill ECC",
192 "master abort",
193 "target abort",
194 "parity error",
195 "watchdog timeout",
196 "invalid address",
197 "mirror Broken",
198 "memory sparing",
199 "scrub corrected error",
200 "scrub uncorrected error",
201 "physical memory map-out event",
202 };
203
204 const char *cper_mem_err_type_str(unsigned int etype)
205 {
206 return etype < ARRAY_SIZE(mem_err_type_strs) ?
207 mem_err_type_strs[etype] : "unknown";
208 }
209 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
210
211 static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
212 {
213 u32 len, n;
214
215 if (!msg)
216 return 0;
217
218 n = 0;
219 len = CPER_REC_LEN - 1;
220 if (mem->validation_bits & CPER_MEM_VALID_NODE)
221 n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
222 if (mem->validation_bits & CPER_MEM_VALID_CARD)
223 n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
224 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
225 n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
226 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
227 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
228 if (mem->validation_bits & CPER_MEM_VALID_BANK)
229 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
230 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
231 n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
232 if (mem->validation_bits & CPER_MEM_VALID_ROW)
233 n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
234 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
235 n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
236 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
237 n += scnprintf(msg + n, len - n, "bit_position: %d ",
238 mem->bit_pos);
239 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
240 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
241 mem->requestor_id);
242 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
243 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
244 mem->responder_id);
245 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
246 scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
247 mem->target_id);
248
249 msg[n] = '\0';
250 return n;
251 }
252
253 static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
254 {
255 u32 len, n;
256 const char *bank = NULL, *device = NULL;
257
258 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
259 return 0;
260
261 n = 0;
262 len = CPER_REC_LEN - 1;
263 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
264 if (bank && device)
265 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
266 else
267 n = snprintf(msg, len,
268 "DIMM location: not present. DMI handle: 0x%.4x ",
269 mem->mem_dev_handle);
270
271 msg[n] = '\0';
272 return n;
273 }
274
275 void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
276 struct cper_mem_err_compact *cmem)
277 {
278 cmem->validation_bits = mem->validation_bits;
279 cmem->node = mem->node;
280 cmem->card = mem->card;
281 cmem->module = mem->module;
282 cmem->bank = mem->bank;
283 cmem->device = mem->device;
284 cmem->row = mem->row;
285 cmem->column = mem->column;
286 cmem->bit_pos = mem->bit_pos;
287 cmem->requestor_id = mem->requestor_id;
288 cmem->responder_id = mem->responder_id;
289 cmem->target_id = mem->target_id;
290 cmem->rank = mem->rank;
291 cmem->mem_array_handle = mem->mem_array_handle;
292 cmem->mem_dev_handle = mem->mem_dev_handle;
293 }
294
295 const char *cper_mem_err_unpack(struct trace_seq *p,
296 struct cper_mem_err_compact *cmem)
297 {
298 const char *ret = trace_seq_buffer_ptr(p);
299
300 if (cper_mem_err_location(cmem, rcd_decode_str))
301 trace_seq_printf(p, "%s", rcd_decode_str);
302 if (cper_dimm_err_location(cmem, rcd_decode_str))
303 trace_seq_printf(p, "%s", rcd_decode_str);
304 trace_seq_putc(p, '\0');
305
306 return ret;
307 }
308
309 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
310 int len)
311 {
312 struct cper_mem_err_compact cmem;
313
314 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
315 if (len == sizeof(struct cper_sec_mem_err_old) &&
316 (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
317 pr_err(FW_WARN "valid bits set for fields beyond structure\n");
318 return;
319 }
320 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
321 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
322 if (mem->validation_bits & CPER_MEM_VALID_PA)
323 printk("%s""physical_address: 0x%016llx\n",
324 pfx, mem->physical_addr);
325 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
326 printk("%s""physical_address_mask: 0x%016llx\n",
327 pfx, mem->physical_addr_mask);
328 cper_mem_err_pack(mem, &cmem);
329 if (cper_mem_err_location(&cmem, rcd_decode_str))
330 printk("%s%s\n", pfx, rcd_decode_str);
331 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
332 u8 etype = mem->error_type;
333 printk("%s""error_type: %d, %s\n", pfx, etype,
334 cper_mem_err_type_str(etype));
335 }
336 if (cper_dimm_err_location(&cmem, rcd_decode_str))
337 printk("%s%s\n", pfx, rcd_decode_str);
338 }
339
340 static const char * const pcie_port_type_strs[] = {
341 "PCIe end point",
342 "legacy PCI end point",
343 "unknown",
344 "unknown",
345 "root port",
346 "upstream switch port",
347 "downstream switch port",
348 "PCIe to PCI/PCI-X bridge",
349 "PCI/PCI-X to PCIe bridge",
350 "root complex integrated endpoint device",
351 "root complex event collector",
352 };
353
354 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
355 const struct acpi_hest_generic_data *gdata)
356 {
357 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
358 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
359 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
360 pcie_port_type_strs[pcie->port_type] : "unknown");
361 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
362 printk("%s""version: %d.%d\n", pfx,
363 pcie->version.major, pcie->version.minor);
364 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
365 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
366 pcie->command, pcie->status);
367 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
368 const __u8 *p;
369 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
370 pcie->device_id.segment, pcie->device_id.bus,
371 pcie->device_id.device, pcie->device_id.function);
372 printk("%s""slot: %d\n", pfx,
373 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
374 printk("%s""secondary_bus: 0x%02x\n", pfx,
375 pcie->device_id.secondary_bus);
376 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
377 pcie->device_id.vendor_id, pcie->device_id.device_id);
378 p = pcie->device_id.class_code;
379 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
380 }
381 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
382 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
383 pcie->serial_number.lower, pcie->serial_number.upper);
384 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
385 printk(
386 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
387 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
388 }
389
390 static void
391 cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
392 int sec_no)
393 {
394 uuid_le *sec_type = (uuid_le *)gdata->section_type;
395 __u16 severity;
396 char newpfx[64];
397
398 severity = gdata->error_severity;
399 printk("%s""Error %d, type: %s\n", pfx, sec_no,
400 cper_severity_str(severity));
401 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
402 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
403 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
404 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
405
406 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
407 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
408 struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
409
410 printk("%s""section_type: general processor error\n", newpfx);
411 if (gdata->error_data_length >= sizeof(*proc_err))
412 cper_print_proc_generic(newpfx, proc_err);
413 else
414 goto err_section_too_small;
415 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
416 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
417
418 printk("%s""section_type: memory error\n", newpfx);
419 if (gdata->error_data_length >=
420 sizeof(struct cper_sec_mem_err_old))
421 cper_print_mem(newpfx, mem_err,
422 gdata->error_data_length);
423 else
424 goto err_section_too_small;
425 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
426 struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
427
428 printk("%s""section_type: PCIe error\n", newpfx);
429 if (gdata->error_data_length >= sizeof(*pcie))
430 cper_print_pcie(newpfx, pcie, gdata);
431 else
432 goto err_section_too_small;
433 } else
434 printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
435
436 return;
437
438 err_section_too_small:
439 pr_err(FW_WARN "error section length is too small\n");
440 }
441
442 void cper_estatus_print(const char *pfx,
443 const struct acpi_hest_generic_status *estatus)
444 {
445 struct acpi_hest_generic_data *gdata;
446 unsigned int data_len;
447 int sec_no = 0;
448 char newpfx[64];
449 __u16 severity;
450
451 severity = estatus->error_severity;
452 if (severity == CPER_SEV_CORRECTED)
453 printk("%s%s\n", pfx,
454 "It has been corrected by h/w "
455 "and requires no further action");
456 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
457 data_len = estatus->data_length;
458 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
459 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
460
461 while (data_len >= acpi_hest_get_size(gdata)) {
462 cper_estatus_print_section(newpfx, gdata, sec_no);
463 data_len -= acpi_hest_get_record_size(gdata);
464 gdata = acpi_hest_get_next(gdata);
465 sec_no++;
466 }
467 }
468 EXPORT_SYMBOL_GPL(cper_estatus_print);
469
470 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
471 {
472 if (estatus->data_length &&
473 estatus->data_length < sizeof(struct acpi_hest_generic_data))
474 return -EINVAL;
475 if (estatus->raw_data_length &&
476 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
477 return -EINVAL;
478
479 return 0;
480 }
481 EXPORT_SYMBOL_GPL(cper_estatus_check_header);
482
483 int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
484 {
485 struct acpi_hest_generic_data *gdata;
486 unsigned int data_len, gedata_len;
487 int rc;
488
489 rc = cper_estatus_check_header(estatus);
490 if (rc)
491 return rc;
492 data_len = estatus->data_length;
493 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
494
495 while (data_len >= acpi_hest_get_size(gdata)) {
496 gedata_len = acpi_hest_get_error_length(gdata);
497 if (gedata_len > data_len - acpi_hest_get_size(gdata))
498 return -EINVAL;
499
500 data_len -= acpi_hest_get_record_size(gdata);
501 gdata = acpi_hest_get_next(gdata);
502 }
503 if (data_len)
504 return -EINVAL;
505
506 return 0;
507 }
508 EXPORT_SYMBOL_GPL(cper_estatus_check);