2 * UEFI Common Platform Error Record (CPER) support
4 * Copyright (C) 2010, Intel Corp.
5 * Author: Huang Ying <ying.huang@intel.com>
7 * CPER is the format used to describe platform hardware error by
8 * various tables, such as ERST, BERT and HEST etc.
10 * For more information about CPER, please refer to Appendix N of UEFI
11 * Specification version 2.4.
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/dmi.h>
32 #include <linux/acpi.h>
33 #include <linux/pci.h>
34 #include <linux/aer.h>
35 #include <acpi/ghes.h>
39 static char rcd_decode_str
[CPER_REC_LEN
];
42 * CPER record ID need to be unique even after reboot, because record
43 * ID is used as index for ERST storage, while CPER records from
44 * multiple boot may co-exist in ERST.
46 u64
cper_next_record_id(void)
48 static atomic64_t seq
;
50 if (!atomic64_read(&seq
))
51 atomic64_set(&seq
, ((u64
)get_seconds()) << 32);
53 return atomic64_inc_return(&seq
);
55 EXPORT_SYMBOL_GPL(cper_next_record_id
);
57 static const char * const severity_strs
[] = {
64 const char *cper_severity_str(unsigned int severity
)
66 return severity
< ARRAY_SIZE(severity_strs
) ?
67 severity_strs
[severity
] : "unknown";
69 EXPORT_SYMBOL_GPL(cper_severity_str
);
72 * cper_print_bits - print strings for set bits
73 * @pfx: prefix for each line, including log level and prefix string
75 * @strs: string array, indexed by bit position
76 * @strs_size: size of the string array: @strs
78 * For each set bit in @bits, print the corresponding string in @strs.
79 * If the output length is longer than 80, multiple line will be
80 * printed, with @pfx is printed at the beginning of each line.
82 void cper_print_bits(const char *pfx
, unsigned int bits
,
83 const char * const strs
[], unsigned int strs_size
)
89 for (i
= 0; i
< strs_size
; i
++) {
90 if (!(bits
& (1U << i
)))
95 if (len
&& len
+ strlen(str
) + 2 > 80) {
100 len
= snprintf(buf
, sizeof(buf
), "%s%s", pfx
, str
);
102 len
+= snprintf(buf
+len
, sizeof(buf
)-len
, ", %s", str
);
108 static const char * const proc_type_strs
[] = {
113 static const char * const proc_isa_strs
[] = {
119 static const char * const proc_error_type_strs
[] = {
123 "micro-architectural error",
126 static const char * const proc_op_strs
[] = {
127 "unknown or generic",
130 "instruction execution",
133 static const char * const proc_flag_strs
[] = {
140 static void cper_print_proc_generic(const char *pfx
,
141 const struct cper_sec_proc_generic
*proc
)
143 if (proc
->validation_bits
& CPER_PROC_VALID_TYPE
)
144 printk("%s""processor_type: %d, %s\n", pfx
, proc
->proc_type
,
145 proc
->proc_type
< ARRAY_SIZE(proc_type_strs
) ?
146 proc_type_strs
[proc
->proc_type
] : "unknown");
147 if (proc
->validation_bits
& CPER_PROC_VALID_ISA
)
148 printk("%s""processor_isa: %d, %s\n", pfx
, proc
->proc_isa
,
149 proc
->proc_isa
< ARRAY_SIZE(proc_isa_strs
) ?
150 proc_isa_strs
[proc
->proc_isa
] : "unknown");
151 if (proc
->validation_bits
& CPER_PROC_VALID_ERROR_TYPE
) {
152 printk("%s""error_type: 0x%02x\n", pfx
, proc
->proc_error_type
);
153 cper_print_bits(pfx
, proc
->proc_error_type
,
154 proc_error_type_strs
,
155 ARRAY_SIZE(proc_error_type_strs
));
157 if (proc
->validation_bits
& CPER_PROC_VALID_OPERATION
)
158 printk("%s""operation: %d, %s\n", pfx
, proc
->operation
,
159 proc
->operation
< ARRAY_SIZE(proc_op_strs
) ?
160 proc_op_strs
[proc
->operation
] : "unknown");
161 if (proc
->validation_bits
& CPER_PROC_VALID_FLAGS
) {
162 printk("%s""flags: 0x%02x\n", pfx
, proc
->flags
);
163 cper_print_bits(pfx
, proc
->flags
, proc_flag_strs
,
164 ARRAY_SIZE(proc_flag_strs
));
166 if (proc
->validation_bits
& CPER_PROC_VALID_LEVEL
)
167 printk("%s""level: %d\n", pfx
, proc
->level
);
168 if (proc
->validation_bits
& CPER_PROC_VALID_VERSION
)
169 printk("%s""version_info: 0x%016llx\n", pfx
, proc
->cpu_version
);
170 if (proc
->validation_bits
& CPER_PROC_VALID_ID
)
171 printk("%s""processor_id: 0x%016llx\n", pfx
, proc
->proc_id
);
172 if (proc
->validation_bits
& CPER_PROC_VALID_TARGET_ADDRESS
)
173 printk("%s""target_address: 0x%016llx\n",
174 pfx
, proc
->target_addr
);
175 if (proc
->validation_bits
& CPER_PROC_VALID_REQUESTOR_ID
)
176 printk("%s""requestor_id: 0x%016llx\n",
177 pfx
, proc
->requestor_id
);
178 if (proc
->validation_bits
& CPER_PROC_VALID_RESPONDER_ID
)
179 printk("%s""responder_id: 0x%016llx\n",
180 pfx
, proc
->responder_id
);
181 if (proc
->validation_bits
& CPER_PROC_VALID_IP
)
182 printk("%s""IP: 0x%016llx\n", pfx
, proc
->ip
);
185 static const char * const mem_err_type_strs
[] = {
190 "single-symbol chipkill ECC",
191 "multi-symbol chipkill ECC",
199 "scrub corrected error",
200 "scrub uncorrected error",
201 "physical memory map-out event",
204 const char *cper_mem_err_type_str(unsigned int etype
)
206 return etype
< ARRAY_SIZE(mem_err_type_strs
) ?
207 mem_err_type_strs
[etype
] : "unknown";
209 EXPORT_SYMBOL_GPL(cper_mem_err_type_str
);
211 static int cper_mem_err_location(struct cper_mem_err_compact
*mem
, char *msg
)
219 len
= CPER_REC_LEN
- 1;
220 if (mem
->validation_bits
& CPER_MEM_VALID_NODE
)
221 n
+= scnprintf(msg
+ n
, len
- n
, "node: %d ", mem
->node
);
222 if (mem
->validation_bits
& CPER_MEM_VALID_CARD
)
223 n
+= scnprintf(msg
+ n
, len
- n
, "card: %d ", mem
->card
);
224 if (mem
->validation_bits
& CPER_MEM_VALID_MODULE
)
225 n
+= scnprintf(msg
+ n
, len
- n
, "module: %d ", mem
->module
);
226 if (mem
->validation_bits
& CPER_MEM_VALID_RANK_NUMBER
)
227 n
+= scnprintf(msg
+ n
, len
- n
, "rank: %d ", mem
->rank
);
228 if (mem
->validation_bits
& CPER_MEM_VALID_BANK
)
229 n
+= scnprintf(msg
+ n
, len
- n
, "bank: %d ", mem
->bank
);
230 if (mem
->validation_bits
& CPER_MEM_VALID_DEVICE
)
231 n
+= scnprintf(msg
+ n
, len
- n
, "device: %d ", mem
->device
);
232 if (mem
->validation_bits
& CPER_MEM_VALID_ROW
)
233 n
+= scnprintf(msg
+ n
, len
- n
, "row: %d ", mem
->row
);
234 if (mem
->validation_bits
& CPER_MEM_VALID_COLUMN
)
235 n
+= scnprintf(msg
+ n
, len
- n
, "column: %d ", mem
->column
);
236 if (mem
->validation_bits
& CPER_MEM_VALID_BIT_POSITION
)
237 n
+= scnprintf(msg
+ n
, len
- n
, "bit_position: %d ",
239 if (mem
->validation_bits
& CPER_MEM_VALID_REQUESTOR_ID
)
240 n
+= scnprintf(msg
+ n
, len
- n
, "requestor_id: 0x%016llx ",
242 if (mem
->validation_bits
& CPER_MEM_VALID_RESPONDER_ID
)
243 n
+= scnprintf(msg
+ n
, len
- n
, "responder_id: 0x%016llx ",
245 if (mem
->validation_bits
& CPER_MEM_VALID_TARGET_ID
)
246 scnprintf(msg
+ n
, len
- n
, "target_id: 0x%016llx ",
253 static int cper_dimm_err_location(struct cper_mem_err_compact
*mem
, char *msg
)
256 const char *bank
= NULL
, *device
= NULL
;
258 if (!msg
|| !(mem
->validation_bits
& CPER_MEM_VALID_MODULE_HANDLE
))
262 len
= CPER_REC_LEN
- 1;
263 dmi_memdev_name(mem
->mem_dev_handle
, &bank
, &device
);
265 n
= snprintf(msg
, len
, "DIMM location: %s %s ", bank
, device
);
267 n
= snprintf(msg
, len
,
268 "DIMM location: not present. DMI handle: 0x%.4x ",
269 mem
->mem_dev_handle
);
275 void cper_mem_err_pack(const struct cper_sec_mem_err
*mem
,
276 struct cper_mem_err_compact
*cmem
)
278 cmem
->validation_bits
= mem
->validation_bits
;
279 cmem
->node
= mem
->node
;
280 cmem
->card
= mem
->card
;
281 cmem
->module
= mem
->module
;
282 cmem
->bank
= mem
->bank
;
283 cmem
->device
= mem
->device
;
284 cmem
->row
= mem
->row
;
285 cmem
->column
= mem
->column
;
286 cmem
->bit_pos
= mem
->bit_pos
;
287 cmem
->requestor_id
= mem
->requestor_id
;
288 cmem
->responder_id
= mem
->responder_id
;
289 cmem
->target_id
= mem
->target_id
;
290 cmem
->rank
= mem
->rank
;
291 cmem
->mem_array_handle
= mem
->mem_array_handle
;
292 cmem
->mem_dev_handle
= mem
->mem_dev_handle
;
295 const char *cper_mem_err_unpack(struct trace_seq
*p
,
296 struct cper_mem_err_compact
*cmem
)
298 const char *ret
= trace_seq_buffer_ptr(p
);
300 if (cper_mem_err_location(cmem
, rcd_decode_str
))
301 trace_seq_printf(p
, "%s", rcd_decode_str
);
302 if (cper_dimm_err_location(cmem
, rcd_decode_str
))
303 trace_seq_printf(p
, "%s", rcd_decode_str
);
304 trace_seq_putc(p
, '\0');
309 static void cper_print_mem(const char *pfx
, const struct cper_sec_mem_err
*mem
,
312 struct cper_mem_err_compact cmem
;
314 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
315 if (len
== sizeof(struct cper_sec_mem_err_old
) &&
316 (mem
->validation_bits
& ~(CPER_MEM_VALID_RANK_NUMBER
- 1))) {
317 pr_err(FW_WARN
"valid bits set for fields beyond structure\n");
320 if (mem
->validation_bits
& CPER_MEM_VALID_ERROR_STATUS
)
321 printk("%s""error_status: 0x%016llx\n", pfx
, mem
->error_status
);
322 if (mem
->validation_bits
& CPER_MEM_VALID_PA
)
323 printk("%s""physical_address: 0x%016llx\n",
324 pfx
, mem
->physical_addr
);
325 if (mem
->validation_bits
& CPER_MEM_VALID_PA_MASK
)
326 printk("%s""physical_address_mask: 0x%016llx\n",
327 pfx
, mem
->physical_addr_mask
);
328 cper_mem_err_pack(mem
, &cmem
);
329 if (cper_mem_err_location(&cmem
, rcd_decode_str
))
330 printk("%s%s\n", pfx
, rcd_decode_str
);
331 if (mem
->validation_bits
& CPER_MEM_VALID_ERROR_TYPE
) {
332 u8 etype
= mem
->error_type
;
333 printk("%s""error_type: %d, %s\n", pfx
, etype
,
334 cper_mem_err_type_str(etype
));
336 if (cper_dimm_err_location(&cmem
, rcd_decode_str
))
337 printk("%s%s\n", pfx
, rcd_decode_str
);
340 static const char * const pcie_port_type_strs
[] = {
342 "legacy PCI end point",
346 "upstream switch port",
347 "downstream switch port",
348 "PCIe to PCI/PCI-X bridge",
349 "PCI/PCI-X to PCIe bridge",
350 "root complex integrated endpoint device",
351 "root complex event collector",
354 static void cper_print_pcie(const char *pfx
, const struct cper_sec_pcie
*pcie
,
355 const struct acpi_hest_generic_data
*gdata
)
357 if (pcie
->validation_bits
& CPER_PCIE_VALID_PORT_TYPE
)
358 printk("%s""port_type: %d, %s\n", pfx
, pcie
->port_type
,
359 pcie
->port_type
< ARRAY_SIZE(pcie_port_type_strs
) ?
360 pcie_port_type_strs
[pcie
->port_type
] : "unknown");
361 if (pcie
->validation_bits
& CPER_PCIE_VALID_VERSION
)
362 printk("%s""version: %d.%d\n", pfx
,
363 pcie
->version
.major
, pcie
->version
.minor
);
364 if (pcie
->validation_bits
& CPER_PCIE_VALID_COMMAND_STATUS
)
365 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx
,
366 pcie
->command
, pcie
->status
);
367 if (pcie
->validation_bits
& CPER_PCIE_VALID_DEVICE_ID
) {
369 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx
,
370 pcie
->device_id
.segment
, pcie
->device_id
.bus
,
371 pcie
->device_id
.device
, pcie
->device_id
.function
);
372 printk("%s""slot: %d\n", pfx
,
373 pcie
->device_id
.slot
>> CPER_PCIE_SLOT_SHIFT
);
374 printk("%s""secondary_bus: 0x%02x\n", pfx
,
375 pcie
->device_id
.secondary_bus
);
376 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx
,
377 pcie
->device_id
.vendor_id
, pcie
->device_id
.device_id
);
378 p
= pcie
->device_id
.class_code
;
379 printk("%s""class_code: %02x%02x%02x\n", pfx
, p
[0], p
[1], p
[2]);
381 if (pcie
->validation_bits
& CPER_PCIE_VALID_SERIAL_NUMBER
)
382 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx
,
383 pcie
->serial_number
.lower
, pcie
->serial_number
.upper
);
384 if (pcie
->validation_bits
& CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS
)
386 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
387 pfx
, pcie
->bridge
.secondary_status
, pcie
->bridge
.control
);
391 cper_estatus_print_section(const char *pfx
, struct acpi_hest_generic_data
*gdata
,
394 uuid_le
*sec_type
= (uuid_le
*)gdata
->section_type
;
398 severity
= gdata
->error_severity
;
399 printk("%s""Error %d, type: %s\n", pfx
, sec_no
,
400 cper_severity_str(severity
));
401 if (gdata
->validation_bits
& CPER_SEC_VALID_FRU_ID
)
402 printk("%s""fru_id: %pUl\n", pfx
, (uuid_le
*)gdata
->fru_id
);
403 if (gdata
->validation_bits
& CPER_SEC_VALID_FRU_TEXT
)
404 printk("%s""fru_text: %.20s\n", pfx
, gdata
->fru_text
);
406 snprintf(newpfx
, sizeof(newpfx
), "%s%s", pfx
, INDENT_SP
);
407 if (!uuid_le_cmp(*sec_type
, CPER_SEC_PROC_GENERIC
)) {
408 struct cper_sec_proc_generic
*proc_err
= acpi_hest_get_payload(gdata
);
410 printk("%s""section_type: general processor error\n", newpfx
);
411 if (gdata
->error_data_length
>= sizeof(*proc_err
))
412 cper_print_proc_generic(newpfx
, proc_err
);
414 goto err_section_too_small
;
415 } else if (!uuid_le_cmp(*sec_type
, CPER_SEC_PLATFORM_MEM
)) {
416 struct cper_sec_mem_err
*mem_err
= acpi_hest_get_payload(gdata
);
418 printk("%s""section_type: memory error\n", newpfx
);
419 if (gdata
->error_data_length
>=
420 sizeof(struct cper_sec_mem_err_old
))
421 cper_print_mem(newpfx
, mem_err
,
422 gdata
->error_data_length
);
424 goto err_section_too_small
;
425 } else if (!uuid_le_cmp(*sec_type
, CPER_SEC_PCIE
)) {
426 struct cper_sec_pcie
*pcie
= acpi_hest_get_payload(gdata
);
428 printk("%s""section_type: PCIe error\n", newpfx
);
429 if (gdata
->error_data_length
>= sizeof(*pcie
))
430 cper_print_pcie(newpfx
, pcie
, gdata
);
432 goto err_section_too_small
;
434 printk("%s""section type: unknown, %pUl\n", newpfx
, sec_type
);
438 err_section_too_small
:
439 pr_err(FW_WARN
"error section length is too small\n");
442 void cper_estatus_print(const char *pfx
,
443 const struct acpi_hest_generic_status
*estatus
)
445 struct acpi_hest_generic_data
*gdata
;
446 unsigned int data_len
;
451 severity
= estatus
->error_severity
;
452 if (severity
== CPER_SEV_CORRECTED
)
453 printk("%s%s\n", pfx
,
454 "It has been corrected by h/w "
455 "and requires no further action");
456 printk("%s""event severity: %s\n", pfx
, cper_severity_str(severity
));
457 data_len
= estatus
->data_length
;
458 gdata
= (struct acpi_hest_generic_data
*)(estatus
+ 1);
459 snprintf(newpfx
, sizeof(newpfx
), "%s%s", pfx
, INDENT_SP
);
461 while (data_len
>= acpi_hest_get_size(gdata
)) {
462 cper_estatus_print_section(newpfx
, gdata
, sec_no
);
463 data_len
-= acpi_hest_get_record_size(gdata
);
464 gdata
= acpi_hest_get_next(gdata
);
468 EXPORT_SYMBOL_GPL(cper_estatus_print
);
470 int cper_estatus_check_header(const struct acpi_hest_generic_status
*estatus
)
472 if (estatus
->data_length
&&
473 estatus
->data_length
< sizeof(struct acpi_hest_generic_data
))
475 if (estatus
->raw_data_length
&&
476 estatus
->raw_data_offset
< sizeof(*estatus
) + estatus
->data_length
)
481 EXPORT_SYMBOL_GPL(cper_estatus_check_header
);
483 int cper_estatus_check(const struct acpi_hest_generic_status
*estatus
)
485 struct acpi_hest_generic_data
*gdata
;
486 unsigned int data_len
, gedata_len
;
489 rc
= cper_estatus_check_header(estatus
);
492 data_len
= estatus
->data_length
;
493 gdata
= (struct acpi_hest_generic_data
*)(estatus
+ 1);
495 while (data_len
>= acpi_hest_get_size(gdata
)) {
496 gedata_len
= acpi_hest_get_error_length(gdata
);
497 if (gedata_len
> data_len
- acpi_hest_get_size(gdata
))
500 data_len
-= acpi_hest_get_record_size(gdata
);
501 gdata
= acpi_hest_get_next(gdata
);
508 EXPORT_SYMBOL_GPL(cper_estatus_check
);