]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/acpi/apei/ghes.c
mm: remove vmalloc_sync_(un)mappings()
[mirror_ubuntu-jammy-kernel.git] / drivers / acpi / apei / ghes.c
CommitLineData
1802d0be 1// SPDX-License-Identifier: GPL-2.0-only
d334a491
HY
2/*
3 * APEI Generic Hardware Error Source support
4 *
5 * Generic Hardware Error Source provides a way to report platform
6 * hardware errors (such as that from chipset). It works in so called
7 * "Firmware First" mode, that is, hardware errors are reported to
8 * firmware firstly, then reported to Linux by firmware. This way,
9 * some non-standard hardware error registers or non-standard hardware
10 * link can be checked by firmware to produce more hardware error
11 * information for Linux.
12 *
13 * For more information about Generic Hardware Error Source, please
14 * refer to ACPI Specification version 4.0, section 17.3.2.6
15 *
67eb2e99 16 * Copyright 2010,2011 Intel Corp.
d334a491 17 * Author: Huang Ying <ying.huang@intel.com>
d334a491
HY
18 */
19
f9f05395 20#include <linux/arm_sdei.h>
d334a491 21#include <linux/kernel.h>
020bf066 22#include <linux/moduleparam.h>
d334a491
HY
23#include <linux/init.h>
24#include <linux/acpi.h>
25#include <linux/io.h>
26#include <linux/interrupt.h>
81e88fdc 27#include <linux/timer.h>
d334a491 28#include <linux/cper.h>
7ad6e943
HY
29#include <linux/platform_device.h>
30#include <linux/mutex.h>
32c361f5 31#include <linux/ratelimit.h>
81e88fdc 32#include <linux/vmalloc.h>
67eb2e99
HY
33#include <linux/irq_work.h>
34#include <linux/llist.h>
35#include <linux/genalloc.h>
a654e5ee 36#include <linux/pci.h>
b484079b 37#include <linux/pfn.h>
a654e5ee 38#include <linux/aer.h>
44a69f61 39#include <linux/nmi.h>
e6017571 40#include <linux/sched/clock.h>
297b64c7
TB
41#include <linux/uuid.h>
42#include <linux/ras.h>
40e06415 43
42aa5604 44#include <acpi/actbl1.h>
40e06415 45#include <acpi/ghes.h>
9dae3d0d 46#include <acpi/apei.h>
4f89fa28 47#include <asm/fixmap.h>
81e88fdc 48#include <asm/tlbflush.h>
297b64c7 49#include <ras/ras_event.h>
d334a491
HY
50
51#include "apei-internal.h"
52
53#define GHES_PFX "GHES: "
54
55#define GHES_ESTATUS_MAX_SIZE 65536
67eb2e99
HY
56#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
57
58#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
59
152cef40
HY
60/* This is just an estimation for memory pool allocation */
61#define GHES_ESTATUS_CACHE_AVG_SIZE 512
62
63#define GHES_ESTATUS_CACHES_SIZE 4
64
70cb6e1d 65#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
152cef40
HY
66/* Prevent too many caches are allocated because of RCU */
67#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
68
69#define GHES_ESTATUS_CACHE_LEN(estatus_len) \
70 (sizeof(struct ghes_estatus_cache) + (estatus_len))
71#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
0a00fd5e 72 ((struct acpi_hest_generic_status *) \
152cef40
HY
73 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
74
67eb2e99
HY
75#define GHES_ESTATUS_NODE_LEN(estatus_len) \
76 (sizeof(struct ghes_estatus_node) + (estatus_len))
88f074f4 77#define GHES_ESTATUS_FROM_NODE(estatus_node) \
0a00fd5e 78 ((struct acpi_hest_generic_status *) \
67eb2e99 79 ((struct ghes_estatus_node *)(estatus_node) + 1))
d334a491 80
f9f05395
JM
81/*
82 * NMI-like notifications vary by architecture, before the compiler can prune
83 * unused static functions it needs a value for these enums.
84 */
85#ifndef CONFIG_ARM_SDE_INTERFACE
86#define FIX_APEI_GHES_SDEI_NORMAL __end_of_fixed_addresses
87#define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses
88#endif
89
42aa5604
TB
90static inline bool is_hest_type_generic_v2(struct ghes *ghes)
91{
92 return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
93}
94
020bf066
PG
95/*
96 * This driver isn't really modular, however for the time being,
97 * continuing to use module_param is the easiest way to remain
98 * compatible with existing boot arg use cases.
99 */
90ab5ee9 100bool ghes_disable;
b6a95016
HY
101module_param_named(disable, ghes_disable, bool, 0);
102
d334a491 103/*
7bf130e4
SJ
104 * All error sources notified with HED (Hardware Error Device) share a
105 * single notifier callback, so they need to be linked and checked one
106 * by one. This holds true for NMI too.
d334a491 107 *
81e88fdc
HY
108 * RCU is used for these lists, so ghes_list_mutex is only used for
109 * list changing, not for traversing.
d334a491 110 */
7bf130e4 111static LIST_HEAD(ghes_hed);
7ad6e943 112static DEFINE_MUTEX(ghes_list_mutex);
d334a491 113
81e88fdc
HY
114/*
115 * Because the memory area used to transfer hardware error information
116 * from BIOS to Linux can be determined only in NMI, IRQ or timer
117 * handler, but general ioremap can not be used in atomic context, so
4f89fa28 118 * the fixmap is used instead.
520e18a5 119 *
3b880cbe 120 * This spinlock is used to prevent the fixmap entry from being used
4f89fa28 121 * simultaneously.
81e88fdc 122 */
3b880cbe 123static DEFINE_SPINLOCK(ghes_notify_lock_irq);
81e88fdc 124
67eb2e99
HY
125static struct gen_pool *ghes_estatus_pool;
126static unsigned long ghes_estatus_pool_size_request;
67eb2e99 127
8f7c31f6 128static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
152cef40
HY
129static atomic_t ghes_estatus_cache_alloced;
130
2fb5853e
JZZ
131static int ghes_panic_timeout __read_mostly = 30;
132
b484079b 133static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
81e88fdc 134{
7edda088
TB
135 phys_addr_t paddr;
136 pgprot_t prot;
81e88fdc 137
b484079b 138 paddr = PFN_PHYS(pfn);
7edda088 139 prot = arch_apei_get_mem_attribute(paddr);
b484079b 140 __set_fixmap(fixmap_idx, paddr, prot);
81e88fdc 141
b484079b 142 return (void __iomem *) __fix_to_virt(fixmap_idx);
81e88fdc
HY
143}
144
b484079b 145static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx)
81e88fdc 146{
b484079b 147 int _idx = virt_to_fix((unsigned long)vaddr);
8ece249a 148
b484079b
JM
149 WARN_ON_ONCE(fixmap_idx != _idx);
150 clear_fixmap(fixmap_idx);
81e88fdc
HY
151}
152
fb7be08f 153int ghes_estatus_pool_init(int num_ghes)
67eb2e99 154{
fb7be08f 155 unsigned long addr, len;
6abc7622 156 int rc;
fb7be08f 157
67eb2e99
HY
158 ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
159 if (!ghes_estatus_pool)
160 return -ENOMEM;
67eb2e99 161
fb7be08f
JM
162 len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX;
163 len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE);
67eb2e99 164
fb7be08f 165 ghes_estatus_pool_size_request = PAGE_ALIGN(len);
0ac234be
JM
166 addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
167 if (!addr)
6abc7622 168 goto err_pool_alloc;
0ac234be 169
6abc7622
LZ
170 rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
171 if (rc)
172 goto err_pool_add;
173
174 return 0;
175
176err_pool_add:
177 vfree((void *)addr);
178
179err_pool_alloc:
180 gen_pool_destroy(ghes_estatus_pool);
181
182 return -ENOMEM;
67eb2e99
HY
183}
184
42aa5604
TB
185static int map_gen_v2(struct ghes *ghes)
186{
187 return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
188}
189
190static void unmap_gen_v2(struct ghes *ghes)
191{
192 apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
193}
194
06ddeadc
JM
195static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
196{
197 int rc;
198 u64 val = 0;
199
200 rc = apei_read(&val, &gv2->read_ack_register);
201 if (rc)
202 return;
203
204 val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
205 val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
206
207 apei_write(val, &gv2->read_ack_register);
208}
209
d334a491
HY
210static struct ghes *ghes_new(struct acpi_hest_generic *generic)
211{
212 struct ghes *ghes;
213 unsigned int error_block_length;
214 int rc;
215
216 ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
217 if (!ghes)
218 return ERR_PTR(-ENOMEM);
42aa5604 219
d334a491 220 ghes->generic = generic;
42aa5604
TB
221 if (is_hest_type_generic_v2(ghes)) {
222 rc = map_gen_v2(ghes);
223 if (rc)
224 goto err_free;
225 }
226
34ddeb03 227 rc = apei_map_generic_address(&generic->error_status_address);
d334a491 228 if (rc)
42aa5604 229 goto err_unmap_read_ack_addr;
d334a491
HY
230 error_block_length = generic->error_block_length;
231 if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
933ca4e3
KW
232 pr_warn(FW_WARN GHES_PFX
233 "Error status block length is too long: %u for "
234 "generic hardware error source: %d.\n",
235 error_block_length, generic->header.source_id);
d334a491
HY
236 error_block_length = GHES_ESTATUS_MAX_SIZE;
237 }
238 ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
239 if (!ghes->estatus) {
240 rc = -ENOMEM;
42aa5604 241 goto err_unmap_status_addr;
d334a491
HY
242 }
243
244 return ghes;
245
42aa5604 246err_unmap_status_addr:
34ddeb03 247 apei_unmap_generic_address(&generic->error_status_address);
42aa5604
TB
248err_unmap_read_ack_addr:
249 if (is_hest_type_generic_v2(ghes))
250 unmap_gen_v2(ghes);
d334a491
HY
251err_free:
252 kfree(ghes);
253 return ERR_PTR(rc);
254}
255
256static void ghes_fini(struct ghes *ghes)
257{
258 kfree(ghes->estatus);
34ddeb03 259 apei_unmap_generic_address(&ghes->generic->error_status_address);
42aa5604
TB
260 if (is_hest_type_generic_v2(ghes))
261 unmap_gen_v2(ghes);
d334a491
HY
262}
263
d334a491
HY
264static inline int ghes_severity(int severity)
265{
266 switch (severity) {
ad4ecef2
HY
267 case CPER_SEV_INFORMATIONAL:
268 return GHES_SEV_NO;
269 case CPER_SEV_CORRECTED:
270 return GHES_SEV_CORRECTED;
271 case CPER_SEV_RECOVERABLE:
272 return GHES_SEV_RECOVERABLE;
273 case CPER_SEV_FATAL:
274 return GHES_SEV_PANIC;
d334a491 275 default:
25985edc 276 /* Unknown, go panic */
ad4ecef2 277 return GHES_SEV_PANIC;
d334a491
HY
278 }
279}
280
81e88fdc 281static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
b484079b
JM
282 int from_phys,
283 enum fixed_addresses fixmap_idx)
d334a491 284{
81e88fdc 285 void __iomem *vaddr;
81e88fdc
HY
286 u64 offset;
287 u32 trunk;
288
289 while (len > 0) {
290 offset = paddr - (paddr & PAGE_MASK);
b484079b 291 vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx);
81e88fdc
HY
292 trunk = PAGE_SIZE - offset;
293 trunk = min(trunk, len);
294 if (from_phys)
295 memcpy_fromio(buffer, vaddr + offset, trunk);
296 else
297 memcpy_toio(vaddr + offset, buffer, trunk);
298 len -= trunk;
299 paddr += trunk;
300 buffer += trunk;
b484079b 301 ghes_unmap(vaddr, fixmap_idx);
81e88fdc 302 }
d334a491
HY
303}
304
f2a681b9
JM
305/* Check the top-level record header has an appropriate size. */
306static int __ghes_check_estatus(struct ghes *ghes,
307 struct acpi_hest_generic_status *estatus)
308{
309 u32 len = cper_estatus_len(estatus);
310
311 if (len < sizeof(*estatus)) {
312 pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n");
313 return -EIO;
314 }
315
316 if (len > ghes->generic->error_block_length) {
317 pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n");
318 return -EIO;
319 }
320
321 if (cper_estatus_check_header(estatus)) {
322 pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n");
323 return -EIO;
324 }
325
326 return 0;
327}
328
e00a6e33
JM
329/* Read the CPER block, returning its address, and header in estatus. */
330static int __ghes_peek_estatus(struct ghes *ghes,
331 struct acpi_hest_generic_status *estatus,
332 u64 *buf_paddr, enum fixed_addresses fixmap_idx)
d334a491
HY
333{
334 struct acpi_hest_generic *g = ghes->generic;
d334a491
HY
335 int rc;
336
eeb25557 337 rc = apei_read(buf_paddr, &g->error_status_address);
d334a491 338 if (rc) {
eeb25557 339 *buf_paddr = 0;
93066e9a 340 pr_warn_ratelimited(FW_WARN GHES_PFX
d334a491
HY
341"Failed to read error status block address for hardware error source: %d.\n",
342 g->header.source_id);
343 return -EIO;
344 }
eeb25557 345 if (!*buf_paddr)
d334a491
HY
346 return -ENOENT;
347
f2a7e059
JM
348 ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1,
349 fixmap_idx);
350 if (!estatus->block_status) {
eeb25557 351 *buf_paddr = 0;
d334a491 352 return -ENOENT;
eeb25557 353 }
d334a491 354
371b8689 355 return 0;
e00a6e33 356}
f2a681b9 357
e00a6e33
JM
358static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
359 u64 buf_paddr, enum fixed_addresses fixmap_idx,
360 size_t buf_len)
361{
362 ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx);
f2a681b9 363 if (cper_estatus_check(estatus)) {
93066e9a
JM
364 pr_warn_ratelimited(FW_WARN GHES_PFX
365 "Failed to read error status block!\n");
f2a681b9
JM
366 return -EIO;
367 }
eeb25557 368
f2a681b9 369 return 0;
d334a491
HY
370}
371
e00a6e33
JM
372static int ghes_read_estatus(struct ghes *ghes,
373 struct acpi_hest_generic_status *estatus,
374 u64 *buf_paddr, enum fixed_addresses fixmap_idx)
375{
376 int rc;
377
378 rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx);
379 if (rc)
380 return rc;
381
382 rc = __ghes_check_estatus(ghes, estatus);
383 if (rc)
384 return rc;
385
386 return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx,
387 cper_estatus_len(estatus));
388}
389
f2a7e059
JM
390static void ghes_clear_estatus(struct ghes *ghes,
391 struct acpi_hest_generic_status *estatus,
392 u64 buf_paddr, enum fixed_addresses fixmap_idx)
d334a491 393{
f2a7e059 394 estatus->block_status = 0;
eeb25557
JM
395
396 if (!buf_paddr)
397 return;
398
f2a7e059
JM
399 ghes_copy_tofrom_phys(estatus, buf_paddr,
400 sizeof(estatus->block_status), 0,
b484079b 401 fixmap_idx);
06ddeadc
JM
402
403 /*
404 * GHESv2 type HEST entries introduce support for error acknowledgment,
405 * so only acknowledge the error if this support is present.
406 */
407 if (is_hest_type_generic_v2(ghes))
408 ghes_ack_error(ghes->generic_v2);
d334a491
HY
409}
410
0a00fd5e 411static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
cf870c70
NR
412{
413#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
414 unsigned long pfn;
ca104edc 415 int flags = -1;
cf870c70 416 int sec_sev = ghes_severity(gdata->error_severity);
bbcc2e7b 417 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
cf870c70 418
ca104edc
CG
419 if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
420 return;
421
422 pfn = mem_err->physical_addr >> PAGE_SHIFT;
423 if (!pfn_valid(pfn)) {
424 pr_warn_ratelimited(FW_WARN GHES_PFX
425 "Invalid address in generic error data: %#llx\n",
426 mem_err->physical_addr);
427 return;
cf870c70 428 }
ca104edc
CG
429
430 /* iff following two events can be handled properly by now */
431 if (sec_sev == GHES_SEV_CORRECTED &&
432 (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
433 flags = MF_SOFT_OFFLINE;
434 if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
435 flags = 0;
436
437 if (flags != -1)
83b57531 438 memory_failure_queue(pfn, flags);
cf870c70
NR
439#endif
440}
441
9852ce9a
TB
442/*
443 * PCIe AER errors need to be sent to the AER driver for reporting and
444 * recovery. The GHES severities map to the following AER severities and
445 * require the following handling:
446 *
447 * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
448 * These need to be reported by the AER driver but no recovery is
449 * necessary.
450 * GHES_SEV_RECOVERABLE -> AER_NONFATAL
451 * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
452 * These both need to be reported and recovered from by the AER driver.
453 * GHES_SEV_PANIC does not make it to this handling since the kernel must
454 * panic.
455 */
456static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
3c5b977f
TB
457{
458#ifdef CONFIG_ACPI_APEI_PCIEAER
459 struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
460
9852ce9a 461 if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
3c5b977f
TB
462 pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
463 unsigned int devfn;
464 int aer_severity;
465
466 devfn = PCI_DEVFN(pcie_err->device_id.device,
467 pcie_err->device_id.function);
468 aer_severity = cper_severity_to_aer(gdata->error_severity);
469
470 /*
471 * If firmware reset the component to contain
472 * the error, we must reinitialize it before
473 * use, so treat it as a fatal AER error.
474 */
475 if (gdata->flags & CPER_SEC_RESET)
476 aer_severity = AER_FATAL;
477
478 aer_recover_queue(pcie_err->device_id.segment,
479 pcie_err->device_id.bus,
480 devfn, aer_severity,
481 (struct aer_capability_regs *)
482 pcie_err->aer_info);
483 }
484#endif
485}
486
21480547 487static void ghes_do_proc(struct ghes *ghes,
0a00fd5e 488 const struct acpi_hest_generic_status *estatus)
d334a491 489{
ba61ca4a 490 int sev, sec_sev;
0a00fd5e 491 struct acpi_hest_generic_data *gdata;
5b53696a 492 guid_t *sec_type;
bb100b64 493 const guid_t *fru_id = &guid_null;
297b64c7 494 char *fru_text = "";
d334a491 495
67eb2e99
HY
496 sev = ghes_severity(estatus->error_severity);
497 apei_estatus_for_each_section(estatus, gdata) {
5b53696a 498 sec_type = (guid_t *)gdata->section_type;
ba61ca4a 499 sec_sev = ghes_severity(gdata->error_severity);
297b64c7
TB
500 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
501 fru_id = (guid_t *)gdata->fru_id;
502
503 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
504 fru_text = gdata->fru_text;
505
5b53696a 506 if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
bbcc2e7b
TB
507 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
508
305d0e00 509 ghes_edac_report_mem_error(sev, mem_err);
21480547 510
9dae3d0d 511 arch_apei_report_mem_error(sev, mem_err);
cf870c70 512 ghes_handle_memory_failure(gdata, sev);
ba61ca4a 513 }
5b53696a 514 else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
9852ce9a 515 ghes_handle_aer(gdata);
a654e5ee 516 }
e9279e83
TB
517 else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
518 struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
519
520 log_arm_hw_error(err);
521 } else {
297b64c7
TB
522 void *err = acpi_hest_get_payload(gdata);
523
524 log_non_standard_event(sec_type, fru_id, fru_text,
525 sec_sev, err,
526 gdata->error_data_length);
527 }
d334a491 528 }
32c361f5 529}
d334a491 530
67eb2e99
HY
531static void __ghes_print_estatus(const char *pfx,
532 const struct acpi_hest_generic *generic,
0a00fd5e 533 const struct acpi_hest_generic_status *estatus)
32c361f5 534{
5ba82ab5
HY
535 static atomic_t seqno;
536 unsigned int curr_seqno;
537 char pfx_seq[64];
538
32c361f5 539 if (pfx == NULL) {
67eb2e99 540 if (ghes_severity(estatus->error_severity) <=
32c361f5 541 GHES_SEV_CORRECTED)
5ba82ab5 542 pfx = KERN_WARNING;
32c361f5 543 else
5ba82ab5 544 pfx = KERN_ERR;
32c361f5 545 }
5ba82ab5
HY
546 curr_seqno = atomic_inc_return(&seqno);
547 snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
5588340d 548 printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
5ba82ab5 549 pfx_seq, generic->header.source_id);
88f074f4 550 cper_estatus_print(pfx_seq, estatus);
5588340d
HY
551}
552
152cef40
HY
553static int ghes_print_estatus(const char *pfx,
554 const struct acpi_hest_generic *generic,
0a00fd5e 555 const struct acpi_hest_generic_status *estatus)
5588340d
HY
556{
557 /* Not more than 2 messages every 5 seconds */
67eb2e99
HY
558 static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
559 static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
560 struct ratelimit_state *ratelimit;
5588340d 561
67eb2e99
HY
562 if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
563 ratelimit = &ratelimit_corrected;
564 else
565 ratelimit = &ratelimit_uncorrected;
152cef40 566 if (__ratelimit(ratelimit)) {
67eb2e99 567 __ghes_print_estatus(pfx, generic, estatus);
152cef40
HY
568 return 1;
569 }
570 return 0;
571}
572
573/*
574 * GHES error status reporting throttle, to report more kinds of
575 * errors, instead of just most frequently occurred errors.
576 */
0a00fd5e 577static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
152cef40
HY
578{
579 u32 len;
580 int i, cached = 0;
581 unsigned long long now;
582 struct ghes_estatus_cache *cache;
0a00fd5e 583 struct acpi_hest_generic_status *cache_estatus;
152cef40 584
88f074f4 585 len = cper_estatus_len(estatus);
152cef40
HY
586 rcu_read_lock();
587 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
588 cache = rcu_dereference(ghes_estatus_caches[i]);
589 if (cache == NULL)
590 continue;
591 if (len != cache->estatus_len)
592 continue;
593 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
594 if (memcmp(estatus, cache_estatus, len))
595 continue;
596 atomic_inc(&cache->count);
597 now = sched_clock();
598 if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
599 cached = 1;
600 break;
601 }
602 rcu_read_unlock();
603 return cached;
604}
605
606static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
607 struct acpi_hest_generic *generic,
0a00fd5e 608 struct acpi_hest_generic_status *estatus)
152cef40
HY
609{
610 int alloced;
611 u32 len, cache_len;
612 struct ghes_estatus_cache *cache;
0a00fd5e 613 struct acpi_hest_generic_status *cache_estatus;
152cef40
HY
614
615 alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
616 if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
617 atomic_dec(&ghes_estatus_cache_alloced);
618 return NULL;
619 }
88f074f4 620 len = cper_estatus_len(estatus);
152cef40
HY
621 cache_len = GHES_ESTATUS_CACHE_LEN(len);
622 cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
623 if (!cache) {
624 atomic_dec(&ghes_estatus_cache_alloced);
625 return NULL;
626 }
627 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
628 memcpy(cache_estatus, estatus, len);
629 cache->estatus_len = len;
630 atomic_set(&cache->count, 0);
631 cache->generic = generic;
632 cache->time_in = sched_clock();
633 return cache;
634}
635
636static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
637{
638 u32 len;
639
88f074f4 640 len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
152cef40
HY
641 len = GHES_ESTATUS_CACHE_LEN(len);
642 gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
643 atomic_dec(&ghes_estatus_cache_alloced);
644}
645
646static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
647{
648 struct ghes_estatus_cache *cache;
649
650 cache = container_of(head, struct ghes_estatus_cache, rcu);
651 ghes_estatus_cache_free(cache);
652}
653
654static void ghes_estatus_cache_add(
655 struct acpi_hest_generic *generic,
0a00fd5e 656 struct acpi_hest_generic_status *estatus)
152cef40
HY
657{
658 int i, slot = -1, count;
659 unsigned long long now, duration, period, max_period = 0;
660 struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
661
662 new_cache = ghes_estatus_cache_alloc(generic, estatus);
663 if (new_cache == NULL)
664 return;
665 rcu_read_lock();
666 now = sched_clock();
667 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
668 cache = rcu_dereference(ghes_estatus_caches[i]);
669 if (cache == NULL) {
670 slot = i;
671 slot_cache = NULL;
672 break;
673 }
674 duration = now - cache->time_in;
675 if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
676 slot = i;
677 slot_cache = cache;
678 break;
679 }
680 count = atomic_read(&cache->count);
70cb6e1d
LB
681 period = duration;
682 do_div(period, (count + 1));
152cef40
HY
683 if (period > max_period) {
684 max_period = period;
685 slot = i;
686 slot_cache = cache;
687 }
688 }
689 /* new_cache must be put into array after its contents are written */
690 smp_wmb();
691 if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
692 slot_cache, new_cache) == slot_cache) {
693 if (slot_cache)
694 call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
695 } else
696 ghes_estatus_cache_free(new_cache);
697 rcu_read_unlock();
d334a491
HY
698}
699
f2a7e059
JM
700static void __ghes_panic(struct ghes *ghes,
701 struct acpi_hest_generic_status *estatus,
702 u64 buf_paddr, enum fixed_addresses fixmap_idx)
2fb5853e 703{
f2a7e059 704 __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus);
2fb5853e 705
f2a7e059 706 ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
98cff8b2 707
2fb5853e
JZZ
708 /* reboot to log the error! */
709 if (!panic_timeout)
710 panic_timeout = ghes_panic_timeout;
711 panic("Fatal hardware error!");
712}
713
d334a491
HY
714static int ghes_proc(struct ghes *ghes)
715{
f2a7e059 716 struct acpi_hest_generic_status *estatus = ghes->estatus;
eeb25557 717 u64 buf_paddr;
d334a491
HY
718 int rc;
719
f2a7e059 720 rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ);
d334a491
HY
721 if (rc)
722 goto out;
2fb5853e 723
f2a7e059
JM
724 if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC)
725 __ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
2fb5853e 726
f2a7e059
JM
727 if (!ghes_estatus_cached(estatus)) {
728 if (ghes_print_estatus(NULL, ghes->generic, estatus))
729 ghes_estatus_cache_add(ghes->generic, estatus);
152cef40 730 }
f2a7e059 731 ghes_do_proc(ghes, estatus);
42aa5604 732
aaf2c2fb 733out:
f2a7e059 734 ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
aaf2c2fb 735
806487a8 736 return rc;
d334a491
HY
737}
738
81e88fdc
HY
739static void ghes_add_timer(struct ghes *ghes)
740{
741 struct acpi_hest_generic *g = ghes->generic;
742 unsigned long expire;
743
744 if (!g->notify.poll_interval) {
933ca4e3
KW
745 pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
746 g->header.source_id);
81e88fdc
HY
747 return;
748 }
749 expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
750 ghes->timer.expires = round_jiffies_relative(expire);
751 add_timer(&ghes->timer);
752}
753
d5272003 754static void ghes_poll_func(struct timer_list *t)
81e88fdc 755{
d5272003 756 struct ghes *ghes = from_timer(ghes, t, timer);
3b880cbe 757 unsigned long flags;
81e88fdc 758
3b880cbe 759 spin_lock_irqsave(&ghes_notify_lock_irq, flags);
81e88fdc 760 ghes_proc(ghes);
3b880cbe 761 spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
81e88fdc
HY
762 if (!(ghes->flags & GHES_EXITING))
763 ghes_add_timer(ghes);
764}
765
766static irqreturn_t ghes_irq_func(int irq, void *data)
767{
768 struct ghes *ghes = data;
3b880cbe 769 unsigned long flags;
81e88fdc
HY
770 int rc;
771
3b880cbe 772 spin_lock_irqsave(&ghes_notify_lock_irq, flags);
81e88fdc 773 rc = ghes_proc(ghes);
3b880cbe 774 spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
81e88fdc
HY
775 if (rc)
776 return IRQ_NONE;
777
778 return IRQ_HANDLED;
779}
780
7bf130e4
SJ
781static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
782 void *data)
d334a491
HY
783{
784 struct ghes *ghes;
3b880cbe 785 unsigned long flags;
d334a491
HY
786 int ret = NOTIFY_DONE;
787
3b880cbe 788 spin_lock_irqsave(&ghes_notify_lock_irq, flags);
d334a491 789 rcu_read_lock();
7bf130e4 790 list_for_each_entry_rcu(ghes, &ghes_hed, list) {
d334a491
HY
791 if (!ghes_proc(ghes))
792 ret = NOTIFY_OK;
793 }
794 rcu_read_unlock();
3b880cbe 795 spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
d334a491
HY
796
797 return ret;
798}
799
7bf130e4
SJ
800static struct notifier_block ghes_notifier_hed = {
801 .notifier_call = ghes_notify_hed,
44a69f61
TN
802};
803
44a69f61 804/*
9c9d0805
JM
805 * Handlers for CPER records may not be NMI safe. For example,
806 * memory_failure_queue() takes spinlocks and calls schedule_work_on().
807 * In any NMI-like handler, memory from ghes_estatus_pool is used to save
808 * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes
809 * ghes_proc_in_irq() to run in IRQ context where each estatus in
810 * ghes_estatus_llist is processed.
811 *
812 * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache
813 * to suppress frequent messages.
44a69f61
TN
814 */
815static struct llist_head ghes_estatus_llist;
816static struct irq_work ghes_proc_irq_work;
817
67eb2e99
HY
818static void ghes_proc_in_irq(struct irq_work *irq_work)
819{
46d12f0b 820 struct llist_node *llnode, *next;
67eb2e99 821 struct ghes_estatus_node *estatus_node;
152cef40 822 struct acpi_hest_generic *generic;
0a00fd5e 823 struct acpi_hest_generic_status *estatus;
67eb2e99
HY
824 u32 len, node_len;
825
46d12f0b 826 llnode = llist_del_all(&ghes_estatus_llist);
67eb2e99
HY
827 /*
828 * Because the time order of estatus in list is reversed,
829 * revert it back to proper order.
830 */
8d21d4c9 831 llnode = llist_reverse_order(llnode);
67eb2e99
HY
832 while (llnode) {
833 next = llnode->next;
834 estatus_node = llist_entry(llnode, struct ghes_estatus_node,
835 llnode);
836 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
88f074f4 837 len = cper_estatus_len(estatus);
67eb2e99 838 node_len = GHES_ESTATUS_NODE_LEN(len);
21480547 839 ghes_do_proc(estatus_node->ghes, estatus);
152cef40
HY
840 if (!ghes_estatus_cached(estatus)) {
841 generic = estatus_node->generic;
842 if (ghes_print_estatus(NULL, generic, estatus))
843 ghes_estatus_cache_add(generic, estatus);
844 }
67eb2e99
HY
845 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
846 node_len);
847 llnode = next;
848 }
849}
850
46d12f0b
HY
851static void ghes_print_queued_estatus(void)
852{
853 struct llist_node *llnode;
854 struct ghes_estatus_node *estatus_node;
855 struct acpi_hest_generic *generic;
0a00fd5e 856 struct acpi_hest_generic_status *estatus;
46d12f0b
HY
857
858 llnode = llist_del_all(&ghes_estatus_llist);
859 /*
860 * Because the time order of estatus in list is reversed,
861 * revert it back to proper order.
862 */
8d21d4c9 863 llnode = llist_reverse_order(llnode);
46d12f0b
HY
864 while (llnode) {
865 estatus_node = llist_entry(llnode, struct ghes_estatus_node,
866 llnode);
867 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
46d12f0b
HY
868 generic = estatus_node->generic;
869 ghes_print_estatus(NULL, generic, estatus);
870 llnode = llnode->next;
871 }
872}
873
d9f608dc
JM
874static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
875 enum fixed_addresses fixmap_idx)
11568496 876{
d9f608dc 877 struct acpi_hest_generic_status *estatus, tmp_header;
11568496 878 struct ghes_estatus_node *estatus_node;
d9f608dc
JM
879 u32 len, node_len;
880 u64 buf_paddr;
881 int sev, rc;
11568496 882
f2a7e059 883 if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG))
d9f608dc 884 return -EOPNOTSUPP;
11568496 885
d9f608dc
JM
886 rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx);
887 if (rc) {
888 ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
889 return rc;
890 }
f2a7e059 891
d9f608dc
JM
892 rc = __ghes_check_estatus(ghes, &tmp_header);
893 if (rc) {
894 ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
895 return rc;
896 }
11568496 897
d9f608dc
JM
898 len = cper_estatus_len(&tmp_header);
899 node_len = GHES_ESTATUS_NODE_LEN(len);
11568496
BP
900 estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
901 if (!estatus_node)
d9f608dc 902 return -ENOMEM;
11568496
BP
903
904 estatus_node->ghes = ghes;
905 estatus_node->generic = ghes->generic;
906 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
11568496 907
d9f608dc 908 if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) {
f2a7e059 909 ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
d9f608dc
JM
910 rc = -ENOENT;
911 goto no_work;
ee2eb3d4 912 }
6fe9e7c2 913
f2a7e059 914 sev = ghes_severity(estatus->error_severity);
ee2eb3d4
JM
915 if (sev >= GHES_SEV_PANIC) {
916 ghes_print_queued_estatus();
f2a7e059 917 __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx);
ee2eb3d4 918 }
6169ddf8 919
d9f608dc 920 ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
6169ddf8 921
d9f608dc
JM
922 /* This error has been reported before, don't process it again. */
923 if (ghes_estatus_cached(estatus))
924 goto no_work;
925
926 llist_add(&estatus_node->llnode, &ghes_estatus_llist);
927
928 return rc;
929
930no_work:
931 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
932 node_len);
933
934 return rc;
ee2eb3d4
JM
935}
936
b484079b
JM
937static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list,
938 enum fixed_addresses fixmap_idx)
ee2eb3d4
JM
939{
940 int ret = -ENOENT;
941 struct ghes *ghes;
942
943 rcu_read_lock();
944 list_for_each_entry_rcu(ghes, rcu_list, list) {
b484079b 945 if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx))
ee2eb3d4 946 ret = 0;
81e88fdc 947 }
ee2eb3d4 948 rcu_read_unlock();
11568496 949
ee2eb3d4 950 if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret)
a545715d 951 irq_work_queue(&ghes_proc_irq_work);
ee2eb3d4
JM
952
953 return ret;
954}
9c9d0805
JM
955
956#ifdef CONFIG_ACPI_APEI_SEA
957static LIST_HEAD(ghes_sea);
958
959/*
960 * Return 0 only if one of the SEA error sources successfully reported an error
961 * record sent from the firmware.
962 */
963int ghes_notify_sea(void)
964{
3b880cbe
JM
965 static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea);
966 int rv;
967
968 raw_spin_lock(&ghes_notify_lock_sea);
b972d2ea 969 rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA);
3b880cbe
JM
970 raw_spin_unlock(&ghes_notify_lock_sea);
971
972 return rv;
9c9d0805
JM
973}
974
975static void ghes_sea_add(struct ghes *ghes)
976{
977 mutex_lock(&ghes_list_mutex);
978 list_add_rcu(&ghes->list, &ghes_sea);
979 mutex_unlock(&ghes_list_mutex);
980}
981
982static void ghes_sea_remove(struct ghes *ghes)
983{
984 mutex_lock(&ghes_list_mutex);
985 list_del_rcu(&ghes->list);
986 mutex_unlock(&ghes_list_mutex);
987 synchronize_rcu();
988}
989#else /* CONFIG_ACPI_APEI_SEA */
990static inline void ghes_sea_add(struct ghes *ghes) { }
991static inline void ghes_sea_remove(struct ghes *ghes) { }
992#endif /* CONFIG_ACPI_APEI_SEA */
993
994#ifdef CONFIG_HAVE_ACPI_APEI_NMI
995/*
996 * NMI may be triggered on any CPU, so ghes_in_nmi is used for
997 * having only one concurrent reader.
998 */
999static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
1000
1001static LIST_HEAD(ghes_nmi);
ee2eb3d4
JM
1002
1003static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
1004{
3b880cbe 1005 static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi);
ee2eb3d4
JM
1006 int ret = NMI_DONE;
1007
1008 if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
1009 return ret;
1010
3b880cbe 1011 raw_spin_lock(&ghes_notify_lock_nmi);
b484079b 1012 if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI))
ee2eb3d4 1013 ret = NMI_HANDLED;
3b880cbe 1014 raw_spin_unlock(&ghes_notify_lock_nmi);
ee2eb3d4 1015
6fe9e7c2 1016 atomic_dec(&ghes_in_nmi);
81e88fdc
HY
1017 return ret;
1018}
1019
44a69f61
TN
1020static void ghes_nmi_add(struct ghes *ghes)
1021{
44a69f61
TN
1022 mutex_lock(&ghes_list_mutex);
1023 if (list_empty(&ghes_nmi))
1024 register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
1025 list_add_rcu(&ghes->list, &ghes_nmi);
1026 mutex_unlock(&ghes_list_mutex);
1027}
1028
1029static void ghes_nmi_remove(struct ghes *ghes)
1030{
44a69f61
TN
1031 mutex_lock(&ghes_list_mutex);
1032 list_del_rcu(&ghes->list);
1033 if (list_empty(&ghes_nmi))
1034 unregister_nmi_handler(NMI_LOCAL, "ghes");
1035 mutex_unlock(&ghes_list_mutex);
1036 /*
1037 * To synchronize with NMI handler, ghes can only be
1038 * freed after NMI handler finishes.
1039 */
1040 synchronize_rcu();
44a69f61 1041}
255097c8
JM
1042#else /* CONFIG_HAVE_ACPI_APEI_NMI */
1043static inline void ghes_nmi_add(struct ghes *ghes) { }
1044static inline void ghes_nmi_remove(struct ghes *ghes) { }
1045#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
44a69f61
TN
1046
1047static void ghes_nmi_init_cxt(void)
1048{
1049 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
1050}
44a69f61 1051
f9f05395
JM
1052static int __ghes_sdei_callback(struct ghes *ghes,
1053 enum fixed_addresses fixmap_idx)
1054{
1055 if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) {
1056 irq_work_queue(&ghes_proc_irq_work);
1057
1058 return 0;
1059 }
1060
1061 return -ENOENT;
1062}
1063
1064static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs,
1065 void *arg)
1066{
1067 static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal);
1068 struct ghes *ghes = arg;
1069 int err;
1070
1071 raw_spin_lock(&ghes_notify_lock_sdei_normal);
1072 err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL);
1073 raw_spin_unlock(&ghes_notify_lock_sdei_normal);
1074
1075 return err;
1076}
1077
1078static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs,
1079 void *arg)
1080{
1081 static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical);
1082 struct ghes *ghes = arg;
1083 int err;
1084
1085 raw_spin_lock(&ghes_notify_lock_sdei_critical);
1086 err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL);
1087 raw_spin_unlock(&ghes_notify_lock_sdei_critical);
1088
1089 return err;
1090}
1091
1092static int apei_sdei_register_ghes(struct ghes *ghes)
1093{
1094 if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
1095 return -EOPNOTSUPP;
1096
1097 return sdei_register_ghes(ghes, ghes_sdei_normal_callback,
1098 ghes_sdei_critical_callback);
1099}
1100
1101static int apei_sdei_unregister_ghes(struct ghes *ghes)
1102{
1103 if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
1104 return -EOPNOTSUPP;
1105
1106 return sdei_unregister_ghes(ghes);
1107}
1108
da095fd3 1109static int ghes_probe(struct platform_device *ghes_dev)
d334a491
HY
1110{
1111 struct acpi_hest_generic *generic;
1112 struct ghes *ghes = NULL;
3b880cbe 1113 unsigned long flags;
44a69f61 1114
7ad6e943 1115 int rc = -EINVAL;
d334a491 1116
1dd6b20e 1117 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
d334a491 1118 if (!generic->enabled)
7ad6e943 1119 return -ENODEV;
d334a491 1120
81e88fdc
HY
1121 switch (generic->notify.type) {
1122 case ACPI_HEST_NOTIFY_POLLED:
1123 case ACPI_HEST_NOTIFY_EXTERNAL:
1124 case ACPI_HEST_NOTIFY_SCI:
7bf130e4
SJ
1125 case ACPI_HEST_NOTIFY_GSIV:
1126 case ACPI_HEST_NOTIFY_GPIO:
44a69f61 1127 break;
7bf130e4 1128
7edda088
TB
1129 case ACPI_HEST_NOTIFY_SEA:
1130 if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
1131 pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
1132 generic->header.source_id);
1133 rc = -ENOTSUPP;
1134 goto err;
1135 }
1136 break;
81e88fdc 1137 case ACPI_HEST_NOTIFY_NMI:
44a69f61
TN
1138 if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
1139 pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
1140 generic->header.source_id);
1141 goto err;
1142 }
81e88fdc 1143 break;
f9f05395
JM
1144 case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1145 if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) {
1146 pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n",
1147 generic->header.source_id);
1148 goto err;
1149 }
1150 break;
81e88fdc 1151 case ACPI_HEST_NOTIFY_LOCAL:
933ca4e3
KW
1152 pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
1153 generic->header.source_id);
d334a491 1154 goto err;
81e88fdc 1155 default:
933ca4e3
KW
1156 pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
1157 generic->notify.type, generic->header.source_id);
81e88fdc 1158 goto err;
d334a491 1159 }
81e88fdc
HY
1160
1161 rc = -EIO;
1162 if (generic->error_block_length <
0a00fd5e 1163 sizeof(struct acpi_hest_generic_status)) {
933ca4e3
KW
1164 pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
1165 generic->error_block_length, generic->header.source_id);
d334a491
HY
1166 goto err;
1167 }
1168 ghes = ghes_new(generic);
1169 if (IS_ERR(ghes)) {
1170 rc = PTR_ERR(ghes);
1171 ghes = NULL;
1172 goto err;
1173 }
21480547 1174
81e88fdc
HY
1175 switch (generic->notify.type) {
1176 case ACPI_HEST_NOTIFY_POLLED:
cea79e7e 1177 timer_setup(&ghes->timer, ghes_poll_func, 0);
81e88fdc
HY
1178 ghes_add_timer(ghes);
1179 break;
1180 case ACPI_HEST_NOTIFY_EXTERNAL:
1181 /* External interrupt vector is GSI */
a98d4f64
WY
1182 rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
1183 if (rc) {
81e88fdc
HY
1184 pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
1185 generic->header.source_id);
cc7f3f13 1186 goto err;
81e88fdc 1187 }
bdb9458a
LH
1188 rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
1189 "GHES IRQ", ghes);
a98d4f64 1190 if (rc) {
81e88fdc
HY
1191 pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
1192 generic->header.source_id);
cc7f3f13 1193 goto err;
81e88fdc
HY
1194 }
1195 break;
7bf130e4 1196
81e88fdc 1197 case ACPI_HEST_NOTIFY_SCI:
7bf130e4
SJ
1198 case ACPI_HEST_NOTIFY_GSIV:
1199 case ACPI_HEST_NOTIFY_GPIO:
7ad6e943 1200 mutex_lock(&ghes_list_mutex);
7bf130e4
SJ
1201 if (list_empty(&ghes_hed))
1202 register_acpi_hed_notifier(&ghes_notifier_hed);
1203 list_add_rcu(&ghes->list, &ghes_hed);
7ad6e943 1204 mutex_unlock(&ghes_list_mutex);
81e88fdc 1205 break;
7bf130e4 1206
7edda088
TB
1207 case ACPI_HEST_NOTIFY_SEA:
1208 ghes_sea_add(ghes);
1209 break;
81e88fdc 1210 case ACPI_HEST_NOTIFY_NMI:
44a69f61 1211 ghes_nmi_add(ghes);
81e88fdc 1212 break;
f9f05395
JM
1213 case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1214 rc = apei_sdei_register_ghes(ghes);
1215 if (rc)
1216 goto err;
1217 break;
81e88fdc
HY
1218 default:
1219 BUG();
d334a491 1220 }
cc7f3f13 1221
7ad6e943 1222 platform_set_drvdata(ghes_dev, ghes);
d334a491 1223
cc7f3f13
BP
1224 ghes_edac_register(ghes, &ghes_dev->dev);
1225
77b246b3 1226 /* Handle any pending errors right away */
3b880cbe 1227 spin_lock_irqsave(&ghes_notify_lock_irq, flags);
77b246b3 1228 ghes_proc(ghes);
3b880cbe 1229 spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
77b246b3 1230
d334a491 1231 return 0;
cc7f3f13 1232
d334a491 1233err:
7ad6e943 1234 if (ghes) {
d334a491 1235 ghes_fini(ghes);
7ad6e943
HY
1236 kfree(ghes);
1237 }
d334a491
HY
1238 return rc;
1239}
1240
b59bc2fb 1241static int ghes_remove(struct platform_device *ghes_dev)
d334a491 1242{
f9f05395 1243 int rc;
7ad6e943
HY
1244 struct ghes *ghes;
1245 struct acpi_hest_generic *generic;
d334a491 1246
7ad6e943
HY
1247 ghes = platform_get_drvdata(ghes_dev);
1248 generic = ghes->generic;
1249
81e88fdc 1250 ghes->flags |= GHES_EXITING;
7ad6e943 1251 switch (generic->notify.type) {
81e88fdc
HY
1252 case ACPI_HEST_NOTIFY_POLLED:
1253 del_timer_sync(&ghes->timer);
1254 break;
1255 case ACPI_HEST_NOTIFY_EXTERNAL:
1256 free_irq(ghes->irq, ghes);
1257 break;
7bf130e4 1258
7ad6e943 1259 case ACPI_HEST_NOTIFY_SCI:
7bf130e4
SJ
1260 case ACPI_HEST_NOTIFY_GSIV:
1261 case ACPI_HEST_NOTIFY_GPIO:
7ad6e943
HY
1262 mutex_lock(&ghes_list_mutex);
1263 list_del_rcu(&ghes->list);
7bf130e4
SJ
1264 if (list_empty(&ghes_hed))
1265 unregister_acpi_hed_notifier(&ghes_notifier_hed);
7ad6e943 1266 mutex_unlock(&ghes_list_mutex);
7d64f82c 1267 synchronize_rcu();
7ad6e943 1268 break;
7bf130e4 1269
7edda088
TB
1270 case ACPI_HEST_NOTIFY_SEA:
1271 ghes_sea_remove(ghes);
1272 break;
81e88fdc 1273 case ACPI_HEST_NOTIFY_NMI:
44a69f61 1274 ghes_nmi_remove(ghes);
81e88fdc 1275 break;
f9f05395
JM
1276 case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1277 rc = apei_sdei_unregister_ghes(ghes);
1278 if (rc)
1279 return rc;
1280 break;
7ad6e943
HY
1281 default:
1282 BUG();
1283 break;
1284 }
d334a491 1285
7ad6e943 1286 ghes_fini(ghes);
21480547
MCC
1287
1288 ghes_edac_unregister(ghes);
1289
7ad6e943 1290 kfree(ghes);
d334a491 1291
7ad6e943
HY
1292 platform_set_drvdata(ghes_dev, NULL);
1293
1294 return 0;
d334a491
HY
1295}
1296
7ad6e943
HY
1297static struct platform_driver ghes_platform_driver = {
1298 .driver = {
1299 .name = "GHES",
7ad6e943
HY
1300 },
1301 .probe = ghes_probe,
1302 .remove = ghes_remove,
1303};
1304
d334a491
HY
1305static int __init ghes_init(void)
1306{
81e88fdc
HY
1307 int rc;
1308
d334a491
HY
1309 if (acpi_disabled)
1310 return -ENODEV;
1311
e931d0da
PA
1312 switch (hest_disable) {
1313 case HEST_NOT_FOUND:
1314 return -ENODEV;
1315 case HEST_DISABLED:
d334a491
HY
1316 pr_info(GHES_PFX "HEST is not enabled!\n");
1317 return -EINVAL;
e931d0da
PA
1318 default:
1319 break;
d334a491
HY
1320 }
1321
b6a95016
HY
1322 if (ghes_disable) {
1323 pr_info(GHES_PFX "GHES is not enabled!\n");
1324 return -EINVAL;
1325 }
1326
44a69f61 1327 ghes_nmi_init_cxt();
67eb2e99 1328
67eb2e99
HY
1329 rc = platform_driver_register(&ghes_platform_driver);
1330 if (rc)
e147133a 1331 goto err;
67eb2e99 1332
9fb0bfe1
HY
1333 rc = apei_osc_setup();
1334 if (rc == 0 && osc_sb_apei_support_acked)
1335 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
1336 else if (rc == 0 && !osc_sb_apei_support_acked)
1337 pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
1338 else if (rc && osc_sb_apei_support_acked)
1339 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
1340 else
1341 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
1342
81e88fdc 1343 return 0;
81e88fdc
HY
1344err:
1345 return rc;
d334a491 1346}
020bf066 1347device_initcall(ghes_init);