]>
Commit | Line | Data |
---|---|---|
1802d0be | 1 | // SPDX-License-Identifier: GPL-2.0-only |
d334a491 HY |
2 | /* |
3 | * APEI Generic Hardware Error Source support | |
4 | * | |
5 | * Generic Hardware Error Source provides a way to report platform | |
6 | * hardware errors (such as that from chipset). It works in so called | |
7 | * "Firmware First" mode, that is, hardware errors are reported to | |
8 | * firmware firstly, then reported to Linux by firmware. This way, | |
9 | * some non-standard hardware error registers or non-standard hardware | |
10 | * link can be checked by firmware to produce more hardware error | |
11 | * information for Linux. | |
12 | * | |
13 | * For more information about Generic Hardware Error Source, please | |
14 | * refer to ACPI Specification version 4.0, section 17.3.2.6 | |
15 | * | |
67eb2e99 | 16 | * Copyright 2010,2011 Intel Corp. |
d334a491 | 17 | * Author: Huang Ying <ying.huang@intel.com> |
d334a491 HY |
18 | */ |
19 | ||
f9f05395 | 20 | #include <linux/arm_sdei.h> |
d334a491 | 21 | #include <linux/kernel.h> |
020bf066 | 22 | #include <linux/moduleparam.h> |
d334a491 HY |
23 | #include <linux/init.h> |
24 | #include <linux/acpi.h> | |
25 | #include <linux/io.h> | |
26 | #include <linux/interrupt.h> | |
81e88fdc | 27 | #include <linux/timer.h> |
d334a491 | 28 | #include <linux/cper.h> |
7ad6e943 HY |
29 | #include <linux/platform_device.h> |
30 | #include <linux/mutex.h> | |
32c361f5 | 31 | #include <linux/ratelimit.h> |
81e88fdc | 32 | #include <linux/vmalloc.h> |
67eb2e99 HY |
33 | #include <linux/irq_work.h> |
34 | #include <linux/llist.h> | |
35 | #include <linux/genalloc.h> | |
a654e5ee | 36 | #include <linux/pci.h> |
b484079b | 37 | #include <linux/pfn.h> |
a654e5ee | 38 | #include <linux/aer.h> |
44a69f61 | 39 | #include <linux/nmi.h> |
e6017571 | 40 | #include <linux/sched/clock.h> |
297b64c7 TB |
41 | #include <linux/uuid.h> |
42 | #include <linux/ras.h> | |
40e06415 | 43 | |
42aa5604 | 44 | #include <acpi/actbl1.h> |
40e06415 | 45 | #include <acpi/ghes.h> |
9dae3d0d | 46 | #include <acpi/apei.h> |
4f89fa28 | 47 | #include <asm/fixmap.h> |
81e88fdc | 48 | #include <asm/tlbflush.h> |
297b64c7 | 49 | #include <ras/ras_event.h> |
d334a491 HY |
50 | |
51 | #include "apei-internal.h" | |
52 | ||
53 | #define GHES_PFX "GHES: " | |
54 | ||
55 | #define GHES_ESTATUS_MAX_SIZE 65536 | |
67eb2e99 HY |
56 | #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 |
57 | ||
58 | #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 | |
59 | ||
152cef40 HY |
60 | /* This is just an estimation for memory pool allocation */ |
61 | #define GHES_ESTATUS_CACHE_AVG_SIZE 512 | |
62 | ||
63 | #define GHES_ESTATUS_CACHES_SIZE 4 | |
64 | ||
70cb6e1d | 65 | #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL |
152cef40 HY |
66 | /* Prevent too many caches are allocated because of RCU */ |
67 | #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) | |
68 | ||
69 | #define GHES_ESTATUS_CACHE_LEN(estatus_len) \ | |
70 | (sizeof(struct ghes_estatus_cache) + (estatus_len)) | |
71 | #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ | |
0a00fd5e | 72 | ((struct acpi_hest_generic_status *) \ |
152cef40 HY |
73 | ((struct ghes_estatus_cache *)(estatus_cache) + 1)) |
74 | ||
67eb2e99 HY |
75 | #define GHES_ESTATUS_NODE_LEN(estatus_len) \ |
76 | (sizeof(struct ghes_estatus_node) + (estatus_len)) | |
88f074f4 | 77 | #define GHES_ESTATUS_FROM_NODE(estatus_node) \ |
0a00fd5e | 78 | ((struct acpi_hest_generic_status *) \ |
67eb2e99 | 79 | ((struct ghes_estatus_node *)(estatus_node) + 1)) |
d334a491 | 80 | |
f9f05395 JM |
81 | /* |
82 | * NMI-like notifications vary by architecture, before the compiler can prune | |
83 | * unused static functions it needs a value for these enums. | |
84 | */ | |
85 | #ifndef CONFIG_ARM_SDE_INTERFACE | |
86 | #define FIX_APEI_GHES_SDEI_NORMAL __end_of_fixed_addresses | |
87 | #define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses | |
88 | #endif | |
89 | ||
42aa5604 TB |
90 | static inline bool is_hest_type_generic_v2(struct ghes *ghes) |
91 | { | |
92 | return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; | |
93 | } | |
94 | ||
020bf066 PG |
95 | /* |
96 | * This driver isn't really modular, however for the time being, | |
97 | * continuing to use module_param is the easiest way to remain | |
98 | * compatible with existing boot arg use cases. | |
99 | */ | |
90ab5ee9 | 100 | bool ghes_disable; |
b6a95016 HY |
101 | module_param_named(disable, ghes_disable, bool, 0); |
102 | ||
d334a491 | 103 | /* |
7bf130e4 SJ |
104 | * All error sources notified with HED (Hardware Error Device) share a |
105 | * single notifier callback, so they need to be linked and checked one | |
106 | * by one. This holds true for NMI too. | |
d334a491 | 107 | * |
81e88fdc HY |
108 | * RCU is used for these lists, so ghes_list_mutex is only used for |
109 | * list changing, not for traversing. | |
d334a491 | 110 | */ |
7bf130e4 | 111 | static LIST_HEAD(ghes_hed); |
7ad6e943 | 112 | static DEFINE_MUTEX(ghes_list_mutex); |
d334a491 | 113 | |
81e88fdc HY |
114 | /* |
115 | * Because the memory area used to transfer hardware error information | |
116 | * from BIOS to Linux can be determined only in NMI, IRQ or timer | |
117 | * handler, but general ioremap can not be used in atomic context, so | |
4f89fa28 | 118 | * the fixmap is used instead. |
520e18a5 | 119 | * |
3b880cbe | 120 | * This spinlock is used to prevent the fixmap entry from being used |
4f89fa28 | 121 | * simultaneously. |
81e88fdc | 122 | */ |
3b880cbe | 123 | static DEFINE_SPINLOCK(ghes_notify_lock_irq); |
81e88fdc | 124 | |
67eb2e99 HY |
125 | static struct gen_pool *ghes_estatus_pool; |
126 | static unsigned long ghes_estatus_pool_size_request; | |
67eb2e99 | 127 | |
8f7c31f6 | 128 | static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; |
152cef40 HY |
129 | static atomic_t ghes_estatus_cache_alloced; |
130 | ||
2fb5853e JZZ |
131 | static int ghes_panic_timeout __read_mostly = 30; |
132 | ||
b484079b | 133 | static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx) |
81e88fdc | 134 | { |
7edda088 TB |
135 | phys_addr_t paddr; |
136 | pgprot_t prot; | |
81e88fdc | 137 | |
b484079b | 138 | paddr = PFN_PHYS(pfn); |
7edda088 | 139 | prot = arch_apei_get_mem_attribute(paddr); |
b484079b | 140 | __set_fixmap(fixmap_idx, paddr, prot); |
81e88fdc | 141 | |
b484079b | 142 | return (void __iomem *) __fix_to_virt(fixmap_idx); |
81e88fdc HY |
143 | } |
144 | ||
b484079b | 145 | static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx) |
81e88fdc | 146 | { |
b484079b | 147 | int _idx = virt_to_fix((unsigned long)vaddr); |
8ece249a | 148 | |
b484079b JM |
149 | WARN_ON_ONCE(fixmap_idx != _idx); |
150 | clear_fixmap(fixmap_idx); | |
81e88fdc HY |
151 | } |
152 | ||
fb7be08f | 153 | int ghes_estatus_pool_init(int num_ghes) |
67eb2e99 | 154 | { |
fb7be08f | 155 | unsigned long addr, len; |
6abc7622 | 156 | int rc; |
fb7be08f | 157 | |
67eb2e99 HY |
158 | ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); |
159 | if (!ghes_estatus_pool) | |
160 | return -ENOMEM; | |
67eb2e99 | 161 | |
fb7be08f JM |
162 | len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX; |
163 | len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE); | |
67eb2e99 | 164 | |
fb7be08f | 165 | ghes_estatus_pool_size_request = PAGE_ALIGN(len); |
0ac234be JM |
166 | addr = (unsigned long)vmalloc(PAGE_ALIGN(len)); |
167 | if (!addr) | |
6abc7622 | 168 | goto err_pool_alloc; |
0ac234be | 169 | |
6abc7622 LZ |
170 | rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); |
171 | if (rc) | |
172 | goto err_pool_add; | |
173 | ||
174 | return 0; | |
175 | ||
176 | err_pool_add: | |
177 | vfree((void *)addr); | |
178 | ||
179 | err_pool_alloc: | |
180 | gen_pool_destroy(ghes_estatus_pool); | |
181 | ||
182 | return -ENOMEM; | |
67eb2e99 HY |
183 | } |
184 | ||
42aa5604 TB |
185 | static int map_gen_v2(struct ghes *ghes) |
186 | { | |
187 | return apei_map_generic_address(&ghes->generic_v2->read_ack_register); | |
188 | } | |
189 | ||
190 | static void unmap_gen_v2(struct ghes *ghes) | |
191 | { | |
192 | apei_unmap_generic_address(&ghes->generic_v2->read_ack_register); | |
193 | } | |
194 | ||
06ddeadc JM |
195 | static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2) |
196 | { | |
197 | int rc; | |
198 | u64 val = 0; | |
199 | ||
200 | rc = apei_read(&val, &gv2->read_ack_register); | |
201 | if (rc) | |
202 | return; | |
203 | ||
204 | val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; | |
205 | val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; | |
206 | ||
207 | apei_write(val, &gv2->read_ack_register); | |
208 | } | |
209 | ||
d334a491 HY |
210 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) |
211 | { | |
212 | struct ghes *ghes; | |
213 | unsigned int error_block_length; | |
214 | int rc; | |
215 | ||
216 | ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); | |
217 | if (!ghes) | |
218 | return ERR_PTR(-ENOMEM); | |
42aa5604 | 219 | |
d334a491 | 220 | ghes->generic = generic; |
42aa5604 TB |
221 | if (is_hest_type_generic_v2(ghes)) { |
222 | rc = map_gen_v2(ghes); | |
223 | if (rc) | |
224 | goto err_free; | |
225 | } | |
226 | ||
34ddeb03 | 227 | rc = apei_map_generic_address(&generic->error_status_address); |
d334a491 | 228 | if (rc) |
42aa5604 | 229 | goto err_unmap_read_ack_addr; |
d334a491 HY |
230 | error_block_length = generic->error_block_length; |
231 | if (error_block_length > GHES_ESTATUS_MAX_SIZE) { | |
933ca4e3 KW |
232 | pr_warn(FW_WARN GHES_PFX |
233 | "Error status block length is too long: %u for " | |
234 | "generic hardware error source: %d.\n", | |
235 | error_block_length, generic->header.source_id); | |
d334a491 HY |
236 | error_block_length = GHES_ESTATUS_MAX_SIZE; |
237 | } | |
238 | ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); | |
239 | if (!ghes->estatus) { | |
240 | rc = -ENOMEM; | |
42aa5604 | 241 | goto err_unmap_status_addr; |
d334a491 HY |
242 | } |
243 | ||
244 | return ghes; | |
245 | ||
42aa5604 | 246 | err_unmap_status_addr: |
34ddeb03 | 247 | apei_unmap_generic_address(&generic->error_status_address); |
42aa5604 TB |
248 | err_unmap_read_ack_addr: |
249 | if (is_hest_type_generic_v2(ghes)) | |
250 | unmap_gen_v2(ghes); | |
d334a491 HY |
251 | err_free: |
252 | kfree(ghes); | |
253 | return ERR_PTR(rc); | |
254 | } | |
255 | ||
256 | static void ghes_fini(struct ghes *ghes) | |
257 | { | |
258 | kfree(ghes->estatus); | |
34ddeb03 | 259 | apei_unmap_generic_address(&ghes->generic->error_status_address); |
42aa5604 TB |
260 | if (is_hest_type_generic_v2(ghes)) |
261 | unmap_gen_v2(ghes); | |
d334a491 HY |
262 | } |
263 | ||
d334a491 HY |
264 | static inline int ghes_severity(int severity) |
265 | { | |
266 | switch (severity) { | |
ad4ecef2 HY |
267 | case CPER_SEV_INFORMATIONAL: |
268 | return GHES_SEV_NO; | |
269 | case CPER_SEV_CORRECTED: | |
270 | return GHES_SEV_CORRECTED; | |
271 | case CPER_SEV_RECOVERABLE: | |
272 | return GHES_SEV_RECOVERABLE; | |
273 | case CPER_SEV_FATAL: | |
274 | return GHES_SEV_PANIC; | |
d334a491 | 275 | default: |
25985edc | 276 | /* Unknown, go panic */ |
ad4ecef2 | 277 | return GHES_SEV_PANIC; |
d334a491 HY |
278 | } |
279 | } | |
280 | ||
81e88fdc | 281 | static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, |
b484079b JM |
282 | int from_phys, |
283 | enum fixed_addresses fixmap_idx) | |
d334a491 | 284 | { |
81e88fdc | 285 | void __iomem *vaddr; |
81e88fdc HY |
286 | u64 offset; |
287 | u32 trunk; | |
288 | ||
289 | while (len > 0) { | |
290 | offset = paddr - (paddr & PAGE_MASK); | |
b484079b | 291 | vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx); |
81e88fdc HY |
292 | trunk = PAGE_SIZE - offset; |
293 | trunk = min(trunk, len); | |
294 | if (from_phys) | |
295 | memcpy_fromio(buffer, vaddr + offset, trunk); | |
296 | else | |
297 | memcpy_toio(vaddr + offset, buffer, trunk); | |
298 | len -= trunk; | |
299 | paddr += trunk; | |
300 | buffer += trunk; | |
b484079b | 301 | ghes_unmap(vaddr, fixmap_idx); |
81e88fdc | 302 | } |
d334a491 HY |
303 | } |
304 | ||
f2a681b9 JM |
305 | /* Check the top-level record header has an appropriate size. */ |
306 | static int __ghes_check_estatus(struct ghes *ghes, | |
307 | struct acpi_hest_generic_status *estatus) | |
308 | { | |
309 | u32 len = cper_estatus_len(estatus); | |
310 | ||
311 | if (len < sizeof(*estatus)) { | |
312 | pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n"); | |
313 | return -EIO; | |
314 | } | |
315 | ||
316 | if (len > ghes->generic->error_block_length) { | |
317 | pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n"); | |
318 | return -EIO; | |
319 | } | |
320 | ||
321 | if (cper_estatus_check_header(estatus)) { | |
322 | pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n"); | |
323 | return -EIO; | |
324 | } | |
325 | ||
326 | return 0; | |
327 | } | |
328 | ||
e00a6e33 JM |
329 | /* Read the CPER block, returning its address, and header in estatus. */ |
330 | static int __ghes_peek_estatus(struct ghes *ghes, | |
331 | struct acpi_hest_generic_status *estatus, | |
332 | u64 *buf_paddr, enum fixed_addresses fixmap_idx) | |
d334a491 HY |
333 | { |
334 | struct acpi_hest_generic *g = ghes->generic; | |
d334a491 HY |
335 | int rc; |
336 | ||
eeb25557 | 337 | rc = apei_read(buf_paddr, &g->error_status_address); |
d334a491 | 338 | if (rc) { |
eeb25557 | 339 | *buf_paddr = 0; |
93066e9a | 340 | pr_warn_ratelimited(FW_WARN GHES_PFX |
d334a491 HY |
341 | "Failed to read error status block address for hardware error source: %d.\n", |
342 | g->header.source_id); | |
343 | return -EIO; | |
344 | } | |
eeb25557 | 345 | if (!*buf_paddr) |
d334a491 HY |
346 | return -ENOENT; |
347 | ||
f2a7e059 JM |
348 | ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1, |
349 | fixmap_idx); | |
350 | if (!estatus->block_status) { | |
eeb25557 | 351 | *buf_paddr = 0; |
d334a491 | 352 | return -ENOENT; |
eeb25557 | 353 | } |
d334a491 | 354 | |
371b8689 | 355 | return 0; |
e00a6e33 | 356 | } |
f2a681b9 | 357 | |
e00a6e33 JM |
358 | static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus, |
359 | u64 buf_paddr, enum fixed_addresses fixmap_idx, | |
360 | size_t buf_len) | |
361 | { | |
362 | ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx); | |
f2a681b9 | 363 | if (cper_estatus_check(estatus)) { |
93066e9a JM |
364 | pr_warn_ratelimited(FW_WARN GHES_PFX |
365 | "Failed to read error status block!\n"); | |
f2a681b9 JM |
366 | return -EIO; |
367 | } | |
eeb25557 | 368 | |
f2a681b9 | 369 | return 0; |
d334a491 HY |
370 | } |
371 | ||
e00a6e33 JM |
372 | static int ghes_read_estatus(struct ghes *ghes, |
373 | struct acpi_hest_generic_status *estatus, | |
374 | u64 *buf_paddr, enum fixed_addresses fixmap_idx) | |
375 | { | |
376 | int rc; | |
377 | ||
378 | rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx); | |
379 | if (rc) | |
380 | return rc; | |
381 | ||
382 | rc = __ghes_check_estatus(ghes, estatus); | |
383 | if (rc) | |
384 | return rc; | |
385 | ||
386 | return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx, | |
387 | cper_estatus_len(estatus)); | |
388 | } | |
389 | ||
f2a7e059 JM |
390 | static void ghes_clear_estatus(struct ghes *ghes, |
391 | struct acpi_hest_generic_status *estatus, | |
392 | u64 buf_paddr, enum fixed_addresses fixmap_idx) | |
d334a491 | 393 | { |
f2a7e059 | 394 | estatus->block_status = 0; |
eeb25557 JM |
395 | |
396 | if (!buf_paddr) | |
397 | return; | |
398 | ||
f2a7e059 JM |
399 | ghes_copy_tofrom_phys(estatus, buf_paddr, |
400 | sizeof(estatus->block_status), 0, | |
b484079b | 401 | fixmap_idx); |
06ddeadc JM |
402 | |
403 | /* | |
404 | * GHESv2 type HEST entries introduce support for error acknowledgment, | |
405 | * so only acknowledge the error if this support is present. | |
406 | */ | |
407 | if (is_hest_type_generic_v2(ghes)) | |
408 | ghes_ack_error(ghes->generic_v2); | |
d334a491 HY |
409 | } |
410 | ||
0a00fd5e | 411 | static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) |
cf870c70 NR |
412 | { |
413 | #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE | |
414 | unsigned long pfn; | |
ca104edc | 415 | int flags = -1; |
cf870c70 | 416 | int sec_sev = ghes_severity(gdata->error_severity); |
bbcc2e7b | 417 | struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); |
cf870c70 | 418 | |
ca104edc CG |
419 | if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) |
420 | return; | |
421 | ||
422 | pfn = mem_err->physical_addr >> PAGE_SHIFT; | |
423 | if (!pfn_valid(pfn)) { | |
424 | pr_warn_ratelimited(FW_WARN GHES_PFX | |
425 | "Invalid address in generic error data: %#llx\n", | |
426 | mem_err->physical_addr); | |
427 | return; | |
cf870c70 | 428 | } |
ca104edc CG |
429 | |
430 | /* iff following two events can be handled properly by now */ | |
431 | if (sec_sev == GHES_SEV_CORRECTED && | |
432 | (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) | |
433 | flags = MF_SOFT_OFFLINE; | |
434 | if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) | |
435 | flags = 0; | |
436 | ||
437 | if (flags != -1) | |
83b57531 | 438 | memory_failure_queue(pfn, flags); |
cf870c70 NR |
439 | #endif |
440 | } | |
441 | ||
9852ce9a TB |
442 | /* |
443 | * PCIe AER errors need to be sent to the AER driver for reporting and | |
444 | * recovery. The GHES severities map to the following AER severities and | |
445 | * require the following handling: | |
446 | * | |
447 | * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE | |
448 | * These need to be reported by the AER driver but no recovery is | |
449 | * necessary. | |
450 | * GHES_SEV_RECOVERABLE -> AER_NONFATAL | |
451 | * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL | |
452 | * These both need to be reported and recovered from by the AER driver. | |
453 | * GHES_SEV_PANIC does not make it to this handling since the kernel must | |
454 | * panic. | |
455 | */ | |
456 | static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) | |
3c5b977f TB |
457 | { |
458 | #ifdef CONFIG_ACPI_APEI_PCIEAER | |
459 | struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); | |
460 | ||
9852ce9a | 461 | if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && |
3c5b977f TB |
462 | pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { |
463 | unsigned int devfn; | |
464 | int aer_severity; | |
465 | ||
466 | devfn = PCI_DEVFN(pcie_err->device_id.device, | |
467 | pcie_err->device_id.function); | |
468 | aer_severity = cper_severity_to_aer(gdata->error_severity); | |
469 | ||
470 | /* | |
471 | * If firmware reset the component to contain | |
472 | * the error, we must reinitialize it before | |
473 | * use, so treat it as a fatal AER error. | |
474 | */ | |
475 | if (gdata->flags & CPER_SEC_RESET) | |
476 | aer_severity = AER_FATAL; | |
477 | ||
478 | aer_recover_queue(pcie_err->device_id.segment, | |
479 | pcie_err->device_id.bus, | |
480 | devfn, aer_severity, | |
481 | (struct aer_capability_regs *) | |
482 | pcie_err->aer_info); | |
483 | } | |
484 | #endif | |
485 | } | |
486 | ||
21480547 | 487 | static void ghes_do_proc(struct ghes *ghes, |
0a00fd5e | 488 | const struct acpi_hest_generic_status *estatus) |
d334a491 | 489 | { |
ba61ca4a | 490 | int sev, sec_sev; |
0a00fd5e | 491 | struct acpi_hest_generic_data *gdata; |
5b53696a | 492 | guid_t *sec_type; |
bb100b64 | 493 | const guid_t *fru_id = &guid_null; |
297b64c7 | 494 | char *fru_text = ""; |
d334a491 | 495 | |
67eb2e99 HY |
496 | sev = ghes_severity(estatus->error_severity); |
497 | apei_estatus_for_each_section(estatus, gdata) { | |
5b53696a | 498 | sec_type = (guid_t *)gdata->section_type; |
ba61ca4a | 499 | sec_sev = ghes_severity(gdata->error_severity); |
297b64c7 TB |
500 | if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) |
501 | fru_id = (guid_t *)gdata->fru_id; | |
502 | ||
503 | if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) | |
504 | fru_text = gdata->fru_text; | |
505 | ||
5b53696a | 506 | if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { |
bbcc2e7b TB |
507 | struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); |
508 | ||
305d0e00 | 509 | ghes_edac_report_mem_error(sev, mem_err); |
21480547 | 510 | |
9dae3d0d | 511 | arch_apei_report_mem_error(sev, mem_err); |
cf870c70 | 512 | ghes_handle_memory_failure(gdata, sev); |
ba61ca4a | 513 | } |
5b53696a | 514 | else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { |
9852ce9a | 515 | ghes_handle_aer(gdata); |
a654e5ee | 516 | } |
e9279e83 TB |
517 | else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { |
518 | struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); | |
519 | ||
520 | log_arm_hw_error(err); | |
521 | } else { | |
297b64c7 TB |
522 | void *err = acpi_hest_get_payload(gdata); |
523 | ||
524 | log_non_standard_event(sec_type, fru_id, fru_text, | |
525 | sec_sev, err, | |
526 | gdata->error_data_length); | |
527 | } | |
d334a491 | 528 | } |
32c361f5 | 529 | } |
d334a491 | 530 | |
67eb2e99 HY |
531 | static void __ghes_print_estatus(const char *pfx, |
532 | const struct acpi_hest_generic *generic, | |
0a00fd5e | 533 | const struct acpi_hest_generic_status *estatus) |
32c361f5 | 534 | { |
5ba82ab5 HY |
535 | static atomic_t seqno; |
536 | unsigned int curr_seqno; | |
537 | char pfx_seq[64]; | |
538 | ||
32c361f5 | 539 | if (pfx == NULL) { |
67eb2e99 | 540 | if (ghes_severity(estatus->error_severity) <= |
32c361f5 | 541 | GHES_SEV_CORRECTED) |
5ba82ab5 | 542 | pfx = KERN_WARNING; |
32c361f5 | 543 | else |
5ba82ab5 | 544 | pfx = KERN_ERR; |
32c361f5 | 545 | } |
5ba82ab5 HY |
546 | curr_seqno = atomic_inc_return(&seqno); |
547 | snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno); | |
5588340d | 548 | printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", |
5ba82ab5 | 549 | pfx_seq, generic->header.source_id); |
88f074f4 | 550 | cper_estatus_print(pfx_seq, estatus); |
5588340d HY |
551 | } |
552 | ||
152cef40 HY |
553 | static int ghes_print_estatus(const char *pfx, |
554 | const struct acpi_hest_generic *generic, | |
0a00fd5e | 555 | const struct acpi_hest_generic_status *estatus) |
5588340d HY |
556 | { |
557 | /* Not more than 2 messages every 5 seconds */ | |
67eb2e99 HY |
558 | static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); |
559 | static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); | |
560 | struct ratelimit_state *ratelimit; | |
5588340d | 561 | |
67eb2e99 HY |
562 | if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) |
563 | ratelimit = &ratelimit_corrected; | |
564 | else | |
565 | ratelimit = &ratelimit_uncorrected; | |
152cef40 | 566 | if (__ratelimit(ratelimit)) { |
67eb2e99 | 567 | __ghes_print_estatus(pfx, generic, estatus); |
152cef40 HY |
568 | return 1; |
569 | } | |
570 | return 0; | |
571 | } | |
572 | ||
573 | /* | |
574 | * GHES error status reporting throttle, to report more kinds of | |
575 | * errors, instead of just most frequently occurred errors. | |
576 | */ | |
0a00fd5e | 577 | static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) |
152cef40 HY |
578 | { |
579 | u32 len; | |
580 | int i, cached = 0; | |
581 | unsigned long long now; | |
582 | struct ghes_estatus_cache *cache; | |
0a00fd5e | 583 | struct acpi_hest_generic_status *cache_estatus; |
152cef40 | 584 | |
88f074f4 | 585 | len = cper_estatus_len(estatus); |
152cef40 HY |
586 | rcu_read_lock(); |
587 | for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { | |
588 | cache = rcu_dereference(ghes_estatus_caches[i]); | |
589 | if (cache == NULL) | |
590 | continue; | |
591 | if (len != cache->estatus_len) | |
592 | continue; | |
593 | cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); | |
594 | if (memcmp(estatus, cache_estatus, len)) | |
595 | continue; | |
596 | atomic_inc(&cache->count); | |
597 | now = sched_clock(); | |
598 | if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) | |
599 | cached = 1; | |
600 | break; | |
601 | } | |
602 | rcu_read_unlock(); | |
603 | return cached; | |
604 | } | |
605 | ||
606 | static struct ghes_estatus_cache *ghes_estatus_cache_alloc( | |
607 | struct acpi_hest_generic *generic, | |
0a00fd5e | 608 | struct acpi_hest_generic_status *estatus) |
152cef40 HY |
609 | { |
610 | int alloced; | |
611 | u32 len, cache_len; | |
612 | struct ghes_estatus_cache *cache; | |
0a00fd5e | 613 | struct acpi_hest_generic_status *cache_estatus; |
152cef40 HY |
614 | |
615 | alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); | |
616 | if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { | |
617 | atomic_dec(&ghes_estatus_cache_alloced); | |
618 | return NULL; | |
619 | } | |
88f074f4 | 620 | len = cper_estatus_len(estatus); |
152cef40 HY |
621 | cache_len = GHES_ESTATUS_CACHE_LEN(len); |
622 | cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); | |
623 | if (!cache) { | |
624 | atomic_dec(&ghes_estatus_cache_alloced); | |
625 | return NULL; | |
626 | } | |
627 | cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); | |
628 | memcpy(cache_estatus, estatus, len); | |
629 | cache->estatus_len = len; | |
630 | atomic_set(&cache->count, 0); | |
631 | cache->generic = generic; | |
632 | cache->time_in = sched_clock(); | |
633 | return cache; | |
634 | } | |
635 | ||
636 | static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache) | |
637 | { | |
638 | u32 len; | |
639 | ||
88f074f4 | 640 | len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); |
152cef40 HY |
641 | len = GHES_ESTATUS_CACHE_LEN(len); |
642 | gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); | |
643 | atomic_dec(&ghes_estatus_cache_alloced); | |
644 | } | |
645 | ||
646 | static void ghes_estatus_cache_rcu_free(struct rcu_head *head) | |
647 | { | |
648 | struct ghes_estatus_cache *cache; | |
649 | ||
650 | cache = container_of(head, struct ghes_estatus_cache, rcu); | |
651 | ghes_estatus_cache_free(cache); | |
652 | } | |
653 | ||
654 | static void ghes_estatus_cache_add( | |
655 | struct acpi_hest_generic *generic, | |
0a00fd5e | 656 | struct acpi_hest_generic_status *estatus) |
152cef40 HY |
657 | { |
658 | int i, slot = -1, count; | |
659 | unsigned long long now, duration, period, max_period = 0; | |
660 | struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache; | |
661 | ||
662 | new_cache = ghes_estatus_cache_alloc(generic, estatus); | |
663 | if (new_cache == NULL) | |
664 | return; | |
665 | rcu_read_lock(); | |
666 | now = sched_clock(); | |
667 | for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { | |
668 | cache = rcu_dereference(ghes_estatus_caches[i]); | |
669 | if (cache == NULL) { | |
670 | slot = i; | |
671 | slot_cache = NULL; | |
672 | break; | |
673 | } | |
674 | duration = now - cache->time_in; | |
675 | if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { | |
676 | slot = i; | |
677 | slot_cache = cache; | |
678 | break; | |
679 | } | |
680 | count = atomic_read(&cache->count); | |
70cb6e1d LB |
681 | period = duration; |
682 | do_div(period, (count + 1)); | |
152cef40 HY |
683 | if (period > max_period) { |
684 | max_period = period; | |
685 | slot = i; | |
686 | slot_cache = cache; | |
687 | } | |
688 | } | |
689 | /* new_cache must be put into array after its contents are written */ | |
690 | smp_wmb(); | |
691 | if (slot != -1 && cmpxchg(ghes_estatus_caches + slot, | |
692 | slot_cache, new_cache) == slot_cache) { | |
693 | if (slot_cache) | |
694 | call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free); | |
695 | } else | |
696 | ghes_estatus_cache_free(new_cache); | |
697 | rcu_read_unlock(); | |
d334a491 HY |
698 | } |
699 | ||
f2a7e059 JM |
700 | static void __ghes_panic(struct ghes *ghes, |
701 | struct acpi_hest_generic_status *estatus, | |
702 | u64 buf_paddr, enum fixed_addresses fixmap_idx) | |
2fb5853e | 703 | { |
f2a7e059 | 704 | __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus); |
2fb5853e | 705 | |
f2a7e059 | 706 | ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); |
98cff8b2 | 707 | |
2fb5853e JZZ |
708 | /* reboot to log the error! */ |
709 | if (!panic_timeout) | |
710 | panic_timeout = ghes_panic_timeout; | |
711 | panic("Fatal hardware error!"); | |
712 | } | |
713 | ||
d334a491 HY |
714 | static int ghes_proc(struct ghes *ghes) |
715 | { | |
f2a7e059 | 716 | struct acpi_hest_generic_status *estatus = ghes->estatus; |
eeb25557 | 717 | u64 buf_paddr; |
d334a491 HY |
718 | int rc; |
719 | ||
f2a7e059 | 720 | rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ); |
d334a491 HY |
721 | if (rc) |
722 | goto out; | |
2fb5853e | 723 | |
f2a7e059 JM |
724 | if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC) |
725 | __ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); | |
2fb5853e | 726 | |
f2a7e059 JM |
727 | if (!ghes_estatus_cached(estatus)) { |
728 | if (ghes_print_estatus(NULL, ghes->generic, estatus)) | |
729 | ghes_estatus_cache_add(ghes->generic, estatus); | |
152cef40 | 730 | } |
f2a7e059 | 731 | ghes_do_proc(ghes, estatus); |
42aa5604 | 732 | |
aaf2c2fb | 733 | out: |
f2a7e059 | 734 | ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); |
aaf2c2fb | 735 | |
806487a8 | 736 | return rc; |
d334a491 HY |
737 | } |
738 | ||
81e88fdc HY |
739 | static void ghes_add_timer(struct ghes *ghes) |
740 | { | |
741 | struct acpi_hest_generic *g = ghes->generic; | |
742 | unsigned long expire; | |
743 | ||
744 | if (!g->notify.poll_interval) { | |
933ca4e3 KW |
745 | pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", |
746 | g->header.source_id); | |
81e88fdc HY |
747 | return; |
748 | } | |
749 | expire = jiffies + msecs_to_jiffies(g->notify.poll_interval); | |
750 | ghes->timer.expires = round_jiffies_relative(expire); | |
751 | add_timer(&ghes->timer); | |
752 | } | |
753 | ||
d5272003 | 754 | static void ghes_poll_func(struct timer_list *t) |
81e88fdc | 755 | { |
d5272003 | 756 | struct ghes *ghes = from_timer(ghes, t, timer); |
3b880cbe | 757 | unsigned long flags; |
81e88fdc | 758 | |
3b880cbe | 759 | spin_lock_irqsave(&ghes_notify_lock_irq, flags); |
81e88fdc | 760 | ghes_proc(ghes); |
3b880cbe | 761 | spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); |
81e88fdc HY |
762 | if (!(ghes->flags & GHES_EXITING)) |
763 | ghes_add_timer(ghes); | |
764 | } | |
765 | ||
766 | static irqreturn_t ghes_irq_func(int irq, void *data) | |
767 | { | |
768 | struct ghes *ghes = data; | |
3b880cbe | 769 | unsigned long flags; |
81e88fdc HY |
770 | int rc; |
771 | ||
3b880cbe | 772 | spin_lock_irqsave(&ghes_notify_lock_irq, flags); |
81e88fdc | 773 | rc = ghes_proc(ghes); |
3b880cbe | 774 | spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); |
81e88fdc HY |
775 | if (rc) |
776 | return IRQ_NONE; | |
777 | ||
778 | return IRQ_HANDLED; | |
779 | } | |
780 | ||
7bf130e4 SJ |
781 | static int ghes_notify_hed(struct notifier_block *this, unsigned long event, |
782 | void *data) | |
d334a491 HY |
783 | { |
784 | struct ghes *ghes; | |
3b880cbe | 785 | unsigned long flags; |
d334a491 HY |
786 | int ret = NOTIFY_DONE; |
787 | ||
3b880cbe | 788 | spin_lock_irqsave(&ghes_notify_lock_irq, flags); |
d334a491 | 789 | rcu_read_lock(); |
7bf130e4 | 790 | list_for_each_entry_rcu(ghes, &ghes_hed, list) { |
d334a491 HY |
791 | if (!ghes_proc(ghes)) |
792 | ret = NOTIFY_OK; | |
793 | } | |
794 | rcu_read_unlock(); | |
3b880cbe | 795 | spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); |
d334a491 HY |
796 | |
797 | return ret; | |
798 | } | |
799 | ||
7bf130e4 SJ |
800 | static struct notifier_block ghes_notifier_hed = { |
801 | .notifier_call = ghes_notify_hed, | |
44a69f61 TN |
802 | }; |
803 | ||
44a69f61 | 804 | /* |
9c9d0805 JM |
805 | * Handlers for CPER records may not be NMI safe. For example, |
806 | * memory_failure_queue() takes spinlocks and calls schedule_work_on(). | |
807 | * In any NMI-like handler, memory from ghes_estatus_pool is used to save | |
808 | * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes | |
809 | * ghes_proc_in_irq() to run in IRQ context where each estatus in | |
810 | * ghes_estatus_llist is processed. | |
811 | * | |
812 | * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache | |
813 | * to suppress frequent messages. | |
44a69f61 TN |
814 | */ |
815 | static struct llist_head ghes_estatus_llist; | |
816 | static struct irq_work ghes_proc_irq_work; | |
817 | ||
67eb2e99 HY |
818 | static void ghes_proc_in_irq(struct irq_work *irq_work) |
819 | { | |
46d12f0b | 820 | struct llist_node *llnode, *next; |
67eb2e99 | 821 | struct ghes_estatus_node *estatus_node; |
152cef40 | 822 | struct acpi_hest_generic *generic; |
0a00fd5e | 823 | struct acpi_hest_generic_status *estatus; |
67eb2e99 HY |
824 | u32 len, node_len; |
825 | ||
46d12f0b | 826 | llnode = llist_del_all(&ghes_estatus_llist); |
67eb2e99 HY |
827 | /* |
828 | * Because the time order of estatus in list is reversed, | |
829 | * revert it back to proper order. | |
830 | */ | |
8d21d4c9 | 831 | llnode = llist_reverse_order(llnode); |
67eb2e99 HY |
832 | while (llnode) { |
833 | next = llnode->next; | |
834 | estatus_node = llist_entry(llnode, struct ghes_estatus_node, | |
835 | llnode); | |
836 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
88f074f4 | 837 | len = cper_estatus_len(estatus); |
67eb2e99 | 838 | node_len = GHES_ESTATUS_NODE_LEN(len); |
21480547 | 839 | ghes_do_proc(estatus_node->ghes, estatus); |
152cef40 HY |
840 | if (!ghes_estatus_cached(estatus)) { |
841 | generic = estatus_node->generic; | |
842 | if (ghes_print_estatus(NULL, generic, estatus)) | |
843 | ghes_estatus_cache_add(generic, estatus); | |
844 | } | |
67eb2e99 HY |
845 | gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, |
846 | node_len); | |
847 | llnode = next; | |
848 | } | |
849 | } | |
850 | ||
46d12f0b HY |
851 | static void ghes_print_queued_estatus(void) |
852 | { | |
853 | struct llist_node *llnode; | |
854 | struct ghes_estatus_node *estatus_node; | |
855 | struct acpi_hest_generic *generic; | |
0a00fd5e | 856 | struct acpi_hest_generic_status *estatus; |
46d12f0b HY |
857 | |
858 | llnode = llist_del_all(&ghes_estatus_llist); | |
859 | /* | |
860 | * Because the time order of estatus in list is reversed, | |
861 | * revert it back to proper order. | |
862 | */ | |
8d21d4c9 | 863 | llnode = llist_reverse_order(llnode); |
46d12f0b HY |
864 | while (llnode) { |
865 | estatus_node = llist_entry(llnode, struct ghes_estatus_node, | |
866 | llnode); | |
867 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
46d12f0b HY |
868 | generic = estatus_node->generic; |
869 | ghes_print_estatus(NULL, generic, estatus); | |
870 | llnode = llnode->next; | |
871 | } | |
872 | } | |
873 | ||
d9f608dc JM |
874 | static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, |
875 | enum fixed_addresses fixmap_idx) | |
11568496 | 876 | { |
d9f608dc | 877 | struct acpi_hest_generic_status *estatus, tmp_header; |
11568496 | 878 | struct ghes_estatus_node *estatus_node; |
d9f608dc JM |
879 | u32 len, node_len; |
880 | u64 buf_paddr; | |
881 | int sev, rc; | |
11568496 | 882 | |
f2a7e059 | 883 | if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)) |
d9f608dc | 884 | return -EOPNOTSUPP; |
11568496 | 885 | |
d9f608dc JM |
886 | rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx); |
887 | if (rc) { | |
888 | ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); | |
889 | return rc; | |
890 | } | |
f2a7e059 | 891 | |
d9f608dc JM |
892 | rc = __ghes_check_estatus(ghes, &tmp_header); |
893 | if (rc) { | |
894 | ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); | |
895 | return rc; | |
896 | } | |
11568496 | 897 | |
d9f608dc JM |
898 | len = cper_estatus_len(&tmp_header); |
899 | node_len = GHES_ESTATUS_NODE_LEN(len); | |
11568496 BP |
900 | estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); |
901 | if (!estatus_node) | |
d9f608dc | 902 | return -ENOMEM; |
11568496 BP |
903 | |
904 | estatus_node->ghes = ghes; | |
905 | estatus_node->generic = ghes->generic; | |
906 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
11568496 | 907 | |
d9f608dc | 908 | if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) { |
f2a7e059 | 909 | ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); |
d9f608dc JM |
910 | rc = -ENOENT; |
911 | goto no_work; | |
ee2eb3d4 | 912 | } |
6fe9e7c2 | 913 | |
f2a7e059 | 914 | sev = ghes_severity(estatus->error_severity); |
ee2eb3d4 JM |
915 | if (sev >= GHES_SEV_PANIC) { |
916 | ghes_print_queued_estatus(); | |
f2a7e059 | 917 | __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx); |
ee2eb3d4 | 918 | } |
6169ddf8 | 919 | |
d9f608dc | 920 | ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); |
6169ddf8 | 921 | |
d9f608dc JM |
922 | /* This error has been reported before, don't process it again. */ |
923 | if (ghes_estatus_cached(estatus)) | |
924 | goto no_work; | |
925 | ||
926 | llist_add(&estatus_node->llnode, &ghes_estatus_llist); | |
927 | ||
928 | return rc; | |
929 | ||
930 | no_work: | |
931 | gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, | |
932 | node_len); | |
933 | ||
934 | return rc; | |
ee2eb3d4 JM |
935 | } |
936 | ||
b484079b JM |
937 | static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list, |
938 | enum fixed_addresses fixmap_idx) | |
ee2eb3d4 JM |
939 | { |
940 | int ret = -ENOENT; | |
941 | struct ghes *ghes; | |
942 | ||
943 | rcu_read_lock(); | |
944 | list_for_each_entry_rcu(ghes, rcu_list, list) { | |
b484079b | 945 | if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) |
ee2eb3d4 | 946 | ret = 0; |
81e88fdc | 947 | } |
ee2eb3d4 | 948 | rcu_read_unlock(); |
11568496 | 949 | |
ee2eb3d4 | 950 | if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret) |
a545715d | 951 | irq_work_queue(&ghes_proc_irq_work); |
ee2eb3d4 JM |
952 | |
953 | return ret; | |
954 | } | |
9c9d0805 JM |
955 | |
956 | #ifdef CONFIG_ACPI_APEI_SEA | |
957 | static LIST_HEAD(ghes_sea); | |
958 | ||
959 | /* | |
960 | * Return 0 only if one of the SEA error sources successfully reported an error | |
961 | * record sent from the firmware. | |
962 | */ | |
963 | int ghes_notify_sea(void) | |
964 | { | |
3b880cbe JM |
965 | static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea); |
966 | int rv; | |
967 | ||
968 | raw_spin_lock(&ghes_notify_lock_sea); | |
b972d2ea | 969 | rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA); |
3b880cbe JM |
970 | raw_spin_unlock(&ghes_notify_lock_sea); |
971 | ||
972 | return rv; | |
9c9d0805 JM |
973 | } |
974 | ||
975 | static void ghes_sea_add(struct ghes *ghes) | |
976 | { | |
977 | mutex_lock(&ghes_list_mutex); | |
978 | list_add_rcu(&ghes->list, &ghes_sea); | |
979 | mutex_unlock(&ghes_list_mutex); | |
980 | } | |
981 | ||
982 | static void ghes_sea_remove(struct ghes *ghes) | |
983 | { | |
984 | mutex_lock(&ghes_list_mutex); | |
985 | list_del_rcu(&ghes->list); | |
986 | mutex_unlock(&ghes_list_mutex); | |
987 | synchronize_rcu(); | |
988 | } | |
989 | #else /* CONFIG_ACPI_APEI_SEA */ | |
990 | static inline void ghes_sea_add(struct ghes *ghes) { } | |
991 | static inline void ghes_sea_remove(struct ghes *ghes) { } | |
992 | #endif /* CONFIG_ACPI_APEI_SEA */ | |
993 | ||
994 | #ifdef CONFIG_HAVE_ACPI_APEI_NMI | |
995 | /* | |
996 | * NMI may be triggered on any CPU, so ghes_in_nmi is used for | |
997 | * having only one concurrent reader. | |
998 | */ | |
999 | static atomic_t ghes_in_nmi = ATOMIC_INIT(0); | |
1000 | ||
1001 | static LIST_HEAD(ghes_nmi); | |
ee2eb3d4 JM |
1002 | |
1003 | static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) | |
1004 | { | |
3b880cbe | 1005 | static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi); |
ee2eb3d4 JM |
1006 | int ret = NMI_DONE; |
1007 | ||
1008 | if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) | |
1009 | return ret; | |
1010 | ||
3b880cbe | 1011 | raw_spin_lock(&ghes_notify_lock_nmi); |
b484079b | 1012 | if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI)) |
ee2eb3d4 | 1013 | ret = NMI_HANDLED; |
3b880cbe | 1014 | raw_spin_unlock(&ghes_notify_lock_nmi); |
ee2eb3d4 | 1015 | |
6fe9e7c2 | 1016 | atomic_dec(&ghes_in_nmi); |
81e88fdc HY |
1017 | return ret; |
1018 | } | |
1019 | ||
44a69f61 TN |
1020 | static void ghes_nmi_add(struct ghes *ghes) |
1021 | { | |
44a69f61 TN |
1022 | mutex_lock(&ghes_list_mutex); |
1023 | if (list_empty(&ghes_nmi)) | |
1024 | register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); | |
1025 | list_add_rcu(&ghes->list, &ghes_nmi); | |
1026 | mutex_unlock(&ghes_list_mutex); | |
1027 | } | |
1028 | ||
1029 | static void ghes_nmi_remove(struct ghes *ghes) | |
1030 | { | |
44a69f61 TN |
1031 | mutex_lock(&ghes_list_mutex); |
1032 | list_del_rcu(&ghes->list); | |
1033 | if (list_empty(&ghes_nmi)) | |
1034 | unregister_nmi_handler(NMI_LOCAL, "ghes"); | |
1035 | mutex_unlock(&ghes_list_mutex); | |
1036 | /* | |
1037 | * To synchronize with NMI handler, ghes can only be | |
1038 | * freed after NMI handler finishes. | |
1039 | */ | |
1040 | synchronize_rcu(); | |
44a69f61 | 1041 | } |
255097c8 JM |
1042 | #else /* CONFIG_HAVE_ACPI_APEI_NMI */ |
1043 | static inline void ghes_nmi_add(struct ghes *ghes) { } | |
1044 | static inline void ghes_nmi_remove(struct ghes *ghes) { } | |
1045 | #endif /* CONFIG_HAVE_ACPI_APEI_NMI */ | |
44a69f61 TN |
1046 | |
1047 | static void ghes_nmi_init_cxt(void) | |
1048 | { | |
1049 | init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); | |
1050 | } | |
44a69f61 | 1051 | |
f9f05395 JM |
1052 | static int __ghes_sdei_callback(struct ghes *ghes, |
1053 | enum fixed_addresses fixmap_idx) | |
1054 | { | |
1055 | if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) { | |
1056 | irq_work_queue(&ghes_proc_irq_work); | |
1057 | ||
1058 | return 0; | |
1059 | } | |
1060 | ||
1061 | return -ENOENT; | |
1062 | } | |
1063 | ||
1064 | static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs, | |
1065 | void *arg) | |
1066 | { | |
1067 | static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal); | |
1068 | struct ghes *ghes = arg; | |
1069 | int err; | |
1070 | ||
1071 | raw_spin_lock(&ghes_notify_lock_sdei_normal); | |
1072 | err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL); | |
1073 | raw_spin_unlock(&ghes_notify_lock_sdei_normal); | |
1074 | ||
1075 | return err; | |
1076 | } | |
1077 | ||
1078 | static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs, | |
1079 | void *arg) | |
1080 | { | |
1081 | static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical); | |
1082 | struct ghes *ghes = arg; | |
1083 | int err; | |
1084 | ||
1085 | raw_spin_lock(&ghes_notify_lock_sdei_critical); | |
1086 | err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL); | |
1087 | raw_spin_unlock(&ghes_notify_lock_sdei_critical); | |
1088 | ||
1089 | return err; | |
1090 | } | |
1091 | ||
1092 | static int apei_sdei_register_ghes(struct ghes *ghes) | |
1093 | { | |
1094 | if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) | |
1095 | return -EOPNOTSUPP; | |
1096 | ||
1097 | return sdei_register_ghes(ghes, ghes_sdei_normal_callback, | |
1098 | ghes_sdei_critical_callback); | |
1099 | } | |
1100 | ||
1101 | static int apei_sdei_unregister_ghes(struct ghes *ghes) | |
1102 | { | |
1103 | if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) | |
1104 | return -EOPNOTSUPP; | |
1105 | ||
1106 | return sdei_unregister_ghes(ghes); | |
1107 | } | |
1108 | ||
da095fd3 | 1109 | static int ghes_probe(struct platform_device *ghes_dev) |
d334a491 HY |
1110 | { |
1111 | struct acpi_hest_generic *generic; | |
1112 | struct ghes *ghes = NULL; | |
3b880cbe | 1113 | unsigned long flags; |
44a69f61 | 1114 | |
7ad6e943 | 1115 | int rc = -EINVAL; |
d334a491 | 1116 | |
1dd6b20e | 1117 | generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; |
d334a491 | 1118 | if (!generic->enabled) |
7ad6e943 | 1119 | return -ENODEV; |
d334a491 | 1120 | |
81e88fdc HY |
1121 | switch (generic->notify.type) { |
1122 | case ACPI_HEST_NOTIFY_POLLED: | |
1123 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
1124 | case ACPI_HEST_NOTIFY_SCI: | |
7bf130e4 SJ |
1125 | case ACPI_HEST_NOTIFY_GSIV: |
1126 | case ACPI_HEST_NOTIFY_GPIO: | |
44a69f61 | 1127 | break; |
7bf130e4 | 1128 | |
7edda088 TB |
1129 | case ACPI_HEST_NOTIFY_SEA: |
1130 | if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { | |
1131 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n", | |
1132 | generic->header.source_id); | |
1133 | rc = -ENOTSUPP; | |
1134 | goto err; | |
1135 | } | |
1136 | break; | |
81e88fdc | 1137 | case ACPI_HEST_NOTIFY_NMI: |
44a69f61 TN |
1138 | if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { |
1139 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", | |
1140 | generic->header.source_id); | |
1141 | goto err; | |
1142 | } | |
81e88fdc | 1143 | break; |
f9f05395 JM |
1144 | case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: |
1145 | if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) { | |
1146 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n", | |
1147 | generic->header.source_id); | |
1148 | goto err; | |
1149 | } | |
1150 | break; | |
81e88fdc | 1151 | case ACPI_HEST_NOTIFY_LOCAL: |
933ca4e3 KW |
1152 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", |
1153 | generic->header.source_id); | |
d334a491 | 1154 | goto err; |
81e88fdc | 1155 | default: |
933ca4e3 KW |
1156 | pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", |
1157 | generic->notify.type, generic->header.source_id); | |
81e88fdc | 1158 | goto err; |
d334a491 | 1159 | } |
81e88fdc HY |
1160 | |
1161 | rc = -EIO; | |
1162 | if (generic->error_block_length < | |
0a00fd5e | 1163 | sizeof(struct acpi_hest_generic_status)) { |
933ca4e3 KW |
1164 | pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", |
1165 | generic->error_block_length, generic->header.source_id); | |
d334a491 HY |
1166 | goto err; |
1167 | } | |
1168 | ghes = ghes_new(generic); | |
1169 | if (IS_ERR(ghes)) { | |
1170 | rc = PTR_ERR(ghes); | |
1171 | ghes = NULL; | |
1172 | goto err; | |
1173 | } | |
21480547 | 1174 | |
81e88fdc HY |
1175 | switch (generic->notify.type) { |
1176 | case ACPI_HEST_NOTIFY_POLLED: | |
cea79e7e | 1177 | timer_setup(&ghes->timer, ghes_poll_func, 0); |
81e88fdc HY |
1178 | ghes_add_timer(ghes); |
1179 | break; | |
1180 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
1181 | /* External interrupt vector is GSI */ | |
a98d4f64 WY |
1182 | rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq); |
1183 | if (rc) { | |
81e88fdc HY |
1184 | pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", |
1185 | generic->header.source_id); | |
cc7f3f13 | 1186 | goto err; |
81e88fdc | 1187 | } |
bdb9458a LH |
1188 | rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED, |
1189 | "GHES IRQ", ghes); | |
a98d4f64 | 1190 | if (rc) { |
81e88fdc HY |
1191 | pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", |
1192 | generic->header.source_id); | |
cc7f3f13 | 1193 | goto err; |
81e88fdc HY |
1194 | } |
1195 | break; | |
7bf130e4 | 1196 | |
81e88fdc | 1197 | case ACPI_HEST_NOTIFY_SCI: |
7bf130e4 SJ |
1198 | case ACPI_HEST_NOTIFY_GSIV: |
1199 | case ACPI_HEST_NOTIFY_GPIO: | |
7ad6e943 | 1200 | mutex_lock(&ghes_list_mutex); |
7bf130e4 SJ |
1201 | if (list_empty(&ghes_hed)) |
1202 | register_acpi_hed_notifier(&ghes_notifier_hed); | |
1203 | list_add_rcu(&ghes->list, &ghes_hed); | |
7ad6e943 | 1204 | mutex_unlock(&ghes_list_mutex); |
81e88fdc | 1205 | break; |
7bf130e4 | 1206 | |
7edda088 TB |
1207 | case ACPI_HEST_NOTIFY_SEA: |
1208 | ghes_sea_add(ghes); | |
1209 | break; | |
81e88fdc | 1210 | case ACPI_HEST_NOTIFY_NMI: |
44a69f61 | 1211 | ghes_nmi_add(ghes); |
81e88fdc | 1212 | break; |
f9f05395 JM |
1213 | case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: |
1214 | rc = apei_sdei_register_ghes(ghes); | |
1215 | if (rc) | |
1216 | goto err; | |
1217 | break; | |
81e88fdc HY |
1218 | default: |
1219 | BUG(); | |
d334a491 | 1220 | } |
cc7f3f13 | 1221 | |
7ad6e943 | 1222 | platform_set_drvdata(ghes_dev, ghes); |
d334a491 | 1223 | |
cc7f3f13 BP |
1224 | ghes_edac_register(ghes, &ghes_dev->dev); |
1225 | ||
77b246b3 | 1226 | /* Handle any pending errors right away */ |
3b880cbe | 1227 | spin_lock_irqsave(&ghes_notify_lock_irq, flags); |
77b246b3 | 1228 | ghes_proc(ghes); |
3b880cbe | 1229 | spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); |
77b246b3 | 1230 | |
d334a491 | 1231 | return 0; |
cc7f3f13 | 1232 | |
d334a491 | 1233 | err: |
7ad6e943 | 1234 | if (ghes) { |
d334a491 | 1235 | ghes_fini(ghes); |
7ad6e943 HY |
1236 | kfree(ghes); |
1237 | } | |
d334a491 HY |
1238 | return rc; |
1239 | } | |
1240 | ||
b59bc2fb | 1241 | static int ghes_remove(struct platform_device *ghes_dev) |
d334a491 | 1242 | { |
f9f05395 | 1243 | int rc; |
7ad6e943 HY |
1244 | struct ghes *ghes; |
1245 | struct acpi_hest_generic *generic; | |
d334a491 | 1246 | |
7ad6e943 HY |
1247 | ghes = platform_get_drvdata(ghes_dev); |
1248 | generic = ghes->generic; | |
1249 | ||
81e88fdc | 1250 | ghes->flags |= GHES_EXITING; |
7ad6e943 | 1251 | switch (generic->notify.type) { |
81e88fdc HY |
1252 | case ACPI_HEST_NOTIFY_POLLED: |
1253 | del_timer_sync(&ghes->timer); | |
1254 | break; | |
1255 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
1256 | free_irq(ghes->irq, ghes); | |
1257 | break; | |
7bf130e4 | 1258 | |
7ad6e943 | 1259 | case ACPI_HEST_NOTIFY_SCI: |
7bf130e4 SJ |
1260 | case ACPI_HEST_NOTIFY_GSIV: |
1261 | case ACPI_HEST_NOTIFY_GPIO: | |
7ad6e943 HY |
1262 | mutex_lock(&ghes_list_mutex); |
1263 | list_del_rcu(&ghes->list); | |
7bf130e4 SJ |
1264 | if (list_empty(&ghes_hed)) |
1265 | unregister_acpi_hed_notifier(&ghes_notifier_hed); | |
7ad6e943 | 1266 | mutex_unlock(&ghes_list_mutex); |
7d64f82c | 1267 | synchronize_rcu(); |
7ad6e943 | 1268 | break; |
7bf130e4 | 1269 | |
7edda088 TB |
1270 | case ACPI_HEST_NOTIFY_SEA: |
1271 | ghes_sea_remove(ghes); | |
1272 | break; | |
81e88fdc | 1273 | case ACPI_HEST_NOTIFY_NMI: |
44a69f61 | 1274 | ghes_nmi_remove(ghes); |
81e88fdc | 1275 | break; |
f9f05395 JM |
1276 | case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: |
1277 | rc = apei_sdei_unregister_ghes(ghes); | |
1278 | if (rc) | |
1279 | return rc; | |
1280 | break; | |
7ad6e943 HY |
1281 | default: |
1282 | BUG(); | |
1283 | break; | |
1284 | } | |
d334a491 | 1285 | |
7ad6e943 | 1286 | ghes_fini(ghes); |
21480547 MCC |
1287 | |
1288 | ghes_edac_unregister(ghes); | |
1289 | ||
7ad6e943 | 1290 | kfree(ghes); |
d334a491 | 1291 | |
7ad6e943 HY |
1292 | platform_set_drvdata(ghes_dev, NULL); |
1293 | ||
1294 | return 0; | |
d334a491 HY |
1295 | } |
1296 | ||
7ad6e943 HY |
1297 | static struct platform_driver ghes_platform_driver = { |
1298 | .driver = { | |
1299 | .name = "GHES", | |
7ad6e943 HY |
1300 | }, |
1301 | .probe = ghes_probe, | |
1302 | .remove = ghes_remove, | |
1303 | }; | |
1304 | ||
d334a491 HY |
1305 | static int __init ghes_init(void) |
1306 | { | |
81e88fdc HY |
1307 | int rc; |
1308 | ||
d334a491 HY |
1309 | if (acpi_disabled) |
1310 | return -ENODEV; | |
1311 | ||
e931d0da PA |
1312 | switch (hest_disable) { |
1313 | case HEST_NOT_FOUND: | |
1314 | return -ENODEV; | |
1315 | case HEST_DISABLED: | |
d334a491 HY |
1316 | pr_info(GHES_PFX "HEST is not enabled!\n"); |
1317 | return -EINVAL; | |
e931d0da PA |
1318 | default: |
1319 | break; | |
d334a491 HY |
1320 | } |
1321 | ||
b6a95016 HY |
1322 | if (ghes_disable) { |
1323 | pr_info(GHES_PFX "GHES is not enabled!\n"); | |
1324 | return -EINVAL; | |
1325 | } | |
1326 | ||
44a69f61 | 1327 | ghes_nmi_init_cxt(); |
67eb2e99 | 1328 | |
67eb2e99 HY |
1329 | rc = platform_driver_register(&ghes_platform_driver); |
1330 | if (rc) | |
e147133a | 1331 | goto err; |
67eb2e99 | 1332 | |
9fb0bfe1 HY |
1333 | rc = apei_osc_setup(); |
1334 | if (rc == 0 && osc_sb_apei_support_acked) | |
1335 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); | |
1336 | else if (rc == 0 && !osc_sb_apei_support_acked) | |
1337 | pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); | |
1338 | else if (rc && osc_sb_apei_support_acked) | |
1339 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); | |
1340 | else | |
1341 | pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); | |
1342 | ||
81e88fdc | 1343 | return 0; |
81e88fdc HY |
1344 | err: |
1345 | return rc; | |
d334a491 | 1346 | } |
020bf066 | 1347 | device_initcall(ghes_init); |