]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * APEI Generic Hardware Error Source support | |
3 | * | |
4 | * Generic Hardware Error Source provides a way to report platform | |
5 | * hardware errors (such as that from chipset). It works in so called | |
6 | * "Firmware First" mode, that is, hardware errors are reported to | |
7 | * firmware firstly, then reported to Linux by firmware. This way, | |
8 | * some non-standard hardware error registers or non-standard hardware | |
9 | * link can be checked by firmware to produce more hardware error | |
10 | * information for Linux. | |
11 | * | |
12 | * For more information about Generic Hardware Error Source, please | |
13 | * refer to ACPI Specification version 4.0, section 17.3.2.6 | |
14 | * | |
15 | * Copyright 2010,2011 Intel Corp. | |
16 | * Author: Huang Ying <ying.huang@intel.com> | |
17 | * | |
18 | * This program is free software; you can redistribute it and/or | |
19 | * modify it under the terms of the GNU General Public License version | |
20 | * 2 as published by the Free Software Foundation; | |
21 | * | |
22 | * This program is distributed in the hope that it will be useful, | |
23 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 | * GNU General Public License for more details. | |
26 | */ | |
27 | ||
28 | #include <linux/kernel.h> | |
29 | #include <linux/moduleparam.h> | |
30 | #include <linux/init.h> | |
31 | #include <linux/acpi.h> | |
32 | #include <linux/io.h> | |
33 | #include <linux/interrupt.h> | |
34 | #include <linux/timer.h> | |
35 | #include <linux/cper.h> | |
36 | #include <linux/kdebug.h> | |
37 | #include <linux/platform_device.h> | |
38 | #include <linux/mutex.h> | |
39 | #include <linux/ratelimit.h> | |
40 | #include <linux/vmalloc.h> | |
41 | #include <linux/irq_work.h> | |
42 | #include <linux/llist.h> | |
43 | #include <linux/genalloc.h> | |
44 | #include <linux/pci.h> | |
45 | #include <linux/aer.h> | |
46 | #include <linux/nmi.h> | |
47 | #include <linux/sched/clock.h> | |
48 | #include <linux/uuid.h> | |
49 | #include <linux/ras.h> | |
50 | ||
51 | #include <acpi/actbl1.h> | |
52 | #include <acpi/ghes.h> | |
53 | #include <acpi/apei.h> | |
54 | #include <asm/fixmap.h> | |
55 | #include <asm/tlbflush.h> | |
56 | #include <ras/ras_event.h> | |
57 | ||
58 | #include "apei-internal.h" | |
59 | ||
60 | #define GHES_PFX "GHES: " | |
61 | ||
62 | #define GHES_ESTATUS_MAX_SIZE 65536 | |
63 | #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 | |
64 | ||
65 | #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 | |
66 | ||
67 | /* This is just an estimation for memory pool allocation */ | |
68 | #define GHES_ESTATUS_CACHE_AVG_SIZE 512 | |
69 | ||
70 | #define GHES_ESTATUS_CACHES_SIZE 4 | |
71 | ||
72 | #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL | |
73 | /* Prevent too many caches are allocated because of RCU */ | |
74 | #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) | |
75 | ||
76 | #define GHES_ESTATUS_CACHE_LEN(estatus_len) \ | |
77 | (sizeof(struct ghes_estatus_cache) + (estatus_len)) | |
78 | #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ | |
79 | ((struct acpi_hest_generic_status *) \ | |
80 | ((struct ghes_estatus_cache *)(estatus_cache) + 1)) | |
81 | ||
82 | #define GHES_ESTATUS_NODE_LEN(estatus_len) \ | |
83 | (sizeof(struct ghes_estatus_node) + (estatus_len)) | |
84 | #define GHES_ESTATUS_FROM_NODE(estatus_node) \ | |
85 | ((struct acpi_hest_generic_status *) \ | |
86 | ((struct ghes_estatus_node *)(estatus_node) + 1)) | |
87 | ||
88 | static inline bool is_hest_type_generic_v2(struct ghes *ghes) | |
89 | { | |
90 | return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; | |
91 | } | |
92 | ||
93 | /* | |
94 | * This driver isn't really modular, however for the time being, | |
95 | * continuing to use module_param is the easiest way to remain | |
96 | * compatible with existing boot arg use cases. | |
97 | */ | |
98 | bool ghes_disable; | |
99 | module_param_named(disable, ghes_disable, bool, 0); | |
100 | ||
101 | /* | |
102 | * All error sources notified with HED (Hardware Error Device) share a | |
103 | * single notifier callback, so they need to be linked and checked one | |
104 | * by one. This holds true for NMI too. | |
105 | * | |
106 | * RCU is used for these lists, so ghes_list_mutex is only used for | |
107 | * list changing, not for traversing. | |
108 | */ | |
109 | static LIST_HEAD(ghes_hed); | |
110 | static DEFINE_MUTEX(ghes_list_mutex); | |
111 | ||
112 | /* | |
113 | * Because the memory area used to transfer hardware error information | |
114 | * from BIOS to Linux can be determined only in NMI, IRQ or timer | |
115 | * handler, but general ioremap can not be used in atomic context, so | |
116 | * the fixmap is used instead. | |
117 | * | |
118 | * These 2 spinlocks are used to prevent the fixmap entries from being used | |
119 | * simultaneously. | |
120 | */ | |
121 | static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); | |
122 | static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); | |
123 | ||
124 | static struct gen_pool *ghes_estatus_pool; | |
125 | static unsigned long ghes_estatus_pool_size_request; | |
126 | ||
127 | static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; | |
128 | static atomic_t ghes_estatus_cache_alloced; | |
129 | ||
130 | static int ghes_panic_timeout __read_mostly = 30; | |
131 | ||
132 | static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) | |
133 | { | |
134 | phys_addr_t paddr; | |
135 | pgprot_t prot; | |
136 | ||
137 | paddr = pfn << PAGE_SHIFT; | |
138 | prot = arch_apei_get_mem_attribute(paddr); | |
139 | __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot); | |
140 | ||
141 | return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI); | |
142 | } | |
143 | ||
144 | static void __iomem *ghes_ioremap_pfn_irq(u64 pfn) | |
145 | { | |
146 | phys_addr_t paddr; | |
147 | pgprot_t prot; | |
148 | ||
149 | paddr = pfn << PAGE_SHIFT; | |
150 | prot = arch_apei_get_mem_attribute(paddr); | |
151 | __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot); | |
152 | ||
153 | return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ); | |
154 | } | |
155 | ||
156 | static void ghes_iounmap_nmi(void) | |
157 | { | |
158 | clear_fixmap(FIX_APEI_GHES_NMI); | |
159 | } | |
160 | ||
161 | static void ghes_iounmap_irq(void) | |
162 | { | |
163 | clear_fixmap(FIX_APEI_GHES_IRQ); | |
164 | } | |
165 | ||
166 | static int ghes_estatus_pool_init(void) | |
167 | { | |
168 | ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); | |
169 | if (!ghes_estatus_pool) | |
170 | return -ENOMEM; | |
171 | return 0; | |
172 | } | |
173 | ||
174 | static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, | |
175 | struct gen_pool_chunk *chunk, | |
176 | void *data) | |
177 | { | |
178 | free_page(chunk->start_addr); | |
179 | } | |
180 | ||
181 | static void ghes_estatus_pool_exit(void) | |
182 | { | |
183 | gen_pool_for_each_chunk(ghes_estatus_pool, | |
184 | ghes_estatus_pool_free_chunk_page, NULL); | |
185 | gen_pool_destroy(ghes_estatus_pool); | |
186 | } | |
187 | ||
188 | static int ghes_estatus_pool_expand(unsigned long len) | |
189 | { | |
190 | unsigned long i, pages, size, addr; | |
191 | int ret; | |
192 | ||
193 | ghes_estatus_pool_size_request += PAGE_ALIGN(len); | |
194 | size = gen_pool_size(ghes_estatus_pool); | |
195 | if (size >= ghes_estatus_pool_size_request) | |
196 | return 0; | |
197 | pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; | |
198 | for (i = 0; i < pages; i++) { | |
199 | addr = __get_free_page(GFP_KERNEL); | |
200 | if (!addr) | |
201 | return -ENOMEM; | |
202 | ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); | |
203 | if (ret) | |
204 | return ret; | |
205 | } | |
206 | ||
207 | return 0; | |
208 | } | |
209 | ||
210 | static int map_gen_v2(struct ghes *ghes) | |
211 | { | |
212 | return apei_map_generic_address(&ghes->generic_v2->read_ack_register); | |
213 | } | |
214 | ||
215 | static void unmap_gen_v2(struct ghes *ghes) | |
216 | { | |
217 | apei_unmap_generic_address(&ghes->generic_v2->read_ack_register); | |
218 | } | |
219 | ||
220 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) | |
221 | { | |
222 | struct ghes *ghes; | |
223 | unsigned int error_block_length; | |
224 | int rc; | |
225 | ||
226 | ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); | |
227 | if (!ghes) | |
228 | return ERR_PTR(-ENOMEM); | |
229 | ||
230 | ghes->generic = generic; | |
231 | if (is_hest_type_generic_v2(ghes)) { | |
232 | rc = map_gen_v2(ghes); | |
233 | if (rc) | |
234 | goto err_free; | |
235 | } | |
236 | ||
237 | rc = apei_map_generic_address(&generic->error_status_address); | |
238 | if (rc) | |
239 | goto err_unmap_read_ack_addr; | |
240 | error_block_length = generic->error_block_length; | |
241 | if (error_block_length > GHES_ESTATUS_MAX_SIZE) { | |
242 | pr_warning(FW_WARN GHES_PFX | |
243 | "Error status block length is too long: %u for " | |
244 | "generic hardware error source: %d.\n", | |
245 | error_block_length, generic->header.source_id); | |
246 | error_block_length = GHES_ESTATUS_MAX_SIZE; | |
247 | } | |
248 | ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); | |
249 | if (!ghes->estatus) { | |
250 | rc = -ENOMEM; | |
251 | goto err_unmap_status_addr; | |
252 | } | |
253 | ||
254 | return ghes; | |
255 | ||
256 | err_unmap_status_addr: | |
257 | apei_unmap_generic_address(&generic->error_status_address); | |
258 | err_unmap_read_ack_addr: | |
259 | if (is_hest_type_generic_v2(ghes)) | |
260 | unmap_gen_v2(ghes); | |
261 | err_free: | |
262 | kfree(ghes); | |
263 | return ERR_PTR(rc); | |
264 | } | |
265 | ||
266 | static void ghes_fini(struct ghes *ghes) | |
267 | { | |
268 | kfree(ghes->estatus); | |
269 | apei_unmap_generic_address(&ghes->generic->error_status_address); | |
270 | if (is_hest_type_generic_v2(ghes)) | |
271 | unmap_gen_v2(ghes); | |
272 | } | |
273 | ||
274 | static inline int ghes_severity(int severity) | |
275 | { | |
276 | switch (severity) { | |
277 | case CPER_SEV_INFORMATIONAL: | |
278 | return GHES_SEV_NO; | |
279 | case CPER_SEV_CORRECTED: | |
280 | return GHES_SEV_CORRECTED; | |
281 | case CPER_SEV_RECOVERABLE: | |
282 | return GHES_SEV_RECOVERABLE; | |
283 | case CPER_SEV_FATAL: | |
284 | return GHES_SEV_PANIC; | |
285 | default: | |
286 | /* Unknown, go panic */ | |
287 | return GHES_SEV_PANIC; | |
288 | } | |
289 | } | |
290 | ||
291 | static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, | |
292 | int from_phys) | |
293 | { | |
294 | void __iomem *vaddr; | |
295 | unsigned long flags = 0; | |
296 | int in_nmi = in_nmi(); | |
297 | u64 offset; | |
298 | u32 trunk; | |
299 | ||
300 | while (len > 0) { | |
301 | offset = paddr - (paddr & PAGE_MASK); | |
302 | if (in_nmi) { | |
303 | raw_spin_lock(&ghes_ioremap_lock_nmi); | |
304 | vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT); | |
305 | } else { | |
306 | spin_lock_irqsave(&ghes_ioremap_lock_irq, flags); | |
307 | vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT); | |
308 | } | |
309 | trunk = PAGE_SIZE - offset; | |
310 | trunk = min(trunk, len); | |
311 | if (from_phys) | |
312 | memcpy_fromio(buffer, vaddr + offset, trunk); | |
313 | else | |
314 | memcpy_toio(vaddr + offset, buffer, trunk); | |
315 | len -= trunk; | |
316 | paddr += trunk; | |
317 | buffer += trunk; | |
318 | if (in_nmi) { | |
319 | ghes_iounmap_nmi(); | |
320 | raw_spin_unlock(&ghes_ioremap_lock_nmi); | |
321 | } else { | |
322 | ghes_iounmap_irq(); | |
323 | spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags); | |
324 | } | |
325 | } | |
326 | } | |
327 | ||
328 | static int ghes_read_estatus(struct ghes *ghes, int silent) | |
329 | { | |
330 | struct acpi_hest_generic *g = ghes->generic; | |
331 | u64 buf_paddr; | |
332 | u32 len; | |
333 | int rc; | |
334 | ||
335 | rc = apei_read(&buf_paddr, &g->error_status_address); | |
336 | if (rc) { | |
337 | if (!silent && printk_ratelimit()) | |
338 | pr_warning(FW_WARN GHES_PFX | |
339 | "Failed to read error status block address for hardware error source: %d.\n", | |
340 | g->header.source_id); | |
341 | return -EIO; | |
342 | } | |
343 | if (!buf_paddr) | |
344 | return -ENOENT; | |
345 | ||
346 | ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, | |
347 | sizeof(*ghes->estatus), 1); | |
348 | if (!ghes->estatus->block_status) | |
349 | return -ENOENT; | |
350 | ||
351 | ghes->buffer_paddr = buf_paddr; | |
352 | ghes->flags |= GHES_TO_CLEAR; | |
353 | ||
354 | rc = -EIO; | |
355 | len = cper_estatus_len(ghes->estatus); | |
356 | if (len < sizeof(*ghes->estatus)) | |
357 | goto err_read_block; | |
358 | if (len > ghes->generic->error_block_length) | |
359 | goto err_read_block; | |
360 | if (cper_estatus_check_header(ghes->estatus)) | |
361 | goto err_read_block; | |
362 | ghes_copy_tofrom_phys(ghes->estatus + 1, | |
363 | buf_paddr + sizeof(*ghes->estatus), | |
364 | len - sizeof(*ghes->estatus), 1); | |
365 | if (cper_estatus_check(ghes->estatus)) | |
366 | goto err_read_block; | |
367 | rc = 0; | |
368 | ||
369 | err_read_block: | |
370 | if (rc && !silent && printk_ratelimit()) | |
371 | pr_warning(FW_WARN GHES_PFX | |
372 | "Failed to read error status block!\n"); | |
373 | return rc; | |
374 | } | |
375 | ||
376 | static void ghes_clear_estatus(struct ghes *ghes) | |
377 | { | |
378 | ghes->estatus->block_status = 0; | |
379 | if (!(ghes->flags & GHES_TO_CLEAR)) | |
380 | return; | |
381 | ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr, | |
382 | sizeof(ghes->estatus->block_status), 0); | |
383 | ghes->flags &= ~GHES_TO_CLEAR; | |
384 | } | |
385 | ||
386 | static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) | |
387 | { | |
388 | #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE | |
389 | unsigned long pfn; | |
390 | int flags = -1; | |
391 | int sec_sev = ghes_severity(gdata->error_severity); | |
392 | struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); | |
393 | ||
394 | if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) | |
395 | return; | |
396 | ||
397 | pfn = mem_err->physical_addr >> PAGE_SHIFT; | |
398 | if (!pfn_valid(pfn)) { | |
399 | pr_warn_ratelimited(FW_WARN GHES_PFX | |
400 | "Invalid address in generic error data: %#llx\n", | |
401 | mem_err->physical_addr); | |
402 | return; | |
403 | } | |
404 | ||
405 | /* iff following two events can be handled properly by now */ | |
406 | if (sec_sev == GHES_SEV_CORRECTED && | |
407 | (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) | |
408 | flags = MF_SOFT_OFFLINE; | |
409 | if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) | |
410 | flags = 0; | |
411 | ||
412 | if (flags != -1) | |
413 | memory_failure_queue(pfn, 0, flags); | |
414 | #endif | |
415 | } | |
416 | ||
417 | static void ghes_do_proc(struct ghes *ghes, | |
418 | const struct acpi_hest_generic_status *estatus) | |
419 | { | |
420 | int sev, sec_sev; | |
421 | struct acpi_hest_generic_data *gdata; | |
422 | guid_t *sec_type; | |
423 | guid_t *fru_id = &NULL_UUID_LE; | |
424 | char *fru_text = ""; | |
425 | ||
426 | sev = ghes_severity(estatus->error_severity); | |
427 | apei_estatus_for_each_section(estatus, gdata) { | |
428 | sec_type = (guid_t *)gdata->section_type; | |
429 | sec_sev = ghes_severity(gdata->error_severity); | |
430 | if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) | |
431 | fru_id = (guid_t *)gdata->fru_id; | |
432 | ||
433 | if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) | |
434 | fru_text = gdata->fru_text; | |
435 | ||
436 | if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { | |
437 | struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); | |
438 | ||
439 | ghes_edac_report_mem_error(ghes, sev, mem_err); | |
440 | ||
441 | arch_apei_report_mem_error(sev, mem_err); | |
442 | ghes_handle_memory_failure(gdata, sev); | |
443 | } | |
444 | #ifdef CONFIG_ACPI_APEI_PCIEAER | |
445 | else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { | |
446 | struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); | |
447 | ||
448 | if (sev == GHES_SEV_RECOVERABLE && | |
449 | sec_sev == GHES_SEV_RECOVERABLE && | |
450 | pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && | |
451 | pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { | |
452 | unsigned int devfn; | |
453 | int aer_severity; | |
454 | ||
455 | devfn = PCI_DEVFN(pcie_err->device_id.device, | |
456 | pcie_err->device_id.function); | |
457 | aer_severity = cper_severity_to_aer(gdata->error_severity); | |
458 | ||
459 | /* | |
460 | * If firmware reset the component to contain | |
461 | * the error, we must reinitialize it before | |
462 | * use, so treat it as a fatal AER error. | |
463 | */ | |
464 | if (gdata->flags & CPER_SEC_RESET) | |
465 | aer_severity = AER_FATAL; | |
466 | ||
467 | aer_recover_queue(pcie_err->device_id.segment, | |
468 | pcie_err->device_id.bus, | |
469 | devfn, aer_severity, | |
470 | (struct aer_capability_regs *) | |
471 | pcie_err->aer_info); | |
472 | } | |
473 | ||
474 | } | |
475 | #endif | |
476 | else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { | |
477 | struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); | |
478 | ||
479 | log_arm_hw_error(err); | |
480 | } else { | |
481 | void *err = acpi_hest_get_payload(gdata); | |
482 | ||
483 | log_non_standard_event(sec_type, fru_id, fru_text, | |
484 | sec_sev, err, | |
485 | gdata->error_data_length); | |
486 | } | |
487 | } | |
488 | } | |
489 | ||
490 | static void __ghes_print_estatus(const char *pfx, | |
491 | const struct acpi_hest_generic *generic, | |
492 | const struct acpi_hest_generic_status *estatus) | |
493 | { | |
494 | static atomic_t seqno; | |
495 | unsigned int curr_seqno; | |
496 | char pfx_seq[64]; | |
497 | ||
498 | if (pfx == NULL) { | |
499 | if (ghes_severity(estatus->error_severity) <= | |
500 | GHES_SEV_CORRECTED) | |
501 | pfx = KERN_WARNING; | |
502 | else | |
503 | pfx = KERN_ERR; | |
504 | } | |
505 | curr_seqno = atomic_inc_return(&seqno); | |
506 | snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno); | |
507 | printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", | |
508 | pfx_seq, generic->header.source_id); | |
509 | cper_estatus_print(pfx_seq, estatus); | |
510 | } | |
511 | ||
512 | static int ghes_print_estatus(const char *pfx, | |
513 | const struct acpi_hest_generic *generic, | |
514 | const struct acpi_hest_generic_status *estatus) | |
515 | { | |
516 | /* Not more than 2 messages every 5 seconds */ | |
517 | static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); | |
518 | static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); | |
519 | struct ratelimit_state *ratelimit; | |
520 | ||
521 | if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) | |
522 | ratelimit = &ratelimit_corrected; | |
523 | else | |
524 | ratelimit = &ratelimit_uncorrected; | |
525 | if (__ratelimit(ratelimit)) { | |
526 | __ghes_print_estatus(pfx, generic, estatus); | |
527 | return 1; | |
528 | } | |
529 | return 0; | |
530 | } | |
531 | ||
532 | /* | |
533 | * GHES error status reporting throttle, to report more kinds of | |
534 | * errors, instead of just most frequently occurred errors. | |
535 | */ | |
536 | static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) | |
537 | { | |
538 | u32 len; | |
539 | int i, cached = 0; | |
540 | unsigned long long now; | |
541 | struct ghes_estatus_cache *cache; | |
542 | struct acpi_hest_generic_status *cache_estatus; | |
543 | ||
544 | len = cper_estatus_len(estatus); | |
545 | rcu_read_lock(); | |
546 | for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { | |
547 | cache = rcu_dereference(ghes_estatus_caches[i]); | |
548 | if (cache == NULL) | |
549 | continue; | |
550 | if (len != cache->estatus_len) | |
551 | continue; | |
552 | cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); | |
553 | if (memcmp(estatus, cache_estatus, len)) | |
554 | continue; | |
555 | atomic_inc(&cache->count); | |
556 | now = sched_clock(); | |
557 | if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) | |
558 | cached = 1; | |
559 | break; | |
560 | } | |
561 | rcu_read_unlock(); | |
562 | return cached; | |
563 | } | |
564 | ||
565 | static struct ghes_estatus_cache *ghes_estatus_cache_alloc( | |
566 | struct acpi_hest_generic *generic, | |
567 | struct acpi_hest_generic_status *estatus) | |
568 | { | |
569 | int alloced; | |
570 | u32 len, cache_len; | |
571 | struct ghes_estatus_cache *cache; | |
572 | struct acpi_hest_generic_status *cache_estatus; | |
573 | ||
574 | alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); | |
575 | if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { | |
576 | atomic_dec(&ghes_estatus_cache_alloced); | |
577 | return NULL; | |
578 | } | |
579 | len = cper_estatus_len(estatus); | |
580 | cache_len = GHES_ESTATUS_CACHE_LEN(len); | |
581 | cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); | |
582 | if (!cache) { | |
583 | atomic_dec(&ghes_estatus_cache_alloced); | |
584 | return NULL; | |
585 | } | |
586 | cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); | |
587 | memcpy(cache_estatus, estatus, len); | |
588 | cache->estatus_len = len; | |
589 | atomic_set(&cache->count, 0); | |
590 | cache->generic = generic; | |
591 | cache->time_in = sched_clock(); | |
592 | return cache; | |
593 | } | |
594 | ||
595 | static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache) | |
596 | { | |
597 | u32 len; | |
598 | ||
599 | len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); | |
600 | len = GHES_ESTATUS_CACHE_LEN(len); | |
601 | gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); | |
602 | atomic_dec(&ghes_estatus_cache_alloced); | |
603 | } | |
604 | ||
605 | static void ghes_estatus_cache_rcu_free(struct rcu_head *head) | |
606 | { | |
607 | struct ghes_estatus_cache *cache; | |
608 | ||
609 | cache = container_of(head, struct ghes_estatus_cache, rcu); | |
610 | ghes_estatus_cache_free(cache); | |
611 | } | |
612 | ||
613 | static void ghes_estatus_cache_add( | |
614 | struct acpi_hest_generic *generic, | |
615 | struct acpi_hest_generic_status *estatus) | |
616 | { | |
617 | int i, slot = -1, count; | |
618 | unsigned long long now, duration, period, max_period = 0; | |
619 | struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache; | |
620 | ||
621 | new_cache = ghes_estatus_cache_alloc(generic, estatus); | |
622 | if (new_cache == NULL) | |
623 | return; | |
624 | rcu_read_lock(); | |
625 | now = sched_clock(); | |
626 | for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { | |
627 | cache = rcu_dereference(ghes_estatus_caches[i]); | |
628 | if (cache == NULL) { | |
629 | slot = i; | |
630 | slot_cache = NULL; | |
631 | break; | |
632 | } | |
633 | duration = now - cache->time_in; | |
634 | if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { | |
635 | slot = i; | |
636 | slot_cache = cache; | |
637 | break; | |
638 | } | |
639 | count = atomic_read(&cache->count); | |
640 | period = duration; | |
641 | do_div(period, (count + 1)); | |
642 | if (period > max_period) { | |
643 | max_period = period; | |
644 | slot = i; | |
645 | slot_cache = cache; | |
646 | } | |
647 | } | |
648 | /* new_cache must be put into array after its contents are written */ | |
649 | smp_wmb(); | |
650 | if (slot != -1 && cmpxchg(ghes_estatus_caches + slot, | |
651 | slot_cache, new_cache) == slot_cache) { | |
652 | if (slot_cache) | |
653 | call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free); | |
654 | } else | |
655 | ghes_estatus_cache_free(new_cache); | |
656 | rcu_read_unlock(); | |
657 | } | |
658 | ||
659 | static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2) | |
660 | { | |
661 | int rc; | |
662 | u64 val = 0; | |
663 | ||
664 | rc = apei_read(&val, &gv2->read_ack_register); | |
665 | if (rc) | |
666 | return rc; | |
667 | ||
668 | val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; | |
669 | val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; | |
670 | ||
671 | return apei_write(val, &gv2->read_ack_register); | |
672 | } | |
673 | ||
674 | static void __ghes_panic(struct ghes *ghes) | |
675 | { | |
676 | __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); | |
677 | ||
678 | /* reboot to log the error! */ | |
679 | if (!panic_timeout) | |
680 | panic_timeout = ghes_panic_timeout; | |
681 | panic("Fatal hardware error!"); | |
682 | } | |
683 | ||
684 | static int ghes_proc(struct ghes *ghes) | |
685 | { | |
686 | int rc; | |
687 | ||
688 | rc = ghes_read_estatus(ghes, 0); | |
689 | if (rc) | |
690 | goto out; | |
691 | ||
692 | if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) { | |
693 | __ghes_panic(ghes); | |
694 | } | |
695 | ||
696 | if (!ghes_estatus_cached(ghes->estatus)) { | |
697 | if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) | |
698 | ghes_estatus_cache_add(ghes->generic, ghes->estatus); | |
699 | } | |
700 | ghes_do_proc(ghes, ghes->estatus); | |
701 | ||
702 | out: | |
703 | ghes_clear_estatus(ghes); | |
704 | ||
705 | if (rc == -ENOENT) | |
706 | return rc; | |
707 | ||
708 | /* | |
709 | * GHESv2 type HEST entries introduce support for error acknowledgment, | |
710 | * so only acknowledge the error if this support is present. | |
711 | */ | |
712 | if (is_hest_type_generic_v2(ghes)) | |
713 | return ghes_ack_error(ghes->generic_v2); | |
714 | ||
715 | return rc; | |
716 | } | |
717 | ||
718 | static void ghes_add_timer(struct ghes *ghes) | |
719 | { | |
720 | struct acpi_hest_generic *g = ghes->generic; | |
721 | unsigned long expire; | |
722 | ||
723 | if (!g->notify.poll_interval) { | |
724 | pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", | |
725 | g->header.source_id); | |
726 | return; | |
727 | } | |
728 | expire = jiffies + msecs_to_jiffies(g->notify.poll_interval); | |
729 | ghes->timer.expires = round_jiffies_relative(expire); | |
730 | add_timer(&ghes->timer); | |
731 | } | |
732 | ||
733 | static void ghes_poll_func(unsigned long data) | |
734 | { | |
735 | struct ghes *ghes = (void *)data; | |
736 | ||
737 | ghes_proc(ghes); | |
738 | if (!(ghes->flags & GHES_EXITING)) | |
739 | ghes_add_timer(ghes); | |
740 | } | |
741 | ||
742 | static irqreturn_t ghes_irq_func(int irq, void *data) | |
743 | { | |
744 | struct ghes *ghes = data; | |
745 | int rc; | |
746 | ||
747 | rc = ghes_proc(ghes); | |
748 | if (rc) | |
749 | return IRQ_NONE; | |
750 | ||
751 | return IRQ_HANDLED; | |
752 | } | |
753 | ||
754 | static int ghes_notify_hed(struct notifier_block *this, unsigned long event, | |
755 | void *data) | |
756 | { | |
757 | struct ghes *ghes; | |
758 | int ret = NOTIFY_DONE; | |
759 | ||
760 | rcu_read_lock(); | |
761 | list_for_each_entry_rcu(ghes, &ghes_hed, list) { | |
762 | if (!ghes_proc(ghes)) | |
763 | ret = NOTIFY_OK; | |
764 | } | |
765 | rcu_read_unlock(); | |
766 | ||
767 | return ret; | |
768 | } | |
769 | ||
770 | static struct notifier_block ghes_notifier_hed = { | |
771 | .notifier_call = ghes_notify_hed, | |
772 | }; | |
773 | ||
774 | #ifdef CONFIG_ACPI_APEI_SEA | |
775 | static LIST_HEAD(ghes_sea); | |
776 | ||
777 | /* | |
778 | * Return 0 only if one of the SEA error sources successfully reported an error | |
779 | * record sent from the firmware. | |
780 | */ | |
781 | int ghes_notify_sea(void) | |
782 | { | |
783 | struct ghes *ghes; | |
784 | int ret = -ENOENT; | |
785 | ||
786 | rcu_read_lock(); | |
787 | list_for_each_entry_rcu(ghes, &ghes_sea, list) { | |
788 | if (!ghes_proc(ghes)) | |
789 | ret = 0; | |
790 | } | |
791 | rcu_read_unlock(); | |
792 | return ret; | |
793 | } | |
794 | ||
795 | static void ghes_sea_add(struct ghes *ghes) | |
796 | { | |
797 | mutex_lock(&ghes_list_mutex); | |
798 | list_add_rcu(&ghes->list, &ghes_sea); | |
799 | mutex_unlock(&ghes_list_mutex); | |
800 | } | |
801 | ||
802 | static void ghes_sea_remove(struct ghes *ghes) | |
803 | { | |
804 | mutex_lock(&ghes_list_mutex); | |
805 | list_del_rcu(&ghes->list); | |
806 | mutex_unlock(&ghes_list_mutex); | |
807 | synchronize_rcu(); | |
808 | } | |
809 | #else /* CONFIG_ACPI_APEI_SEA */ | |
810 | static inline void ghes_sea_add(struct ghes *ghes) { } | |
811 | static inline void ghes_sea_remove(struct ghes *ghes) { } | |
812 | #endif /* CONFIG_ACPI_APEI_SEA */ | |
813 | ||
814 | #ifdef CONFIG_HAVE_ACPI_APEI_NMI | |
815 | /* | |
816 | * printk is not safe in NMI context. So in NMI handler, we allocate | |
817 | * required memory from lock-less memory allocator | |
818 | * (ghes_estatus_pool), save estatus into it, put them into lock-less | |
819 | * list (ghes_estatus_llist), then delay printk into IRQ context via | |
820 | * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record | |
821 | * required pool size by all NMI error source. | |
822 | */ | |
823 | static struct llist_head ghes_estatus_llist; | |
824 | static struct irq_work ghes_proc_irq_work; | |
825 | ||
826 | /* | |
827 | * NMI may be triggered on any CPU, so ghes_in_nmi is used for | |
828 | * having only one concurrent reader. | |
829 | */ | |
830 | static atomic_t ghes_in_nmi = ATOMIC_INIT(0); | |
831 | ||
832 | static LIST_HEAD(ghes_nmi); | |
833 | ||
834 | static void ghes_proc_in_irq(struct irq_work *irq_work) | |
835 | { | |
836 | struct llist_node *llnode, *next; | |
837 | struct ghes_estatus_node *estatus_node; | |
838 | struct acpi_hest_generic *generic; | |
839 | struct acpi_hest_generic_status *estatus; | |
840 | u32 len, node_len; | |
841 | ||
842 | llnode = llist_del_all(&ghes_estatus_llist); | |
843 | /* | |
844 | * Because the time order of estatus in list is reversed, | |
845 | * revert it back to proper order. | |
846 | */ | |
847 | llnode = llist_reverse_order(llnode); | |
848 | while (llnode) { | |
849 | next = llnode->next; | |
850 | estatus_node = llist_entry(llnode, struct ghes_estatus_node, | |
851 | llnode); | |
852 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
853 | len = cper_estatus_len(estatus); | |
854 | node_len = GHES_ESTATUS_NODE_LEN(len); | |
855 | ghes_do_proc(estatus_node->ghes, estatus); | |
856 | if (!ghes_estatus_cached(estatus)) { | |
857 | generic = estatus_node->generic; | |
858 | if (ghes_print_estatus(NULL, generic, estatus)) | |
859 | ghes_estatus_cache_add(generic, estatus); | |
860 | } | |
861 | gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, | |
862 | node_len); | |
863 | llnode = next; | |
864 | } | |
865 | } | |
866 | ||
867 | static void ghes_print_queued_estatus(void) | |
868 | { | |
869 | struct llist_node *llnode; | |
870 | struct ghes_estatus_node *estatus_node; | |
871 | struct acpi_hest_generic *generic; | |
872 | struct acpi_hest_generic_status *estatus; | |
873 | u32 len, node_len; | |
874 | ||
875 | llnode = llist_del_all(&ghes_estatus_llist); | |
876 | /* | |
877 | * Because the time order of estatus in list is reversed, | |
878 | * revert it back to proper order. | |
879 | */ | |
880 | llnode = llist_reverse_order(llnode); | |
881 | while (llnode) { | |
882 | estatus_node = llist_entry(llnode, struct ghes_estatus_node, | |
883 | llnode); | |
884 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
885 | len = cper_estatus_len(estatus); | |
886 | node_len = GHES_ESTATUS_NODE_LEN(len); | |
887 | generic = estatus_node->generic; | |
888 | ghes_print_estatus(NULL, generic, estatus); | |
889 | llnode = llnode->next; | |
890 | } | |
891 | } | |
892 | ||
893 | /* Save estatus for further processing in IRQ context */ | |
894 | static void __process_error(struct ghes *ghes) | |
895 | { | |
896 | #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | |
897 | u32 len, node_len; | |
898 | struct ghes_estatus_node *estatus_node; | |
899 | struct acpi_hest_generic_status *estatus; | |
900 | ||
901 | if (ghes_estatus_cached(ghes->estatus)) | |
902 | return; | |
903 | ||
904 | len = cper_estatus_len(ghes->estatus); | |
905 | node_len = GHES_ESTATUS_NODE_LEN(len); | |
906 | ||
907 | estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); | |
908 | if (!estatus_node) | |
909 | return; | |
910 | ||
911 | estatus_node->ghes = ghes; | |
912 | estatus_node->generic = ghes->generic; | |
913 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
914 | memcpy(estatus, ghes->estatus, len); | |
915 | llist_add(&estatus_node->llnode, &ghes_estatus_llist); | |
916 | #endif | |
917 | } | |
918 | ||
919 | static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) | |
920 | { | |
921 | struct ghes *ghes; | |
922 | int sev, ret = NMI_DONE; | |
923 | ||
924 | if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) | |
925 | return ret; | |
926 | ||
927 | list_for_each_entry_rcu(ghes, &ghes_nmi, list) { | |
928 | if (ghes_read_estatus(ghes, 1)) { | |
929 | ghes_clear_estatus(ghes); | |
930 | continue; | |
931 | } else { | |
932 | ret = NMI_HANDLED; | |
933 | } | |
934 | ||
935 | sev = ghes_severity(ghes->estatus->error_severity); | |
936 | if (sev >= GHES_SEV_PANIC) { | |
937 | oops_begin(); | |
938 | ghes_print_queued_estatus(); | |
939 | __ghes_panic(ghes); | |
940 | } | |
941 | ||
942 | if (!(ghes->flags & GHES_TO_CLEAR)) | |
943 | continue; | |
944 | ||
945 | __process_error(ghes); | |
946 | ghes_clear_estatus(ghes); | |
947 | } | |
948 | ||
949 | #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | |
950 | if (ret == NMI_HANDLED) | |
951 | irq_work_queue(&ghes_proc_irq_work); | |
952 | #endif | |
953 | atomic_dec(&ghes_in_nmi); | |
954 | return ret; | |
955 | } | |
956 | ||
957 | static unsigned long ghes_esource_prealloc_size( | |
958 | const struct acpi_hest_generic *generic) | |
959 | { | |
960 | unsigned long block_length, prealloc_records, prealloc_size; | |
961 | ||
962 | block_length = min_t(unsigned long, generic->error_block_length, | |
963 | GHES_ESTATUS_MAX_SIZE); | |
964 | prealloc_records = max_t(unsigned long, | |
965 | generic->records_to_preallocate, 1); | |
966 | prealloc_size = min_t(unsigned long, block_length * prealloc_records, | |
967 | GHES_ESOURCE_PREALLOC_MAX_SIZE); | |
968 | ||
969 | return prealloc_size; | |
970 | } | |
971 | ||
972 | static void ghes_estatus_pool_shrink(unsigned long len) | |
973 | { | |
974 | ghes_estatus_pool_size_request -= PAGE_ALIGN(len); | |
975 | } | |
976 | ||
977 | static void ghes_nmi_add(struct ghes *ghes) | |
978 | { | |
979 | unsigned long len; | |
980 | ||
981 | len = ghes_esource_prealloc_size(ghes->generic); | |
982 | ghes_estatus_pool_expand(len); | |
983 | mutex_lock(&ghes_list_mutex); | |
984 | if (list_empty(&ghes_nmi)) | |
985 | register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); | |
986 | list_add_rcu(&ghes->list, &ghes_nmi); | |
987 | mutex_unlock(&ghes_list_mutex); | |
988 | } | |
989 | ||
990 | static void ghes_nmi_remove(struct ghes *ghes) | |
991 | { | |
992 | unsigned long len; | |
993 | ||
994 | mutex_lock(&ghes_list_mutex); | |
995 | list_del_rcu(&ghes->list); | |
996 | if (list_empty(&ghes_nmi)) | |
997 | unregister_nmi_handler(NMI_LOCAL, "ghes"); | |
998 | mutex_unlock(&ghes_list_mutex); | |
999 | /* | |
1000 | * To synchronize with NMI handler, ghes can only be | |
1001 | * freed after NMI handler finishes. | |
1002 | */ | |
1003 | synchronize_rcu(); | |
1004 | len = ghes_esource_prealloc_size(ghes->generic); | |
1005 | ghes_estatus_pool_shrink(len); | |
1006 | } | |
1007 | ||
1008 | static void ghes_nmi_init_cxt(void) | |
1009 | { | |
1010 | init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); | |
1011 | } | |
1012 | #else /* CONFIG_HAVE_ACPI_APEI_NMI */ | |
1013 | static inline void ghes_nmi_add(struct ghes *ghes) { } | |
1014 | static inline void ghes_nmi_remove(struct ghes *ghes) { } | |
1015 | static inline void ghes_nmi_init_cxt(void) { } | |
1016 | #endif /* CONFIG_HAVE_ACPI_APEI_NMI */ | |
1017 | ||
1018 | static int ghes_probe(struct platform_device *ghes_dev) | |
1019 | { | |
1020 | struct acpi_hest_generic *generic; | |
1021 | struct ghes *ghes = NULL; | |
1022 | ||
1023 | int rc = -EINVAL; | |
1024 | ||
1025 | generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; | |
1026 | if (!generic->enabled) | |
1027 | return -ENODEV; | |
1028 | ||
1029 | switch (generic->notify.type) { | |
1030 | case ACPI_HEST_NOTIFY_POLLED: | |
1031 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
1032 | case ACPI_HEST_NOTIFY_SCI: | |
1033 | case ACPI_HEST_NOTIFY_GSIV: | |
1034 | case ACPI_HEST_NOTIFY_GPIO: | |
1035 | break; | |
1036 | ||
1037 | case ACPI_HEST_NOTIFY_SEA: | |
1038 | if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { | |
1039 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n", | |
1040 | generic->header.source_id); | |
1041 | rc = -ENOTSUPP; | |
1042 | goto err; | |
1043 | } | |
1044 | break; | |
1045 | case ACPI_HEST_NOTIFY_NMI: | |
1046 | if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { | |
1047 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", | |
1048 | generic->header.source_id); | |
1049 | goto err; | |
1050 | } | |
1051 | break; | |
1052 | case ACPI_HEST_NOTIFY_LOCAL: | |
1053 | pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", | |
1054 | generic->header.source_id); | |
1055 | goto err; | |
1056 | default: | |
1057 | pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", | |
1058 | generic->notify.type, generic->header.source_id); | |
1059 | goto err; | |
1060 | } | |
1061 | ||
1062 | rc = -EIO; | |
1063 | if (generic->error_block_length < | |
1064 | sizeof(struct acpi_hest_generic_status)) { | |
1065 | pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", | |
1066 | generic->error_block_length, | |
1067 | generic->header.source_id); | |
1068 | goto err; | |
1069 | } | |
1070 | ghes = ghes_new(generic); | |
1071 | if (IS_ERR(ghes)) { | |
1072 | rc = PTR_ERR(ghes); | |
1073 | ghes = NULL; | |
1074 | goto err; | |
1075 | } | |
1076 | ||
1077 | rc = ghes_edac_register(ghes, &ghes_dev->dev); | |
1078 | if (rc < 0) | |
1079 | goto err; | |
1080 | ||
1081 | switch (generic->notify.type) { | |
1082 | case ACPI_HEST_NOTIFY_POLLED: | |
1083 | setup_deferrable_timer(&ghes->timer, ghes_poll_func, | |
1084 | (unsigned long)ghes); | |
1085 | ghes_add_timer(ghes); | |
1086 | break; | |
1087 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
1088 | /* External interrupt vector is GSI */ | |
1089 | rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq); | |
1090 | if (rc) { | |
1091 | pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", | |
1092 | generic->header.source_id); | |
1093 | goto err_edac_unreg; | |
1094 | } | |
1095 | rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED, | |
1096 | "GHES IRQ", ghes); | |
1097 | if (rc) { | |
1098 | pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", | |
1099 | generic->header.source_id); | |
1100 | goto err_edac_unreg; | |
1101 | } | |
1102 | break; | |
1103 | ||
1104 | case ACPI_HEST_NOTIFY_SCI: | |
1105 | case ACPI_HEST_NOTIFY_GSIV: | |
1106 | case ACPI_HEST_NOTIFY_GPIO: | |
1107 | mutex_lock(&ghes_list_mutex); | |
1108 | if (list_empty(&ghes_hed)) | |
1109 | register_acpi_hed_notifier(&ghes_notifier_hed); | |
1110 | list_add_rcu(&ghes->list, &ghes_hed); | |
1111 | mutex_unlock(&ghes_list_mutex); | |
1112 | break; | |
1113 | ||
1114 | case ACPI_HEST_NOTIFY_SEA: | |
1115 | ghes_sea_add(ghes); | |
1116 | break; | |
1117 | case ACPI_HEST_NOTIFY_NMI: | |
1118 | ghes_nmi_add(ghes); | |
1119 | break; | |
1120 | default: | |
1121 | BUG(); | |
1122 | } | |
1123 | platform_set_drvdata(ghes_dev, ghes); | |
1124 | ||
1125 | /* Handle any pending errors right away */ | |
1126 | ghes_proc(ghes); | |
1127 | ||
1128 | return 0; | |
1129 | err_edac_unreg: | |
1130 | ghes_edac_unregister(ghes); | |
1131 | err: | |
1132 | if (ghes) { | |
1133 | ghes_fini(ghes); | |
1134 | kfree(ghes); | |
1135 | } | |
1136 | return rc; | |
1137 | } | |
1138 | ||
1139 | static int ghes_remove(struct platform_device *ghes_dev) | |
1140 | { | |
1141 | struct ghes *ghes; | |
1142 | struct acpi_hest_generic *generic; | |
1143 | ||
1144 | ghes = platform_get_drvdata(ghes_dev); | |
1145 | generic = ghes->generic; | |
1146 | ||
1147 | ghes->flags |= GHES_EXITING; | |
1148 | switch (generic->notify.type) { | |
1149 | case ACPI_HEST_NOTIFY_POLLED: | |
1150 | del_timer_sync(&ghes->timer); | |
1151 | break; | |
1152 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
1153 | free_irq(ghes->irq, ghes); | |
1154 | break; | |
1155 | ||
1156 | case ACPI_HEST_NOTIFY_SCI: | |
1157 | case ACPI_HEST_NOTIFY_GSIV: | |
1158 | case ACPI_HEST_NOTIFY_GPIO: | |
1159 | mutex_lock(&ghes_list_mutex); | |
1160 | list_del_rcu(&ghes->list); | |
1161 | if (list_empty(&ghes_hed)) | |
1162 | unregister_acpi_hed_notifier(&ghes_notifier_hed); | |
1163 | mutex_unlock(&ghes_list_mutex); | |
1164 | synchronize_rcu(); | |
1165 | break; | |
1166 | ||
1167 | case ACPI_HEST_NOTIFY_SEA: | |
1168 | ghes_sea_remove(ghes); | |
1169 | break; | |
1170 | case ACPI_HEST_NOTIFY_NMI: | |
1171 | ghes_nmi_remove(ghes); | |
1172 | break; | |
1173 | default: | |
1174 | BUG(); | |
1175 | break; | |
1176 | } | |
1177 | ||
1178 | ghes_fini(ghes); | |
1179 | ||
1180 | ghes_edac_unregister(ghes); | |
1181 | ||
1182 | kfree(ghes); | |
1183 | ||
1184 | platform_set_drvdata(ghes_dev, NULL); | |
1185 | ||
1186 | return 0; | |
1187 | } | |
1188 | ||
1189 | static struct platform_driver ghes_platform_driver = { | |
1190 | .driver = { | |
1191 | .name = "GHES", | |
1192 | }, | |
1193 | .probe = ghes_probe, | |
1194 | .remove = ghes_remove, | |
1195 | }; | |
1196 | ||
1197 | static int __init ghes_init(void) | |
1198 | { | |
1199 | int rc; | |
1200 | ||
1201 | if (acpi_disabled) | |
1202 | return -ENODEV; | |
1203 | ||
1204 | switch (hest_disable) { | |
1205 | case HEST_NOT_FOUND: | |
1206 | return -ENODEV; | |
1207 | case HEST_DISABLED: | |
1208 | pr_info(GHES_PFX "HEST is not enabled!\n"); | |
1209 | return -EINVAL; | |
1210 | default: | |
1211 | break; | |
1212 | } | |
1213 | ||
1214 | if (ghes_disable) { | |
1215 | pr_info(GHES_PFX "GHES is not enabled!\n"); | |
1216 | return -EINVAL; | |
1217 | } | |
1218 | ||
1219 | ghes_nmi_init_cxt(); | |
1220 | ||
1221 | rc = ghes_estatus_pool_init(); | |
1222 | if (rc) | |
1223 | goto err; | |
1224 | ||
1225 | rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * | |
1226 | GHES_ESTATUS_CACHE_ALLOCED_MAX); | |
1227 | if (rc) | |
1228 | goto err_pool_exit; | |
1229 | ||
1230 | rc = platform_driver_register(&ghes_platform_driver); | |
1231 | if (rc) | |
1232 | goto err_pool_exit; | |
1233 | ||
1234 | rc = apei_osc_setup(); | |
1235 | if (rc == 0 && osc_sb_apei_support_acked) | |
1236 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); | |
1237 | else if (rc == 0 && !osc_sb_apei_support_acked) | |
1238 | pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); | |
1239 | else if (rc && osc_sb_apei_support_acked) | |
1240 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); | |
1241 | else | |
1242 | pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); | |
1243 | ||
1244 | return 0; | |
1245 | err_pool_exit: | |
1246 | ghes_estatus_pool_exit(); | |
1247 | err: | |
1248 | return rc; | |
1249 | } | |
1250 | device_initcall(ghes_init); |