]>
Commit | Line | Data |
---|---|---|
d334a491 HY |
1 | /* |
2 | * APEI Generic Hardware Error Source support | |
3 | * | |
4 | * Generic Hardware Error Source provides a way to report platform | |
5 | * hardware errors (such as that from chipset). It works in so called | |
6 | * "Firmware First" mode, that is, hardware errors are reported to | |
7 | * firmware firstly, then reported to Linux by firmware. This way, | |
8 | * some non-standard hardware error registers or non-standard hardware | |
9 | * link can be checked by firmware to produce more hardware error | |
10 | * information for Linux. | |
11 | * | |
12 | * For more information about Generic Hardware Error Source, please | |
13 | * refer to ACPI Specification version 4.0, section 17.3.2.6 | |
14 | * | |
67eb2e99 | 15 | * Copyright 2010,2011 Intel Corp. |
d334a491 HY |
16 | * Author: Huang Ying <ying.huang@intel.com> |
17 | * | |
18 | * This program is free software; you can redistribute it and/or | |
19 | * modify it under the terms of the GNU General Public License version | |
20 | * 2 as published by the Free Software Foundation; | |
21 | * | |
22 | * This program is distributed in the hope that it will be useful, | |
23 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 | * GNU General Public License for more details. | |
26 | * | |
27 | * You should have received a copy of the GNU General Public License | |
28 | * along with this program; if not, write to the Free Software | |
29 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
30 | */ | |
31 | ||
32 | #include <linux/kernel.h> | |
33 | #include <linux/module.h> | |
34 | #include <linux/init.h> | |
35 | #include <linux/acpi.h> | |
36 | #include <linux/io.h> | |
37 | #include <linux/interrupt.h> | |
81e88fdc | 38 | #include <linux/timer.h> |
d334a491 HY |
39 | #include <linux/cper.h> |
40 | #include <linux/kdebug.h> | |
7ad6e943 HY |
41 | #include <linux/platform_device.h> |
42 | #include <linux/mutex.h> | |
32c361f5 | 43 | #include <linux/ratelimit.h> |
81e88fdc | 44 | #include <linux/vmalloc.h> |
67eb2e99 HY |
45 | #include <linux/irq_work.h> |
46 | #include <linux/llist.h> | |
47 | #include <linux/genalloc.h> | |
d334a491 HY |
48 | #include <acpi/apei.h> |
49 | #include <acpi/atomicio.h> | |
50 | #include <acpi/hed.h> | |
51 | #include <asm/mce.h> | |
81e88fdc | 52 | #include <asm/tlbflush.h> |
d334a491 HY |
53 | |
54 | #include "apei-internal.h" | |
55 | ||
56 | #define GHES_PFX "GHES: " | |
57 | ||
58 | #define GHES_ESTATUS_MAX_SIZE 65536 | |
67eb2e99 HY |
59 | #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 |
60 | ||
61 | #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 | |
62 | ||
63 | #define GHES_ESTATUS_NODE_LEN(estatus_len) \ | |
64 | (sizeof(struct ghes_estatus_node) + (estatus_len)) | |
65 | #define GHES_ESTATUS_FROM_NODE(estatus_node) \ | |
66 | ((struct acpi_hest_generic_status *) \ | |
67 | ((struct ghes_estatus_node *)(estatus_node) + 1)) | |
d334a491 HY |
68 | |
69 | /* | |
81e88fdc | 70 | * One struct ghes is created for each generic hardware error source. |
d334a491 | 71 | * It provides the context for APEI hardware error timer/IRQ/SCI/NMI |
81e88fdc | 72 | * handler. |
d334a491 HY |
73 | * |
74 | * estatus: memory buffer for error status block, allocated during | |
75 | * HEST parsing. | |
76 | */ | |
77 | #define GHES_TO_CLEAR 0x0001 | |
81e88fdc | 78 | #define GHES_EXITING 0x0002 |
d334a491 HY |
79 | |
80 | struct ghes { | |
81 | struct acpi_hest_generic *generic; | |
82 | struct acpi_hest_generic_status *estatus; | |
d334a491 HY |
83 | u64 buffer_paddr; |
84 | unsigned long flags; | |
81e88fdc HY |
85 | union { |
86 | struct list_head list; | |
87 | struct timer_list timer; | |
88 | unsigned int irq; | |
89 | }; | |
d334a491 HY |
90 | }; |
91 | ||
67eb2e99 HY |
92 | struct ghes_estatus_node { |
93 | struct llist_node llnode; | |
94 | struct acpi_hest_generic *generic; | |
95 | }; | |
96 | ||
b6a95016 HY |
97 | int ghes_disable; |
98 | module_param_named(disable, ghes_disable, bool, 0); | |
99 | ||
81e88fdc HY |
100 | static int ghes_panic_timeout __read_mostly = 30; |
101 | ||
d334a491 | 102 | /* |
81e88fdc HY |
103 | * All error sources notified with SCI shares one notifier function, |
104 | * so they need to be linked and checked one by one. This is applied | |
105 | * to NMI too. | |
d334a491 | 106 | * |
81e88fdc HY |
107 | * RCU is used for these lists, so ghes_list_mutex is only used for |
108 | * list changing, not for traversing. | |
d334a491 HY |
109 | */ |
110 | static LIST_HEAD(ghes_sci); | |
81e88fdc | 111 | static LIST_HEAD(ghes_nmi); |
7ad6e943 | 112 | static DEFINE_MUTEX(ghes_list_mutex); |
d334a491 | 113 | |
81e88fdc HY |
114 | /* |
115 | * NMI may be triggered on any CPU, so ghes_nmi_lock is used for | |
116 | * mutual exclusion. | |
117 | */ | |
118 | static DEFINE_RAW_SPINLOCK(ghes_nmi_lock); | |
119 | ||
120 | /* | |
121 | * Because the memory area used to transfer hardware error information | |
122 | * from BIOS to Linux can be determined only in NMI, IRQ or timer | |
123 | * handler, but general ioremap can not be used in atomic context, so | |
124 | * a special version of atomic ioremap is implemented for that. | |
125 | */ | |
126 | ||
127 | /* | |
128 | * Two virtual pages are used, one for NMI context, the other for | |
129 | * IRQ/PROCESS context | |
130 | */ | |
131 | #define GHES_IOREMAP_PAGES 2 | |
132 | #define GHES_IOREMAP_NMI_PAGE(base) (base) | |
133 | #define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE) | |
134 | ||
135 | /* virtual memory area for atomic ioremap */ | |
136 | static struct vm_struct *ghes_ioremap_area; | |
137 | /* | |
138 | * These 2 spinlock is used to prevent atomic ioremap virtual memory | |
139 | * area from being mapped simultaneously. | |
140 | */ | |
141 | static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); | |
142 | static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); | |
143 | ||
67eb2e99 HY |
144 | /* |
145 | * printk is not safe in NMI context. So in NMI handler, we allocate | |
146 | * required memory from lock-less memory allocator | |
147 | * (ghes_estatus_pool), save estatus into it, put them into lock-less | |
148 | * list (ghes_estatus_llist), then delay printk into IRQ context via | |
149 | * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record | |
150 | * required pool size by all NMI error source. | |
151 | */ | |
152 | static struct gen_pool *ghes_estatus_pool; | |
153 | static unsigned long ghes_estatus_pool_size_request; | |
154 | static struct llist_head ghes_estatus_llist; | |
155 | static struct irq_work ghes_proc_irq_work; | |
156 | ||
81e88fdc HY |
157 | static int ghes_ioremap_init(void) |
158 | { | |
159 | ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, | |
160 | VM_IOREMAP, VMALLOC_START, VMALLOC_END); | |
161 | if (!ghes_ioremap_area) { | |
162 | pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n"); | |
163 | return -ENOMEM; | |
164 | } | |
165 | ||
166 | return 0; | |
167 | } | |
168 | ||
169 | static void ghes_ioremap_exit(void) | |
170 | { | |
171 | free_vm_area(ghes_ioremap_area); | |
172 | } | |
173 | ||
174 | static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) | |
175 | { | |
176 | unsigned long vaddr; | |
177 | ||
178 | vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr); | |
179 | ioremap_page_range(vaddr, vaddr + PAGE_SIZE, | |
180 | pfn << PAGE_SHIFT, PAGE_KERNEL); | |
181 | ||
182 | return (void __iomem *)vaddr; | |
183 | } | |
184 | ||
185 | static void __iomem *ghes_ioremap_pfn_irq(u64 pfn) | |
186 | { | |
187 | unsigned long vaddr; | |
188 | ||
189 | vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr); | |
190 | ioremap_page_range(vaddr, vaddr + PAGE_SIZE, | |
191 | pfn << PAGE_SHIFT, PAGE_KERNEL); | |
192 | ||
193 | return (void __iomem *)vaddr; | |
194 | } | |
195 | ||
196 | static void ghes_iounmap_nmi(void __iomem *vaddr_ptr) | |
197 | { | |
198 | unsigned long vaddr = (unsigned long __force)vaddr_ptr; | |
199 | void *base = ghes_ioremap_area->addr; | |
200 | ||
201 | BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); | |
202 | unmap_kernel_range_noflush(vaddr, PAGE_SIZE); | |
203 | __flush_tlb_one(vaddr); | |
204 | } | |
205 | ||
206 | static void ghes_iounmap_irq(void __iomem *vaddr_ptr) | |
207 | { | |
208 | unsigned long vaddr = (unsigned long __force)vaddr_ptr; | |
209 | void *base = ghes_ioremap_area->addr; | |
210 | ||
211 | BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); | |
212 | unmap_kernel_range_noflush(vaddr, PAGE_SIZE); | |
213 | __flush_tlb_one(vaddr); | |
214 | } | |
215 | ||
67eb2e99 HY |
216 | static int ghes_estatus_pool_init(void) |
217 | { | |
218 | ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); | |
219 | if (!ghes_estatus_pool) | |
220 | return -ENOMEM; | |
221 | return 0; | |
222 | } | |
223 | ||
224 | static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, | |
225 | struct gen_pool_chunk *chunk, | |
226 | void *data) | |
227 | { | |
228 | free_page(chunk->start_addr); | |
229 | } | |
230 | ||
231 | static void ghes_estatus_pool_exit(void) | |
232 | { | |
233 | gen_pool_for_each_chunk(ghes_estatus_pool, | |
234 | ghes_estatus_pool_free_chunk_page, NULL); | |
235 | gen_pool_destroy(ghes_estatus_pool); | |
236 | } | |
237 | ||
238 | static int ghes_estatus_pool_expand(unsigned long len) | |
239 | { | |
240 | unsigned long i, pages, size, addr; | |
241 | int ret; | |
242 | ||
243 | ghes_estatus_pool_size_request += PAGE_ALIGN(len); | |
244 | size = gen_pool_size(ghes_estatus_pool); | |
245 | if (size >= ghes_estatus_pool_size_request) | |
246 | return 0; | |
247 | pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; | |
248 | for (i = 0; i < pages; i++) { | |
249 | addr = __get_free_page(GFP_KERNEL); | |
250 | if (!addr) | |
251 | return -ENOMEM; | |
252 | ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); | |
253 | if (ret) | |
254 | return ret; | |
255 | } | |
256 | ||
257 | return 0; | |
258 | } | |
259 | ||
260 | static void ghes_estatus_pool_shrink(unsigned long len) | |
261 | { | |
262 | ghes_estatus_pool_size_request -= PAGE_ALIGN(len); | |
263 | } | |
264 | ||
d334a491 HY |
265 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) |
266 | { | |
267 | struct ghes *ghes; | |
268 | unsigned int error_block_length; | |
269 | int rc; | |
270 | ||
271 | ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); | |
272 | if (!ghes) | |
273 | return ERR_PTR(-ENOMEM); | |
274 | ghes->generic = generic; | |
d334a491 HY |
275 | rc = acpi_pre_map_gar(&generic->error_status_address); |
276 | if (rc) | |
277 | goto err_free; | |
278 | error_block_length = generic->error_block_length; | |
279 | if (error_block_length > GHES_ESTATUS_MAX_SIZE) { | |
280 | pr_warning(FW_WARN GHES_PFX | |
281 | "Error status block length is too long: %u for " | |
282 | "generic hardware error source: %d.\n", | |
283 | error_block_length, generic->header.source_id); | |
284 | error_block_length = GHES_ESTATUS_MAX_SIZE; | |
285 | } | |
286 | ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); | |
287 | if (!ghes->estatus) { | |
288 | rc = -ENOMEM; | |
289 | goto err_unmap; | |
290 | } | |
291 | ||
292 | return ghes; | |
293 | ||
294 | err_unmap: | |
295 | acpi_post_unmap_gar(&generic->error_status_address); | |
296 | err_free: | |
297 | kfree(ghes); | |
298 | return ERR_PTR(rc); | |
299 | } | |
300 | ||
301 | static void ghes_fini(struct ghes *ghes) | |
302 | { | |
303 | kfree(ghes->estatus); | |
304 | acpi_post_unmap_gar(&ghes->generic->error_status_address); | |
305 | } | |
306 | ||
307 | enum { | |
ad4ecef2 HY |
308 | GHES_SEV_NO = 0x0, |
309 | GHES_SEV_CORRECTED = 0x1, | |
310 | GHES_SEV_RECOVERABLE = 0x2, | |
311 | GHES_SEV_PANIC = 0x3, | |
d334a491 HY |
312 | }; |
313 | ||
314 | static inline int ghes_severity(int severity) | |
315 | { | |
316 | switch (severity) { | |
ad4ecef2 HY |
317 | case CPER_SEV_INFORMATIONAL: |
318 | return GHES_SEV_NO; | |
319 | case CPER_SEV_CORRECTED: | |
320 | return GHES_SEV_CORRECTED; | |
321 | case CPER_SEV_RECOVERABLE: | |
322 | return GHES_SEV_RECOVERABLE; | |
323 | case CPER_SEV_FATAL: | |
324 | return GHES_SEV_PANIC; | |
d334a491 | 325 | default: |
25985edc | 326 | /* Unknown, go panic */ |
ad4ecef2 | 327 | return GHES_SEV_PANIC; |
d334a491 HY |
328 | } |
329 | } | |
330 | ||
81e88fdc HY |
331 | static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, |
332 | int from_phys) | |
d334a491 | 333 | { |
81e88fdc HY |
334 | void __iomem *vaddr; |
335 | unsigned long flags = 0; | |
336 | int in_nmi = in_nmi(); | |
337 | u64 offset; | |
338 | u32 trunk; | |
339 | ||
340 | while (len > 0) { | |
341 | offset = paddr - (paddr & PAGE_MASK); | |
342 | if (in_nmi) { | |
343 | raw_spin_lock(&ghes_ioremap_lock_nmi); | |
344 | vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT); | |
345 | } else { | |
346 | spin_lock_irqsave(&ghes_ioremap_lock_irq, flags); | |
347 | vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT); | |
348 | } | |
349 | trunk = PAGE_SIZE - offset; | |
350 | trunk = min(trunk, len); | |
351 | if (from_phys) | |
352 | memcpy_fromio(buffer, vaddr + offset, trunk); | |
353 | else | |
354 | memcpy_toio(vaddr + offset, buffer, trunk); | |
355 | len -= trunk; | |
356 | paddr += trunk; | |
357 | buffer += trunk; | |
358 | if (in_nmi) { | |
359 | ghes_iounmap_nmi(vaddr); | |
360 | raw_spin_unlock(&ghes_ioremap_lock_nmi); | |
361 | } else { | |
362 | ghes_iounmap_irq(vaddr); | |
363 | spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags); | |
364 | } | |
365 | } | |
d334a491 HY |
366 | } |
367 | ||
368 | static int ghes_read_estatus(struct ghes *ghes, int silent) | |
369 | { | |
370 | struct acpi_hest_generic *g = ghes->generic; | |
371 | u64 buf_paddr; | |
372 | u32 len; | |
373 | int rc; | |
374 | ||
375 | rc = acpi_atomic_read(&buf_paddr, &g->error_status_address); | |
376 | if (rc) { | |
377 | if (!silent && printk_ratelimit()) | |
378 | pr_warning(FW_WARN GHES_PFX | |
379 | "Failed to read error status block address for hardware error source: %d.\n", | |
380 | g->header.source_id); | |
381 | return -EIO; | |
382 | } | |
383 | if (!buf_paddr) | |
384 | return -ENOENT; | |
385 | ||
81e88fdc HY |
386 | ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, |
387 | sizeof(*ghes->estatus), 1); | |
d334a491 HY |
388 | if (!ghes->estatus->block_status) |
389 | return -ENOENT; | |
390 | ||
391 | ghes->buffer_paddr = buf_paddr; | |
392 | ghes->flags |= GHES_TO_CLEAR; | |
393 | ||
394 | rc = -EIO; | |
395 | len = apei_estatus_len(ghes->estatus); | |
396 | if (len < sizeof(*ghes->estatus)) | |
397 | goto err_read_block; | |
398 | if (len > ghes->generic->error_block_length) | |
399 | goto err_read_block; | |
400 | if (apei_estatus_check_header(ghes->estatus)) | |
401 | goto err_read_block; | |
81e88fdc HY |
402 | ghes_copy_tofrom_phys(ghes->estatus + 1, |
403 | buf_paddr + sizeof(*ghes->estatus), | |
404 | len - sizeof(*ghes->estatus), 1); | |
d334a491 HY |
405 | if (apei_estatus_check(ghes->estatus)) |
406 | goto err_read_block; | |
407 | rc = 0; | |
408 | ||
409 | err_read_block: | |
81e88fdc | 410 | if (rc && !silent && printk_ratelimit()) |
d334a491 HY |
411 | pr_warning(FW_WARN GHES_PFX |
412 | "Failed to read error status block!\n"); | |
413 | return rc; | |
414 | } | |
415 | ||
416 | static void ghes_clear_estatus(struct ghes *ghes) | |
417 | { | |
418 | ghes->estatus->block_status = 0; | |
419 | if (!(ghes->flags & GHES_TO_CLEAR)) | |
420 | return; | |
421 | ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr, | |
422 | sizeof(ghes->estatus->block_status), 0); | |
423 | ghes->flags &= ~GHES_TO_CLEAR; | |
424 | } | |
425 | ||
67eb2e99 | 426 | static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) |
d334a491 | 427 | { |
ad4ecef2 | 428 | int sev, processed = 0; |
d334a491 HY |
429 | struct acpi_hest_generic_data *gdata; |
430 | ||
67eb2e99 HY |
431 | sev = ghes_severity(estatus->error_severity); |
432 | apei_estatus_for_each_section(estatus, gdata) { | |
d334a491 HY |
433 | #ifdef CONFIG_X86_MCE |
434 | if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, | |
435 | CPER_SEC_PLATFORM_MEM)) { | |
436 | apei_mce_report_mem_error( | |
ad4ecef2 | 437 | sev == GHES_SEV_CORRECTED, |
d334a491 HY |
438 | (struct cper_sec_mem_err *)(gdata+1)); |
439 | processed = 1; | |
440 | } | |
441 | #endif | |
442 | } | |
32c361f5 | 443 | } |
d334a491 | 444 | |
67eb2e99 HY |
445 | static void __ghes_print_estatus(const char *pfx, |
446 | const struct acpi_hest_generic *generic, | |
447 | const struct acpi_hest_generic_status *estatus) | |
32c361f5 | 448 | { |
32c361f5 | 449 | if (pfx == NULL) { |
67eb2e99 | 450 | if (ghes_severity(estatus->error_severity) <= |
32c361f5 HY |
451 | GHES_SEV_CORRECTED) |
452 | pfx = KERN_WARNING HW_ERR; | |
453 | else | |
454 | pfx = KERN_ERR HW_ERR; | |
455 | } | |
5588340d | 456 | printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", |
67eb2e99 HY |
457 | pfx, generic->header.source_id); |
458 | apei_estatus_print(pfx, estatus); | |
5588340d HY |
459 | } |
460 | ||
67eb2e99 HY |
461 | static void ghes_print_estatus(const char *pfx, |
462 | const struct acpi_hest_generic *generic, | |
463 | const struct acpi_hest_generic_status *estatus) | |
5588340d HY |
464 | { |
465 | /* Not more than 2 messages every 5 seconds */ | |
67eb2e99 HY |
466 | static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); |
467 | static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); | |
468 | struct ratelimit_state *ratelimit; | |
5588340d | 469 | |
67eb2e99 HY |
470 | if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) |
471 | ratelimit = &ratelimit_corrected; | |
472 | else | |
473 | ratelimit = &ratelimit_uncorrected; | |
474 | if (__ratelimit(ratelimit)) | |
475 | __ghes_print_estatus(pfx, generic, estatus); | |
d334a491 HY |
476 | } |
477 | ||
478 | static int ghes_proc(struct ghes *ghes) | |
479 | { | |
480 | int rc; | |
481 | ||
482 | rc = ghes_read_estatus(ghes, 0); | |
483 | if (rc) | |
484 | goto out; | |
67eb2e99 HY |
485 | ghes_print_estatus(NULL, ghes->generic, ghes->estatus); |
486 | ghes_do_proc(ghes->estatus); | |
d334a491 HY |
487 | |
488 | out: | |
489 | ghes_clear_estatus(ghes); | |
490 | return 0; | |
491 | } | |
492 | ||
81e88fdc HY |
493 | static void ghes_add_timer(struct ghes *ghes) |
494 | { | |
495 | struct acpi_hest_generic *g = ghes->generic; | |
496 | unsigned long expire; | |
497 | ||
498 | if (!g->notify.poll_interval) { | |
499 | pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", | |
500 | g->header.source_id); | |
501 | return; | |
502 | } | |
503 | expire = jiffies + msecs_to_jiffies(g->notify.poll_interval); | |
504 | ghes->timer.expires = round_jiffies_relative(expire); | |
505 | add_timer(&ghes->timer); | |
506 | } | |
507 | ||
508 | static void ghes_poll_func(unsigned long data) | |
509 | { | |
510 | struct ghes *ghes = (void *)data; | |
511 | ||
512 | ghes_proc(ghes); | |
513 | if (!(ghes->flags & GHES_EXITING)) | |
514 | ghes_add_timer(ghes); | |
515 | } | |
516 | ||
517 | static irqreturn_t ghes_irq_func(int irq, void *data) | |
518 | { | |
519 | struct ghes *ghes = data; | |
520 | int rc; | |
521 | ||
522 | rc = ghes_proc(ghes); | |
523 | if (rc) | |
524 | return IRQ_NONE; | |
525 | ||
526 | return IRQ_HANDLED; | |
527 | } | |
528 | ||
d334a491 HY |
529 | static int ghes_notify_sci(struct notifier_block *this, |
530 | unsigned long event, void *data) | |
531 | { | |
532 | struct ghes *ghes; | |
533 | int ret = NOTIFY_DONE; | |
534 | ||
535 | rcu_read_lock(); | |
536 | list_for_each_entry_rcu(ghes, &ghes_sci, list) { | |
537 | if (!ghes_proc(ghes)) | |
538 | ret = NOTIFY_OK; | |
539 | } | |
540 | rcu_read_unlock(); | |
541 | ||
542 | return ret; | |
543 | } | |
544 | ||
67eb2e99 HY |
545 | static void ghes_proc_in_irq(struct irq_work *irq_work) |
546 | { | |
547 | struct llist_node *llnode, *next, *tail = NULL; | |
548 | struct ghes_estatus_node *estatus_node; | |
549 | struct acpi_hest_generic_status *estatus; | |
550 | u32 len, node_len; | |
551 | ||
552 | /* | |
553 | * Because the time order of estatus in list is reversed, | |
554 | * revert it back to proper order. | |
555 | */ | |
556 | llnode = llist_del_all(&ghes_estatus_llist); | |
557 | while (llnode) { | |
558 | next = llnode->next; | |
559 | llnode->next = tail; | |
560 | tail = llnode; | |
561 | llnode = next; | |
562 | } | |
563 | llnode = tail; | |
564 | while (llnode) { | |
565 | next = llnode->next; | |
566 | estatus_node = llist_entry(llnode, struct ghes_estatus_node, | |
567 | llnode); | |
568 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
569 | len = apei_estatus_len(estatus); | |
570 | node_len = GHES_ESTATUS_NODE_LEN(len); | |
571 | ghes_do_proc(estatus); | |
572 | ghes_print_estatus(NULL, estatus_node->generic, estatus); | |
573 | gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, | |
574 | node_len); | |
575 | llnode = next; | |
576 | } | |
577 | } | |
578 | ||
81e88fdc HY |
579 | static int ghes_notify_nmi(struct notifier_block *this, |
580 | unsigned long cmd, void *data) | |
581 | { | |
582 | struct ghes *ghes, *ghes_global = NULL; | |
583 | int sev, sev_global = -1; | |
584 | int ret = NOTIFY_DONE; | |
585 | ||
586 | if (cmd != DIE_NMI) | |
587 | return ret; | |
588 | ||
589 | raw_spin_lock(&ghes_nmi_lock); | |
590 | list_for_each_entry_rcu(ghes, &ghes_nmi, list) { | |
591 | if (ghes_read_estatus(ghes, 1)) { | |
592 | ghes_clear_estatus(ghes); | |
593 | continue; | |
594 | } | |
595 | sev = ghes_severity(ghes->estatus->error_severity); | |
596 | if (sev > sev_global) { | |
597 | sev_global = sev; | |
598 | ghes_global = ghes; | |
599 | } | |
600 | ret = NOTIFY_STOP; | |
601 | } | |
602 | ||
603 | if (ret == NOTIFY_DONE) | |
604 | goto out; | |
605 | ||
606 | if (sev_global >= GHES_SEV_PANIC) { | |
607 | oops_begin(); | |
67eb2e99 HY |
608 | __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic, |
609 | ghes_global->estatus); | |
81e88fdc HY |
610 | /* reboot to log the error! */ |
611 | if (panic_timeout == 0) | |
612 | panic_timeout = ghes_panic_timeout; | |
613 | panic("Fatal hardware error!"); | |
614 | } | |
615 | ||
616 | list_for_each_entry_rcu(ghes, &ghes_nmi, list) { | |
67eb2e99 HY |
617 | #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG |
618 | u32 len, node_len; | |
619 | struct ghes_estatus_node *estatus_node; | |
620 | struct acpi_hest_generic_status *estatus; | |
621 | #endif | |
81e88fdc HY |
622 | if (!(ghes->flags & GHES_TO_CLEAR)) |
623 | continue; | |
67eb2e99 HY |
624 | #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG |
625 | /* Save estatus for further processing in IRQ context */ | |
626 | len = apei_estatus_len(ghes->estatus); | |
627 | node_len = GHES_ESTATUS_NODE_LEN(len); | |
628 | estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, | |
629 | node_len); | |
630 | if (estatus_node) { | |
631 | estatus_node->generic = ghes->generic; | |
632 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
633 | memcpy(estatus, ghes->estatus, len); | |
634 | llist_add(&estatus_node->llnode, &ghes_estatus_llist); | |
635 | } | |
636 | #endif | |
81e88fdc HY |
637 | ghes_clear_estatus(ghes); |
638 | } | |
67eb2e99 HY |
639 | #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG |
640 | irq_work_queue(&ghes_proc_irq_work); | |
641 | #endif | |
81e88fdc HY |
642 | |
643 | out: | |
644 | raw_spin_unlock(&ghes_nmi_lock); | |
645 | return ret; | |
646 | } | |
647 | ||
d334a491 HY |
648 | static struct notifier_block ghes_notifier_sci = { |
649 | .notifier_call = ghes_notify_sci, | |
650 | }; | |
651 | ||
81e88fdc HY |
652 | static struct notifier_block ghes_notifier_nmi = { |
653 | .notifier_call = ghes_notify_nmi, | |
654 | }; | |
655 | ||
67eb2e99 HY |
656 | static unsigned long ghes_esource_prealloc_size( |
657 | const struct acpi_hest_generic *generic) | |
658 | { | |
659 | unsigned long block_length, prealloc_records, prealloc_size; | |
660 | ||
661 | block_length = min_t(unsigned long, generic->error_block_length, | |
662 | GHES_ESTATUS_MAX_SIZE); | |
663 | prealloc_records = max_t(unsigned long, | |
664 | generic->records_to_preallocate, 1); | |
665 | prealloc_size = min_t(unsigned long, block_length * prealloc_records, | |
666 | GHES_ESOURCE_PREALLOC_MAX_SIZE); | |
667 | ||
668 | return prealloc_size; | |
669 | } | |
670 | ||
7ad6e943 | 671 | static int __devinit ghes_probe(struct platform_device *ghes_dev) |
d334a491 HY |
672 | { |
673 | struct acpi_hest_generic *generic; | |
674 | struct ghes *ghes = NULL; | |
67eb2e99 | 675 | unsigned long len; |
7ad6e943 | 676 | int rc = -EINVAL; |
d334a491 | 677 | |
1dd6b20e | 678 | generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; |
d334a491 | 679 | if (!generic->enabled) |
7ad6e943 | 680 | return -ENODEV; |
d334a491 | 681 | |
81e88fdc HY |
682 | switch (generic->notify.type) { |
683 | case ACPI_HEST_NOTIFY_POLLED: | |
684 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
685 | case ACPI_HEST_NOTIFY_SCI: | |
686 | case ACPI_HEST_NOTIFY_NMI: | |
687 | break; | |
688 | case ACPI_HEST_NOTIFY_LOCAL: | |
689 | pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", | |
d334a491 HY |
690 | generic->header.source_id); |
691 | goto err; | |
81e88fdc HY |
692 | default: |
693 | pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", | |
694 | generic->notify.type, generic->header.source_id); | |
695 | goto err; | |
d334a491 | 696 | } |
81e88fdc HY |
697 | |
698 | rc = -EIO; | |
699 | if (generic->error_block_length < | |
700 | sizeof(struct acpi_hest_generic_status)) { | |
701 | pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", | |
702 | generic->error_block_length, | |
d334a491 HY |
703 | generic->header.source_id); |
704 | goto err; | |
705 | } | |
706 | ghes = ghes_new(generic); | |
707 | if (IS_ERR(ghes)) { | |
708 | rc = PTR_ERR(ghes); | |
709 | ghes = NULL; | |
710 | goto err; | |
711 | } | |
81e88fdc HY |
712 | switch (generic->notify.type) { |
713 | case ACPI_HEST_NOTIFY_POLLED: | |
714 | ghes->timer.function = ghes_poll_func; | |
715 | ghes->timer.data = (unsigned long)ghes; | |
716 | init_timer_deferrable(&ghes->timer); | |
717 | ghes_add_timer(ghes); | |
718 | break; | |
719 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
720 | /* External interrupt vector is GSI */ | |
721 | if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) { | |
722 | pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", | |
723 | generic->header.source_id); | |
724 | goto err; | |
725 | } | |
726 | if (request_irq(ghes->irq, ghes_irq_func, | |
727 | 0, "GHES IRQ", ghes)) { | |
728 | pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", | |
729 | generic->header.source_id); | |
730 | goto err; | |
731 | } | |
732 | break; | |
733 | case ACPI_HEST_NOTIFY_SCI: | |
7ad6e943 | 734 | mutex_lock(&ghes_list_mutex); |
d334a491 HY |
735 | if (list_empty(&ghes_sci)) |
736 | register_acpi_hed_notifier(&ghes_notifier_sci); | |
737 | list_add_rcu(&ghes->list, &ghes_sci); | |
7ad6e943 | 738 | mutex_unlock(&ghes_list_mutex); |
81e88fdc HY |
739 | break; |
740 | case ACPI_HEST_NOTIFY_NMI: | |
67eb2e99 HY |
741 | len = ghes_esource_prealloc_size(generic); |
742 | ghes_estatus_pool_expand(len); | |
81e88fdc HY |
743 | mutex_lock(&ghes_list_mutex); |
744 | if (list_empty(&ghes_nmi)) | |
745 | register_die_notifier(&ghes_notifier_nmi); | |
746 | list_add_rcu(&ghes->list, &ghes_nmi); | |
747 | mutex_unlock(&ghes_list_mutex); | |
748 | break; | |
749 | default: | |
750 | BUG(); | |
d334a491 | 751 | } |
7ad6e943 | 752 | platform_set_drvdata(ghes_dev, ghes); |
d334a491 HY |
753 | |
754 | return 0; | |
755 | err: | |
7ad6e943 | 756 | if (ghes) { |
d334a491 | 757 | ghes_fini(ghes); |
7ad6e943 HY |
758 | kfree(ghes); |
759 | } | |
d334a491 HY |
760 | return rc; |
761 | } | |
762 | ||
7ad6e943 | 763 | static int __devexit ghes_remove(struct platform_device *ghes_dev) |
d334a491 | 764 | { |
7ad6e943 HY |
765 | struct ghes *ghes; |
766 | struct acpi_hest_generic *generic; | |
67eb2e99 | 767 | unsigned long len; |
d334a491 | 768 | |
7ad6e943 HY |
769 | ghes = platform_get_drvdata(ghes_dev); |
770 | generic = ghes->generic; | |
771 | ||
81e88fdc | 772 | ghes->flags |= GHES_EXITING; |
7ad6e943 | 773 | switch (generic->notify.type) { |
81e88fdc HY |
774 | case ACPI_HEST_NOTIFY_POLLED: |
775 | del_timer_sync(&ghes->timer); | |
776 | break; | |
777 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
778 | free_irq(ghes->irq, ghes); | |
779 | break; | |
7ad6e943 HY |
780 | case ACPI_HEST_NOTIFY_SCI: |
781 | mutex_lock(&ghes_list_mutex); | |
782 | list_del_rcu(&ghes->list); | |
783 | if (list_empty(&ghes_sci)) | |
784 | unregister_acpi_hed_notifier(&ghes_notifier_sci); | |
785 | mutex_unlock(&ghes_list_mutex); | |
786 | break; | |
81e88fdc HY |
787 | case ACPI_HEST_NOTIFY_NMI: |
788 | mutex_lock(&ghes_list_mutex); | |
789 | list_del_rcu(&ghes->list); | |
790 | if (list_empty(&ghes_nmi)) | |
791 | unregister_die_notifier(&ghes_notifier_nmi); | |
792 | mutex_unlock(&ghes_list_mutex); | |
793 | /* | |
794 | * To synchronize with NMI handler, ghes can only be | |
795 | * freed after NMI handler finishes. | |
796 | */ | |
797 | synchronize_rcu(); | |
67eb2e99 HY |
798 | len = ghes_esource_prealloc_size(generic); |
799 | ghes_estatus_pool_shrink(len); | |
81e88fdc | 800 | break; |
7ad6e943 HY |
801 | default: |
802 | BUG(); | |
803 | break; | |
804 | } | |
d334a491 | 805 | |
7ad6e943 HY |
806 | ghes_fini(ghes); |
807 | kfree(ghes); | |
d334a491 | 808 | |
7ad6e943 HY |
809 | platform_set_drvdata(ghes_dev, NULL); |
810 | ||
811 | return 0; | |
d334a491 HY |
812 | } |
813 | ||
7ad6e943 HY |
814 | static struct platform_driver ghes_platform_driver = { |
815 | .driver = { | |
816 | .name = "GHES", | |
817 | .owner = THIS_MODULE, | |
818 | }, | |
819 | .probe = ghes_probe, | |
820 | .remove = ghes_remove, | |
821 | }; | |
822 | ||
d334a491 HY |
823 | static int __init ghes_init(void) |
824 | { | |
81e88fdc HY |
825 | int rc; |
826 | ||
d334a491 HY |
827 | if (acpi_disabled) |
828 | return -ENODEV; | |
829 | ||
830 | if (hest_disable) { | |
831 | pr_info(GHES_PFX "HEST is not enabled!\n"); | |
832 | return -EINVAL; | |
833 | } | |
834 | ||
b6a95016 HY |
835 | if (ghes_disable) { |
836 | pr_info(GHES_PFX "GHES is not enabled!\n"); | |
837 | return -EINVAL; | |
838 | } | |
839 | ||
67eb2e99 HY |
840 | init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); |
841 | ||
81e88fdc HY |
842 | rc = ghes_ioremap_init(); |
843 | if (rc) | |
844 | goto err; | |
845 | ||
67eb2e99 | 846 | rc = ghes_estatus_pool_init(); |
81e88fdc HY |
847 | if (rc) |
848 | goto err_ioremap_exit; | |
849 | ||
67eb2e99 HY |
850 | rc = platform_driver_register(&ghes_platform_driver); |
851 | if (rc) | |
852 | goto err_pool_exit; | |
853 | ||
9fb0bfe1 HY |
854 | rc = apei_osc_setup(); |
855 | if (rc == 0 && osc_sb_apei_support_acked) | |
856 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); | |
857 | else if (rc == 0 && !osc_sb_apei_support_acked) | |
858 | pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); | |
859 | else if (rc && osc_sb_apei_support_acked) | |
860 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); | |
861 | else | |
862 | pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); | |
863 | ||
81e88fdc | 864 | return 0; |
67eb2e99 HY |
865 | err_pool_exit: |
866 | ghes_estatus_pool_exit(); | |
81e88fdc HY |
867 | err_ioremap_exit: |
868 | ghes_ioremap_exit(); | |
869 | err: | |
870 | return rc; | |
d334a491 HY |
871 | } |
872 | ||
873 | static void __exit ghes_exit(void) | |
874 | { | |
7ad6e943 | 875 | platform_driver_unregister(&ghes_platform_driver); |
67eb2e99 | 876 | ghes_estatus_pool_exit(); |
81e88fdc | 877 | ghes_ioremap_exit(); |
d334a491 HY |
878 | } |
879 | ||
880 | module_init(ghes_init); | |
881 | module_exit(ghes_exit); | |
882 | ||
883 | MODULE_AUTHOR("Huang Ying"); | |
884 | MODULE_DESCRIPTION("APEI Generic Hardware Error Source support"); | |
885 | MODULE_LICENSE("GPL"); | |
7ad6e943 | 886 | MODULE_ALIAS("platform:GHES"); |