2 * PowerNV OPAL high level interfaces
4 * Copyright 2011 IBM Corp.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
14 #include <linux/types.h>
16 #include <linux/of_fdt.h>
17 #include <linux/of_platform.h>
18 #include <linux/interrupt.h>
19 #include <linux/notifier.h>
20 #include <linux/slab.h>
21 #include <linux/sched.h>
22 #include <linux/kobject.h>
23 #include <linux/delay.h>
24 #include <linux/memblock.h>
26 #include <asm/machdep.h>
28 #include <asm/firmware.h>
33 /* /sys/firmware/opal */
34 struct kobject
*opal_kobj
;
42 struct mcheck_recoverable_range
{
48 static struct mcheck_recoverable_range
*mc_recoverable_range
;
49 static int mc_recoverable_range_len
;
51 struct device_node
*opal_node
;
52 static DEFINE_SPINLOCK(opal_write_lock
);
53 extern u64 opal_mc_secondary_handler
[];
54 static unsigned int *opal_irqs
;
55 static unsigned int opal_irq_count
;
56 static ATOMIC_NOTIFIER_HEAD(opal_notifier_head
);
57 static struct atomic_notifier_head opal_msg_notifier_head
[OPAL_MSG_TYPE_MAX
];
58 static DEFINE_SPINLOCK(opal_notifier_lock
);
59 static uint64_t last_notified_mask
= 0x0ul
;
60 static atomic_t opal_notifier_hold
= ATOMIC_INIT(0);
62 static void opal_reinit_cores(void)
64 /* Do the actual re-init, This will clobber all FPRs, VRs, etc...
66 * It will preserve non volatile GPRs and HSPRG0/1. It will
67 * also restore HIDs and other SPRs to their original value
68 * but it might clobber a bunch.
71 opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE
);
73 opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE
);
77 int __init
early_init_dt_scan_opal(unsigned long node
,
78 const char *uname
, int depth
, void *data
)
80 const void *basep
, *entryp
, *sizep
;
81 int basesz
, entrysz
, runtimesz
;
83 if (depth
!= 1 || strcmp(uname
, "ibm,opal") != 0)
86 basep
= of_get_flat_dt_prop(node
, "opal-base-address", &basesz
);
87 entryp
= of_get_flat_dt_prop(node
, "opal-entry-address", &entrysz
);
88 sizep
= of_get_flat_dt_prop(node
, "opal-runtime-size", &runtimesz
);
90 if (!basep
|| !entryp
|| !sizep
)
93 opal
.base
= of_read_number(basep
, basesz
/4);
94 opal
.entry
= of_read_number(entryp
, entrysz
/4);
95 opal
.size
= of_read_number(sizep
, runtimesz
/4);
97 pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%d)\n",
98 opal
.base
, basep
, basesz
);
99 pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
100 opal
.entry
, entryp
, entrysz
);
101 pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
102 opal
.size
, sizep
, runtimesz
);
104 powerpc_firmware_features
|= FW_FEATURE_OPAL
;
105 if (of_flat_dt_is_compatible(node
, "ibm,opal-v3")) {
106 powerpc_firmware_features
|= FW_FEATURE_OPALv2
;
107 powerpc_firmware_features
|= FW_FEATURE_OPALv3
;
108 printk("OPAL V3 detected !\n");
109 } else if (of_flat_dt_is_compatible(node
, "ibm,opal-v2")) {
110 powerpc_firmware_features
|= FW_FEATURE_OPALv2
;
111 printk("OPAL V2 detected !\n");
113 printk("OPAL V1 detected !\n");
116 /* Reinit all cores with the right endian */
119 /* Restore some bits */
120 if (cur_cpu_spec
->cpu_restore
)
121 cur_cpu_spec
->cpu_restore();
126 int __init
early_init_dt_scan_recoverable_ranges(unsigned long node
,
127 const char *uname
, int depth
, void *data
)
132 if (depth
!= 1 || strcmp(uname
, "ibm,opal") != 0)
135 prop
= of_get_flat_dt_prop(node
, "mcheck-recoverable-ranges", &psize
);
140 pr_debug("Found machine check recoverable ranges.\n");
143 * Calculate number of available entries.
145 * Each recoverable address range entry is (start address, len,
146 * recovery address), 2 cells each for start and recovery address,
147 * 1 cell for len, totalling 5 cells per entry.
149 mc_recoverable_range_len
= psize
/ (sizeof(*prop
) * 5);
152 if (!mc_recoverable_range_len
)
155 /* Size required to hold all the entries. */
156 size
= mc_recoverable_range_len
*
157 sizeof(struct mcheck_recoverable_range
);
160 * Allocate a buffer to hold the MC recoverable ranges. We would be
161 * accessing them in real mode, hence it needs to be within
164 mc_recoverable_range
=__va(memblock_alloc_base(size
, __alignof__(u64
),
166 memset(mc_recoverable_range
, 0, size
);
168 for (i
= 0; i
< mc_recoverable_range_len
; i
++) {
169 mc_recoverable_range
[i
].start_addr
=
170 of_read_number(prop
+ (i
* 5) + 0, 2);
171 mc_recoverable_range
[i
].end_addr
=
172 mc_recoverable_range
[i
].start_addr
+
173 of_read_number(prop
+ (i
* 5) + 2, 1);
174 mc_recoverable_range
[i
].recover_addr
=
175 of_read_number(prop
+ (i
* 5) + 3, 2);
177 pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
178 mc_recoverable_range
[i
].start_addr
,
179 mc_recoverable_range
[i
].end_addr
,
180 mc_recoverable_range
[i
].recover_addr
);
185 static int __init
opal_register_exception_handlers(void)
187 #ifdef __BIG_ENDIAN__
190 if (!(powerpc_firmware_features
& FW_FEATURE_OPAL
))
193 /* Hookup some exception handlers except machine check. We use the
194 * fwnmi area at 0x7000 to provide the glue space to OPAL
197 opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER
,
200 opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER
, 0, glue
);
205 machine_early_initcall(powernv
, opal_register_exception_handlers
);
207 int opal_notifier_register(struct notifier_block
*nb
)
210 pr_warning("%s: Invalid argument (%p)\n",
215 atomic_notifier_chain_register(&opal_notifier_head
, nb
);
218 EXPORT_SYMBOL_GPL(opal_notifier_register
);
220 int opal_notifier_unregister(struct notifier_block
*nb
)
223 pr_warning("%s: Invalid argument (%p)\n",
228 atomic_notifier_chain_unregister(&opal_notifier_head
, nb
);
231 EXPORT_SYMBOL_GPL(opal_notifier_unregister
);
233 static void opal_do_notifier(uint64_t events
)
236 uint64_t changed_mask
;
238 if (atomic_read(&opal_notifier_hold
))
241 spin_lock_irqsave(&opal_notifier_lock
, flags
);
242 changed_mask
= last_notified_mask
^ events
;
243 last_notified_mask
= events
;
244 spin_unlock_irqrestore(&opal_notifier_lock
, flags
);
247 * We feed with the event bits and changed bits for
248 * enough information to the callback.
250 atomic_notifier_call_chain(&opal_notifier_head
,
251 events
, (void *)changed_mask
);
254 void opal_notifier_update_evt(uint64_t evt_mask
,
259 spin_lock_irqsave(&opal_notifier_lock
, flags
);
260 last_notified_mask
&= ~evt_mask
;
261 last_notified_mask
|= evt_val
;
262 spin_unlock_irqrestore(&opal_notifier_lock
, flags
);
265 void opal_notifier_enable(void)
270 atomic_set(&opal_notifier_hold
, 0);
272 /* Process pending events */
273 rc
= opal_poll_events(&evt
);
274 if (rc
== OPAL_SUCCESS
&& evt
)
275 opal_do_notifier(be64_to_cpu(evt
));
278 void opal_notifier_disable(void)
280 atomic_set(&opal_notifier_hold
, 1);
284 * Opal message notifier based on message type. Allow subscribers to get
285 * notified for specific messgae type.
287 int opal_message_notifier_register(enum OpalMessageType msg_type
,
288 struct notifier_block
*nb
)
291 pr_warning("%s: Invalid argument (%p)\n",
295 if (msg_type
> OPAL_MSG_TYPE_MAX
) {
296 pr_warning("%s: Invalid message type argument (%d)\n",
300 return atomic_notifier_chain_register(
301 &opal_msg_notifier_head
[msg_type
], nb
);
304 static void opal_message_do_notify(uint32_t msg_type
, void *msg
)
306 /* notify subscribers */
307 atomic_notifier_call_chain(&opal_msg_notifier_head
[msg_type
],
311 static void opal_handle_message(void)
315 * TODO: pre-allocate a message buffer depending on opal-msg-size
316 * value in /proc/device-tree.
318 static struct opal_msg msg
;
321 ret
= opal_get_msg(__pa(&msg
), sizeof(msg
));
322 /* No opal message pending. */
323 if (ret
== OPAL_RESOURCE
)
326 /* check for errors. */
328 pr_warning("%s: Failed to retrive opal message, err=%lld\n",
333 type
= be32_to_cpu(msg
.msg_type
);
336 if (type
> OPAL_MSG_TYPE_MAX
) {
337 pr_warning("%s: Unknown message type: %u\n", __func__
, type
);
340 opal_message_do_notify(type
, (void *)&msg
);
343 static int opal_message_notify(struct notifier_block
*nb
,
344 unsigned long events
, void *change
)
346 if (events
& OPAL_EVENT_MSG_PENDING
)
347 opal_handle_message();
351 static struct notifier_block opal_message_nb
= {
352 .notifier_call
= opal_message_notify
,
357 static int __init
opal_message_init(void)
361 for (i
= 0; i
< OPAL_MSG_TYPE_MAX
; i
++)
362 ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head
[i
]);
364 ret
= opal_notifier_register(&opal_message_nb
);
366 pr_err("%s: Can't register OPAL event notifier (%d)\n",
372 machine_early_initcall(powernv
, opal_message_init
);
374 int opal_get_chars(uint32_t vtermno
, char *buf
, int count
)
381 opal_poll_events(&evt
);
382 if ((be64_to_cpu(evt
) & OPAL_EVENT_CONSOLE_INPUT
) == 0)
384 len
= cpu_to_be64(count
);
385 rc
= opal_console_read(vtermno
, &len
, buf
);
386 if (rc
== OPAL_SUCCESS
)
387 return be64_to_cpu(len
);
391 int opal_put_chars(uint32_t vtermno
, const char *data
, int total_len
)
402 /* We want put_chars to be atomic to avoid mangling of hvsi
403 * packets. To do that, we first test for room and return
404 * -EAGAIN if there isn't enough.
406 * Unfortunately, opal_console_write_buffer_space() doesn't
407 * appear to work on opal v1, so we just assume there is
408 * enough room and be done with it
410 spin_lock_irqsave(&opal_write_lock
, flags
);
411 if (firmware_has_feature(FW_FEATURE_OPALv2
)) {
412 rc
= opal_console_write_buffer_space(vtermno
, &olen
);
413 len
= be64_to_cpu(olen
);
414 if (rc
|| len
< total_len
) {
415 spin_unlock_irqrestore(&opal_write_lock
, flags
);
416 /* Closed -> drop characters */
419 opal_poll_events(NULL
);
424 /* We still try to handle partial completions, though they
425 * should no longer happen.
428 while(total_len
> 0 && (rc
== OPAL_BUSY
||
429 rc
== OPAL_BUSY_EVENT
|| rc
== OPAL_SUCCESS
)) {
430 olen
= cpu_to_be64(total_len
);
431 rc
= opal_console_write(vtermno
, &olen
, data
);
432 len
= be64_to_cpu(olen
);
434 /* Closed or other error drop */
435 if (rc
!= OPAL_SUCCESS
&& rc
!= OPAL_BUSY
&&
436 rc
!= OPAL_BUSY_EVENT
) {
440 if (rc
== OPAL_SUCCESS
) {
445 /* This is a bit nasty but we need that for the console to
446 * flush when there aren't any interrupts. We will clean
447 * things a bit later to limit that to synchronous path
448 * such as the kernel console and xmon/udbg
451 opal_poll_events(&evt
);
452 while(rc
== OPAL_SUCCESS
&&
453 (be64_to_cpu(evt
) & OPAL_EVENT_CONSOLE_OUTPUT
));
455 spin_unlock_irqrestore(&opal_write_lock
, flags
);
459 static int opal_recover_mce(struct pt_regs
*regs
,
460 struct machine_check_event
*evt
)
463 uint64_t ea
= get_mce_fault_addr(evt
);
465 if (!(regs
->msr
& MSR_RI
)) {
466 /* If MSR_RI isn't set, we cannot recover */
468 } else if (evt
->disposition
== MCE_DISPOSITION_RECOVERED
) {
469 /* Platform corrected itself */
471 } else if (ea
&& !is_kernel_addr(ea
)) {
473 * Faulting address is not in kernel text. We should be fine.
474 * We need to find which process uses this address.
475 * For now, kill the task if we have received exception when
478 * TODO: Queue up this address for hwpoisioning later.
480 if (user_mode(regs
) && !is_global_init(current
)) {
481 _exception(SIGBUS
, regs
, BUS_MCEERR_AR
, regs
->nip
);
485 } else if (user_mode(regs
) && !is_global_init(current
) &&
486 evt
->severity
== MCE_SEV_ERROR_SYNC
) {
488 * If we have received a synchronous error when in userspace
491 _exception(SIGBUS
, regs
, BUS_MCEERR_AR
, regs
->nip
);
497 int opal_machine_check(struct pt_regs
*regs
)
499 struct machine_check_event evt
;
501 if (!get_mce_event(&evt
, MCE_EVENT_RELEASE
))
504 /* Print things out */
505 if (evt
.version
!= MCE_V1
) {
506 pr_err("Machine Check Exception, Unknown event version %d !\n",
510 machine_check_print_event_info(&evt
);
512 if (opal_recover_mce(regs
, &evt
))
517 /* Early hmi handler called in real mode. */
518 int opal_hmi_exception_early(struct pt_regs
*regs
)
520 /* TODO: Call opal hmi handler. */
524 /* HMI exception handler called in virtual mode during check_irq_replay. */
525 int opal_handle_hmi_exception(struct pt_regs
*regs
)
527 /* TODO: Retrive and print HMI event from OPAL. */
531 static uint64_t find_recovery_address(uint64_t nip
)
535 for (i
= 0; i
< mc_recoverable_range_len
; i
++)
536 if ((nip
>= mc_recoverable_range
[i
].start_addr
) &&
537 (nip
< mc_recoverable_range
[i
].end_addr
))
538 return mc_recoverable_range
[i
].recover_addr
;
542 bool opal_mce_check_early_recovery(struct pt_regs
*regs
)
544 uint64_t recover_addr
= 0;
546 if (!opal
.base
|| !opal
.size
)
549 if ((regs
->nip
>= opal
.base
) &&
550 (regs
->nip
<= (opal
.base
+ opal
.size
)))
551 recover_addr
= find_recovery_address(regs
->nip
);
554 * Setup regs->nip to rfi into fixup address.
557 regs
->nip
= recover_addr
;
560 return !!recover_addr
;
563 static irqreturn_t
opal_interrupt(int irq
, void *data
)
567 opal_handle_interrupt(virq_to_hw(irq
), &events
);
569 opal_do_notifier(be64_to_cpu(events
));
574 static int opal_sysfs_init(void)
576 opal_kobj
= kobject_create_and_add("opal", firmware_kobj
);
578 pr_warn("kobject_create_and_add opal failed\n");
585 static int __init
opal_init(void)
587 struct device_node
*np
, *consoles
;
591 opal_node
= of_find_node_by_path("/ibm,opal");
593 pr_warn("opal: Node not found\n");
597 /* Register OPAL consoles if any ports */
598 if (firmware_has_feature(FW_FEATURE_OPALv2
))
599 consoles
= of_find_node_by_path("/ibm,opal/consoles");
601 consoles
= of_node_get(opal_node
);
603 for_each_child_of_node(consoles
, np
) {
604 if (strcmp(np
->name
, "serial"))
606 of_platform_device_create(np
, NULL
, NULL
);
608 of_node_put(consoles
);
611 /* Find all OPAL interrupts and request them */
612 irqs
= of_get_property(opal_node
, "opal-interrupts", &irqlen
);
613 pr_debug("opal: Found %d interrupts reserved for OPAL\n",
614 irqs
? (irqlen
/ 4) : 0);
615 opal_irq_count
= irqlen
/ 4;
616 opal_irqs
= kzalloc(opal_irq_count
* sizeof(unsigned int), GFP_KERNEL
);
617 for (i
= 0; irqs
&& i
< (irqlen
/ 4); i
++, irqs
++) {
618 unsigned int hwirq
= be32_to_cpup(irqs
);
619 unsigned int irq
= irq_create_mapping(NULL
, hwirq
);
621 pr_warning("opal: Failed to map irq 0x%x\n", hwirq
);
624 rc
= request_irq(irq
, opal_interrupt
, 0, "opal", NULL
);
626 pr_warning("opal: Error %d requesting irq %d"
627 " (0x%x)\n", rc
, irq
, hwirq
);
631 /* Create "opal" kobject under /sys/firmware */
632 rc
= opal_sysfs_init();
634 /* Setup error log interface */
635 rc
= opal_elog_init();
636 /* Setup code update interface */
638 /* Setup platform dump extract interface */
639 opal_platform_dump_init();
640 /* Setup system parameters interface */
641 opal_sys_param_init();
642 /* Setup message log interface. */
648 machine_subsys_initcall(powernv
, opal_init
);
650 void opal_shutdown(void)
655 /* First free interrupts, which will also mask them */
656 for (i
= 0; i
< opal_irq_count
; i
++) {
658 free_irq(opal_irqs
[i
], NULL
);
663 * Then sync with OPAL which ensure anything that can
664 * potentially write to our memory has completed such
665 * as an ongoing dump retrieval
667 while (rc
== OPAL_BUSY
|| rc
== OPAL_BUSY_EVENT
) {
668 rc
= opal_sync_host_reboot();
670 opal_poll_events(NULL
);
676 /* Export this so that test modules can use it */
677 EXPORT_SYMBOL_GPL(opal_invalid_call
);
679 /* Convert a region of vmalloc memory to an opal sg list */
680 struct opal_sg_list
*opal_vmalloc_to_sg_list(void *vmalloc_addr
,
681 unsigned long vmalloc_size
)
683 struct opal_sg_list
*sg
, *first
= NULL
;
686 sg
= kzalloc(PAGE_SIZE
, GFP_KERNEL
);
692 while (vmalloc_size
> 0) {
693 uint64_t data
= vmalloc_to_pfn(vmalloc_addr
) << PAGE_SHIFT
;
694 uint64_t length
= min(vmalloc_size
, PAGE_SIZE
);
696 sg
->entry
[i
].data
= cpu_to_be64(data
);
697 sg
->entry
[i
].length
= cpu_to_be64(length
);
700 if (i
>= SG_ENTRIES_PER_NODE
) {
701 struct opal_sg_list
*next
;
703 next
= kzalloc(PAGE_SIZE
, GFP_KERNEL
);
707 sg
->length
= cpu_to_be64(
708 i
* sizeof(struct opal_sg_entry
) + 16);
710 sg
->next
= cpu_to_be64(__pa(next
));
714 vmalloc_addr
+= length
;
715 vmalloc_size
-= length
;
718 sg
->length
= cpu_to_be64(i
* sizeof(struct opal_sg_entry
) + 16);
723 pr_err("%s : Failed to allocate memory\n", __func__
);
724 opal_free_sg_list(first
);
728 void opal_free_sg_list(struct opal_sg_list
*sg
)
731 uint64_t next
= be64_to_cpu(sg
->next
);