]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - arch/powerpc/platforms/powernv/opal.c
powerpc/powernv: powernv platform is not constrained by RMA
[mirror_ubuntu-bionic-kernel.git] / arch / powerpc / platforms / powernv / opal.c
1 /*
2 * PowerNV OPAL high level interfaces
3 *
4 * Copyright 2011 IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12 #define pr_fmt(fmt) "opal: " fmt
13
14 #include <linux/printk.h>
15 #include <linux/types.h>
16 #include <linux/of.h>
17 #include <linux/of_fdt.h>
18 #include <linux/of_platform.h>
19 #include <linux/of_address.h>
20 #include <linux/interrupt.h>
21 #include <linux/notifier.h>
22 #include <linux/slab.h>
23 #include <linux/sched.h>
24 #include <linux/kobject.h>
25 #include <linux/delay.h>
26 #include <linux/memblock.h>
27 #include <linux/kthread.h>
28 #include <linux/freezer.h>
29
30 #include <asm/machdep.h>
31 #include <asm/opal.h>
32 #include <asm/firmware.h>
33 #include <asm/mce.h>
34 #include <asm/imc-pmu.h>
35
36 #include "powernv.h"
37
38 /* /sys/firmware/opal */
39 struct kobject *opal_kobj;
40
41 struct opal {
42 u64 base;
43 u64 entry;
44 u64 size;
45 } opal;
46
47 struct mcheck_recoverable_range {
48 u64 start_addr;
49 u64 end_addr;
50 u64 recover_addr;
51 };
52
53 static struct mcheck_recoverable_range *mc_recoverable_range;
54 static int mc_recoverable_range_len;
55
56 struct device_node *opal_node;
57 static DEFINE_SPINLOCK(opal_write_lock);
58 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
59 static uint32_t opal_heartbeat;
60 static struct task_struct *kopald_tsk;
61
62 void opal_configure_cores(void)
63 {
64 u64 reinit_flags = 0;
65
66 /* Do the actual re-init, This will clobber all FPRs, VRs, etc...
67 *
68 * It will preserve non volatile GPRs and HSPRG0/1. It will
69 * also restore HIDs and other SPRs to their original value
70 * but it might clobber a bunch.
71 */
72 #ifdef __BIG_ENDIAN__
73 reinit_flags |= OPAL_REINIT_CPUS_HILE_BE;
74 #else
75 reinit_flags |= OPAL_REINIT_CPUS_HILE_LE;
76 #endif
77
78 /*
79 * POWER9 always support running hash:
80 * ie. Host hash supports hash guests
81 * Host radix supports hash/radix guests
82 */
83 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) {
84 reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
85 if (early_radix_enabled())
86 reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;
87 }
88
89 opal_reinit_cpus(reinit_flags);
90
91 /* Restore some bits */
92 if (cur_cpu_spec->cpu_restore)
93 cur_cpu_spec->cpu_restore();
94 }
95
96 int __init early_init_dt_scan_opal(unsigned long node,
97 const char *uname, int depth, void *data)
98 {
99 const void *basep, *entryp, *sizep;
100 int basesz, entrysz, runtimesz;
101
102 if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
103 return 0;
104
105 basep = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
106 entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
107 sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
108
109 if (!basep || !entryp || !sizep)
110 return 1;
111
112 opal.base = of_read_number(basep, basesz/4);
113 opal.entry = of_read_number(entryp, entrysz/4);
114 opal.size = of_read_number(sizep, runtimesz/4);
115
116 pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%d)\n",
117 opal.base, basep, basesz);
118 pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
119 opal.entry, entryp, entrysz);
120 pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
121 opal.size, sizep, runtimesz);
122
123 if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
124 powerpc_firmware_features |= FW_FEATURE_OPAL;
125 pr_info("OPAL detected !\n");
126 } else {
127 panic("OPAL != V3 detected, no longer supported.\n");
128 }
129
130 return 1;
131 }
132
133 int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
134 const char *uname, int depth, void *data)
135 {
136 int i, psize, size;
137 const __be32 *prop;
138
139 if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
140 return 0;
141
142 prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
143
144 if (!prop)
145 return 1;
146
147 pr_debug("Found machine check recoverable ranges.\n");
148
149 /*
150 * Calculate number of available entries.
151 *
152 * Each recoverable address range entry is (start address, len,
153 * recovery address), 2 cells each for start and recovery address,
154 * 1 cell for len, totalling 5 cells per entry.
155 */
156 mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
157
158 /* Sanity check */
159 if (!mc_recoverable_range_len)
160 return 1;
161
162 /* Size required to hold all the entries. */
163 size = mc_recoverable_range_len *
164 sizeof(struct mcheck_recoverable_range);
165
166 /*
167 * Allocate a buffer to hold the MC recoverable ranges.
168 */
169 mc_recoverable_range =__va(memblock_alloc(size, __alignof__(u64)));
170 memset(mc_recoverable_range, 0, size);
171
172 for (i = 0; i < mc_recoverable_range_len; i++) {
173 mc_recoverable_range[i].start_addr =
174 of_read_number(prop + (i * 5) + 0, 2);
175 mc_recoverable_range[i].end_addr =
176 mc_recoverable_range[i].start_addr +
177 of_read_number(prop + (i * 5) + 2, 1);
178 mc_recoverable_range[i].recover_addr =
179 of_read_number(prop + (i * 5) + 3, 2);
180
181 pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
182 mc_recoverable_range[i].start_addr,
183 mc_recoverable_range[i].end_addr,
184 mc_recoverable_range[i].recover_addr);
185 }
186 return 1;
187 }
188
189 static int __init opal_register_exception_handlers(void)
190 {
191 #ifdef __BIG_ENDIAN__
192 u64 glue;
193
194 if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
195 return -ENODEV;
196
197 /* Hookup some exception handlers except machine check. We use the
198 * fwnmi area at 0x7000 to provide the glue space to OPAL
199 */
200 glue = 0x7000;
201
202 /*
203 * Check if we are running on newer firmware that exports
204 * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
205 * the HMI interrupt and we catch it directly in Linux.
206 *
207 * For older firmware (i.e currently released POWER8 System Firmware
208 * as of today <= SV810_087), we fallback to old behavior and let OPAL
209 * patch the HMI vector and handle it inside OPAL firmware.
210 *
211 * For newer firmware (in development/yet to be released) we will
212 * start catching/handling HMI directly in Linux.
213 */
214 if (!opal_check_token(OPAL_HANDLE_HMI)) {
215 pr_info("Old firmware detected, OPAL handles HMIs.\n");
216 opal_register_exception_handler(
217 OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
218 0, glue);
219 glue += 128;
220 }
221
222 opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
223 #endif
224
225 return 0;
226 }
227 machine_early_initcall(powernv, opal_register_exception_handlers);
228
229 /*
230 * Opal message notifier based on message type. Allow subscribers to get
231 * notified for specific messgae type.
232 */
233 int opal_message_notifier_register(enum opal_msg_type msg_type,
234 struct notifier_block *nb)
235 {
236 if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
237 pr_warning("%s: Invalid arguments, msg_type:%d\n",
238 __func__, msg_type);
239 return -EINVAL;
240 }
241
242 return atomic_notifier_chain_register(
243 &opal_msg_notifier_head[msg_type], nb);
244 }
245 EXPORT_SYMBOL_GPL(opal_message_notifier_register);
246
247 int opal_message_notifier_unregister(enum opal_msg_type msg_type,
248 struct notifier_block *nb)
249 {
250 return atomic_notifier_chain_unregister(
251 &opal_msg_notifier_head[msg_type], nb);
252 }
253 EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
254
255 static void opal_message_do_notify(uint32_t msg_type, void *msg)
256 {
257 /* notify subscribers */
258 atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
259 msg_type, msg);
260 }
261
262 static void opal_handle_message(void)
263 {
264 s64 ret;
265 /*
266 * TODO: pre-allocate a message buffer depending on opal-msg-size
267 * value in /proc/device-tree.
268 */
269 static struct opal_msg msg;
270 u32 type;
271
272 ret = opal_get_msg(__pa(&msg), sizeof(msg));
273 /* No opal message pending. */
274 if (ret == OPAL_RESOURCE)
275 return;
276
277 /* check for errors. */
278 if (ret) {
279 pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
280 __func__, ret);
281 return;
282 }
283
284 type = be32_to_cpu(msg.msg_type);
285
286 /* Sanity check */
287 if (type >= OPAL_MSG_TYPE_MAX) {
288 pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
289 return;
290 }
291 opal_message_do_notify(type, (void *)&msg);
292 }
293
294 static irqreturn_t opal_message_notify(int irq, void *data)
295 {
296 opal_handle_message();
297 return IRQ_HANDLED;
298 }
299
300 static int __init opal_message_init(void)
301 {
302 int ret, i, irq;
303
304 for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
305 ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
306
307 irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING));
308 if (!irq) {
309 pr_err("%s: Can't register OPAL event irq (%d)\n",
310 __func__, irq);
311 return irq;
312 }
313
314 ret = request_irq(irq, opal_message_notify,
315 IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL);
316 if (ret) {
317 pr_err("%s: Can't request OPAL event irq (%d)\n",
318 __func__, ret);
319 return ret;
320 }
321
322 return 0;
323 }
324
325 int opal_get_chars(uint32_t vtermno, char *buf, int count)
326 {
327 s64 rc;
328 __be64 evt, len;
329
330 if (!opal.entry)
331 return -ENODEV;
332 opal_poll_events(&evt);
333 if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
334 return 0;
335 len = cpu_to_be64(count);
336 rc = opal_console_read(vtermno, &len, buf);
337 if (rc == OPAL_SUCCESS)
338 return be64_to_cpu(len);
339 return 0;
340 }
341
342 int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
343 {
344 int written = 0;
345 __be64 olen;
346 s64 len, rc;
347 unsigned long flags;
348 __be64 evt;
349
350 if (!opal.entry)
351 return -ENODEV;
352
353 /* We want put_chars to be atomic to avoid mangling of hvsi
354 * packets. To do that, we first test for room and return
355 * -EAGAIN if there isn't enough.
356 *
357 * Unfortunately, opal_console_write_buffer_space() doesn't
358 * appear to work on opal v1, so we just assume there is
359 * enough room and be done with it
360 */
361 spin_lock_irqsave(&opal_write_lock, flags);
362 rc = opal_console_write_buffer_space(vtermno, &olen);
363 len = be64_to_cpu(olen);
364 if (rc || len < total_len) {
365 spin_unlock_irqrestore(&opal_write_lock, flags);
366 /* Closed -> drop characters */
367 if (rc)
368 return total_len;
369 opal_poll_events(NULL);
370 return -EAGAIN;
371 }
372
373 /* We still try to handle partial completions, though they
374 * should no longer happen.
375 */
376 rc = OPAL_BUSY;
377 while(total_len > 0 && (rc == OPAL_BUSY ||
378 rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
379 olen = cpu_to_be64(total_len);
380 rc = opal_console_write(vtermno, &olen, data);
381 len = be64_to_cpu(olen);
382
383 /* Closed or other error drop */
384 if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
385 rc != OPAL_BUSY_EVENT) {
386 written = total_len;
387 break;
388 }
389 if (rc == OPAL_SUCCESS) {
390 total_len -= len;
391 data += len;
392 written += len;
393 }
394 /* This is a bit nasty but we need that for the console to
395 * flush when there aren't any interrupts. We will clean
396 * things a bit later to limit that to synchronous path
397 * such as the kernel console and xmon/udbg
398 */
399 do
400 opal_poll_events(&evt);
401 while(rc == OPAL_SUCCESS &&
402 (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
403 }
404 spin_unlock_irqrestore(&opal_write_lock, flags);
405 return written;
406 }
407
408 static int opal_recover_mce(struct pt_regs *regs,
409 struct machine_check_event *evt)
410 {
411 int recovered = 0;
412
413 if (!(regs->msr & MSR_RI)) {
414 /* If MSR_RI isn't set, we cannot recover */
415 pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
416 recovered = 0;
417 } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
418 /* Platform corrected itself */
419 recovered = 1;
420 } else if (evt->severity == MCE_SEV_FATAL) {
421 /* Fatal machine check */
422 pr_err("Machine check interrupt is fatal\n");
423 recovered = 0;
424 } else if ((evt->severity == MCE_SEV_ERROR_SYNC) &&
425 (user_mode(regs) && !is_global_init(current))) {
426 /*
427 * For now, kill the task if we have received exception when
428 * in userspace.
429 *
430 * TODO: Queue up this address for hwpoisioning later.
431 */
432 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
433 recovered = 1;
434 }
435 return recovered;
436 }
437
438 int opal_machine_check(struct pt_regs *regs)
439 {
440 struct machine_check_event evt;
441 int ret;
442
443 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
444 return 0;
445
446 /* Print things out */
447 if (evt.version != MCE_V1) {
448 pr_err("Machine Check Exception, Unknown event version %d !\n",
449 evt.version);
450 return 0;
451 }
452 machine_check_print_event_info(&evt, user_mode(regs));
453
454 if (opal_recover_mce(regs, &evt))
455 return 1;
456
457 /*
458 * Unrecovered machine check, we are heading to panic path.
459 *
460 * We may have hit this MCE in very early stage of kernel
461 * initialization even before opal-prd has started running. If
462 * this is the case then this MCE error may go un-noticed or
463 * un-analyzed if we go down panic path. We need to inform
464 * BMC/OCC about this error so that they can collect relevant
465 * data for error analysis before rebooting.
466 * Use opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR) to do so.
467 * This function may not return on BMC based system.
468 */
469 ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
470 "Unrecoverable Machine Check exception");
471 if (ret == OPAL_UNSUPPORTED) {
472 pr_emerg("Reboot type %d not supported\n",
473 OPAL_REBOOT_PLATFORM_ERROR);
474 }
475
476 /*
477 * We reached here. There can be three possibilities:
478 * 1. We are running on a firmware level that do not support
479 * opal_cec_reboot2()
480 * 2. We are running on a firmware level that do not support
481 * OPAL_REBOOT_PLATFORM_ERROR reboot type.
482 * 3. We are running on FSP based system that does not need opal
483 * to trigger checkstop explicitly for error analysis. The FSP
484 * PRD component would have already got notified about this
485 * error through other channels.
486 *
487 * If hardware marked this as an unrecoverable MCE, we are
488 * going to panic anyway. Even if it didn't, it's not safe to
489 * continue at this point, so we should explicitly panic.
490 */
491
492 panic("PowerNV Unrecovered Machine Check");
493 return 0;
494 }
495
496 /* Early hmi handler called in real mode. */
497 int opal_hmi_exception_early(struct pt_regs *regs)
498 {
499 s64 rc;
500
501 /*
502 * call opal hmi handler. Pass paca address as token.
503 * The return value OPAL_SUCCESS is an indication that there is
504 * an HMI event generated waiting to pull by Linux.
505 */
506 rc = opal_handle_hmi();
507 if (rc == OPAL_SUCCESS) {
508 local_paca->hmi_event_available = 1;
509 return 1;
510 }
511 return 0;
512 }
513
514 /* HMI exception handler called in virtual mode during check_irq_replay. */
515 int opal_handle_hmi_exception(struct pt_regs *regs)
516 {
517 s64 rc;
518 __be64 evt = 0;
519
520 /*
521 * Check if HMI event is available.
522 * if Yes, then call opal_poll_events to pull opal messages and
523 * process them.
524 */
525 if (!local_paca->hmi_event_available)
526 return 0;
527
528 local_paca->hmi_event_available = 0;
529 rc = opal_poll_events(&evt);
530 if (rc == OPAL_SUCCESS && evt)
531 opal_handle_events(be64_to_cpu(evt));
532
533 return 1;
534 }
535
536 static uint64_t find_recovery_address(uint64_t nip)
537 {
538 int i;
539
540 for (i = 0; i < mc_recoverable_range_len; i++)
541 if ((nip >= mc_recoverable_range[i].start_addr) &&
542 (nip < mc_recoverable_range[i].end_addr))
543 return mc_recoverable_range[i].recover_addr;
544 return 0;
545 }
546
547 bool opal_mce_check_early_recovery(struct pt_regs *regs)
548 {
549 uint64_t recover_addr = 0;
550
551 if (!opal.base || !opal.size)
552 goto out;
553
554 if ((regs->nip >= opal.base) &&
555 (regs->nip < (opal.base + opal.size)))
556 recover_addr = find_recovery_address(regs->nip);
557
558 /*
559 * Setup regs->nip to rfi into fixup address.
560 */
561 if (recover_addr)
562 regs->nip = recover_addr;
563
564 out:
565 return !!recover_addr;
566 }
567
568 static int opal_sysfs_init(void)
569 {
570 opal_kobj = kobject_create_and_add("opal", firmware_kobj);
571 if (!opal_kobj) {
572 pr_warn("kobject_create_and_add opal failed\n");
573 return -ENOMEM;
574 }
575
576 return 0;
577 }
578
579 static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
580 struct bin_attribute *bin_attr,
581 char *buf, loff_t off, size_t count)
582 {
583 return memory_read_from_buffer(buf, count, &off, bin_attr->private,
584 bin_attr->size);
585 }
586
587 static BIN_ATTR_RO(symbol_map, 0);
588
589 static void opal_export_symmap(void)
590 {
591 const __be64 *syms;
592 unsigned int size;
593 struct device_node *fw;
594 int rc;
595
596 fw = of_find_node_by_path("/ibm,opal/firmware");
597 if (!fw)
598 return;
599 syms = of_get_property(fw, "symbol-map", &size);
600 if (!syms || size != 2 * sizeof(__be64))
601 return;
602
603 /* Setup attributes */
604 bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
605 bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
606
607 rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
608 if (rc)
609 pr_warn("Error %d creating OPAL symbols file\n", rc);
610 }
611
612 static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
613 struct bin_attribute *bin_attr, char *buf,
614 loff_t off, size_t count)
615 {
616 return memory_read_from_buffer(buf, count, &off, bin_attr->private,
617 bin_attr->size);
618 }
619
620 /*
621 * opal_export_attrs: creates a sysfs node for each property listed in
622 * the device-tree under /ibm,opal/firmware/exports/
623 * All new sysfs nodes are created under /opal/exports/.
624 * This allows for reserved memory regions (e.g. HDAT) to be read.
625 * The new sysfs nodes are only readable by root.
626 */
627 static void opal_export_attrs(void)
628 {
629 struct bin_attribute *attr;
630 struct device_node *np;
631 struct property *prop;
632 struct kobject *kobj;
633 u64 vals[2];
634 int rc;
635
636 np = of_find_node_by_path("/ibm,opal/firmware/exports");
637 if (!np)
638 return;
639
640 /* Create new 'exports' directory - /sys/firmware/opal/exports */
641 kobj = kobject_create_and_add("exports", opal_kobj);
642 if (!kobj) {
643 pr_warn("kobject_create_and_add() of exports failed\n");
644 return;
645 }
646
647 for_each_property_of_node(np, prop) {
648 if (!strcmp(prop->name, "name") || !strcmp(prop->name, "phandle"))
649 continue;
650
651 if (of_property_read_u64_array(np, prop->name, &vals[0], 2))
652 continue;
653
654 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
655
656 if (attr == NULL) {
657 pr_warn("Failed kmalloc for bin_attribute!");
658 continue;
659 }
660
661 sysfs_bin_attr_init(attr);
662 attr->attr.name = kstrdup(prop->name, GFP_KERNEL);
663 attr->attr.mode = 0400;
664 attr->read = export_attr_read;
665 attr->private = __va(vals[0]);
666 attr->size = vals[1];
667
668 if (attr->attr.name == NULL) {
669 pr_warn("Failed kstrdup for bin_attribute attr.name");
670 kfree(attr);
671 continue;
672 }
673
674 rc = sysfs_create_bin_file(kobj, attr);
675 if (rc) {
676 pr_warn("Error %d creating OPAL sysfs exports/%s file\n",
677 rc, prop->name);
678 kfree(attr->attr.name);
679 kfree(attr);
680 }
681 }
682
683 of_node_put(np);
684 }
685
686 static void __init opal_dump_region_init(void)
687 {
688 void *addr;
689 uint64_t size;
690 int rc;
691
692 if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
693 return;
694
695 /* Register kernel log buffer */
696 addr = log_buf_addr_get();
697 if (addr == NULL)
698 return;
699
700 size = log_buf_len_get();
701 if (size == 0)
702 return;
703
704 rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
705 __pa(addr), size);
706 /* Don't warn if this is just an older OPAL that doesn't
707 * know about that call
708 */
709 if (rc && rc != OPAL_UNSUPPORTED)
710 pr_warn("DUMP: Failed to register kernel log buffer. "
711 "rc = %d\n", rc);
712 }
713
714 static void opal_pdev_init(const char *compatible)
715 {
716 struct device_node *np;
717
718 for_each_compatible_node(np, NULL, compatible)
719 of_platform_device_create(np, NULL, NULL);
720 }
721
722 static void __init opal_imc_init_dev(void)
723 {
724 struct device_node *np;
725
726 np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
727 if (np)
728 of_platform_device_create(np, NULL, NULL);
729 }
730
731 static int kopald(void *unused)
732 {
733 unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
734 __be64 events;
735
736 set_freezable();
737 do {
738 try_to_freeze();
739 opal_poll_events(&events);
740 opal_handle_events(be64_to_cpu(events));
741 schedule_timeout_interruptible(timeout);
742 } while (!kthread_should_stop());
743
744 return 0;
745 }
746
747 void opal_wake_poller(void)
748 {
749 if (kopald_tsk)
750 wake_up_process(kopald_tsk);
751 }
752
753 static void opal_init_heartbeat(void)
754 {
755 /* Old firwmware, we assume the HVC heartbeat is sufficient */
756 if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
757 &opal_heartbeat) != 0)
758 opal_heartbeat = 0;
759
760 if (opal_heartbeat)
761 kopald_tsk = kthread_run(kopald, NULL, "kopald");
762 }
763
764 static int __init opal_init(void)
765 {
766 struct device_node *np, *consoles, *leds;
767 int rc;
768
769 opal_node = of_find_node_by_path("/ibm,opal");
770 if (!opal_node) {
771 pr_warn("Device node not found\n");
772 return -ENODEV;
773 }
774
775 /* Register OPAL consoles if any ports */
776 consoles = of_find_node_by_path("/ibm,opal/consoles");
777 if (consoles) {
778 for_each_child_of_node(consoles, np) {
779 if (strcmp(np->name, "serial"))
780 continue;
781 of_platform_device_create(np, NULL, NULL);
782 }
783 of_node_put(consoles);
784 }
785
786 /* Initialise OPAL messaging system */
787 opal_message_init();
788
789 /* Initialise OPAL asynchronous completion interface */
790 opal_async_comp_init();
791
792 /* Initialise OPAL sensor interface */
793 opal_sensor_init();
794
795 /* Initialise OPAL hypervisor maintainence interrupt handling */
796 opal_hmi_handler_init();
797
798 /* Create i2c platform devices */
799 opal_pdev_init("ibm,opal-i2c");
800
801 /* Setup a heatbeat thread if requested by OPAL */
802 opal_init_heartbeat();
803
804 /* Detect In-Memory Collection counters and create devices*/
805 opal_imc_init_dev();
806
807 /* Create leds platform devices */
808 leds = of_find_node_by_path("/ibm,opal/leds");
809 if (leds) {
810 of_platform_device_create(leds, "opal_leds", NULL);
811 of_node_put(leds);
812 }
813
814 /* Initialise OPAL message log interface */
815 opal_msglog_init();
816
817 /* Create "opal" kobject under /sys/firmware */
818 rc = opal_sysfs_init();
819 if (rc == 0) {
820 /* Export symbol map to userspace */
821 opal_export_symmap();
822 /* Setup dump region interface */
823 opal_dump_region_init();
824 /* Setup error log interface */
825 rc = opal_elog_init();
826 /* Setup code update interface */
827 opal_flash_update_init();
828 /* Setup platform dump extract interface */
829 opal_platform_dump_init();
830 /* Setup system parameters interface */
831 opal_sys_param_init();
832 /* Setup message log sysfs interface. */
833 opal_msglog_sysfs_init();
834 }
835
836 /* Export all properties */
837 opal_export_attrs();
838
839 /* Initialize platform devices: IPMI backend, PRD & flash interface */
840 opal_pdev_init("ibm,opal-ipmi");
841 opal_pdev_init("ibm,opal-flash");
842 opal_pdev_init("ibm,opal-prd");
843
844 /* Initialise platform device: oppanel interface */
845 opal_pdev_init("ibm,opal-oppanel");
846
847 /* Initialise OPAL kmsg dumper for flushing console on panic */
848 opal_kmsg_init();
849
850 /* Initialise OPAL powercap interface */
851 opal_powercap_init();
852
853 /* Initialise OPAL Power-Shifting-Ratio interface */
854 opal_psr_init();
855
856 /* Initialise OPAL sensor groups */
857 opal_sensor_groups_init();
858
859 return 0;
860 }
861 machine_subsys_initcall(powernv, opal_init);
862
863 void opal_shutdown(void)
864 {
865 long rc = OPAL_BUSY;
866
867 opal_event_shutdown();
868
869 /*
870 * Then sync with OPAL which ensure anything that can
871 * potentially write to our memory has completed such
872 * as an ongoing dump retrieval
873 */
874 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
875 rc = opal_sync_host_reboot();
876 if (rc == OPAL_BUSY)
877 opal_poll_events(NULL);
878 else
879 mdelay(10);
880 }
881
882 /* Unregister memory dump region */
883 if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
884 opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
885 }
886
887 /* Export this so that test modules can use it */
888 EXPORT_SYMBOL_GPL(opal_invalid_call);
889 EXPORT_SYMBOL_GPL(opal_xscom_read);
890 EXPORT_SYMBOL_GPL(opal_xscom_write);
891 EXPORT_SYMBOL_GPL(opal_ipmi_send);
892 EXPORT_SYMBOL_GPL(opal_ipmi_recv);
893 EXPORT_SYMBOL_GPL(opal_flash_read);
894 EXPORT_SYMBOL_GPL(opal_flash_write);
895 EXPORT_SYMBOL_GPL(opal_flash_erase);
896 EXPORT_SYMBOL_GPL(opal_prd_msg);
897
898 /* Convert a region of vmalloc memory to an opal sg list */
899 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
900 unsigned long vmalloc_size)
901 {
902 struct opal_sg_list *sg, *first = NULL;
903 unsigned long i = 0;
904
905 sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
906 if (!sg)
907 goto nomem;
908
909 first = sg;
910
911 while (vmalloc_size > 0) {
912 uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
913 uint64_t length = min(vmalloc_size, PAGE_SIZE);
914
915 sg->entry[i].data = cpu_to_be64(data);
916 sg->entry[i].length = cpu_to_be64(length);
917 i++;
918
919 if (i >= SG_ENTRIES_PER_NODE) {
920 struct opal_sg_list *next;
921
922 next = kzalloc(PAGE_SIZE, GFP_KERNEL);
923 if (!next)
924 goto nomem;
925
926 sg->length = cpu_to_be64(
927 i * sizeof(struct opal_sg_entry) + 16);
928 i = 0;
929 sg->next = cpu_to_be64(__pa(next));
930 sg = next;
931 }
932
933 vmalloc_addr += length;
934 vmalloc_size -= length;
935 }
936
937 sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
938
939 return first;
940
941 nomem:
942 pr_err("%s : Failed to allocate memory\n", __func__);
943 opal_free_sg_list(first);
944 return NULL;
945 }
946
947 void opal_free_sg_list(struct opal_sg_list *sg)
948 {
949 while (sg) {
950 uint64_t next = be64_to_cpu(sg->next);
951
952 kfree(sg);
953
954 if (next)
955 sg = __va(next);
956 else
957 sg = NULL;
958 }
959 }
960
961 int opal_error_code(int rc)
962 {
963 switch (rc) {
964 case OPAL_SUCCESS: return 0;
965
966 case OPAL_PARAMETER: return -EINVAL;
967 case OPAL_ASYNC_COMPLETION: return -EINPROGRESS;
968 case OPAL_BUSY_EVENT: return -EBUSY;
969 case OPAL_NO_MEM: return -ENOMEM;
970 case OPAL_PERMISSION: return -EPERM;
971
972 case OPAL_UNSUPPORTED: return -EIO;
973 case OPAL_HARDWARE: return -EIO;
974 case OPAL_INTERNAL_ERROR: return -EIO;
975 case OPAL_TIMEOUT: return -ETIMEDOUT;
976 default:
977 pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
978 return -EIO;
979 }
980 }
981
982 void powernv_set_nmmu_ptcr(unsigned long ptcr)
983 {
984 int rc;
985
986 if (firmware_has_feature(FW_FEATURE_OPAL)) {
987 rc = opal_nmmu_set_ptcr(-1UL, ptcr);
988 if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
989 pr_warn("%s: Unable to set nest mmu ptcr\n", __func__);
990 }
991 }
992
993 EXPORT_SYMBOL_GPL(opal_poll_events);
994 EXPORT_SYMBOL_GPL(opal_rtc_read);
995 EXPORT_SYMBOL_GPL(opal_rtc_write);
996 EXPORT_SYMBOL_GPL(opal_tpo_read);
997 EXPORT_SYMBOL_GPL(opal_tpo_write);
998 EXPORT_SYMBOL_GPL(opal_i2c_request);
999 /* Export these symbols for PowerNV LED class driver */
1000 EXPORT_SYMBOL_GPL(opal_leds_get_ind);
1001 EXPORT_SYMBOL_GPL(opal_leds_set_ind);
1002 /* Export this symbol for PowerNV Operator Panel class driver */
1003 EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
1004 /* Export this for KVM */
1005 EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
1006 EXPORT_SYMBOL_GPL(opal_int_eoi);