2 * The file intends to implement the platform dependent EEH operations on
3 * powernv platform. Actually, the powernv was created in order to fully
6 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
14 #include <linux/atomic.h>
15 #include <linux/debugfs.h>
16 #include <linux/delay.h>
17 #include <linux/export.h>
18 #include <linux/init.h>
19 #include <linux/list.h>
20 #include <linux/msi.h>
22 #include <linux/pci.h>
23 #include <linux/proc_fs.h>
24 #include <linux/rbtree.h>
25 #include <linux/sched.h>
26 #include <linux/seq_file.h>
27 #include <linux/spinlock.h>
30 #include <asm/eeh_event.h>
31 #include <asm/firmware.h>
33 #include <asm/iommu.h>
34 #include <asm/machdep.h>
35 #include <asm/msi_bitmap.h>
37 #include <asm/ppc-pci.h>
42 static bool pnv_eeh_nb_init
= false;
45 * pnv_eeh_init - EEH platform dependent initialization
47 * EEH platform dependent initialization on powernv
49 static int pnv_eeh_init(void)
51 struct pci_controller
*hose
;
54 /* We require OPALv3 */
55 if (!firmware_has_feature(FW_FEATURE_OPALv3
)) {
56 pr_warn("%s: OPALv3 is required !\n",
62 eeh_add_flag(EEH_PROBE_MODE_DEV
);
65 * P7IOC blocks PCI config access to frozen PE, but PHB3
66 * doesn't do that. So we have to selectively enable I/O
67 * prior to collecting error log.
69 list_for_each_entry(hose
, &hose_list
, list_node
) {
70 phb
= hose
->private_data
;
72 if (phb
->model
== PNV_PHB_MODEL_P7IOC
)
73 eeh_add_flag(EEH_ENABLE_IO_FOR_LOG
);
76 * PE#0 should be regarded as valid by EEH core
77 * if it's not the reserved one. Currently, we
78 * have the reserved PE#0 and PE#127 for PHB3
79 * and P7IOC separately. So we should regard
80 * PE#0 as valid for P7IOC.
82 if (phb
->ioda
.reserved_pe
!= 0)
83 eeh_add_flag(EEH_VALID_PE_ZERO
);
91 static int pnv_eeh_event(struct notifier_block
*nb
,
92 unsigned long events
, void *change
)
94 uint64_t changed_evts
= (uint64_t)change
;
97 * We simply send special EEH event if EEH has
98 * been enabled, or clear pending events in
99 * case that we enable EEH soon
101 if (!(changed_evts
& OPAL_EVENT_PCI_ERROR
) ||
102 !(events
& OPAL_EVENT_PCI_ERROR
))
106 eeh_send_failure_event(NULL
);
108 opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR
, 0x0ul
);
113 static struct notifier_block pnv_eeh_nb
= {
114 .notifier_call
= pnv_eeh_event
,
119 #ifdef CONFIG_DEBUG_FS
120 static ssize_t
pnv_eeh_ei_write(struct file
*filp
,
121 const char __user
*user_buf
,
122 size_t count
, loff_t
*ppos
)
124 struct pci_controller
*hose
= filp
->private_data
;
125 struct eeh_dev
*edev
;
127 int pe_no
, type
, func
;
128 unsigned long addr
, mask
;
132 if (!eeh_ops
|| !eeh_ops
->err_inject
)
135 /* Copy over argument buffer */
136 ret
= simple_write_to_buffer(buf
, sizeof(buf
), ppos
, user_buf
, count
);
140 /* Retrieve parameters */
141 ret
= sscanf(buf
, "%x:%x:%x:%lx:%lx",
142 &pe_no
, &type
, &func
, &addr
, &mask
);
147 edev
= kzalloc(sizeof(*edev
), GFP_KERNEL
);
151 edev
->pe_config_addr
= pe_no
;
152 pe
= eeh_pe_get(edev
);
157 /* Do error injection */
158 ret
= eeh_ops
->err_inject(pe
, type
, func
, addr
, mask
);
159 return ret
< 0 ? ret
: count
;
162 static const struct file_operations pnv_eeh_ei_fops
= {
165 .write
= pnv_eeh_ei_write
,
168 static int pnv_eeh_dbgfs_set(void *data
, int offset
, u64 val
)
170 struct pci_controller
*hose
= data
;
171 struct pnv_phb
*phb
= hose
->private_data
;
173 out_be64(phb
->regs
+ offset
, val
);
177 static int pnv_eeh_dbgfs_get(void *data
, int offset
, u64
*val
)
179 struct pci_controller
*hose
= data
;
180 struct pnv_phb
*phb
= hose
->private_data
;
182 *val
= in_be64(phb
->regs
+ offset
);
186 static int pnv_eeh_outb_dbgfs_set(void *data
, u64 val
)
188 return pnv_eeh_dbgfs_set(data
, 0xD10, val
);
191 static int pnv_eeh_outb_dbgfs_get(void *data
, u64
*val
)
193 return pnv_eeh_dbgfs_get(data
, 0xD10, val
);
196 static int pnv_eeh_inbA_dbgfs_set(void *data
, u64 val
)
198 return pnv_eeh_dbgfs_set(data
, 0xD90, val
);
201 static int pnv_eeh_inbA_dbgfs_get(void *data
, u64
*val
)
203 return pnv_eeh_dbgfs_get(data
, 0xD90, val
);
206 static int pnv_eeh_inbB_dbgfs_set(void *data
, u64 val
)
208 return pnv_eeh_dbgfs_set(data
, 0xE10, val
);
211 static int pnv_eeh_inbB_dbgfs_get(void *data
, u64
*val
)
213 return pnv_eeh_dbgfs_get(data
, 0xE10, val
);
216 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops
, pnv_eeh_outb_dbgfs_get
,
217 pnv_eeh_outb_dbgfs_set
, "0x%llx\n");
218 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops
, pnv_eeh_inbA_dbgfs_get
,
219 pnv_eeh_inbA_dbgfs_set
, "0x%llx\n");
220 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops
, pnv_eeh_inbB_dbgfs_get
,
221 pnv_eeh_inbB_dbgfs_set
, "0x%llx\n");
222 #endif /* CONFIG_DEBUG_FS */
225 * pnv_eeh_post_init - EEH platform dependent post initialization
227 * EEH platform dependent post initialization on powernv. When
228 * the function is called, the EEH PEs and devices should have
229 * been built. If the I/O cache staff has been built, EEH is
230 * ready to supply service.
232 static int pnv_eeh_post_init(void)
234 struct pci_controller
*hose
;
238 /* Register OPAL event notifier */
239 if (!pnv_eeh_nb_init
) {
240 ret
= opal_notifier_register(&pnv_eeh_nb
);
242 pr_warn("%s: Can't register OPAL event notifier (%d)\n",
247 pnv_eeh_nb_init
= true;
250 list_for_each_entry(hose
, &hose_list
, list_node
) {
251 phb
= hose
->private_data
;
254 * If EEH is enabled, we're going to rely on that.
255 * Otherwise, we restore to conventional mechanism
256 * to clear frozen PE during PCI config access.
259 phb
->flags
|= PNV_PHB_FLAG_EEH
;
261 phb
->flags
&= ~PNV_PHB_FLAG_EEH
;
263 /* Create debugfs entries */
264 #ifdef CONFIG_DEBUG_FS
265 if (phb
->has_dbgfs
|| !phb
->dbgfs
)
269 debugfs_create_file("err_injct", 0200,
273 debugfs_create_file("err_injct_outbound", 0600,
275 &pnv_eeh_outb_dbgfs_ops
);
276 debugfs_create_file("err_injct_inboundA", 0600,
278 &pnv_eeh_inbA_dbgfs_ops
);
279 debugfs_create_file("err_injct_inboundB", 0600,
281 &pnv_eeh_inbB_dbgfs_ops
);
282 #endif /* CONFIG_DEBUG_FS */
290 * pnv_eeh_dev_probe - Do probe on PCI device
294 * When EEH module is installed during system boot, all PCI devices
295 * are checked one by one to see if it supports EEH. The function
296 * is introduced for the purpose. By default, EEH has been enabled
297 * on all PCI devices. That's to say, we only need do necessary
298 * initialization on the corresponding eeh device and create PE
301 * It's notable that's unsafe to retrieve the EEH device through
302 * the corresponding PCI device. During the PCI device hotplug, which
303 * was possiblly triggered by EEH core, the binding between EEH device
304 * and the PCI device isn't built yet.
306 static int pnv_eeh_dev_probe(struct pci_dev
*dev
, void *flag
)
308 struct pci_controller
*hose
= pci_bus_to_host(dev
->bus
);
309 struct pnv_phb
*phb
= hose
->private_data
;
310 struct device_node
*dn
= pci_device_to_OF_node(dev
);
311 struct eeh_dev
*edev
= of_node_to_eeh_dev(dn
);
315 * When probing the root bridge, which doesn't have any
316 * subordinate PCI devices. We don't have OF node for
317 * the root bridge. So it's not reasonable to continue
320 if (!dn
|| !edev
|| edev
->pe
)
323 /* Skip for PCI-ISA bridge */
324 if ((dev
->class >> 8) == PCI_CLASS_BRIDGE_ISA
)
327 /* Initialize eeh device */
328 edev
->class_code
= dev
->class;
329 edev
->mode
&= 0xFFFFFF00;
330 if (dev
->hdr_type
== PCI_HEADER_TYPE_BRIDGE
)
331 edev
->mode
|= EEH_DEV_BRIDGE
;
332 edev
->pcix_cap
= pci_find_capability(dev
, PCI_CAP_ID_PCIX
);
333 if (pci_is_pcie(dev
)) {
334 edev
->pcie_cap
= pci_pcie_cap(dev
);
336 if (pci_pcie_type(dev
) == PCI_EXP_TYPE_ROOT_PORT
)
337 edev
->mode
|= EEH_DEV_ROOT_PORT
;
338 else if (pci_pcie_type(dev
) == PCI_EXP_TYPE_DOWNSTREAM
)
339 edev
->mode
|= EEH_DEV_DS_PORT
;
341 edev
->aer_cap
= pci_find_ext_capability(dev
,
345 edev
->config_addr
= ((dev
->bus
->number
<< 8) | dev
->devfn
);
346 edev
->pe_config_addr
= phb
->bdfn_to_pe(phb
, dev
->bus
, dev
->devfn
& 0xff);
349 ret
= eeh_add_to_parent_pe(edev
);
351 pr_warn("%s: Can't add PCI dev %s to parent PE (%d)\n",
352 __func__
, pci_name(dev
), ret
);
357 * If the PE contains any one of following adapters, the
358 * PCI config space can't be accessed when dumping EEH log.
359 * Otherwise, we will run into fenced PHB caused by shortage
360 * of outbound credits in the adapter. The PCI config access
361 * should be blocked until PE reset. MMIO access is dropped
362 * by hardware certainly. In order to drop PCI config requests,
363 * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
364 * will be checked in the backend for PE state retrival. If
365 * the PE becomes frozen for the first time and the flag has
366 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
367 * that PE to block its config space.
369 * Broadcom Austin 4-ports NICs (14e4:1657)
370 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
372 if ((dev
->vendor
== PCI_VENDOR_ID_BROADCOM
&& dev
->device
== 0x1657) ||
373 (dev
->vendor
== PCI_VENDOR_ID_BROADCOM
&& dev
->device
== 0x168e))
374 edev
->pe
->state
|= EEH_PE_CFG_RESTRICTED
;
377 * Cache the PE primary bus, which can't be fetched when
378 * full hotplug is in progress. In that case, all child
379 * PCI devices of the PE are expected to be removed prior
383 edev
->pe
->bus
= dev
->bus
;
386 * Enable EEH explicitly so that we will do EEH check
387 * while accessing I/O stuff
389 eeh_add_flag(EEH_ENABLED
);
391 /* Save memory bars */
398 * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
400 * @option: operation to be issued
402 * The function is used to control the EEH functionality globally.
403 * Currently, following options are support according to PAPR:
404 * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
406 static int pnv_eeh_set_option(struct eeh_pe
*pe
, int option
)
408 struct pci_controller
*hose
= pe
->phb
;
409 struct pnv_phb
*phb
= hose
->private_data
;
410 bool freeze_pe
= false;
414 /* Sanity check on option */
416 case EEH_OPT_DISABLE
:
420 case EEH_OPT_THAW_MMIO
:
421 opt
= OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO
;
423 case EEH_OPT_THAW_DMA
:
424 opt
= OPAL_EEH_ACTION_CLEAR_FREEZE_DMA
;
426 case EEH_OPT_FREEZE_PE
:
428 opt
= OPAL_EEH_ACTION_SET_FREEZE_ALL
;
431 pr_warn("%s: Invalid option %d\n", __func__
, option
);
435 /* If PHB supports compound PE, to handle it */
437 if (phb
->freeze_pe
) {
438 phb
->freeze_pe(phb
, pe
->addr
);
440 rc
= opal_pci_eeh_freeze_set(phb
->opal_id
,
442 if (rc
!= OPAL_SUCCESS
) {
443 pr_warn("%s: Failure %lld freezing "
446 phb
->hose
->global_number
, pe
->addr
);
451 if (phb
->unfreeze_pe
) {
452 ret
= phb
->unfreeze_pe(phb
, pe
->addr
, opt
);
454 rc
= opal_pci_eeh_freeze_clear(phb
->opal_id
,
456 if (rc
!= OPAL_SUCCESS
) {
457 pr_warn("%s: Failure %lld enable %d "
458 "for PHB#%x-PE#%x\n",
459 __func__
, rc
, option
,
460 phb
->hose
->global_number
, pe
->addr
);
470 * pnv_eeh_get_pe_addr - Retrieve PE address
473 * Retrieve the PE address according to the given tranditional
474 * PCI BDF (Bus/Device/Function) address.
476 static int pnv_eeh_get_pe_addr(struct eeh_pe
*pe
)
482 * pnv_eeh_get_state - Retrieve PE state
484 * @delay: delay while PE state is temporarily unavailable
486 * Retrieve the state of the specified PE. For IODA-compitable
487 * platform, it should be retrieved from IODA table. Therefore,
488 * we prefer passing down to hardware implementation to handle
491 static int pnv_eeh_get_state(struct eeh_pe
*pe
, int *delay
)
493 struct pci_controller
*hose
= pe
->phb
;
494 struct pnv_phb
*phb
= hose
->private_data
;
495 int ret
= EEH_STATE_NOT_SUPPORT
;
497 if (phb
->eeh_ops
&& phb
->eeh_ops
->get_state
) {
498 ret
= phb
->eeh_ops
->get_state(pe
);
501 * If the PE state is temporarily unavailable,
502 * to inform the EEH core delay for default
507 if (ret
& EEH_STATE_UNAVAILABLE
)
516 * pnv_eeh_reset - Reset the specified PE
518 * @option: reset option
520 * Reset the specified PE
522 static int pnv_eeh_reset(struct eeh_pe
*pe
, int option
)
524 struct pci_controller
*hose
= pe
->phb
;
525 struct pnv_phb
*phb
= hose
->private_data
;
528 if (phb
->eeh_ops
&& phb
->eeh_ops
->reset
)
529 ret
= phb
->eeh_ops
->reset(pe
, option
);
535 * pnv_eeh_wait_state - Wait for PE state
537 * @max_wait: maximal period in microsecond
539 * Wait for the state of associated PE. It might take some time
540 * to retrieve the PE's state.
542 static int pnv_eeh_wait_state(struct eeh_pe
*pe
, int max_wait
)
548 ret
= pnv_eeh_get_state(pe
, &mwait
);
551 * If the PE's state is temporarily unavailable,
552 * we have to wait for the specified time. Otherwise,
553 * the PE's state will be returned immediately.
555 if (ret
!= EEH_STATE_UNAVAILABLE
)
560 pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
561 __func__
, pe
->addr
, max_wait
);
562 return EEH_STATE_NOT_SUPPORT
;
568 return EEH_STATE_NOT_SUPPORT
;
572 * pnv_eeh_get_log - Retrieve error log
574 * @severity: temporary or permanent error log
575 * @drv_log: driver log to be combined with retrieved error log
576 * @len: length of driver log
578 * Retrieve the temporary or permanent error from the PE.
580 static int pnv_eeh_get_log(struct eeh_pe
*pe
, int severity
,
581 char *drv_log
, unsigned long len
)
583 if (!eeh_has_flag(EEH_EARLY_DUMP_LOG
))
584 pnv_pci_dump_phb_diag_data(pe
->phb
, pe
->data
);
590 * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
593 * The function will be called to reconfigure the bridges included
594 * in the specified PE so that the mulfunctional PE would be recovered
597 static int pnv_eeh_configure_bridge(struct eeh_pe
*pe
)
603 * pnv_pe_err_inject - Inject specified error to the indicated PE
604 * @pe: the indicated PE
606 * @func: specific error type
608 * @mask: address mask
610 * The routine is called to inject specified error, which is
611 * determined by @type and @func, to the indicated PE for
614 static int pnv_eeh_err_inject(struct eeh_pe
*pe
, int type
, int func
,
615 unsigned long addr
, unsigned long mask
)
617 struct pci_controller
*hose
= pe
->phb
;
618 struct pnv_phb
*phb
= hose
->private_data
;
621 /* Sanity check on error type */
622 if (type
!= OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR
&&
623 type
!= OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64
) {
624 pr_warn("%s: Invalid error type %d\n",
629 if (func
< OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR
||
630 func
> OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET
) {
631 pr_warn("%s: Invalid error function %d\n",
636 /* Firmware supports error injection ? */
637 if (!opal_check_token(OPAL_PCI_ERR_INJECT
)) {
638 pr_warn("%s: Firmware doesn't support error injection\n",
643 /* Do error injection */
644 rc
= opal_pci_err_inject(phb
->opal_id
, pe
->addr
,
645 type
, func
, addr
, mask
);
646 if (rc
!= OPAL_SUCCESS
) {
647 pr_warn("%s: Failure %lld injecting error "
648 "%d-%d to PHB#%x-PE#%x\n",
649 __func__
, rc
, type
, func
,
650 hose
->global_number
, pe
->addr
);
657 static inline bool pnv_eeh_cfg_blocked(struct device_node
*dn
)
659 struct eeh_dev
*edev
= of_node_to_eeh_dev(dn
);
661 if (!edev
|| !edev
->pe
)
664 if (edev
->pe
->state
& EEH_PE_CFG_BLOCKED
)
670 static int pnv_eeh_read_config(struct device_node
*dn
,
671 int where
, int size
, u32
*val
)
673 if (pnv_eeh_cfg_blocked(dn
)) {
675 return PCIBIOS_SET_FAILED
;
678 return pnv_pci_cfg_read(dn
, where
, size
, val
);
681 static int pnv_eeh_write_config(struct device_node
*dn
,
682 int where
, int size
, u32 val
)
684 if (pnv_eeh_cfg_blocked(dn
))
685 return PCIBIOS_SET_FAILED
;
687 return pnv_pci_cfg_write(dn
, where
, size
, val
);
691 * pnv_eeh_next_error - Retrieve next EEH error to handle
694 * Using OPAL API, to retrieve next EEH error for EEH core to handle
696 static int pnv_eeh_next_error(struct eeh_pe
**pe
)
698 struct pci_controller
*hose
;
699 struct pnv_phb
*phb
= NULL
;
701 list_for_each_entry(hose
, &hose_list
, list_node
) {
702 phb
= hose
->private_data
;
706 if (phb
&& phb
->eeh_ops
->next_error
)
707 return phb
->eeh_ops
->next_error(pe
);
712 static int pnv_eeh_restore_config(struct device_node
*dn
)
714 struct eeh_dev
*edev
= of_node_to_eeh_dev(dn
);
721 phb
= edev
->phb
->private_data
;
722 ret
= opal_pci_reinit(phb
->opal_id
,
723 OPAL_REINIT_PCI_DEV
, edev
->config_addr
);
725 pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
726 __func__
, edev
->config_addr
, ret
);
733 static struct eeh_ops pnv_eeh_ops
= {
735 .init
= pnv_eeh_init
,
736 .post_init
= pnv_eeh_post_init
,
738 .dev_probe
= pnv_eeh_dev_probe
,
739 .set_option
= pnv_eeh_set_option
,
740 .get_pe_addr
= pnv_eeh_get_pe_addr
,
741 .get_state
= pnv_eeh_get_state
,
742 .reset
= pnv_eeh_reset
,
743 .wait_state
= pnv_eeh_wait_state
,
744 .get_log
= pnv_eeh_get_log
,
745 .configure_bridge
= pnv_eeh_configure_bridge
,
746 .err_inject
= pnv_eeh_err_inject
,
747 .read_config
= pnv_eeh_read_config
,
748 .write_config
= pnv_eeh_write_config
,
749 .next_error
= pnv_eeh_next_error
,
750 .restore_config
= pnv_eeh_restore_config
754 * eeh_powernv_init - Register platform dependent EEH operations
756 * EEH initialization on powernv platform. This function should be
757 * called before any EEH related functions.
759 static int __init
eeh_powernv_init(void)
763 eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE
);
764 ret
= eeh_ops_register(&pnv_eeh_ops
);
766 pr_info("EEH: PowerNV platform initialized\n");
768 pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret
);
772 machine_early_initcall(powernv
, eeh_powernv_init
);