2 * The file intends to implement PE based on the information from
3 * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
4 * All the PEs should be organized as hierarchy tree. The first level
5 * of the tree will be associated to existing PHBs since the particular
6 * PE is only meaningful in one PHB domain.
8 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/delay.h>
26 #include <linux/export.h>
27 #include <linux/gfp.h>
28 #include <linux/kernel.h>
29 #include <linux/pci.h>
30 #include <linux/string.h>
32 #include <asm/pci-bridge.h>
33 #include <asm/ppc-pci.h>
35 static LIST_HEAD(eeh_phb_pe
);
38 * eeh_pe_alloc - Allocate PE
39 * @phb: PCI controller
42 * Allocate PE instance dynamically.
44 static struct eeh_pe
*eeh_pe_alloc(struct pci_controller
*phb
, int type
)
49 pe
= kzalloc(sizeof(struct eeh_pe
), GFP_KERNEL
);
52 /* Initialize PHB PE */
55 INIT_LIST_HEAD(&pe
->child_list
);
56 INIT_LIST_HEAD(&pe
->child
);
57 INIT_LIST_HEAD(&pe
->edevs
);
63 * eeh_phb_pe_create - Create PHB PE
64 * @phb: PCI controller
66 * The function should be called while the PHB is detected during
67 * system boot or PCI hotplug in order to create PHB PE.
69 int eeh_phb_pe_create(struct pci_controller
*phb
)
74 pe
= eeh_pe_alloc(phb
, EEH_PE_PHB
);
76 pr_err("%s: out of memory!\n", __func__
);
80 /* Put it into the list */
81 list_add_tail(&pe
->child
, &eeh_phb_pe
);
83 pr_debug("EEH: Add PE for PHB#%d\n", phb
->global_number
);
89 * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
90 * @phb: PCI controller
92 * The overall PEs form hierarchy tree. The first layer of the
93 * hierarchy tree is composed of PHB PEs. The function is used
94 * to retrieve the corresponding PHB PE according to the given PHB.
96 struct eeh_pe
*eeh_phb_pe_get(struct pci_controller
*phb
)
100 list_for_each_entry(pe
, &eeh_phb_pe
, child
) {
102 * Actually, we needn't check the type since
103 * the PE for PHB has been determined when that
106 if ((pe
->type
& EEH_PE_PHB
) && pe
->phb
== phb
)
114 * eeh_pe_next - Retrieve the next PE in the tree
118 * The function is used to retrieve the next PE in the
121 static struct eeh_pe
*eeh_pe_next(struct eeh_pe
*pe
,
124 struct list_head
*next
= pe
->child_list
.next
;
126 if (next
== &pe
->child_list
) {
130 next
= pe
->child
.next
;
131 if (next
!= &pe
->parent
->child_list
)
137 return list_entry(next
, struct eeh_pe
, child
);
141 * eeh_pe_traverse - Traverse PEs in the specified PHB
144 * @flag: extra parameter to callback
146 * The function is used to traverse the specified PE and its
147 * child PEs. The traversing is to be terminated once the
148 * callback returns something other than NULL, or no more PEs
151 void *eeh_pe_traverse(struct eeh_pe
*root
,
152 eeh_traverse_func fn
, void *flag
)
157 for (pe
= root
; pe
; pe
= eeh_pe_next(pe
, root
)) {
166 * eeh_pe_dev_traverse - Traverse the devices from the PE
168 * @fn: function callback
169 * @flag: extra parameter to callback
171 * The function is used to traverse the devices of the specified
172 * PE and its child PEs.
174 void *eeh_pe_dev_traverse(struct eeh_pe
*root
,
175 eeh_traverse_func fn
, void *flag
)
178 struct eeh_dev
*edev
, *tmp
;
182 pr_warning("%s: Invalid PE %p\n", __func__
, root
);
186 /* Traverse root PE */
187 for (pe
= root
; pe
; pe
= eeh_pe_next(pe
, root
)) {
188 eeh_pe_for_each_dev(pe
, edev
, tmp
) {
189 ret
= fn(edev
, flag
);
199 * __eeh_pe_get - Check the PE address
203 * For one particular PE, it can be identified by PE address
204 * or tranditional BDF address. BDF address is composed of
205 * Bus/Device/Function number. The extra data referred by flag
206 * indicates which type of address should be used.
208 static void *__eeh_pe_get(void *data
, void *flag
)
210 struct eeh_pe
*pe
= (struct eeh_pe
*)data
;
211 struct eeh_dev
*edev
= (struct eeh_dev
*)flag
;
213 /* Unexpected PHB PE */
214 if (pe
->type
& EEH_PE_PHB
)
217 /* We prefer PE address */
218 if (edev
->pe_config_addr
&&
219 (edev
->pe_config_addr
== pe
->addr
))
222 /* Try BDF address */
223 if (edev
->config_addr
&&
224 (edev
->config_addr
== pe
->config_addr
))
231 * eeh_pe_get - Search PE based on the given address
234 * Search the corresponding PE based on the specified address which
235 * is included in the eeh device. The function is used to check if
236 * the associated PE has been created against the PE address. It's
237 * notable that the PE address has 2 format: traditional PE address
238 * which is composed of PCI bus/device/function number, or unified
241 struct eeh_pe
*eeh_pe_get(struct eeh_dev
*edev
)
243 struct eeh_pe
*root
= eeh_phb_pe_get(edev
->phb
);
246 pe
= eeh_pe_traverse(root
, __eeh_pe_get
, edev
);
252 * eeh_pe_get_parent - Retrieve the parent PE
255 * The whole PEs existing in the system are organized as hierarchy
256 * tree. The function is used to retrieve the parent PE according
257 * to the parent EEH device.
259 static struct eeh_pe
*eeh_pe_get_parent(struct eeh_dev
*edev
)
261 struct device_node
*dn
;
262 struct eeh_dev
*parent
;
265 * It might have the case for the indirect parent
266 * EEH device already having associated PE, but
267 * the direct parent EEH device doesn't have yet.
269 dn
= edev
->dn
->parent
;
271 /* We're poking out of PCI territory */
272 if (!PCI_DN(dn
)) return NULL
;
274 parent
= of_node_to_eeh_dev(dn
);
275 /* We're poking out of PCI territory */
276 if (!parent
) return NULL
;
288 * eeh_add_to_parent_pe - Add EEH device to parent PE
291 * Add EEH device to the parent PE. If the parent PE already
292 * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
293 * we have to create new PE to hold the EEH device and the new
294 * PE will be linked to its parent PE as well.
296 int eeh_add_to_parent_pe(struct eeh_dev
*edev
)
298 struct eeh_pe
*pe
, *parent
;
301 * Search the PE has been existing or not according
302 * to the PE address. If that has been existing, the
303 * PE should be composed of PCI bus and its subordinate
306 pe
= eeh_pe_get(edev
);
307 if (pe
&& !(pe
->type
& EEH_PE_INVALID
)) {
308 if (!edev
->pe_config_addr
) {
309 pr_err("%s: PE with addr 0x%x already exists\n",
310 __func__
, edev
->config_addr
);
314 /* Mark the PE as type of PCI bus */
315 pe
->type
= EEH_PE_BUS
;
318 /* Put the edev to PE */
319 list_add_tail(&edev
->list
, &pe
->edevs
);
320 pr_debug("EEH: Add %s to Bus PE#%x\n",
321 edev
->dn
->full_name
, pe
->addr
);
324 } else if (pe
&& (pe
->type
& EEH_PE_INVALID
)) {
325 list_add_tail(&edev
->list
, &pe
->edevs
);
328 * We're running to here because of PCI hotplug caused by
329 * EEH recovery. We need clear EEH_PE_INVALID until the top.
333 if (!(parent
->type
& EEH_PE_INVALID
))
335 parent
->type
&= ~(EEH_PE_INVALID
| EEH_PE_KEEP
);
336 parent
= parent
->parent
;
338 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
339 edev
->dn
->full_name
, pe
->addr
, pe
->parent
->addr
);
344 /* Create a new EEH PE */
345 pe
= eeh_pe_alloc(edev
->phb
, EEH_PE_DEVICE
);
347 pr_err("%s: out of memory!\n", __func__
);
350 pe
->addr
= edev
->pe_config_addr
;
351 pe
->config_addr
= edev
->config_addr
;
354 * Put the new EEH PE into hierarchy tree. If the parent
355 * can't be found, the newly created PE will be attached
356 * to PHB directly. Otherwise, we have to associate the
357 * PE with its parent.
359 parent
= eeh_pe_get_parent(edev
);
361 parent
= eeh_phb_pe_get(edev
->phb
);
363 pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
364 __func__
, edev
->phb
->global_number
);
373 * Put the newly created PE into the child list and
374 * link the EEH device accordingly.
376 list_add_tail(&pe
->child
, &parent
->child_list
);
377 list_add_tail(&edev
->list
, &pe
->edevs
);
379 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
380 edev
->dn
->full_name
, pe
->addr
, pe
->parent
->addr
);
386 * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
389 * The PE hierarchy tree might be changed when doing PCI hotplug.
390 * Also, the PCI devices or buses could be removed from the system
391 * during EEH recovery. So we have to call the function remove the
392 * corresponding PE accordingly if necessary.
394 int eeh_rmv_from_parent_pe(struct eeh_dev
*edev
)
396 struct eeh_pe
*pe
, *parent
, *child
;
400 pr_debug("%s: No PE found for EEH device %s\n",
401 __func__
, edev
->dn
->full_name
);
405 /* Remove the EEH device */
408 list_del(&edev
->list
);
411 * Check if the parent PE includes any EEH devices.
412 * If not, we should delete that. Also, we should
413 * delete the parent PE if it doesn't have associated
414 * child PEs and EEH devices.
418 if (pe
->type
& EEH_PE_PHB
)
421 if (!(pe
->state
& EEH_PE_KEEP
)) {
422 if (list_empty(&pe
->edevs
) &&
423 list_empty(&pe
->child_list
)) {
424 list_del(&pe
->child
);
430 if (list_empty(&pe
->edevs
)) {
432 list_for_each_entry(child
, &pe
->child_list
, child
) {
433 if (!(child
->type
& EEH_PE_INVALID
)) {
440 pe
->type
|= EEH_PE_INVALID
;
453 * eeh_pe_update_time_stamp - Update PE's frozen time stamp
456 * We have time stamp for each PE to trace its time of getting
457 * frozen in last hour. The function should be called to update
458 * the time stamp on first error of the specific PE. On the other
459 * handle, we needn't account for errors happened in last hour.
461 void eeh_pe_update_time_stamp(struct eeh_pe
*pe
)
463 struct timeval tstamp
;
467 if (pe
->freeze_count
<= 0) {
468 pe
->freeze_count
= 0;
469 do_gettimeofday(&pe
->tstamp
);
471 do_gettimeofday(&tstamp
);
472 if (tstamp
.tv_sec
- pe
->tstamp
.tv_sec
> 3600) {
474 pe
->freeze_count
= 0;
480 * __eeh_pe_state_mark - Mark the state for the PE
484 * The function is used to mark the indicated state for the given
485 * PE. Also, the associated PCI devices will be put into IO frozen
488 static void *__eeh_pe_state_mark(void *data
, void *flag
)
490 struct eeh_pe
*pe
= (struct eeh_pe
*)data
;
491 int state
= *((int *)flag
);
492 struct eeh_dev
*edev
, *tmp
;
493 struct pci_dev
*pdev
;
495 /* Keep the state of permanently removed PE intact */
496 if ((pe
->freeze_count
> EEH_MAX_ALLOWED_FREEZES
) &&
497 (state
& (EEH_PE_ISOLATED
| EEH_PE_RECOVERING
)))
502 /* Offline PCI devices if applicable */
503 if (state
!= EEH_PE_ISOLATED
)
506 eeh_pe_for_each_dev(pe
, edev
, tmp
) {
507 pdev
= eeh_dev_to_pci_dev(edev
);
509 pdev
->error_state
= pci_channel_io_frozen
;
516 * eeh_pe_state_mark - Mark specified state for PE and its associated device
519 * EEH error affects the current PE and its child PEs. The function
520 * is used to mark appropriate state for the affected PEs and the
521 * associated devices.
523 void eeh_pe_state_mark(struct eeh_pe
*pe
, int state
)
525 eeh_pe_traverse(pe
, __eeh_pe_state_mark
, &state
);
528 static void *__eeh_pe_dev_mode_mark(void *data
, void *flag
)
530 struct eeh_dev
*edev
= data
;
531 int mode
= *((int *)flag
);
539 * eeh_pe_dev_state_mark - Mark state for all device under the PE
542 * Mark specific state for all child devices of the PE.
544 void eeh_pe_dev_mode_mark(struct eeh_pe
*pe
, int mode
)
546 eeh_pe_dev_traverse(pe
, __eeh_pe_dev_mode_mark
, &mode
);
550 * __eeh_pe_state_clear - Clear state for the PE
554 * The function is used to clear the indicated state from the
555 * given PE. Besides, we also clear the check count of the PE
558 static void *__eeh_pe_state_clear(void *data
, void *flag
)
560 struct eeh_pe
*pe
= (struct eeh_pe
*)data
;
561 int state
= *((int *)flag
);
563 /* Keep the state of permanently removed PE intact */
564 if ((pe
->freeze_count
> EEH_MAX_ALLOWED_FREEZES
) &&
565 (state
& EEH_PE_ISOLATED
))
570 /* Clear check count since last isolation */
571 if (state
& EEH_PE_ISOLATED
)
578 * eeh_pe_state_clear - Clear state for the PE and its children
580 * @state: state to be cleared
582 * When the PE and its children has been recovered from error,
583 * we need clear the error state for that. The function is used
586 void eeh_pe_state_clear(struct eeh_pe
*pe
, int state
)
588 eeh_pe_traverse(pe
, __eeh_pe_state_clear
, &state
);
592 * Some PCI bridges (e.g. PLX bridges) have primary/secondary
593 * buses assigned explicitly by firmware, and we probably have
594 * lost that after reset. So we have to delay the check until
595 * the PCI-CFG registers have been restored for the parent
598 * Don't use normal PCI-CFG accessors, which probably has been
599 * blocked on normal path during the stage. So we need utilize
600 * eeh operations, which is always permitted.
602 static void eeh_bridge_check_link(struct eeh_dev
*edev
,
603 struct device_node
*dn
)
610 * We only check root port and downstream ports of
613 if (!(edev
->mode
& (EEH_DEV_ROOT_PORT
| EEH_DEV_DS_PORT
)))
616 pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
617 __func__
, edev
->phb
->global_number
,
618 edev
->config_addr
>> 8,
619 PCI_SLOT(edev
->config_addr
& 0xFF),
620 PCI_FUNC(edev
->config_addr
& 0xFF));
622 /* Check slot status */
623 cap
= edev
->pcie_cap
;
624 eeh_ops
->read_config(dn
, cap
+ PCI_EXP_SLTSTA
, 2, &val
);
625 if (!(val
& PCI_EXP_SLTSTA_PDS
)) {
626 pr_debug(" No card in the slot (0x%04x) !\n", val
);
630 /* Check power status if we have the capability */
631 eeh_ops
->read_config(dn
, cap
+ PCI_EXP_SLTCAP
, 2, &val
);
632 if (val
& PCI_EXP_SLTCAP_PCP
) {
633 eeh_ops
->read_config(dn
, cap
+ PCI_EXP_SLTCTL
, 2, &val
);
634 if (val
& PCI_EXP_SLTCTL_PCC
) {
635 pr_debug(" In power-off state, power it on ...\n");
636 val
&= ~(PCI_EXP_SLTCTL_PCC
| PCI_EXP_SLTCTL_PIC
);
637 val
|= (0x0100 & PCI_EXP_SLTCTL_PIC
);
638 eeh_ops
->write_config(dn
, cap
+ PCI_EXP_SLTCTL
, 2, val
);
644 eeh_ops
->read_config(dn
, cap
+ PCI_EXP_LNKCTL
, 2, &val
);
645 val
&= ~PCI_EXP_LNKCTL_LD
;
646 eeh_ops
->write_config(dn
, cap
+ PCI_EXP_LNKCTL
, 2, val
);
649 eeh_ops
->read_config(dn
, cap
+ PCI_EXP_LNKCAP
, 4, &val
);
650 if (!(val
& PCI_EXP_LNKCAP_DLLLARC
)) {
651 pr_debug(" No link reporting capability (0x%08x) \n", val
);
656 /* Wait the link is up until timeout (5s) */
658 while (timeout
< 5000) {
662 eeh_ops
->read_config(dn
, cap
+ PCI_EXP_LNKSTA
, 2, &val
);
663 if (val
& PCI_EXP_LNKSTA_DLLLA
)
667 if (val
& PCI_EXP_LNKSTA_DLLLA
)
668 pr_debug(" Link up (%s)\n",
669 (val
& PCI_EXP_LNKSTA_CLS_2_5GB
) ? "2.5GB" : "5GB");
671 pr_debug(" Link not ready (0x%04x)\n", val
);
674 #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
675 #define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
677 static void eeh_restore_bridge_bars(struct eeh_dev
*edev
,
678 struct device_node
*dn
)
683 * Device BARs: 0x10 - 0x18
684 * Bus numbers and windows: 0x18 - 0x30
686 for (i
= 4; i
< 13; i
++)
687 eeh_ops
->write_config(dn
, i
*4, 4, edev
->config_space
[i
]);
689 eeh_ops
->write_config(dn
, 14*4, 4, edev
->config_space
[14]);
691 /* Cache line & Latency timer: 0xC 0xD */
692 eeh_ops
->write_config(dn
, PCI_CACHE_LINE_SIZE
, 1,
693 SAVED_BYTE(PCI_CACHE_LINE_SIZE
));
694 eeh_ops
->write_config(dn
, PCI_LATENCY_TIMER
, 1,
695 SAVED_BYTE(PCI_LATENCY_TIMER
));
696 /* Max latency, min grant, interrupt ping and line: 0x3C */
697 eeh_ops
->write_config(dn
, 15*4, 4, edev
->config_space
[15]);
699 /* PCI Command: 0x4 */
700 eeh_ops
->write_config(dn
, PCI_COMMAND
, 4, edev
->config_space
[1]);
702 /* Check the PCIe link is ready */
703 eeh_bridge_check_link(edev
, dn
);
706 static void eeh_restore_device_bars(struct eeh_dev
*edev
,
707 struct device_node
*dn
)
712 for (i
= 4; i
< 10; i
++)
713 eeh_ops
->write_config(dn
, i
*4, 4, edev
->config_space
[i
]);
714 /* 12 == Expansion ROM Address */
715 eeh_ops
->write_config(dn
, 12*4, 4, edev
->config_space
[12]);
717 eeh_ops
->write_config(dn
, PCI_CACHE_LINE_SIZE
, 1,
718 SAVED_BYTE(PCI_CACHE_LINE_SIZE
));
719 eeh_ops
->write_config(dn
, PCI_LATENCY_TIMER
, 1,
720 SAVED_BYTE(PCI_LATENCY_TIMER
));
722 /* max latency, min grant, interrupt pin and line */
723 eeh_ops
->write_config(dn
, 15*4, 4, edev
->config_space
[15]);
726 * Restore PERR & SERR bits, some devices require it,
727 * don't touch the other command bits
729 eeh_ops
->read_config(dn
, PCI_COMMAND
, 4, &cmd
);
730 if (edev
->config_space
[1] & PCI_COMMAND_PARITY
)
731 cmd
|= PCI_COMMAND_PARITY
;
733 cmd
&= ~PCI_COMMAND_PARITY
;
734 if (edev
->config_space
[1] & PCI_COMMAND_SERR
)
735 cmd
|= PCI_COMMAND_SERR
;
737 cmd
&= ~PCI_COMMAND_SERR
;
738 eeh_ops
->write_config(dn
, PCI_COMMAND
, 4, cmd
);
742 * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
746 * Loads the PCI configuration space base address registers,
747 * the expansion ROM base address, the latency timer, and etc.
748 * from the saved values in the device node.
750 static void *eeh_restore_one_device_bars(void *data
, void *flag
)
752 struct eeh_dev
*edev
= (struct eeh_dev
*)data
;
753 struct device_node
*dn
= eeh_dev_to_of_node(edev
);
755 /* Do special restore for bridges */
756 if (edev
->mode
& EEH_DEV_BRIDGE
)
757 eeh_restore_bridge_bars(edev
, dn
);
759 eeh_restore_device_bars(edev
, dn
);
761 if (eeh_ops
->restore_config
)
762 eeh_ops
->restore_config(dn
);
768 * eeh_pe_restore_bars - Restore the PCI config space info
771 * This routine performs a recursive walk to the children
772 * of this device as well.
774 void eeh_pe_restore_bars(struct eeh_pe
*pe
)
777 * We needn't take the EEH lock since eeh_pe_dev_traverse()
780 eeh_pe_dev_traverse(pe
, eeh_restore_one_device_bars
, NULL
);
784 * eeh_pe_loc_get - Retrieve location code binding to the given PE
787 * Retrieve the location code of the given PE. If the primary PE bus
788 * is root bus, we will grab location code from PHB device tree node
789 * or root port. Otherwise, the upstream bridge's device tree node
790 * of the primary PE bus will be checked for the location code.
792 const char *eeh_pe_loc_get(struct eeh_pe
*pe
)
794 struct pci_bus
*bus
= eeh_pe_bus_get(pe
);
795 struct device_node
*dn
= pci_bus_to_OF_node(bus
);
796 const char *loc
= NULL
;
801 /* PHB PE or root PE ? */
802 if (pci_is_root_bus(bus
)) {
803 loc
= of_get_property(dn
, "ibm,loc-code", NULL
);
805 loc
= of_get_property(dn
, "ibm,io-base-loc-code", NULL
);
809 /* Check the root port */
815 loc
= of_get_property(dn
, "ibm,loc-code", NULL
);
817 loc
= of_get_property(dn
, "ibm,slot-location-code", NULL
);
820 return loc
? loc
: "N/A";
824 * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
827 * Retrieve the PCI bus according to the given PE. Basically,
828 * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
829 * primary PCI bus will be retrieved. The parent bus will be
830 * returned for BUS PE. However, we don't have associated PCI
833 struct pci_bus
*eeh_pe_bus_get(struct eeh_pe
*pe
)
835 struct pci_bus
*bus
= NULL
;
836 struct eeh_dev
*edev
;
837 struct pci_dev
*pdev
;
839 if (pe
->type
& EEH_PE_PHB
) {
841 } else if (pe
->type
& EEH_PE_BUS
||
842 pe
->type
& EEH_PE_DEVICE
) {
848 edev
= list_first_entry(&pe
->edevs
, struct eeh_dev
, list
);
849 pdev
= eeh_dev_to_pci_dev(edev
);