]>
Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* |
3c8c90ab LV |
2 | * Copyright IBM Corporation 2001, 2005, 2006 |
3 | * Copyright Dave Engebretsen & Todd Inglett 2001 | |
4 | * Copyright Linas Vepstas 2005, 2006 | |
cb3bc9d0 | 5 | * Copyright 2001-2012 IBM Corporation. |
69376502 | 6 | * |
1da177e4 LT |
7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
69376502 | 11 | * |
1da177e4 LT |
12 | * This program is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
69376502 | 16 | * |
1da177e4 LT |
17 | * You should have received a copy of the GNU General Public License |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
3c8c90ab LV |
20 | * |
21 | * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> | |
1da177e4 LT |
22 | */ |
23 | ||
6dee3fb9 | 24 | #include <linux/delay.h> |
cb3bc9d0 | 25 | #include <linux/sched.h> |
1da177e4 LT |
26 | #include <linux/init.h> |
27 | #include <linux/list.h> | |
1da177e4 LT |
28 | #include <linux/pci.h> |
29 | #include <linux/proc_fs.h> | |
30 | #include <linux/rbtree.h> | |
31 | #include <linux/seq_file.h> | |
32 | #include <linux/spinlock.h> | |
66b15db6 | 33 | #include <linux/export.h> |
acaa6176 SR |
34 | #include <linux/of.h> |
35 | ||
60063497 | 36 | #include <linux/atomic.h> |
1da177e4 | 37 | #include <asm/eeh.h> |
172ca926 | 38 | #include <asm/eeh_event.h> |
1da177e4 LT |
39 | #include <asm/io.h> |
40 | #include <asm/machdep.h> | |
172ca926 | 41 | #include <asm/ppc-pci.h> |
1da177e4 | 42 | #include <asm/rtas.h> |
1da177e4 | 43 | |
1da177e4 LT |
44 | |
45 | /** Overview: | |
46 | * EEH, or "Extended Error Handling" is a PCI bridge technology for | |
47 | * dealing with PCI bus errors that can't be dealt with within the | |
48 | * usual PCI framework, except by check-stopping the CPU. Systems | |
49 | * that are designed for high-availability/reliability cannot afford | |
50 | * to crash due to a "mere" PCI error, thus the need for EEH. | |
51 | * An EEH-capable bridge operates by converting a detected error | |
52 | * into a "slot freeze", taking the PCI adapter off-line, making | |
53 | * the slot behave, from the OS'es point of view, as if the slot | |
54 | * were "empty": all reads return 0xff's and all writes are silently | |
55 | * ignored. EEH slot isolation events can be triggered by parity | |
56 | * errors on the address or data busses (e.g. during posted writes), | |
69376502 LV |
57 | * which in turn might be caused by low voltage on the bus, dust, |
58 | * vibration, humidity, radioactivity or plain-old failed hardware. | |
1da177e4 LT |
59 | * |
60 | * Note, however, that one of the leading causes of EEH slot | |
61 | * freeze events are buggy device drivers, buggy device microcode, | |
62 | * or buggy device hardware. This is because any attempt by the | |
63 | * device to bus-master data to a memory address that is not | |
64 | * assigned to the device will trigger a slot freeze. (The idea | |
65 | * is to prevent devices-gone-wild from corrupting system memory). | |
66 | * Buggy hardware/drivers will have a miserable time co-existing | |
67 | * with EEH. | |
68 | * | |
69 | * Ideally, a PCI device driver, when suspecting that an isolation | |
25985edc | 70 | * event has occurred (e.g. by reading 0xff's), will then ask EEH |
1da177e4 LT |
71 | * whether this is the case, and then take appropriate steps to |
72 | * reset the PCI slot, the PCI device, and then resume operations. | |
73 | * However, until that day, the checking is done here, with the | |
74 | * eeh_check_failure() routine embedded in the MMIO macros. If | |
75 | * the slot is found to be isolated, an "EEH Event" is synthesized | |
76 | * and sent out for processing. | |
77 | */ | |
78 | ||
5c1344e9 | 79 | /* If a device driver keeps reading an MMIO register in an interrupt |
f36c5227 MM |
80 | * handler after a slot isolation event, it might be broken. |
81 | * This sets the threshold for how many read attempts we allow | |
82 | * before printing an error message. | |
1da177e4 | 83 | */ |
2fd30be8 | 84 | #define EEH_MAX_FAILS 2100000 |
1da177e4 | 85 | |
17213c3b | 86 | /* Time to wait for a PCI slot to report status, in milliseconds */ |
9c547768 LV |
87 | #define PCI_BUS_RESET_WAIT_MSEC (60*1000) |
88 | ||
1da177e4 | 89 | /* RTAS tokens */ |
21e464dd | 90 | static int ibm_configure_bridge; |
65f47f13 | 91 | static int ibm_configure_pe; |
1da177e4 | 92 | |
aa1e6374 GS |
93 | /* Platform dependent EEH operations */ |
94 | struct eeh_ops *eeh_ops = NULL; | |
95 | ||
1e28a7dd DW |
96 | int eeh_subsystem_enabled; |
97 | EXPORT_SYMBOL(eeh_subsystem_enabled); | |
1da177e4 | 98 | |
fd761fd8 | 99 | /* Lock to avoid races due to multiple reports of an error */ |
3d372628 | 100 | static DEFINE_RAW_SPINLOCK(confirm_error_lock); |
fd761fd8 | 101 | |
17213c3b LV |
102 | /* Buffer for reporting pci register dumps. Its here in BSS, and |
103 | * not dynamically alloced, so that it ends up in RMO where RTAS | |
104 | * can access it. | |
105 | */ | |
d99bb1db LV |
106 | #define EEH_PCI_REGS_LOG_LEN 4096 |
107 | static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; | |
108 | ||
1da177e4 | 109 | /* System monitoring statistics */ |
257ffc64 LV |
110 | static unsigned long no_device; |
111 | static unsigned long no_dn; | |
112 | static unsigned long no_cfg_addr; | |
113 | static unsigned long ignored_check; | |
114 | static unsigned long total_mmio_ffs; | |
115 | static unsigned long false_positives; | |
257ffc64 | 116 | static unsigned long slot_resets; |
1da177e4 | 117 | |
7684b40c LV |
118 | #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) |
119 | ||
d99bb1db | 120 | /** |
cce4b2d2 | 121 | * eeh_gather_pci_data - Copy assorted PCI config space registers to buff |
d99bb1db LV |
122 | * @pdn: device to report data for |
123 | * @buf: point to buffer in which to log | |
124 | * @len: amount of room in buffer | |
125 | * | |
126 | * This routine captures assorted PCI configuration space data, | |
127 | * and puts them into a buffer for RTAS error logging. | |
128 | */ | |
cce4b2d2 | 129 | static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) |
d99bb1db | 130 | { |
0b9369f4 | 131 | struct pci_dev *dev = pdn->pcidev; |
d99bb1db | 132 | u32 cfg; |
fcf9892b | 133 | int cap, i; |
d99bb1db LV |
134 | int n = 0; |
135 | ||
fcf9892b LV |
136 | n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name); |
137 | printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name); | |
138 | ||
d99bb1db | 139 | rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg); |
fcf9892b LV |
140 | n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); |
141 | printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); | |
142 | ||
d99bb1db LV |
143 | rtas_read_config(pdn, PCI_COMMAND, 4, &cfg); |
144 | n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); | |
fcf9892b LV |
145 | printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); |
146 | ||
b37ceefe LV |
147 | if (!dev) { |
148 | printk(KERN_WARNING "EEH: no PCI device for this of node\n"); | |
149 | return n; | |
150 | } | |
151 | ||
0b9369f4 LV |
152 | /* Gather bridge-specific registers */ |
153 | if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { | |
154 | rtas_read_config(pdn, PCI_SEC_STATUS, 2, &cfg); | |
155 | n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); | |
156 | printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg); | |
157 | ||
158 | rtas_read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); | |
159 | n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); | |
160 | printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg); | |
161 | } | |
162 | ||
fcf9892b | 163 | /* Dump out the PCI-X command and status regs */ |
b37ceefe | 164 | cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); |
fcf9892b LV |
165 | if (cap) { |
166 | rtas_read_config(pdn, cap, 4, &cfg); | |
167 | n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); | |
168 | printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); | |
169 | ||
170 | rtas_read_config(pdn, cap+4, 4, &cfg); | |
171 | n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); | |
172 | printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); | |
173 | } | |
174 | ||
175 | /* If PCI-E capable, dump PCI-E cap 10, and the AER */ | |
b37ceefe | 176 | cap = pci_find_capability(dev, PCI_CAP_ID_EXP); |
fcf9892b LV |
177 | if (cap) { |
178 | n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); | |
179 | printk(KERN_WARNING | |
180 | "EEH: PCI-E capabilities and status follow:\n"); | |
181 | ||
182 | for (i=0; i<=8; i++) { | |
183 | rtas_read_config(pdn, cap+4*i, 4, &cfg); | |
184 | n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); | |
185 | printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); | |
186 | } | |
187 | ||
b37ceefe | 188 | cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); |
fcf9892b LV |
189 | if (cap) { |
190 | n += scnprintf(buf+n, len-n, "pci-e AER:\n"); | |
191 | printk(KERN_WARNING | |
192 | "EEH: PCI-E AER capability register set follows:\n"); | |
193 | ||
194 | for (i=0; i<14; i++) { | |
195 | rtas_read_config(pdn, cap+4*i, 4, &cfg); | |
196 | n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); | |
197 | printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); | |
198 | } | |
199 | } | |
200 | } | |
0b9369f4 LV |
201 | |
202 | /* Gather status on devices under the bridge */ | |
203 | if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { | |
acaa6176 SR |
204 | struct device_node *dn; |
205 | ||
206 | for_each_child_of_node(pdn->node, dn) { | |
0b9369f4 LV |
207 | pdn = PCI_DN(dn); |
208 | if (pdn) | |
cce4b2d2 | 209 | n += eeh_gather_pci_data(pdn, buf+n, len-n); |
0b9369f4 LV |
210 | } |
211 | } | |
212 | ||
d99bb1db LV |
213 | return n; |
214 | } | |
215 | ||
cb3bc9d0 GS |
216 | /** |
217 | * eeh_slot_error_detail - Generate combined log including driver log and error log | |
218 | * @pdn: device node | |
219 | * @severity: temporary or permanent error log | |
220 | * | |
221 | * This routine should be called to generate the combined log, which | |
222 | * is comprised of driver log and error log. The driver log is figured | |
223 | * out from the config space of the corresponding PCI device, while | |
224 | * the error log is fetched through platform dependent function call. | |
225 | */ | |
d99bb1db LV |
226 | void eeh_slot_error_detail(struct pci_dn *pdn, int severity) |
227 | { | |
228 | size_t loglen = 0; | |
17213c3b | 229 | pci_regs_buf[0] = 0; |
d99bb1db | 230 | |
8fb8f709 | 231 | eeh_pci_enable(pdn, EEH_OPT_THAW_MMIO); |
cce4b2d2 | 232 | eeh_configure_bridge(pdn); |
65f47f13 | 233 | eeh_restore_bars(pdn); |
cce4b2d2 | 234 | loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); |
d99bb1db | 235 | |
8d633291 | 236 | eeh_ops->get_log(pdn->node, severity, pci_regs_buf, loglen); |
d99bb1db LV |
237 | } |
238 | ||
1da177e4 | 239 | /** |
cb3bc9d0 GS |
240 | * eeh_token_to_phys - Convert EEH address token to phys address |
241 | * @token: I/O token, should be address in the form 0xA.... | |
242 | * | |
243 | * This routine should be called to convert virtual I/O address | |
244 | * to physical one. | |
1da177e4 LT |
245 | */ |
246 | static inline unsigned long eeh_token_to_phys(unsigned long token) | |
247 | { | |
248 | pte_t *ptep; | |
249 | unsigned long pa; | |
250 | ||
20cee16c | 251 | ptep = find_linux_pte(init_mm.pgd, token); |
1da177e4 LT |
252 | if (!ptep) |
253 | return token; | |
254 | pa = pte_pfn(*ptep) << PAGE_SHIFT; | |
255 | ||
256 | return pa | (token & (PAGE_SIZE-1)); | |
257 | } | |
258 | ||
cb3bc9d0 | 259 | /** |
cce4b2d2 | 260 | * eeh_find_device_pe - Retrieve the PE for the given device |
cb3bc9d0 GS |
261 | * @dn: device node |
262 | * | |
263 | * Return the PE under which this device lies | |
fd761fd8 | 264 | */ |
cce4b2d2 | 265 | struct device_node *eeh_find_device_pe(struct device_node *dn) |
fd761fd8 LV |
266 | { |
267 | while ((dn->parent) && PCI_DN(dn->parent) && | |
268 | (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { | |
269 | dn = dn->parent; | |
270 | } | |
271 | return dn; | |
272 | } | |
273 | ||
cb3bc9d0 GS |
274 | /** |
275 | * __eeh_mark_slot - Mark all child devices as failed | |
276 | * @parent: parent device | |
277 | * @mode_flag: failure flag | |
278 | * | |
279 | * Mark all devices that are children of this device as failed. | |
280 | * Mark the device driver too, so that it can see the failure | |
281 | * immediately; this is critical, since some drivers poll | |
282 | * status registers in interrupts ... If a driver is polling, | |
283 | * and the slot is frozen, then the driver can deadlock in | |
284 | * an interrupt context, which is bad. | |
fd761fd8 | 285 | */ |
acaa6176 | 286 | static void __eeh_mark_slot(struct device_node *parent, int mode_flag) |
fd761fd8 | 287 | { |
acaa6176 SR |
288 | struct device_node *dn; |
289 | ||
290 | for_each_child_of_node(parent, dn) { | |
d9564ad1 | 291 | if (PCI_DN(dn)) { |
77bd7415 LV |
292 | /* Mark the pci device driver too */ |
293 | struct pci_dev *dev = PCI_DN(dn)->pcidev; | |
ea183a95 OJ |
294 | |
295 | PCI_DN(dn)->eeh_mode |= mode_flag; | |
296 | ||
77bd7415 LV |
297 | if (dev && dev->driver) |
298 | dev->error_state = pci_channel_io_frozen; | |
299 | ||
acaa6176 | 300 | __eeh_mark_slot(dn, mode_flag); |
d9564ad1 | 301 | } |
fd761fd8 LV |
302 | } |
303 | } | |
304 | ||
cb3bc9d0 GS |
305 | /** |
306 | * eeh_mark_slot - Mark the indicated device and its children as failed | |
307 | * @dn: parent device | |
308 | * @mode_flag: failure flag | |
309 | * | |
310 | * Mark the indicated device and its child devices as failed. | |
311 | * The device drivers are marked as failed as well. | |
312 | */ | |
313 | void eeh_mark_slot(struct device_node *dn, int mode_flag) | |
d9564ad1 | 314 | { |
022d51b1 | 315 | struct pci_dev *dev; |
cce4b2d2 | 316 | dn = eeh_find_device_pe(dn); |
3914ac7b LV |
317 | |
318 | /* Back up one, since config addrs might be shared */ | |
4980d5eb | 319 | if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) |
3914ac7b LV |
320 | dn = dn->parent; |
321 | ||
d9564ad1 | 322 | PCI_DN(dn)->eeh_mode |= mode_flag; |
022d51b1 LV |
323 | |
324 | /* Mark the pci device too */ | |
325 | dev = PCI_DN(dn)->pcidev; | |
326 | if (dev) | |
327 | dev->error_state = pci_channel_io_frozen; | |
328 | ||
acaa6176 | 329 | __eeh_mark_slot(dn, mode_flag); |
d9564ad1 LV |
330 | } |
331 | ||
cb3bc9d0 GS |
332 | /** |
333 | * __eeh_clear_slot - Clear failure flag for the child devices | |
334 | * @parent: parent device | |
335 | * @mode_flag: flag to be cleared | |
336 | * | |
337 | * Clear failure flag for the child devices. | |
338 | */ | |
acaa6176 | 339 | static void __eeh_clear_slot(struct device_node *parent, int mode_flag) |
fd761fd8 | 340 | { |
acaa6176 SR |
341 | struct device_node *dn; |
342 | ||
343 | for_each_child_of_node(parent, dn) { | |
d9564ad1 LV |
344 | if (PCI_DN(dn)) { |
345 | PCI_DN(dn)->eeh_mode &= ~mode_flag; | |
346 | PCI_DN(dn)->eeh_check_count = 0; | |
acaa6176 | 347 | __eeh_clear_slot(dn, mode_flag); |
d9564ad1 | 348 | } |
fd761fd8 LV |
349 | } |
350 | } | |
351 | ||
cb3bc9d0 GS |
352 | /** |
353 | * eeh_clear_slot - Clear failure flag for the indicated device and its children | |
354 | * @dn: parent device | |
355 | * @mode_flag: flag to be cleared | |
356 | * | |
357 | * Clear failure flag for the indicated device and its children. | |
358 | */ | |
359 | void eeh_clear_slot(struct device_node *dn, int mode_flag) | |
fd761fd8 LV |
360 | { |
361 | unsigned long flags; | |
3d372628 | 362 | raw_spin_lock_irqsave(&confirm_error_lock, flags); |
3914ac7b | 363 | |
cce4b2d2 | 364 | dn = eeh_find_device_pe(dn); |
3914ac7b LV |
365 | |
366 | /* Back up one, since config addrs might be shared */ | |
4980d5eb | 367 | if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) |
3914ac7b LV |
368 | dn = dn->parent; |
369 | ||
d9564ad1 LV |
370 | PCI_DN(dn)->eeh_mode &= ~mode_flag; |
371 | PCI_DN(dn)->eeh_check_count = 0; | |
acaa6176 | 372 | __eeh_clear_slot(dn, mode_flag); |
3d372628 | 373 | raw_spin_unlock_irqrestore(&confirm_error_lock, flags); |
fd761fd8 LV |
374 | } |
375 | ||
1da177e4 | 376 | /** |
cb3bc9d0 GS |
377 | * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze |
378 | * @dn: device node | |
379 | * @dev: pci device, if known | |
1da177e4 LT |
380 | * |
381 | * Check for an EEH failure for the given device node. Call this | |
382 | * routine if the result of a read was all 0xff's and you want to | |
383 | * find out if this is due to an EEH slot freeze. This routine | |
384 | * will query firmware for the EEH status. | |
385 | * | |
386 | * Returns 0 if there has not been an EEH error; otherwise returns | |
69376502 | 387 | * a non-zero value and queues up a slot isolation event notification. |
1da177e4 LT |
388 | * |
389 | * It is safe to call this routine in an interrupt context. | |
390 | */ | |
391 | int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) | |
392 | { | |
393 | int ret; | |
1da177e4 | 394 | unsigned long flags; |
1635317f | 395 | struct pci_dn *pdn; |
fd761fd8 | 396 | int rc = 0; |
f36c5227 | 397 | const char *location; |
1da177e4 | 398 | |
257ffc64 | 399 | total_mmio_ffs++; |
1da177e4 LT |
400 | |
401 | if (!eeh_subsystem_enabled) | |
402 | return 0; | |
403 | ||
177bc936 | 404 | if (!dn) { |
257ffc64 | 405 | no_dn++; |
1da177e4 | 406 | return 0; |
177bc936 | 407 | } |
cce4b2d2 | 408 | dn = eeh_find_device_pe(dn); |
69376502 | 409 | pdn = PCI_DN(dn); |
1da177e4 LT |
410 | |
411 | /* Access to IO BARs might get this far and still not want checking. */ | |
f8632c82 | 412 | if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || |
1635317f | 413 | pdn->eeh_mode & EEH_MODE_NOCHECK) { |
257ffc64 | 414 | ignored_check++; |
57b066ff | 415 | pr_debug("EEH: Ignored check (%x) for %s %s\n", |
8d3d50bf | 416 | pdn->eeh_mode, eeh_pci_name(dev), dn->full_name); |
1da177e4 LT |
417 | return 0; |
418 | } | |
419 | ||
fcb7543e | 420 | if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) { |
257ffc64 | 421 | no_cfg_addr++; |
1da177e4 LT |
422 | return 0; |
423 | } | |
424 | ||
fd761fd8 LV |
425 | /* If we already have a pending isolation event for this |
426 | * slot, we know it's bad already, we don't need to check. | |
427 | * Do this checking under a lock; as multiple PCI devices | |
428 | * in one slot might report errors simultaneously, and we | |
429 | * only want one error recovery routine running. | |
1da177e4 | 430 | */ |
3d372628 | 431 | raw_spin_lock_irqsave(&confirm_error_lock, flags); |
fd761fd8 | 432 | rc = 1; |
1635317f | 433 | if (pdn->eeh_mode & EEH_MODE_ISOLATED) { |
5c1344e9 | 434 | pdn->eeh_check_count ++; |
f36c5227 MM |
435 | if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) { |
436 | location = of_get_property(dn, "ibm,loc-code", NULL); | |
cb3bc9d0 | 437 | printk(KERN_ERR "EEH: %d reads ignored for recovering device at " |
f36c5227 MM |
438 | "location=%s driver=%s pci addr=%s\n", |
439 | pdn->eeh_check_count, location, | |
778a785f | 440 | eeh_driver_name(dev), eeh_pci_name(dev)); |
cb3bc9d0 | 441 | printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", |
778a785f | 442 | eeh_driver_name(dev)); |
5c1344e9 | 443 | dump_stack(); |
1da177e4 | 444 | } |
fd761fd8 | 445 | goto dn_unlock; |
1da177e4 LT |
446 | } |
447 | ||
448 | /* | |
449 | * Now test for an EEH failure. This is VERY expensive. | |
450 | * Note that the eeh_config_addr may be a parent device | |
451 | * in the case of a device behind a bridge, or it may be | |
452 | * function zero of a multi-function device. | |
453 | * In any case they must share a common PHB. | |
454 | */ | |
eb594a47 | 455 | ret = eeh_ops->get_state(pdn->node, NULL); |
76e6faf7 | 456 | |
39d16e29 | 457 | /* Note that config-io to empty slots may fail; |
cb3bc9d0 | 458 | * they are empty when they don't have children. |
eb594a47 GS |
459 | * We will punt with the following conditions: Failure to get |
460 | * PE's state, EEH not support and Permanently unavailable | |
461 | * state, PE is in good state. | |
cb3bc9d0 | 462 | */ |
eb594a47 GS |
463 | if ((ret < 0) || |
464 | (ret == EEH_STATE_NOT_SUPPORT) || | |
465 | (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == | |
466 | (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { | |
257ffc64 | 467 | false_positives++; |
858955bd | 468 | pdn->eeh_false_positives ++; |
fd761fd8 LV |
469 | rc = 0; |
470 | goto dn_unlock; | |
76e6faf7 LV |
471 | } |
472 | ||
257ffc64 | 473 | slot_resets++; |
fd761fd8 LV |
474 | |
475 | /* Avoid repeated reports of this failure, including problems | |
476 | * with other functions on this device, and functions under | |
cb3bc9d0 GS |
477 | * bridges. |
478 | */ | |
479 | eeh_mark_slot(dn, EEH_MODE_ISOLATED); | |
3d372628 | 480 | raw_spin_unlock_irqrestore(&confirm_error_lock, flags); |
1da177e4 | 481 | |
cb3bc9d0 | 482 | eeh_send_failure_event(dn, dev); |
77bd7415 | 483 | |
1da177e4 LT |
484 | /* Most EEH events are due to device driver bugs. Having |
485 | * a stack trace will help the device-driver authors figure | |
cb3bc9d0 GS |
486 | * out what happened. So print that out. |
487 | */ | |
90375f53 | 488 | dump_stack(); |
fd761fd8 LV |
489 | return 1; |
490 | ||
491 | dn_unlock: | |
3d372628 | 492 | raw_spin_unlock_irqrestore(&confirm_error_lock, flags); |
fd761fd8 | 493 | return rc; |
1da177e4 LT |
494 | } |
495 | ||
fd761fd8 | 496 | EXPORT_SYMBOL_GPL(eeh_dn_check_failure); |
1da177e4 LT |
497 | |
498 | /** | |
cb3bc9d0 GS |
499 | * eeh_check_failure - Check if all 1's data is due to EEH slot freeze |
500 | * @token: I/O token, should be address in the form 0xA.... | |
501 | * @val: value, should be all 1's (XXX why do we need this arg??) | |
1da177e4 | 502 | * |
1da177e4 LT |
503 | * Check for an EEH failure at the given token address. Call this |
504 | * routine if the result of a read was all 0xff's and you want to | |
505 | * find out if this is due to an EEH slot freeze event. This routine | |
506 | * will query firmware for the EEH status. | |
507 | * | |
508 | * Note this routine is safe to call in an interrupt context. | |
509 | */ | |
510 | unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) | |
511 | { | |
512 | unsigned long addr; | |
513 | struct pci_dev *dev; | |
514 | struct device_node *dn; | |
515 | ||
516 | /* Finding the phys addr + pci device; this is pretty quick. */ | |
517 | addr = eeh_token_to_phys((unsigned long __force) token); | |
518 | dev = pci_get_device_by_addr(addr); | |
177bc936 | 519 | if (!dev) { |
257ffc64 | 520 | no_device++; |
1da177e4 | 521 | return val; |
177bc936 | 522 | } |
1da177e4 LT |
523 | |
524 | dn = pci_device_to_OF_node(dev); | |
cb3bc9d0 | 525 | eeh_dn_check_failure(dn, dev); |
1da177e4 LT |
526 | |
527 | pci_dev_put(dev); | |
528 | return val; | |
529 | } | |
530 | ||
531 | EXPORT_SYMBOL(eeh_check_failure); | |
532 | ||
6dee3fb9 | 533 | |
47b5c838 | 534 | /** |
cce4b2d2 | 535 | * eeh_pci_enable - Enable MMIO or DMA transfers for this slot |
47b5c838 | 536 | * @pdn pci device node |
cb3bc9d0 GS |
537 | * |
538 | * This routine should be called to reenable frozen MMIO or DMA | |
539 | * so that it would work correctly again. It's useful while doing | |
540 | * recovery or log collection on the indicated device. | |
47b5c838 | 541 | */ |
cce4b2d2 | 542 | int eeh_pci_enable(struct pci_dn *pdn, int function) |
47b5c838 | 543 | { |
47b5c838 LV |
544 | int rc; |
545 | ||
8fb8f709 | 546 | rc = eeh_ops->set_option(pdn->node, function); |
47b5c838 | 547 | if (rc) |
fa1be476 | 548 | printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n", |
47b5c838 LV |
549 | function, rc, pdn->node->full_name); |
550 | ||
b0e5f742 | 551 | rc = eeh_ops->wait_state(pdn->node, PCI_BUS_RESET_WAIT_MSEC); |
eb594a47 GS |
552 | if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && |
553 | (function == EEH_OPT_THAW_MMIO)) | |
fa1be476 LV |
554 | return 0; |
555 | ||
47b5c838 LV |
556 | return rc; |
557 | } | |
558 | ||
00c2ae35 BK |
559 | /** |
560 | * pcibios_set_pcie_slot_reset - Set PCI-E reset state | |
cb3bc9d0 GS |
561 | * @dev: pci device struct |
562 | * @state: reset state to enter | |
00c2ae35 BK |
563 | * |
564 | * Return value: | |
565 | * 0 if success | |
cb3bc9d0 | 566 | */ |
00c2ae35 BK |
567 | int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) |
568 | { | |
569 | struct device_node *dn = pci_device_to_OF_node(dev); | |
00c2ae35 BK |
570 | |
571 | switch (state) { | |
572 | case pcie_deassert_reset: | |
2652481f | 573 | eeh_ops->reset(dn, EEH_RESET_DEACTIVATE); |
00c2ae35 BK |
574 | break; |
575 | case pcie_hot_reset: | |
2652481f | 576 | eeh_ops->reset(dn, EEH_RESET_HOT); |
00c2ae35 BK |
577 | break; |
578 | case pcie_warm_reset: | |
2652481f | 579 | eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL); |
00c2ae35 BK |
580 | break; |
581 | default: | |
582 | return -EINVAL; | |
583 | }; | |
584 | ||
585 | return 0; | |
586 | } | |
587 | ||
cb5b5624 | 588 | /** |
cb3bc9d0 GS |
589 | * __eeh_set_pe_freset - Check the required reset for child devices |
590 | * @parent: parent device | |
591 | * @freset: return value | |
592 | * | |
593 | * Each device might have its preferred reset type: fundamental or | |
594 | * hot reset. The routine is used to collect the information from | |
595 | * the child devices so that they could be reset accordingly. | |
6dee3fb9 | 596 | */ |
cb3bc9d0 GS |
597 | void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) |
598 | { | |
599 | struct device_node *dn; | |
600 | ||
601 | for_each_child_of_node(parent, dn) { | |
602 | if (PCI_DN(dn)) { | |
603 | struct pci_dev *dev = PCI_DN(dn)->pcidev; | |
604 | ||
605 | if (dev && dev->driver) | |
606 | *freset |= dev->needs_freset; | |
607 | ||
608 | __eeh_set_pe_freset(dn, freset); | |
609 | } | |
610 | } | |
611 | } | |
612 | ||
613 | /** | |
614 | * eeh_set_pe_freset - Check the required reset for the indicated device and its children | |
615 | * @dn: parent device | |
616 | * @freset: return value | |
617 | * | |
618 | * Each device might have its preferred reset type: fundamental or | |
619 | * hot reset. The routine is used to collected the information for | |
620 | * the indicated device and its children so that the bunch of the | |
621 | * devices could be reset properly. | |
622 | */ | |
623 | void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) | |
624 | { | |
625 | struct pci_dev *dev; | |
cce4b2d2 | 626 | dn = eeh_find_device_pe(dn); |
cb3bc9d0 GS |
627 | |
628 | /* Back up one, since config addrs might be shared */ | |
629 | if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) | |
630 | dn = dn->parent; | |
6dee3fb9 | 631 | |
cb3bc9d0 GS |
632 | dev = PCI_DN(dn)->pcidev; |
633 | if (dev) | |
634 | *freset |= dev->needs_freset; | |
635 | ||
636 | __eeh_set_pe_freset(dn, freset); | |
637 | } | |
638 | ||
639 | /** | |
cce4b2d2 | 640 | * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second |
cb3bc9d0 GS |
641 | * @pdn: pci device node to be reset. |
642 | * | |
643 | * Assert the PCI #RST line for 1/4 second. | |
644 | */ | |
cce4b2d2 | 645 | static void eeh_reset_pe_once(struct pci_dn *pdn) |
6dee3fb9 | 646 | { |
308fc4f8 | 647 | unsigned int freset = 0; |
6e19314c | 648 | |
308fc4f8 RL |
649 | /* Determine type of EEH reset required for |
650 | * Partitionable Endpoint, a hot-reset (1) | |
651 | * or a fundamental reset (3). | |
652 | * A fundamental reset required by any device under | |
653 | * Partitionable Endpoint trumps hot-reset. | |
654 | */ | |
655 | eeh_set_pe_freset(pdn->node, &freset); | |
656 | ||
657 | if (freset) | |
2652481f | 658 | eeh_ops->reset(pdn->node, EEH_RESET_FUNDAMENTAL); |
6e19314c | 659 | else |
2652481f | 660 | eeh_ops->reset(pdn->node, EEH_RESET_HOT); |
6dee3fb9 LV |
661 | |
662 | /* The PCI bus requires that the reset be held high for at least | |
cb3bc9d0 GS |
663 | * a 100 milliseconds. We wait a bit longer 'just in case'. |
664 | */ | |
6dee3fb9 | 665 | #define PCI_BUS_RST_HOLD_TIME_MSEC 250 |
cb3bc9d0 | 666 | msleep(PCI_BUS_RST_HOLD_TIME_MSEC); |
d9564ad1 LV |
667 | |
668 | /* We might get hit with another EEH freeze as soon as the | |
669 | * pci slot reset line is dropped. Make sure we don't miss | |
cb3bc9d0 GS |
670 | * these, and clear the flag now. |
671 | */ | |
672 | eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED); | |
d9564ad1 | 673 | |
2652481f | 674 | eeh_ops->reset(pdn->node, EEH_RESET_DEACTIVATE); |
6dee3fb9 LV |
675 | |
676 | /* After a PCI slot has been reset, the PCI Express spec requires | |
677 | * a 1.5 second idle time for the bus to stabilize, before starting | |
cb3bc9d0 GS |
678 | * up traffic. |
679 | */ | |
6dee3fb9 | 680 | #define PCI_BUS_SETTLE_TIME_MSEC 1800 |
cb3bc9d0 | 681 | msleep(PCI_BUS_SETTLE_TIME_MSEC); |
e1029263 LV |
682 | } |
683 | ||
cb3bc9d0 | 684 | /** |
cce4b2d2 | 685 | * eeh_reset_pe - Reset the indicated PE |
cb3bc9d0 GS |
686 | * @pdn: PCI device node |
687 | * | |
688 | * This routine should be called to reset indicated device, including | |
689 | * PE. A PE might include multiple PCI devices and sometimes PCI bridges | |
690 | * might be involved as well. | |
691 | */ | |
cce4b2d2 | 692 | int eeh_reset_pe(struct pci_dn *pdn) |
e1029263 LV |
693 | { |
694 | int i, rc; | |
695 | ||
9c547768 LV |
696 | /* Take three shots at resetting the bus */ |
697 | for (i=0; i<3; i++) { | |
cce4b2d2 | 698 | eeh_reset_pe_once(pdn); |
6dee3fb9 | 699 | |
b0e5f742 | 700 | rc = eeh_ops->wait_state(pdn->node, PCI_BUS_RESET_WAIT_MSEC); |
eb594a47 | 701 | if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) |
b6495c0c | 702 | return 0; |
e1029263 | 703 | |
e1029263 | 704 | if (rc < 0) { |
12588da7 LV |
705 | printk(KERN_ERR "EEH: unrecoverable slot failure %s\n", |
706 | pdn->node->full_name); | |
b6495c0c | 707 | return -1; |
e1029263 | 708 | } |
12588da7 LV |
709 | printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n", |
710 | i+1, pdn->node->full_name, rc); | |
6dee3fb9 | 711 | } |
b6495c0c | 712 | |
9c547768 | 713 | return -1; |
6dee3fb9 LV |
714 | } |
715 | ||
8b553f32 LV |
716 | /** Save and restore of PCI BARs |
717 | * | |
718 | * Although firmware will set up BARs during boot, it doesn't | |
719 | * set up device BAR's after a device reset, although it will, | |
720 | * if requested, set up bridge configuration. Thus, we need to | |
721 | * configure the PCI devices ourselves. | |
722 | */ | |
723 | ||
724 | /** | |
cce4b2d2 | 725 | * eeh_restore_one_device_bars - Restore the Base Address Registers for one device |
cb5b5624 LV |
726 | * @pdn: pci device node |
727 | * | |
8b553f32 LV |
728 | * Loads the PCI configuration space base address registers, |
729 | * the expansion ROM base address, the latency timer, and etc. | |
730 | * from the saved values in the device node. | |
731 | */ | |
cce4b2d2 | 732 | static inline void eeh_restore_one_device_bars(struct pci_dn *pdn) |
8b553f32 LV |
733 | { |
734 | int i; | |
cde274c0 | 735 | u32 cmd; |
8b553f32 LV |
736 | |
737 | if (NULL==pdn->phb) return; | |
738 | for (i=4; i<10; i++) { | |
739 | rtas_write_config(pdn, i*4, 4, pdn->config_space[i]); | |
740 | } | |
741 | ||
742 | /* 12 == Expansion ROM Address */ | |
743 | rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]); | |
744 | ||
745 | #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) | |
746 | #define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)]) | |
747 | ||
cb3bc9d0 | 748 | rtas_write_config(pdn, PCI_CACHE_LINE_SIZE, 1, |
8b553f32 LV |
749 | SAVED_BYTE(PCI_CACHE_LINE_SIZE)); |
750 | ||
cb3bc9d0 | 751 | rtas_write_config(pdn, PCI_LATENCY_TIMER, 1, |
8b553f32 LV |
752 | SAVED_BYTE(PCI_LATENCY_TIMER)); |
753 | ||
754 | /* max latency, min grant, interrupt pin and line */ | |
755 | rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]); | |
cde274c0 MM |
756 | |
757 | /* Restore PERR & SERR bits, some devices require it, | |
cb3bc9d0 GS |
758 | * don't touch the other command bits |
759 | */ | |
cde274c0 MM |
760 | rtas_read_config(pdn, PCI_COMMAND, 4, &cmd); |
761 | if (pdn->config_space[1] & PCI_COMMAND_PARITY) | |
762 | cmd |= PCI_COMMAND_PARITY; | |
763 | else | |
764 | cmd &= ~PCI_COMMAND_PARITY; | |
765 | if (pdn->config_space[1] & PCI_COMMAND_SERR) | |
766 | cmd |= PCI_COMMAND_SERR; | |
767 | else | |
768 | cmd &= ~PCI_COMMAND_SERR; | |
769 | rtas_write_config(pdn, PCI_COMMAND, 4, cmd); | |
8b553f32 LV |
770 | } |
771 | ||
772 | /** | |
cb3bc9d0 GS |
773 | * eeh_restore_bars - Restore the PCI config space info |
774 | * @pdn: PCI device node | |
8b553f32 LV |
775 | * |
776 | * This routine performs a recursive walk to the children | |
777 | * of this device as well. | |
778 | */ | |
779 | void eeh_restore_bars(struct pci_dn *pdn) | |
780 | { | |
781 | struct device_node *dn; | |
782 | if (!pdn) | |
783 | return; | |
784 | ||
7684b40c | 785 | if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code)) |
cce4b2d2 | 786 | eeh_restore_one_device_bars(pdn); |
8b553f32 | 787 | |
acaa6176 | 788 | for_each_child_of_node(pdn->node, dn) |
cb3bc9d0 | 789 | eeh_restore_bars(PCI_DN(dn)); |
8b553f32 LV |
790 | } |
791 | ||
792 | /** | |
cb3bc9d0 GS |
793 | * eeh_save_bars - Save device bars |
794 | * @pdn: PCI device node | |
8b553f32 LV |
795 | * |
796 | * Save the values of the device bars. Unlike the restore | |
797 | * routine, this routine is *not* recursive. This is because | |
31116f0b | 798 | * PCI devices are added individually; but, for the restore, |
8b553f32 LV |
799 | * an entire slot is reset at a time. |
800 | */ | |
7684b40c | 801 | static void eeh_save_bars(struct pci_dn *pdn) |
8b553f32 LV |
802 | { |
803 | int i; | |
804 | ||
7684b40c | 805 | if (!pdn ) |
8b553f32 LV |
806 | return; |
807 | ||
808 | for (i = 0; i < 16; i++) | |
7684b40c | 809 | rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]); |
8b553f32 LV |
810 | } |
811 | ||
cb3bc9d0 | 812 | /** |
cce4b2d2 | 813 | * eeh_configure_bridge - Configure PCI bridges for the indicated PE |
cb3bc9d0 GS |
814 | * @pdn: PCI device node |
815 | * | |
816 | * PCI bridges might be included in PE. In order to make the PE work | |
817 | * again. The included PCI bridges should be recovered after the PE | |
818 | * encounters frozen state. | |
819 | */ | |
cce4b2d2 | 820 | void eeh_configure_bridge(struct pci_dn *pdn) |
8b553f32 | 821 | { |
fcb7543e | 822 | int config_addr; |
8b553f32 | 823 | int rc; |
65f47f13 | 824 | int token; |
8b553f32 | 825 | |
fcb7543e LV |
826 | /* Use PE configuration address, if present */ |
827 | config_addr = pdn->eeh_config_addr; | |
828 | if (pdn->eeh_pe_config_addr) | |
829 | config_addr = pdn->eeh_pe_config_addr; | |
830 | ||
65f47f13 RL |
831 | /* Use new configure-pe function, if supported */ |
832 | if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) | |
833 | token = ibm_configure_pe; | |
834 | else | |
835 | token = ibm_configure_bridge; | |
836 | ||
837 | rc = rtas_call(token, 3, 1, NULL, | |
fcb7543e | 838 | config_addr, |
8b553f32 LV |
839 | BUID_HI(pdn->phb->buid), |
840 | BUID_LO(pdn->phb->buid)); | |
841 | if (rc) { | |
cb3bc9d0 | 842 | printk(KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n", |
8b553f32 LV |
843 | rc, pdn->node->full_name); |
844 | } | |
845 | } | |
846 | ||
cb3bc9d0 | 847 | /** |
cce4b2d2 | 848 | * eeh_early_enable - Early enable EEH on the indicated device |
cb3bc9d0 GS |
849 | * @dn: device node |
850 | * @data: BUID | |
851 | * | |
852 | * Enable EEH functionality on the specified PCI device. The function | |
853 | * is expected to be called before real PCI probing is done. However, | |
854 | * the PHBs have been initialized at this point. | |
855 | */ | |
cce4b2d2 | 856 | static void *eeh_early_enable(struct device_node *dn, void *data) |
1da177e4 | 857 | { |
1da177e4 | 858 | int ret; |
e2eb6392 SR |
859 | const u32 *class_code = of_get_property(dn, "class-code", NULL); |
860 | const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL); | |
861 | const u32 *device_id = of_get_property(dn, "device-id", NULL); | |
954a46e2 | 862 | const u32 *regs; |
1da177e4 | 863 | int enable; |
69376502 | 864 | struct pci_dn *pdn = PCI_DN(dn); |
1da177e4 | 865 | |
0f17574a | 866 | pdn->class_code = 0; |
1635317f | 867 | pdn->eeh_mode = 0; |
5c1344e9 LV |
868 | pdn->eeh_check_count = 0; |
869 | pdn->eeh_freeze_count = 0; | |
858955bd | 870 | pdn->eeh_false_positives = 0; |
1da177e4 | 871 | |
c6d4d5a8 NL |
872 | if (!of_device_is_available(dn)) |
873 | return NULL; | |
1da177e4 LT |
874 | |
875 | /* Ignore bad nodes. */ | |
876 | if (!class_code || !vendor_id || !device_id) | |
877 | return NULL; | |
878 | ||
879 | /* There is nothing to check on PCI to ISA bridges */ | |
880 | if (dn->type && !strcmp(dn->type, "isa")) { | |
1635317f | 881 | pdn->eeh_mode |= EEH_MODE_NOCHECK; |
1da177e4 LT |
882 | return NULL; |
883 | } | |
0f17574a | 884 | pdn->class_code = *class_code; |
1da177e4 | 885 | |
1da177e4 | 886 | /* Ok... see if this device supports EEH. Some do, some don't, |
cb3bc9d0 GS |
887 | * and the only way to find out is to check each and every one. |
888 | */ | |
e2eb6392 | 889 | regs = of_get_property(dn, "reg", NULL); |
1da177e4 LT |
890 | if (regs) { |
891 | /* First register entry is addr (00BBSS00) */ | |
892 | /* Try to enable eeh */ | |
8fb8f709 | 893 | ret = eeh_ops->set_option(dn, EEH_OPT_ENABLE); |
172ca926 | 894 | |
25c4a46f | 895 | enable = 0; |
1da177e4 | 896 | if (ret == 0) { |
1635317f | 897 | pdn->eeh_config_addr = regs[0]; |
25e591f6 LV |
898 | |
899 | /* If the newer, better, ibm,get-config-addr-info is supported, | |
cb3bc9d0 GS |
900 | * then use that instead. |
901 | */ | |
c8c29b38 | 902 | pdn->eeh_pe_config_addr = eeh_ops->get_pe_addr(dn); |
25c4a46f LV |
903 | |
904 | /* Some older systems (Power4) allow the | |
905 | * ibm,set-eeh-option call to succeed even on nodes | |
906 | * where EEH is not supported. Verify support | |
cb3bc9d0 GS |
907 | * explicitly. |
908 | */ | |
eb594a47 GS |
909 | ret = eeh_ops->get_state(pdn->node, NULL); |
910 | if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) | |
25c4a46f LV |
911 | enable = 1; |
912 | } | |
913 | ||
914 | if (enable) { | |
915 | eeh_subsystem_enabled = 1; | |
916 | pdn->eeh_mode |= EEH_MODE_SUPPORTED; | |
917 | ||
57b066ff BH |
918 | pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n", |
919 | dn->full_name, pdn->eeh_config_addr, | |
920 | pdn->eeh_pe_config_addr); | |
1da177e4 LT |
921 | } else { |
922 | ||
923 | /* This device doesn't support EEH, but it may have an | |
cb3bc9d0 GS |
924 | * EEH parent, in which case we mark it as supported. |
925 | */ | |
69376502 | 926 | if (dn->parent && PCI_DN(dn->parent) |
1635317f | 927 | && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { |
1da177e4 | 928 | /* Parent supports EEH. */ |
1635317f PM |
929 | pdn->eeh_mode |= EEH_MODE_SUPPORTED; |
930 | pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr; | |
1da177e4 LT |
931 | return NULL; |
932 | } | |
933 | } | |
934 | } else { | |
935 | printk(KERN_WARNING "EEH: %s: unable to get reg property.\n", | |
936 | dn->full_name); | |
937 | } | |
938 | ||
7684b40c | 939 | eeh_save_bars(pdn); |
69376502 | 940 | return NULL; |
1da177e4 LT |
941 | } |
942 | ||
aa1e6374 GS |
943 | /** |
944 | * eeh_ops_register - Register platform dependent EEH operations | |
945 | * @ops: platform dependent EEH operations | |
946 | * | |
947 | * Register the platform dependent EEH operation callback | |
948 | * functions. The platform should call this function before | |
949 | * any other EEH operations. | |
950 | */ | |
951 | int __init eeh_ops_register(struct eeh_ops *ops) | |
952 | { | |
953 | if (!ops->name) { | |
954 | pr_warning("%s: Invalid EEH ops name for %p\n", | |
955 | __func__, ops); | |
956 | return -EINVAL; | |
957 | } | |
958 | ||
959 | if (eeh_ops && eeh_ops != ops) { | |
960 | pr_warning("%s: EEH ops of platform %s already existing (%s)\n", | |
961 | __func__, eeh_ops->name, ops->name); | |
962 | return -EEXIST; | |
963 | } | |
964 | ||
965 | eeh_ops = ops; | |
966 | ||
967 | return 0; | |
968 | } | |
969 | ||
970 | /** | |
971 | * eeh_ops_unregister - Unreigster platform dependent EEH operations | |
972 | * @name: name of EEH platform operations | |
973 | * | |
974 | * Unregister the platform dependent EEH operation callback | |
975 | * functions. | |
976 | */ | |
977 | int __exit eeh_ops_unregister(const char *name) | |
978 | { | |
979 | if (!name || !strlen(name)) { | |
980 | pr_warning("%s: Invalid EEH ops name\n", | |
981 | __func__); | |
982 | return -EINVAL; | |
983 | } | |
984 | ||
985 | if (eeh_ops && !strcmp(eeh_ops->name, name)) { | |
986 | eeh_ops = NULL; | |
987 | return 0; | |
988 | } | |
989 | ||
990 | return -EEXIST; | |
991 | } | |
992 | ||
cb3bc9d0 GS |
993 | /** |
994 | * eeh_init - EEH initialization | |
995 | * | |
1da177e4 LT |
996 | * Initialize EEH by trying to enable it for all of the adapters in the system. |
997 | * As a side effect we can determine here if eeh is supported at all. | |
998 | * Note that we leave EEH on so failed config cycles won't cause a machine | |
999 | * check. If a user turns off EEH for a particular adapter they are really | |
1000 | * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't | |
1001 | * grant access to a slot if EEH isn't enabled, and so we always enable | |
1002 | * EEH for all slots/all devices. | |
1003 | * | |
1004 | * The eeh-force-off option disables EEH checking globally, for all slots. | |
1005 | * Even if force-off is set, the EEH hardware is still enabled, so that | |
1006 | * newer systems can boot. | |
1007 | */ | |
1008 | void __init eeh_init(void) | |
1009 | { | |
1010 | struct device_node *phb, *np; | |
e2af155c GS |
1011 | int ret; |
1012 | ||
1013 | /* call platform initialization function */ | |
1014 | if (!eeh_ops) { | |
1015 | pr_warning("%s: Platform EEH operation not found\n", | |
1016 | __func__); | |
1017 | return; | |
1018 | } else if ((ret = eeh_ops->init())) { | |
1019 | pr_warning("%s: Failed to call platform init function (%d)\n", | |
1020 | __func__, ret); | |
1021 | return; | |
1022 | } | |
1da177e4 | 1023 | |
3d372628 | 1024 | raw_spin_lock_init(&confirm_error_lock); |
df7242b1 | 1025 | |
1da177e4 LT |
1026 | np = of_find_node_by_path("/rtas"); |
1027 | if (np == NULL) | |
1028 | return; | |
1029 | ||
cb3bc9d0 | 1030 | ibm_configure_bridge = rtas_token("ibm,configure-bridge"); |
65f47f13 | 1031 | ibm_configure_pe = rtas_token("ibm,configure-pe"); |
1da177e4 | 1032 | |
1da177e4 LT |
1033 | /* Enable EEH for all adapters. Note that eeh requires buid's */ |
1034 | for (phb = of_find_node_by_name(NULL, "pci"); phb; | |
1035 | phb = of_find_node_by_name(phb, "pci")) { | |
1036 | unsigned long buid; | |
1037 | ||
1038 | buid = get_phb_buid(phb); | |
69376502 | 1039 | if (buid == 0 || PCI_DN(phb) == NULL) |
1da177e4 LT |
1040 | continue; |
1041 | ||
c8c29b38 | 1042 | traverse_pci_devices(phb, eeh_early_enable, NULL); |
1da177e4 LT |
1043 | } |
1044 | ||
1045 | if (eeh_subsystem_enabled) | |
1046 | printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n"); | |
1047 | else | |
1048 | printk(KERN_WARNING "EEH: No capable adapters found\n"); | |
1049 | } | |
1050 | ||
1051 | /** | |
cb3bc9d0 | 1052 | * eeh_add_device_early - Enable EEH for the indicated device_node |
1da177e4 LT |
1053 | * @dn: device node for which to set up EEH |
1054 | * | |
1055 | * This routine must be used to perform EEH initialization for PCI | |
1056 | * devices that were added after system boot (e.g. hotplug, dlpar). | |
1057 | * This routine must be called before any i/o is performed to the | |
1058 | * adapter (inluding any config-space i/o). | |
1059 | * Whether this actually enables EEH or not for this device depends | |
1060 | * on the CEC architecture, type of the device, on earlier boot | |
1061 | * command-line arguments & etc. | |
1062 | */ | |
794e085e | 1063 | static void eeh_add_device_early(struct device_node *dn) |
1da177e4 LT |
1064 | { |
1065 | struct pci_controller *phb; | |
1da177e4 | 1066 | |
69376502 | 1067 | if (!dn || !PCI_DN(dn)) |
1da177e4 | 1068 | return; |
1635317f | 1069 | phb = PCI_DN(dn)->phb; |
f751f841 LV |
1070 | |
1071 | /* USB Bus children of PCI devices will not have BUID's */ | |
1072 | if (NULL == phb || 0 == phb->buid) | |
1da177e4 | 1073 | return; |
1da177e4 | 1074 | |
c8c29b38 | 1075 | eeh_early_enable(dn, NULL); |
1da177e4 | 1076 | } |
1da177e4 | 1077 | |
cb3bc9d0 GS |
1078 | /** |
1079 | * eeh_add_device_tree_early - Enable EEH for the indicated device | |
1080 | * @dn: device node | |
1081 | * | |
1082 | * This routine must be used to perform EEH initialization for the | |
1083 | * indicated PCI device that was added after system boot (e.g. | |
1084 | * hotplug, dlpar). | |
1085 | */ | |
e2a296ee LV |
1086 | void eeh_add_device_tree_early(struct device_node *dn) |
1087 | { | |
1088 | struct device_node *sib; | |
acaa6176 SR |
1089 | |
1090 | for_each_child_of_node(dn, sib) | |
e2a296ee LV |
1091 | eeh_add_device_tree_early(sib); |
1092 | eeh_add_device_early(dn); | |
1093 | } | |
1094 | EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); | |
1095 | ||
1da177e4 | 1096 | /** |
cb3bc9d0 | 1097 | * eeh_add_device_late - Perform EEH initialization for the indicated pci device |
1da177e4 LT |
1098 | * @dev: pci device for which to set up EEH |
1099 | * | |
1100 | * This routine must be used to complete EEH initialization for PCI | |
1101 | * devices that were added after system boot (e.g. hotplug, dlpar). | |
1102 | */ | |
794e085e | 1103 | static void eeh_add_device_late(struct pci_dev *dev) |
1da177e4 | 1104 | { |
56b0fca3 | 1105 | struct device_node *dn; |
8b553f32 | 1106 | struct pci_dn *pdn; |
56b0fca3 | 1107 | |
1da177e4 LT |
1108 | if (!dev || !eeh_subsystem_enabled) |
1109 | return; | |
1110 | ||
57b066ff | 1111 | pr_debug("EEH: Adding device %s\n", pci_name(dev)); |
1da177e4 | 1112 | |
56b0fca3 | 1113 | dn = pci_device_to_OF_node(dev); |
8b553f32 | 1114 | pdn = PCI_DN(dn); |
57b066ff BH |
1115 | if (pdn->pcidev == dev) { |
1116 | pr_debug("EEH: Already referenced !\n"); | |
1117 | return; | |
1118 | } | |
1119 | WARN_ON(pdn->pcidev); | |
1120 | ||
cb3bc9d0 | 1121 | pci_dev_get(dev); |
8b553f32 | 1122 | pdn->pcidev = dev; |
56b0fca3 | 1123 | |
e1d04c97 LV |
1124 | pci_addr_cache_insert_device(dev); |
1125 | eeh_sysfs_add_device(dev); | |
1da177e4 | 1126 | } |
794e085e | 1127 | |
cb3bc9d0 GS |
1128 | /** |
1129 | * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus | |
1130 | * @bus: PCI bus | |
1131 | * | |
1132 | * This routine must be used to perform EEH initialization for PCI | |
1133 | * devices which are attached to the indicated PCI bus. The PCI bus | |
1134 | * is added after system boot through hotplug or dlpar. | |
1135 | */ | |
794e085e NF |
1136 | void eeh_add_device_tree_late(struct pci_bus *bus) |
1137 | { | |
1138 | struct pci_dev *dev; | |
1139 | ||
1140 | list_for_each_entry(dev, &bus->devices, bus_list) { | |
1141 | eeh_add_device_late(dev); | |
1142 | if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { | |
1143 | struct pci_bus *subbus = dev->subordinate; | |
1144 | if (subbus) | |
1145 | eeh_add_device_tree_late(subbus); | |
1146 | } | |
1147 | } | |
1148 | } | |
1149 | EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); | |
1da177e4 LT |
1150 | |
1151 | /** | |
cb3bc9d0 | 1152 | * eeh_remove_device - Undo EEH setup for the indicated pci device |
1da177e4 LT |
1153 | * @dev: pci device to be removed |
1154 | * | |
794e085e NF |
1155 | * This routine should be called when a device is removed from |
1156 | * a running system (e.g. by hotplug or dlpar). It unregisters | |
1157 | * the PCI device from the EEH subsystem. I/O errors affecting | |
1158 | * this device will no longer be detected after this call; thus, | |
1159 | * i/o errors affecting this slot may leave this device unusable. | |
1da177e4 | 1160 | */ |
794e085e | 1161 | static void eeh_remove_device(struct pci_dev *dev) |
1da177e4 | 1162 | { |
56b0fca3 | 1163 | struct device_node *dn; |
1da177e4 LT |
1164 | if (!dev || !eeh_subsystem_enabled) |
1165 | return; | |
1166 | ||
1167 | /* Unregister the device with the EEH/PCI address search system */ | |
57b066ff | 1168 | pr_debug("EEH: Removing device %s\n", pci_name(dev)); |
56b0fca3 LV |
1169 | |
1170 | dn = pci_device_to_OF_node(dev); | |
57b066ff BH |
1171 | if (PCI_DN(dn)->pcidev == NULL) { |
1172 | pr_debug("EEH: Not referenced !\n"); | |
1173 | return; | |
b055a9e1 | 1174 | } |
57b066ff | 1175 | PCI_DN(dn)->pcidev = NULL; |
cb3bc9d0 | 1176 | pci_dev_put(dev); |
57b066ff BH |
1177 | |
1178 | pci_addr_cache_remove_device(dev); | |
1179 | eeh_sysfs_remove_device(dev); | |
1da177e4 | 1180 | } |
1da177e4 | 1181 | |
cb3bc9d0 GS |
1182 | /** |
1183 | * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device | |
1184 | * @dev: PCI device | |
1185 | * | |
1186 | * This routine must be called when a device is removed from the | |
1187 | * running system through hotplug or dlpar. The corresponding | |
1188 | * PCI address cache will be removed. | |
1189 | */ | |
e2a296ee LV |
1190 | void eeh_remove_bus_device(struct pci_dev *dev) |
1191 | { | |
794e085e NF |
1192 | struct pci_bus *bus = dev->subordinate; |
1193 | struct pci_dev *child, *tmp; | |
1194 | ||
e2a296ee | 1195 | eeh_remove_device(dev); |
794e085e NF |
1196 | |
1197 | if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { | |
1198 | list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) | |
1199 | eeh_remove_bus_device(child); | |
e2a296ee LV |
1200 | } |
1201 | } | |
1202 | EXPORT_SYMBOL_GPL(eeh_remove_bus_device); | |
1203 | ||
1da177e4 LT |
1204 | static int proc_eeh_show(struct seq_file *m, void *v) |
1205 | { | |
1da177e4 LT |
1206 | if (0 == eeh_subsystem_enabled) { |
1207 | seq_printf(m, "EEH Subsystem is globally disabled\n"); | |
257ffc64 | 1208 | seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs); |
1da177e4 LT |
1209 | } else { |
1210 | seq_printf(m, "EEH Subsystem is enabled\n"); | |
177bc936 LV |
1211 | seq_printf(m, |
1212 | "no device=%ld\n" | |
1213 | "no device node=%ld\n" | |
1214 | "no config address=%ld\n" | |
1215 | "check not wanted=%ld\n" | |
1216 | "eeh_total_mmio_ffs=%ld\n" | |
1217 | "eeh_false_positives=%ld\n" | |
177bc936 | 1218 | "eeh_slot_resets=%ld\n", |
257ffc64 LV |
1219 | no_device, no_dn, no_cfg_addr, |
1220 | ignored_check, total_mmio_ffs, | |
42253a68 | 1221 | false_positives, |
257ffc64 | 1222 | slot_resets); |
1da177e4 LT |
1223 | } |
1224 | ||
1225 | return 0; | |
1226 | } | |
1227 | ||
1228 | static int proc_eeh_open(struct inode *inode, struct file *file) | |
1229 | { | |
1230 | return single_open(file, proc_eeh_show, NULL); | |
1231 | } | |
1232 | ||
5dfe4c96 | 1233 | static const struct file_operations proc_eeh_operations = { |
1da177e4 LT |
1234 | .open = proc_eeh_open, |
1235 | .read = seq_read, | |
1236 | .llseek = seq_lseek, | |
1237 | .release = single_release, | |
1238 | }; | |
1239 | ||
1240 | static int __init eeh_init_proc(void) | |
1241 | { | |
66747138 | 1242 | if (machine_is(pseries)) |
8feaa434 | 1243 | proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); |
1da177e4 LT |
1244 | return 0; |
1245 | } | |
1246 | __initcall(eeh_init_proc); |