]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * P4 specific Machine Check Exception Reporting | |
3 | */ | |
4 | ||
5 | #include <linux/init.h> | |
6 | #include <linux/types.h> | |
7 | #include <linux/kernel.h> | |
1da177e4 LT |
8 | #include <linux/interrupt.h> |
9 | #include <linux/smp.h> | |
10 | ||
11 | #include <asm/processor.h> | |
12 | #include <asm/system.h> | |
13 | #include <asm/msr.h> | |
14 | #include <asm/apic.h> | |
15 | ||
16 | #include "mce.h" | |
17 | ||
18 | /* as supported by the P4/Xeon family */ | |
19 | struct intel_mce_extended_msrs { | |
20 | u32 eax; | |
21 | u32 ebx; | |
22 | u32 ecx; | |
23 | u32 edx; | |
24 | u32 esi; | |
25 | u32 edi; | |
26 | u32 ebp; | |
27 | u32 esp; | |
28 | u32 eflags; | |
29 | u32 eip; | |
30 | /* u32 *reserved[]; */ | |
31 | }; | |
32 | ||
33 | static int mce_num_extended_msrs = 0; | |
34 | ||
35 | ||
36 | #ifdef CONFIG_X86_MCE_P4THERMAL | |
37 | static void unexpected_thermal_interrupt(struct pt_regs *regs) | |
38 | { | |
39 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | |
40 | smp_processor_id()); | |
41 | add_taint(TAINT_MACHINE_CHECK); | |
42 | } | |
43 | ||
44 | /* P4/Xeon Thermal transition interrupt handler */ | |
45 | static void intel_thermal_interrupt(struct pt_regs *regs) | |
46 | { | |
47 | u32 l, h; | |
48 | unsigned int cpu = smp_processor_id(); | |
49 | static unsigned long next[NR_CPUS]; | |
50 | ||
51 | ack_APIC_irq(); | |
52 | ||
53 | if (time_after(next[cpu], jiffies)) | |
54 | return; | |
55 | ||
56 | next[cpu] = jiffies + HZ*5; | |
57 | rdmsr(MSR_IA32_THERM_STATUS, l, h); | |
58 | if (l & 0x1) { | |
59 | printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); | |
60 | printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", | |
61 | cpu); | |
62 | add_taint(TAINT_MACHINE_CHECK); | |
63 | } else { | |
64 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); | |
65 | } | |
66 | } | |
67 | ||
68 | /* Thermal interrupt handler for this CPU setup */ | |
69 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; | |
70 | ||
71 | fastcall void smp_thermal_interrupt(struct pt_regs *regs) | |
72 | { | |
73 | irq_enter(); | |
74 | vendor_thermal_interrupt(regs); | |
75 | irq_exit(); | |
76 | } | |
77 | ||
78 | /* P4/Xeon Thermal regulation detect and init */ | |
31ab269a | 79 | static void intel_init_thermal(struct cpuinfo_x86 *c) |
1da177e4 LT |
80 | { |
81 | u32 l, h; | |
82 | unsigned int cpu = smp_processor_id(); | |
83 | ||
84 | /* Thermal monitoring */ | |
85 | if (!cpu_has(c, X86_FEATURE_ACPI)) | |
86 | return; /* -ENODEV */ | |
87 | ||
88 | /* Clock modulation */ | |
89 | if (!cpu_has(c, X86_FEATURE_ACC)) | |
90 | return; /* -ENODEV */ | |
91 | ||
92 | /* first check if its enabled already, in which case there might | |
93 | * be some SMM goo which handles it, so we can't even put a handler | |
94 | * since it might be delivered via SMI already -zwanem. | |
95 | */ | |
96 | rdmsr (MSR_IA32_MISC_ENABLE, l, h); | |
97 | h = apic_read(APIC_LVTTHMR); | |
98 | if ((l & (1<<3)) && (h & APIC_DM_SMI)) { | |
99 | printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", | |
100 | cpu); | |
101 | return; /* -EBUSY */ | |
102 | } | |
103 | ||
104 | /* check whether a vector already exists, temporarily masked? */ | |
105 | if (h & APIC_VECTOR_MASK) { | |
106 | printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " | |
107 | "installed\n", | |
108 | cpu, (h & APIC_VECTOR_MASK)); | |
109 | return; /* -EBUSY */ | |
110 | } | |
111 | ||
112 | /* The temperature transition interrupt handler setup */ | |
113 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ | |
114 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ | |
115 | apic_write_around(APIC_LVTTHMR, h); | |
116 | ||
117 | rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); | |
118 | wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); | |
119 | ||
120 | /* ok we're good to go... */ | |
121 | vendor_thermal_interrupt = intel_thermal_interrupt; | |
122 | ||
123 | rdmsr (MSR_IA32_MISC_ENABLE, l, h); | |
124 | wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); | |
125 | ||
126 | l = apic_read (APIC_LVTTHMR); | |
127 | apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | |
128 | printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | |
129 | return; | |
130 | } | |
131 | #endif /* CONFIG_X86_MCE_P4THERMAL */ | |
132 | ||
133 | ||
134 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | |
135 | static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | |
136 | { | |
137 | u32 h; | |
138 | ||
139 | if (mce_num_extended_msrs == 0) | |
140 | goto done; | |
141 | ||
142 | rdmsr (MSR_IA32_MCG_EAX, r->eax, h); | |
143 | rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); | |
144 | rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); | |
145 | rdmsr (MSR_IA32_MCG_EDX, r->edx, h); | |
146 | rdmsr (MSR_IA32_MCG_ESI, r->esi, h); | |
147 | rdmsr (MSR_IA32_MCG_EDI, r->edi, h); | |
148 | rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); | |
149 | rdmsr (MSR_IA32_MCG_ESP, r->esp, h); | |
150 | rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); | |
151 | rdmsr (MSR_IA32_MCG_EIP, r->eip, h); | |
152 | ||
153 | /* can we rely on kmalloc to do a dynamic | |
154 | * allocation for the reserved registers? | |
155 | */ | |
156 | done: | |
157 | return mce_num_extended_msrs; | |
158 | } | |
159 | ||
160 | static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | |
161 | { | |
162 | int recover=1; | |
163 | u32 alow, ahigh, high, low; | |
164 | u32 mcgstl, mcgsth; | |
165 | int i; | |
166 | struct intel_mce_extended_msrs dbg; | |
167 | ||
168 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | |
169 | if (mcgstl & (1<<0)) /* Recoverable ? */ | |
170 | recover=0; | |
171 | ||
172 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | |
173 | smp_processor_id(), mcgsth, mcgstl); | |
174 | ||
175 | if (intel_get_extended_msrs(&dbg)) { | |
176 | printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", | |
177 | smp_processor_id(), dbg.eip, dbg.eflags); | |
178 | printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", | |
179 | dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); | |
180 | printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", | |
181 | dbg.esi, dbg.edi, dbg.ebp, dbg.esp); | |
182 | } | |
183 | ||
184 | for (i=0; i<nr_mce_banks; i++) { | |
185 | rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); | |
186 | if (high & (1<<31)) { | |
187 | if (high & (1<<29)) | |
188 | recover |= 1; | |
189 | if (high & (1<<25)) | |
190 | recover |= 2; | |
191 | printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); | |
192 | high &= ~(1<<31); | |
193 | if (high & (1<<27)) { | |
194 | rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); | |
195 | printk ("[%08x%08x]", ahigh, alow); | |
196 | } | |
197 | if (high & (1<<26)) { | |
198 | rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | |
199 | printk (" at %08x%08x", ahigh, alow); | |
200 | } | |
201 | printk ("\n"); | |
202 | } | |
203 | } | |
204 | ||
205 | if (recover & 2) | |
206 | panic ("CPU context corrupt"); | |
207 | if (recover & 1) | |
208 | panic ("Unable to continue"); | |
209 | ||
210 | printk(KERN_EMERG "Attempting to continue.\n"); | |
211 | /* | |
212 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | |
213 | * recoverable/continuable.This will allow BIOS to look at the MSRs | |
214 | * for errors if the OS could not log the error. | |
215 | */ | |
216 | for (i=0; i<nr_mce_banks; i++) { | |
217 | u32 msr; | |
218 | msr = MSR_IA32_MC0_STATUS+i*4; | |
219 | rdmsr (msr, low, high); | |
220 | if (high&(1<<31)) { | |
221 | /* Clear it */ | |
222 | wrmsr(msr, 0UL, 0UL); | |
223 | /* Serialize */ | |
224 | wmb(); | |
225 | add_taint(TAINT_MACHINE_CHECK); | |
226 | } | |
227 | } | |
228 | mcgstl &= ~(1<<2); | |
229 | wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); | |
230 | } | |
231 | ||
232 | ||
31ab269a | 233 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c) |
1da177e4 LT |
234 | { |
235 | u32 l, h; | |
236 | int i; | |
237 | ||
238 | machine_check_vector = intel_machine_check; | |
239 | wmb(); | |
240 | ||
241 | printk (KERN_INFO "Intel machine check architecture supported.\n"); | |
242 | rdmsr (MSR_IA32_MCG_CAP, l, h); | |
243 | if (l & (1<<8)) /* Control register present ? */ | |
244 | wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | |
245 | nr_mce_banks = l & 0xff; | |
246 | ||
247 | for (i=0; i<nr_mce_banks; i++) { | |
248 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | |
249 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | |
250 | } | |
251 | ||
252 | set_in_cr4 (X86_CR4_MCE); | |
253 | printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | |
254 | smp_processor_id()); | |
255 | ||
256 | /* Check for P4/Xeon extended MCE MSRs */ | |
257 | rdmsr (MSR_IA32_MCG_CAP, l, h); | |
258 | if (l & (1<<9)) {/* MCG_EXT_P */ | |
259 | mce_num_extended_msrs = (l >> 16) & 0xff; | |
260 | printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" | |
261 | " available\n", | |
262 | smp_processor_id(), mce_num_extended_msrs); | |
263 | ||
264 | #ifdef CONFIG_X86_MCE_P4THERMAL | |
265 | /* Check for P4/Xeon Thermal monitor */ | |
266 | intel_init_thermal(c); | |
267 | #endif | |
268 | } | |
269 | } |