]>
Commit | Line | Data |
---|---|---|
15d5f839 | 1 | /* |
3222b36f DZ |
2 | * Thermal throttle event support code (such as syslog messaging and rate |
3 | * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). | |
cb6f3c15 | 4 | * |
3222b36f DZ |
5 | * This allows consistent reporting of CPU thermal throttle events. |
6 | * | |
7 | * Maintains a counter in /sys that keeps track of the number of thermal | |
8 | * events, such that the user knows how bad the thermal problem might be | |
9 | * (since the logging to syslog and mcelog is rate limited). | |
15d5f839 DZ |
10 | * |
11 | * Author: Dmitriy Zavin (dmitriyz@google.com) | |
12 | * | |
13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. | |
3222b36f | 14 | * Inspired by Ross Biro's and Al Borchers' counter code. |
15d5f839 | 15 | */ |
a65c88dd | 16 | #include <linux/interrupt.h> |
cb6f3c15 IM |
17 | #include <linux/notifier.h> |
18 | #include <linux/jiffies.h> | |
895287c0 | 19 | #include <linux/kernel.h> |
15d5f839 | 20 | #include <linux/percpu.h> |
3222b36f | 21 | #include <linux/sysdev.h> |
895287c0 HS |
22 | #include <linux/types.h> |
23 | #include <linux/init.h> | |
24 | #include <linux/smp.h> | |
15d5f839 | 25 | #include <linux/cpu.h> |
cb6f3c15 | 26 | |
895287c0 HS |
27 | #include <asm/processor.h> |
28 | #include <asm/system.h> | |
29 | #include <asm/apic.h> | |
a65c88dd HS |
30 | #include <asm/idle.h> |
31 | #include <asm/mce.h> | |
895287c0 | 32 | #include <asm/msr.h> |
15d5f839 DZ |
33 | |
34 | /* How long to wait between reporting thermal events */ | |
cb6f3c15 | 35 | #define CHECK_INTERVAL (300 * HZ) |
15d5f839 | 36 | |
39676840 IM |
37 | /* |
38 | * Current thermal throttling state: | |
39 | */ | |
55d435a2 | 40 | struct _thermal_state { |
39676840 IM |
41 | bool is_throttled; |
42 | ||
43 | u64 next_check; | |
44 | unsigned long throttle_count; | |
b417c9fd | 45 | unsigned long last_throttle_count; |
39676840 | 46 | }; |
cb6f3c15 | 47 | |
55d435a2 FY |
48 | struct thermal_state { |
49 | struct _thermal_state core; | |
50 | struct _thermal_state package; | |
51 | }; | |
52 | ||
39676840 IM |
53 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); |
54 | ||
55 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | |
3222b36f | 56 | |
a2202aa2 YW |
57 | static u32 lvtthmr_init __read_mostly; |
58 | ||
3222b36f | 59 | #ifdef CONFIG_SYSFS |
cb6f3c15 | 60 | #define define_therm_throt_sysdev_one_ro(_name) \ |
55d435a2 FY |
61 | static SYSDEV_ATTR(_name, 0444, \ |
62 | therm_throt_sysdev_show_##_name, \ | |
63 | NULL) \ | |
cb6f3c15 | 64 | |
55d435a2 | 65 | #define define_therm_throt_sysdev_show_func(level, name) \ |
39676840 | 66 | \ |
55d435a2 | 67 | static ssize_t therm_throt_sysdev_show_##level##_##name( \ |
39676840 IM |
68 | struct sys_device *dev, \ |
69 | struct sysdev_attribute *attr, \ | |
70 | char *buf) \ | |
cb6f3c15 IM |
71 | { \ |
72 | unsigned int cpu = dev->id; \ | |
73 | ssize_t ret; \ | |
74 | \ | |
75 | preempt_disable(); /* CPU hotplug */ \ | |
55d435a2 | 76 | if (cpu_online(cpu)) { \ |
cb6f3c15 | 77 | ret = sprintf(buf, "%lu\n", \ |
55d435a2 FY |
78 | per_cpu(thermal_state, cpu).level.name); \ |
79 | } else \ | |
cb6f3c15 IM |
80 | ret = 0; \ |
81 | preempt_enable(); \ | |
82 | \ | |
83 | return ret; \ | |
3222b36f DZ |
84 | } |
85 | ||
55d435a2 FY |
86 | define_therm_throt_sysdev_show_func(core, throttle_count); |
87 | define_therm_throt_sysdev_one_ro(core_throttle_count); | |
88 | ||
89 | define_therm_throt_sysdev_show_func(package, throttle_count); | |
90 | define_therm_throt_sysdev_one_ro(package_throttle_count); | |
3222b36f DZ |
91 | |
92 | static struct attribute *thermal_throttle_attrs[] = { | |
55d435a2 | 93 | &attr_core_throttle_count.attr, |
3222b36f DZ |
94 | NULL |
95 | }; | |
96 | ||
97 | static struct attribute_group thermal_throttle_attr_group = { | |
cb6f3c15 IM |
98 | .attrs = thermal_throttle_attrs, |
99 | .name = "thermal_throttle" | |
3222b36f DZ |
100 | }; |
101 | #endif /* CONFIG_SYSFS */ | |
15d5f839 DZ |
102 | |
103 | /*** | |
3222b36f | 104 | * therm_throt_process - Process thermal throttling event from interrupt |
15d5f839 DZ |
105 | * @curr: Whether the condition is current or not (boolean), since the |
106 | * thermal interrupt normally gets called both when the thermal | |
107 | * event begins and once the event has ended. | |
108 | * | |
3222b36f | 109 | * This function is called by the thermal interrupt after the |
15d5f839 DZ |
110 | * IRQ has been acknowledged. |
111 | * | |
112 | * It will take care of rate limiting and printing messages to the syslog. | |
113 | * | |
114 | * Returns: 0 : Event should NOT be further logged, i.e. still in | |
115 | * "timeout" from previous log message. | |
116 | * 1 : Event should be logged further, and a message has been | |
117 | * printed to the syslog. | |
118 | */ | |
55d435a2 FY |
119 | #define CORE_LEVEL 0 |
120 | #define PACKAGE_LEVEL 1 | |
121 | static int therm_throt_process(bool is_throttled, int level) | |
15d5f839 | 122 | { |
55d435a2 | 123 | struct _thermal_state *state; |
39676840 IM |
124 | unsigned int this_cpu; |
125 | bool was_throttled; | |
126 | u64 now; | |
127 | ||
128 | this_cpu = smp_processor_id(); | |
129 | now = get_jiffies_64(); | |
55d435a2 FY |
130 | if (level == CORE_LEVEL) |
131 | state = &per_cpu(thermal_state, this_cpu).core; | |
132 | else | |
133 | state = &per_cpu(thermal_state, this_cpu).package; | |
39676840 IM |
134 | |
135 | was_throttled = state->is_throttled; | |
136 | state->is_throttled = is_throttled; | |
15d5f839 | 137 | |
0d01f314 | 138 | if (is_throttled) |
39676840 | 139 | state->throttle_count++; |
3222b36f | 140 | |
b417c9fd IM |
141 | if (time_before64(now, state->next_check) && |
142 | state->throttle_count != state->last_throttle_count) | |
15d5f839 DZ |
143 | return 0; |
144 | ||
39676840 | 145 | state->next_check = now + CHECK_INTERVAL; |
b417c9fd | 146 | state->last_throttle_count = state->throttle_count; |
15d5f839 DZ |
147 | |
148 | /* if we just entered the thermal event */ | |
0d01f314 | 149 | if (is_throttled) { |
55d435a2 FY |
150 | printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", |
151 | this_cpu, | |
152 | level == CORE_LEVEL ? "Core" : "Package", | |
153 | state->throttle_count); | |
3222b36f | 154 | |
15d5f839 | 155 | add_taint(TAINT_MACHINE_CHECK); |
4e5c25d4 HD |
156 | return 1; |
157 | } | |
158 | if (was_throttled) { | |
55d435a2 FY |
159 | printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", |
160 | this_cpu, | |
161 | level == CORE_LEVEL ? "Core" : "Package"); | |
4e5c25d4 | 162 | return 1; |
15d5f839 DZ |
163 | } |
164 | ||
4e5c25d4 | 165 | return 0; |
15d5f839 | 166 | } |
3222b36f DZ |
167 | |
168 | #ifdef CONFIG_SYSFS | |
cb6f3c15 | 169 | /* Add/Remove thermal_throttle interface for CPU device: */ |
6569345a | 170 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) |
3222b36f | 171 | { |
55d435a2 FY |
172 | int err; |
173 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | |
174 | ||
175 | err = sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); | |
176 | if (err) | |
177 | return err; | |
178 | ||
179 | if (cpu_has(c, X86_FEATURE_PTS)) | |
180 | err = sysfs_add_file_to_group(&sys_dev->kobj, | |
181 | &attr_package_throttle_count.attr, | |
182 | thermal_throttle_attr_group.name); | |
183 | ||
184 | return err; | |
3222b36f DZ |
185 | } |
186 | ||
6569345a | 187 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) |
3222b36f | 188 | { |
7c36752a | 189 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); |
3222b36f DZ |
190 | } |
191 | ||
cb6f3c15 | 192 | /* Mutex protecting device creation against CPU hotplug: */ |
3222b36f DZ |
193 | static DEFINE_MUTEX(therm_cpu_lock); |
194 | ||
195 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | |
cb6f3c15 IM |
196 | static __cpuinit int |
197 | thermal_throttle_cpu_callback(struct notifier_block *nfb, | |
198 | unsigned long action, | |
199 | void *hcpu) | |
3222b36f DZ |
200 | { |
201 | unsigned int cpu = (unsigned long)hcpu; | |
202 | struct sys_device *sys_dev; | |
c7e38a9c | 203 | int err = 0; |
3222b36f DZ |
204 | |
205 | sys_dev = get_cpu_sysdev(cpu); | |
cb6f3c15 | 206 | |
3222b36f | 207 | switch (action) { |
c7e38a9c AM |
208 | case CPU_UP_PREPARE: |
209 | case CPU_UP_PREPARE_FROZEN: | |
38ef6d19 | 210 | mutex_lock(&therm_cpu_lock); |
6569345a | 211 | err = thermal_throttle_add_dev(sys_dev); |
38ef6d19 | 212 | mutex_unlock(&therm_cpu_lock); |
6569345a | 213 | WARN_ON(err); |
3222b36f | 214 | break; |
c7e38a9c AM |
215 | case CPU_UP_CANCELED: |
216 | case CPU_UP_CANCELED_FROZEN: | |
3222b36f | 217 | case CPU_DEAD: |
8bb78442 | 218 | case CPU_DEAD_FROZEN: |
38ef6d19 | 219 | mutex_lock(&therm_cpu_lock); |
3222b36f | 220 | thermal_throttle_remove_dev(sys_dev); |
38ef6d19 | 221 | mutex_unlock(&therm_cpu_lock); |
3222b36f DZ |
222 | break; |
223 | } | |
a94247e7 | 224 | return notifier_from_errno(err); |
3222b36f DZ |
225 | } |
226 | ||
25d1b516 | 227 | static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = |
3222b36f DZ |
228 | { |
229 | .notifier_call = thermal_throttle_cpu_callback, | |
230 | }; | |
3222b36f DZ |
231 | |
232 | static __init int thermal_throttle_init_device(void) | |
233 | { | |
234 | unsigned int cpu = 0; | |
6569345a | 235 | int err; |
3222b36f DZ |
236 | |
237 | if (!atomic_read(&therm_throt_en)) | |
238 | return 0; | |
239 | ||
240 | register_hotcpu_notifier(&thermal_throttle_cpu_notifier); | |
241 | ||
242 | #ifdef CONFIG_HOTPLUG_CPU | |
243 | mutex_lock(&therm_cpu_lock); | |
244 | #endif | |
245 | /* connect live CPUs to sysfs */ | |
6569345a SH |
246 | for_each_online_cpu(cpu) { |
247 | err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); | |
248 | WARN_ON(err); | |
249 | } | |
3222b36f DZ |
250 | #ifdef CONFIG_HOTPLUG_CPU |
251 | mutex_unlock(&therm_cpu_lock); | |
252 | #endif | |
253 | ||
254 | return 0; | |
255 | } | |
3222b36f | 256 | device_initcall(thermal_throttle_init_device); |
a65c88dd | 257 | |
3222b36f | 258 | #endif /* CONFIG_SYSFS */ |
a65c88dd HS |
259 | |
260 | /* Thermal transition interrupt handler */ | |
8363fc82 | 261 | static void intel_thermal_interrupt(void) |
a65c88dd HS |
262 | { |
263 | __u64 msr_val; | |
55d435a2 | 264 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); |
a65c88dd HS |
265 | |
266 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | |
55d435a2 FY |
267 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, |
268 | CORE_LEVEL) != 0) | |
a65c88dd | 269 | mce_log_therm_throt_event(msr_val); |
55d435a2 FY |
270 | |
271 | if (cpu_has(c, X86_FEATURE_PTS)) { | |
272 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | |
273 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | |
274 | PACKAGE_LEVEL) != 0) | |
275 | /* | |
276 | * Set up the most significant bit to notify mce log | |
277 | * that this thermal event is a package level event. | |
278 | * This is a temp solution. May be changed in the future | |
279 | * with mce log infrasture. | |
280 | */ | |
281 | mce_log_therm_throt_event(((__u64)1 << 63) | msr_val); | |
282 | } | |
a65c88dd HS |
283 | } |
284 | ||
285 | static void unexpected_thermal_interrupt(void) | |
286 | { | |
287 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | |
288 | smp_processor_id()); | |
289 | add_taint(TAINT_MACHINE_CHECK); | |
290 | } | |
291 | ||
292 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | |
293 | ||
294 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | |
295 | { | |
296 | exit_idle(); | |
297 | irq_enter(); | |
298 | inc_irq_stat(irq_thermal_count); | |
299 | smp_thermal_vector(); | |
300 | irq_exit(); | |
301 | /* Ack only at the end to avoid potential reentry */ | |
302 | ack_APIC_irq(); | |
303 | } | |
304 | ||
70fe4407 HS |
305 | /* Thermal monitoring depends on APIC, ACPI and clock modulation */ |
306 | static int intel_thermal_supported(struct cpuinfo_x86 *c) | |
307 | { | |
308 | if (!cpu_has_apic) | |
309 | return 0; | |
310 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | |
311 | return 0; | |
312 | return 1; | |
313 | } | |
314 | ||
ce6b5d76 | 315 | void __init mcheck_intel_therm_init(void) |
a2202aa2 YW |
316 | { |
317 | /* | |
318 | * This function is only called on boot CPU. Save the init thermal | |
319 | * LVT value on BSP and use that value to restore APs' thermal LVT | |
320 | * entry BIOS programmed later | |
321 | */ | |
70fe4407 | 322 | if (intel_thermal_supported(&boot_cpu_data)) |
a2202aa2 YW |
323 | lvtthmr_init = apic_read(APIC_LVTTHMR); |
324 | } | |
325 | ||
cffd377e | 326 | void intel_init_thermal(struct cpuinfo_x86 *c) |
895287c0 HS |
327 | { |
328 | unsigned int cpu = smp_processor_id(); | |
329 | int tm2 = 0; | |
330 | u32 l, h; | |
331 | ||
70fe4407 | 332 | if (!intel_thermal_supported(c)) |
895287c0 HS |
333 | return; |
334 | ||
335 | /* | |
336 | * First check if its enabled already, in which case there might | |
337 | * be some SMM goo which handles it, so we can't even put a handler | |
338 | * since it might be delivered via SMI already: | |
339 | */ | |
340 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | |
a2202aa2 YW |
341 | |
342 | /* | |
343 | * The initial value of thermal LVT entries on all APs always reads | |
344 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI | |
345 | * sequence to them and LVT registers are reset to 0s except for | |
346 | * the mask bits which are set to 1s when APs receive INIT IPI. | |
347 | * Always restore the value that BIOS has programmed on AP based on | |
348 | * BSP's info we saved since BIOS is always setting the same value | |
349 | * for all threads/cores | |
350 | */ | |
351 | apic_write(APIC_LVTTHMR, lvtthmr_init); | |
352 | ||
353 | h = lvtthmr_init; | |
354 | ||
895287c0 HS |
355 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
356 | printk(KERN_DEBUG | |
357 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | |
358 | return; | |
359 | } | |
360 | ||
895287c0 HS |
361 | /* Check whether a vector already exists */ |
362 | if (h & APIC_VECTOR_MASK) { | |
363 | printk(KERN_DEBUG | |
364 | "CPU%d: Thermal LVT vector (%#x) already installed\n", | |
365 | cpu, (h & APIC_VECTOR_MASK)); | |
366 | return; | |
367 | } | |
368 | ||
f3a0867b BZ |
369 | /* early Pentium M models use different method for enabling TM2 */ |
370 | if (cpu_has(c, X86_FEATURE_TM2)) { | |
371 | if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) { | |
372 | rdmsr(MSR_THERM2_CTL, l, h); | |
373 | if (l & MSR_THERM2_CTL_TM_SELECT) | |
374 | tm2 = 1; | |
375 | } else if (l & MSR_IA32_MISC_ENABLE_TM2) | |
376 | tm2 = 1; | |
377 | } | |
378 | ||
895287c0 HS |
379 | /* We'll mask the thermal vector in the lapic till we're ready: */ |
380 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | |
381 | apic_write(APIC_LVTTHMR, h); | |
382 | ||
383 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | |
384 | wrmsr(MSR_IA32_THERM_INTERRUPT, | |
385 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | |
386 | ||
55d435a2 FY |
387 | if (cpu_has(c, X86_FEATURE_PTS)) { |
388 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | |
389 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | |
390 | l | (PACKAGE_THERM_INT_LOW_ENABLE | |
391 | | PACKAGE_THERM_INT_HIGH_ENABLE), h); | |
392 | } | |
393 | ||
8363fc82 | 394 | smp_thermal_vector = intel_thermal_interrupt; |
895287c0 HS |
395 | |
396 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | |
397 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | |
398 | ||
399 | /* Unmask the thermal vector: */ | |
400 | l = apic_read(APIC_LVTTHMR); | |
401 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | |
402 | ||
2eaad1fd MT |
403 | printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n", |
404 | tm2 ? "TM2" : "TM1"); | |
895287c0 HS |
405 | |
406 | /* enable thermal throttle processing */ | |
407 | atomic_set(&therm_throt_en, 1); | |
408 | } |