]>
Commit | Line | Data |
---|---|---|
15d5f839 | 1 | /* |
3222b36f DZ |
2 | * Thermal throttle event support code (such as syslog messaging and rate |
3 | * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). | |
cb6f3c15 | 4 | * |
3222b36f DZ |
5 | * This allows consistent reporting of CPU thermal throttle events. |
6 | * | |
7 | * Maintains a counter in /sys that keeps track of the number of thermal | |
8 | * events, such that the user knows how bad the thermal problem might be | |
9 | * (since the logging to syslog and mcelog is rate limited). | |
15d5f839 DZ |
10 | * |
11 | * Author: Dmitriy Zavin (dmitriyz@google.com) | |
12 | * | |
13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. | |
3222b36f | 14 | * Inspired by Ross Biro's and Al Borchers' counter code. |
15d5f839 | 15 | */ |
a65c88dd | 16 | #include <linux/interrupt.h> |
cb6f3c15 IM |
17 | #include <linux/notifier.h> |
18 | #include <linux/jiffies.h> | |
895287c0 | 19 | #include <linux/kernel.h> |
15d5f839 | 20 | #include <linux/percpu.h> |
3222b36f | 21 | #include <linux/sysdev.h> |
895287c0 HS |
22 | #include <linux/types.h> |
23 | #include <linux/init.h> | |
24 | #include <linux/smp.h> | |
15d5f839 | 25 | #include <linux/cpu.h> |
cb6f3c15 | 26 | |
895287c0 HS |
27 | #include <asm/processor.h> |
28 | #include <asm/system.h> | |
29 | #include <asm/apic.h> | |
a65c88dd HS |
30 | #include <asm/idle.h> |
31 | #include <asm/mce.h> | |
895287c0 | 32 | #include <asm/msr.h> |
15d5f839 DZ |
33 | |
34 | /* How long to wait between reporting thermal events */ | |
cb6f3c15 | 35 | #define CHECK_INTERVAL (300 * HZ) |
15d5f839 | 36 | |
39676840 IM |
37 | /* |
38 | * Current thermal throttling state: | |
39 | */ | |
40 | struct thermal_state { | |
41 | bool is_throttled; | |
42 | ||
43 | u64 next_check; | |
44 | unsigned long throttle_count; | |
45 | }; | |
cb6f3c15 | 46 | |
39676840 IM |
47 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); |
48 | ||
49 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | |
3222b36f DZ |
50 | |
51 | #ifdef CONFIG_SYSFS | |
cb6f3c15 IM |
52 | #define define_therm_throt_sysdev_one_ro(_name) \ |
53 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | |
54 | ||
55 | #define define_therm_throt_sysdev_show_func(name) \ | |
39676840 IM |
56 | \ |
57 | static ssize_t therm_throt_sysdev_show_##name( \ | |
58 | struct sys_device *dev, \ | |
59 | struct sysdev_attribute *attr, \ | |
60 | char *buf) \ | |
cb6f3c15 IM |
61 | { \ |
62 | unsigned int cpu = dev->id; \ | |
63 | ssize_t ret; \ | |
64 | \ | |
65 | preempt_disable(); /* CPU hotplug */ \ | |
66 | if (cpu_online(cpu)) \ | |
67 | ret = sprintf(buf, "%lu\n", \ | |
39676840 | 68 | per_cpu(thermal_state, cpu).name); \ |
cb6f3c15 IM |
69 | else \ |
70 | ret = 0; \ | |
71 | preempt_enable(); \ | |
72 | \ | |
73 | return ret; \ | |
3222b36f DZ |
74 | } |
75 | ||
39676840 IM |
76 | define_therm_throt_sysdev_show_func(throttle_count); |
77 | define_therm_throt_sysdev_one_ro(throttle_count); | |
3222b36f DZ |
78 | |
79 | static struct attribute *thermal_throttle_attrs[] = { | |
39676840 | 80 | &attr_throttle_count.attr, |
3222b36f DZ |
81 | NULL |
82 | }; | |
83 | ||
84 | static struct attribute_group thermal_throttle_attr_group = { | |
cb6f3c15 IM |
85 | .attrs = thermal_throttle_attrs, |
86 | .name = "thermal_throttle" | |
3222b36f DZ |
87 | }; |
88 | #endif /* CONFIG_SYSFS */ | |
15d5f839 DZ |
89 | |
90 | /*** | |
3222b36f | 91 | * therm_throt_process - Process thermal throttling event from interrupt |
15d5f839 DZ |
92 | * @curr: Whether the condition is current or not (boolean), since the |
93 | * thermal interrupt normally gets called both when the thermal | |
94 | * event begins and once the event has ended. | |
95 | * | |
3222b36f | 96 | * This function is called by the thermal interrupt after the |
15d5f839 DZ |
97 | * IRQ has been acknowledged. |
98 | * | |
99 | * It will take care of rate limiting and printing messages to the syslog. | |
100 | * | |
101 | * Returns: 0 : Event should NOT be further logged, i.e. still in | |
102 | * "timeout" from previous log message. | |
103 | * 1 : Event should be logged further, and a message has been | |
104 | * printed to the syslog. | |
105 | */ | |
39676840 | 106 | static int therm_throt_process(bool is_throttled) |
15d5f839 | 107 | { |
39676840 IM |
108 | struct thermal_state *state; |
109 | unsigned int this_cpu; | |
110 | bool was_throttled; | |
111 | u64 now; | |
112 | ||
113 | this_cpu = smp_processor_id(); | |
114 | now = get_jiffies_64(); | |
115 | state = &per_cpu(thermal_state, this_cpu); | |
116 | ||
117 | was_throttled = state->is_throttled; | |
118 | state->is_throttled = is_throttled; | |
15d5f839 | 119 | |
0d01f314 | 120 | if (is_throttled) |
39676840 | 121 | state->throttle_count++; |
3222b36f | 122 | |
0d01f314 | 123 | if (!(was_throttled ^ is_throttled) && |
39676840 | 124 | time_before64(now, state->next_check)) |
15d5f839 DZ |
125 | return 0; |
126 | ||
39676840 | 127 | state->next_check = now + CHECK_INTERVAL; |
15d5f839 DZ |
128 | |
129 | /* if we just entered the thermal event */ | |
0d01f314 | 130 | if (is_throttled) { |
39676840 | 131 | printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count); |
3222b36f | 132 | |
15d5f839 | 133 | add_taint(TAINT_MACHINE_CHECK); |
4e5c25d4 HD |
134 | return 1; |
135 | } | |
136 | if (was_throttled) { | |
39676840 | 137 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu); |
4e5c25d4 | 138 | return 1; |
15d5f839 DZ |
139 | } |
140 | ||
4e5c25d4 | 141 | return 0; |
15d5f839 | 142 | } |
3222b36f DZ |
143 | |
144 | #ifdef CONFIG_SYSFS | |
cb6f3c15 | 145 | /* Add/Remove thermal_throttle interface for CPU device: */ |
6569345a | 146 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) |
3222b36f | 147 | { |
cb6f3c15 IM |
148 | return sysfs_create_group(&sys_dev->kobj, |
149 | &thermal_throttle_attr_group); | |
3222b36f DZ |
150 | } |
151 | ||
6569345a | 152 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) |
3222b36f | 153 | { |
7c36752a | 154 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); |
3222b36f DZ |
155 | } |
156 | ||
cb6f3c15 | 157 | /* Mutex protecting device creation against CPU hotplug: */ |
3222b36f DZ |
158 | static DEFINE_MUTEX(therm_cpu_lock); |
159 | ||
160 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | |
cb6f3c15 IM |
161 | static __cpuinit int |
162 | thermal_throttle_cpu_callback(struct notifier_block *nfb, | |
163 | unsigned long action, | |
164 | void *hcpu) | |
3222b36f DZ |
165 | { |
166 | unsigned int cpu = (unsigned long)hcpu; | |
167 | struct sys_device *sys_dev; | |
c7e38a9c | 168 | int err = 0; |
3222b36f DZ |
169 | |
170 | sys_dev = get_cpu_sysdev(cpu); | |
cb6f3c15 | 171 | |
3222b36f | 172 | switch (action) { |
c7e38a9c AM |
173 | case CPU_UP_PREPARE: |
174 | case CPU_UP_PREPARE_FROZEN: | |
38ef6d19 | 175 | mutex_lock(&therm_cpu_lock); |
6569345a | 176 | err = thermal_throttle_add_dev(sys_dev); |
38ef6d19 | 177 | mutex_unlock(&therm_cpu_lock); |
6569345a | 178 | WARN_ON(err); |
3222b36f | 179 | break; |
c7e38a9c AM |
180 | case CPU_UP_CANCELED: |
181 | case CPU_UP_CANCELED_FROZEN: | |
3222b36f | 182 | case CPU_DEAD: |
8bb78442 | 183 | case CPU_DEAD_FROZEN: |
38ef6d19 | 184 | mutex_lock(&therm_cpu_lock); |
3222b36f | 185 | thermal_throttle_remove_dev(sys_dev); |
38ef6d19 | 186 | mutex_unlock(&therm_cpu_lock); |
3222b36f DZ |
187 | break; |
188 | } | |
c7e38a9c | 189 | return err ? NOTIFY_BAD : NOTIFY_OK; |
3222b36f DZ |
190 | } |
191 | ||
25d1b516 | 192 | static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = |
3222b36f DZ |
193 | { |
194 | .notifier_call = thermal_throttle_cpu_callback, | |
195 | }; | |
3222b36f DZ |
196 | |
197 | static __init int thermal_throttle_init_device(void) | |
198 | { | |
199 | unsigned int cpu = 0; | |
6569345a | 200 | int err; |
3222b36f DZ |
201 | |
202 | if (!atomic_read(&therm_throt_en)) | |
203 | return 0; | |
204 | ||
205 | register_hotcpu_notifier(&thermal_throttle_cpu_notifier); | |
206 | ||
207 | #ifdef CONFIG_HOTPLUG_CPU | |
208 | mutex_lock(&therm_cpu_lock); | |
209 | #endif | |
210 | /* connect live CPUs to sysfs */ | |
6569345a SH |
211 | for_each_online_cpu(cpu) { |
212 | err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); | |
213 | WARN_ON(err); | |
214 | } | |
3222b36f DZ |
215 | #ifdef CONFIG_HOTPLUG_CPU |
216 | mutex_unlock(&therm_cpu_lock); | |
217 | #endif | |
218 | ||
219 | return 0; | |
220 | } | |
3222b36f | 221 | device_initcall(thermal_throttle_init_device); |
a65c88dd | 222 | |
3222b36f | 223 | #endif /* CONFIG_SYSFS */ |
a65c88dd HS |
224 | |
225 | /* Thermal transition interrupt handler */ | |
8363fc82 | 226 | static void intel_thermal_interrupt(void) |
a65c88dd HS |
227 | { |
228 | __u64 msr_val; | |
229 | ||
230 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | |
39676840 | 231 | if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0)) |
a65c88dd HS |
232 | mce_log_therm_throt_event(msr_val); |
233 | } | |
234 | ||
235 | static void unexpected_thermal_interrupt(void) | |
236 | { | |
237 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | |
238 | smp_processor_id()); | |
239 | add_taint(TAINT_MACHINE_CHECK); | |
240 | } | |
241 | ||
242 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | |
243 | ||
244 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | |
245 | { | |
246 | exit_idle(); | |
247 | irq_enter(); | |
248 | inc_irq_stat(irq_thermal_count); | |
249 | smp_thermal_vector(); | |
250 | irq_exit(); | |
251 | /* Ack only at the end to avoid potential reentry */ | |
252 | ack_APIC_irq(); | |
253 | } | |
254 | ||
895287c0 HS |
255 | void intel_init_thermal(struct cpuinfo_x86 *c) |
256 | { | |
257 | unsigned int cpu = smp_processor_id(); | |
258 | int tm2 = 0; | |
259 | u32 l, h; | |
260 | ||
261 | /* Thermal monitoring depends on ACPI and clock modulation*/ | |
262 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | |
263 | return; | |
264 | ||
265 | /* | |
266 | * First check if its enabled already, in which case there might | |
267 | * be some SMM goo which handles it, so we can't even put a handler | |
268 | * since it might be delivered via SMI already: | |
269 | */ | |
270 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | |
271 | h = apic_read(APIC_LVTTHMR); | |
272 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | |
273 | printk(KERN_DEBUG | |
274 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | |
275 | return; | |
276 | } | |
277 | ||
895287c0 HS |
278 | /* Check whether a vector already exists */ |
279 | if (h & APIC_VECTOR_MASK) { | |
280 | printk(KERN_DEBUG | |
281 | "CPU%d: Thermal LVT vector (%#x) already installed\n", | |
282 | cpu, (h & APIC_VECTOR_MASK)); | |
283 | return; | |
284 | } | |
285 | ||
f3a0867b BZ |
286 | /* early Pentium M models use different method for enabling TM2 */ |
287 | if (cpu_has(c, X86_FEATURE_TM2)) { | |
288 | if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) { | |
289 | rdmsr(MSR_THERM2_CTL, l, h); | |
290 | if (l & MSR_THERM2_CTL_TM_SELECT) | |
291 | tm2 = 1; | |
292 | } else if (l & MSR_IA32_MISC_ENABLE_TM2) | |
293 | tm2 = 1; | |
294 | } | |
295 | ||
895287c0 HS |
296 | /* We'll mask the thermal vector in the lapic till we're ready: */ |
297 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | |
298 | apic_write(APIC_LVTTHMR, h); | |
299 | ||
300 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | |
301 | wrmsr(MSR_IA32_THERM_INTERRUPT, | |
302 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | |
303 | ||
8363fc82 | 304 | smp_thermal_vector = intel_thermal_interrupt; |
895287c0 HS |
305 | |
306 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | |
307 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | |
308 | ||
309 | /* Unmask the thermal vector: */ | |
310 | l = apic_read(APIC_LVTTHMR); | |
311 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | |
312 | ||
313 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | |
314 | cpu, tm2 ? "TM2" : "TM1"); | |
315 | ||
316 | /* enable thermal throttle processing */ | |
317 | atomic_set(&therm_throt_en, 1); | |
318 | } |