]>
Commit | Line | Data |
---|---|---|
8b45b72b | 1 | /* |
1da177e4 | 2 | * @file op_model_ppro.h |
b9917028 | 3 | * Family 6 perfmon and architectural perfmon MSR operations |
1da177e4 LT |
4 | * |
5 | * @remark Copyright 2002 OProfile authors | |
b9917028 | 6 | * @remark Copyright 2008 Intel Corporation |
1da177e4 LT |
7 | * @remark Read the file COPYING |
8 | * | |
9 | * @author John Levon | |
10 | * @author Philippe Elie | |
11 | * @author Graydon Hoare | |
b9917028 | 12 | * @author Andi Kleen |
3370d358 | 13 | * @author Robert Richter <robert.richter@amd.com> |
1da177e4 LT |
14 | */ |
15 | ||
16 | #include <linux/oprofile.h> | |
b9917028 | 17 | #include <linux/slab.h> |
1da177e4 LT |
18 | #include <asm/ptrace.h> |
19 | #include <asm/msr.h> | |
20 | #include <asm/apic.h> | |
3e4ff115 | 21 | #include <asm/nmi.h> |
8b45b72b | 22 | |
1da177e4 LT |
23 | #include "op_x86_model.h" |
24 | #include "op_counter.h" | |
25 | ||
b9917028 AK |
26 | static int num_counters = 2; |
27 | static int counter_width = 32; | |
1da177e4 | 28 | |
7c64ade5 | 29 | #define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) |
3370d358 RR |
30 | |
31 | #define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21)) | |
1da177e4 | 32 | |
b9917028 | 33 | static u64 *reset_value; |
8b45b72b | 34 | |
1da177e4 LT |
35 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) |
36 | { | |
cb9c448c DZ |
37 | int i; |
38 | ||
b9917028 | 39 | for (i = 0; i < num_counters; i++) { |
cb9c448c DZ |
40 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
41 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | |
42 | else | |
43 | msrs->counters[i].addr = 0; | |
44 | } | |
8b45b72b | 45 | |
b9917028 | 46 | for (i = 0; i < num_counters; i++) { |
cb9c448c DZ |
47 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) |
48 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | |
49 | else | |
50 | msrs->controls[i].addr = 0; | |
51 | } | |
1da177e4 LT |
52 | } |
53 | ||
54 | ||
ef8828dd RR |
55 | static void ppro_setup_ctrs(struct op_x86_model_spec const *model, |
56 | struct op_msrs const * const msrs) | |
1da177e4 | 57 | { |
3370d358 | 58 | u64 val; |
1da177e4 LT |
59 | int i; |
60 | ||
b9917028 | 61 | if (!reset_value) { |
a4a16bea | 62 | reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, |
b9917028 AK |
63 | GFP_ATOMIC); |
64 | if (!reset_value) | |
65 | return; | |
66 | } | |
67 | ||
68 | if (cpu_has_arch_perfmon) { | |
69 | union cpuid10_eax eax; | |
70 | eax.full = cpuid_eax(0xa); | |
780eef94 TB |
71 | |
72 | /* | |
73 | * For Core2 (family 6, model 15), don't reset the | |
74 | * counter width: | |
75 | */ | |
76 | if (!(eax.split.version_id == 0 && | |
77 | current_cpu_data.x86 == 6 && | |
78 | current_cpu_data.x86_model == 15)) { | |
79 | ||
80 | if (counter_width < eax.split.bit_width) | |
81 | counter_width = eax.split.bit_width; | |
82 | } | |
b9917028 AK |
83 | } |
84 | ||
1da177e4 | 85 | /* clear all counters */ |
b9917028 | 86 | for (i = 0 ; i < num_counters; ++i) { |
8b45b72b | 87 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) |
cb9c448c | 88 | continue; |
3370d358 RR |
89 | rdmsrl(msrs->controls[i].addr, val); |
90 | val &= model->reserved; | |
91 | wrmsrl(msrs->controls[i].addr, val); | |
1da177e4 | 92 | } |
8b45b72b | 93 | |
1da177e4 | 94 | /* avoid a false detection of ctr overflows in NMI handler */ |
b9917028 | 95 | for (i = 0; i < num_counters; ++i) { |
8b45b72b | 96 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) |
cb9c448c | 97 | continue; |
b9917028 | 98 | wrmsrl(msrs->counters[i].addr, -1LL); |
1da177e4 LT |
99 | } |
100 | ||
101 | /* enable active counters */ | |
b9917028 | 102 | for (i = 0; i < num_counters; ++i) { |
8b45b72b | 103 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { |
1da177e4 | 104 | reset_value[i] = counter_config[i].count; |
b9917028 | 105 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
3370d358 RR |
106 | rdmsrl(msrs->controls[i].addr, val); |
107 | val &= model->reserved; | |
108 | val |= op_x86_get_ctrl(model, &counter_config[i]); | |
109 | wrmsrl(msrs->controls[i].addr, val); | |
cb9c448c DZ |
110 | } else { |
111 | reset_value[i] = 0; | |
1da177e4 LT |
112 | } |
113 | } | |
114 | } | |
115 | ||
8b45b72b | 116 | |
1da177e4 LT |
117 | static int ppro_check_ctrs(struct pt_regs * const regs, |
118 | struct op_msrs const * const msrs) | |
119 | { | |
7c64ade5 | 120 | u64 val; |
1da177e4 | 121 | int i; |
8b45b72b | 122 | |
b9917028 | 123 | for (i = 0 ; i < num_counters; ++i) { |
cb9c448c DZ |
124 | if (!reset_value[i]) |
125 | continue; | |
7c64ade5 AK |
126 | rdmsrl(msrs->counters[i].addr, val); |
127 | if (CTR_OVERFLOWED(val)) { | |
1da177e4 | 128 | oprofile_add_sample(regs, i); |
b9917028 | 129 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
1da177e4 LT |
130 | } |
131 | } | |
132 | ||
133 | /* Only P6 based Pentium M need to re-unmask the apic vector but it | |
134 | * doesn't hurt other P6 variant */ | |
135 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | |
136 | ||
137 | /* We can't work out if we really handled an interrupt. We | |
138 | * might have caught a *second* counter just after overflowing | |
139 | * the interrupt for this counter then arrives | |
140 | * and we don't find a counter that's overflowed, so we | |
141 | * would return 0 and get dazed + confused. Instead we always | |
142 | * assume we found an overflow. This sucks. | |
143 | */ | |
144 | return 1; | |
145 | } | |
146 | ||
8b45b72b | 147 | |
1da177e4 LT |
148 | static void ppro_start(struct op_msrs const * const msrs) |
149 | { | |
8b45b72b | 150 | unsigned int low, high; |
6b77df08 | 151 | int i; |
cb9c448c | 152 | |
9ea84ad7 ED |
153 | if (!reset_value) |
154 | return; | |
b9917028 | 155 | for (i = 0; i < num_counters; ++i) { |
6b77df08 | 156 | if (reset_value[i]) { |
74c9a5c3 | 157 | rdmsr(msrs->controls[i].addr, low, high); |
6b77df08 | 158 | CTRL_SET_ACTIVE(low); |
74c9a5c3 | 159 | wrmsr(msrs->controls[i].addr, low, high); |
6b77df08 | 160 | } |
cb9c448c | 161 | } |
1da177e4 LT |
162 | } |
163 | ||
164 | ||
165 | static void ppro_stop(struct op_msrs const * const msrs) | |
166 | { | |
8b45b72b | 167 | unsigned int low, high; |
6b77df08 | 168 | int i; |
cb9c448c | 169 | |
9ea84ad7 ED |
170 | if (!reset_value) |
171 | return; | |
b9917028 | 172 | for (i = 0; i < num_counters; ++i) { |
6b77df08 AS |
173 | if (!reset_value[i]) |
174 | continue; | |
74c9a5c3 | 175 | rdmsr(msrs->controls[i].addr, low, high); |
cb9c448c | 176 | CTRL_SET_INACTIVE(low); |
74c9a5c3 | 177 | wrmsr(msrs->controls[i].addr, low, high); |
cb9c448c DZ |
178 | } |
179 | } | |
180 | ||
181 | static void ppro_shutdown(struct op_msrs const * const msrs) | |
182 | { | |
183 | int i; | |
184 | ||
b9917028 | 185 | for (i = 0 ; i < num_counters ; ++i) { |
8b45b72b | 186 | if (CTR_IS_RESERVED(msrs, i)) |
cb9c448c DZ |
187 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); |
188 | } | |
b9917028 | 189 | for (i = 0 ; i < num_counters ; ++i) { |
8b45b72b | 190 | if (CTRL_IS_RESERVED(msrs, i)) |
cb9c448c DZ |
191 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); |
192 | } | |
b9917028 AK |
193 | if (reset_value) { |
194 | kfree(reset_value); | |
195 | reset_value = NULL; | |
196 | } | |
1da177e4 LT |
197 | } |
198 | ||
199 | ||
849620fa RR |
200 | struct op_x86_model_spec const op_ppro_spec = { |
201 | .num_counters = 2, | |
202 | .num_controls = 2, | |
3370d358 | 203 | .reserved = MSR_PPRO_EVENTSEL_RESERVED, |
5a289395 RR |
204 | .fill_in_addresses = &ppro_fill_in_addresses, |
205 | .setup_ctrs = &ppro_setup_ctrs, | |
206 | .check_ctrs = &ppro_check_ctrs, | |
207 | .start = &ppro_start, | |
208 | .stop = &ppro_stop, | |
209 | .shutdown = &ppro_shutdown | |
b9917028 AK |
210 | }; |
211 | ||
212 | /* | |
213 | * Architectural performance monitoring. | |
214 | * | |
215 | * Newer Intel CPUs (Core1+) have support for architectural | |
216 | * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. | |
217 | * The advantage of this is that it can be done without knowing about | |
218 | * the specific CPU. | |
219 | */ | |
220 | ||
e419294e | 221 | static void arch_perfmon_setup_counters(void) |
b9917028 AK |
222 | { |
223 | union cpuid10_eax eax; | |
224 | ||
225 | eax.full = cpuid_eax(0xa); | |
226 | ||
227 | /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ | |
228 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | |
229 | current_cpu_data.x86_model == 15) { | |
230 | eax.split.version_id = 2; | |
231 | eax.split.num_counters = 2; | |
232 | eax.split.bit_width = 40; | |
233 | } | |
234 | ||
235 | num_counters = eax.split.num_counters; | |
236 | ||
237 | op_arch_perfmon_spec.num_counters = num_counters; | |
238 | op_arch_perfmon_spec.num_controls = num_counters; | |
239 | } | |
240 | ||
e419294e RR |
241 | static int arch_perfmon_init(struct oprofile_operations *ignore) |
242 | { | |
243 | arch_perfmon_setup_counters(); | |
244 | return 0; | |
245 | } | |
246 | ||
b9917028 | 247 | struct op_x86_model_spec op_arch_perfmon_spec = { |
3370d358 | 248 | .reserved = MSR_PPRO_EVENTSEL_RESERVED, |
e419294e | 249 | .init = &arch_perfmon_init, |
b9917028 | 250 | /* num_counters/num_controls filled in at runtime */ |
c92960fc | 251 | .fill_in_addresses = &ppro_fill_in_addresses, |
b9917028 | 252 | /* user space does the cpuid check for available events */ |
c92960fc RR |
253 | .setup_ctrs = &ppro_setup_ctrs, |
254 | .check_ctrs = &ppro_check_ctrs, | |
255 | .start = &ppro_start, | |
256 | .stop = &ppro_stop, | |
257 | .shutdown = &ppro_shutdown | |
1da177e4 | 258 | }; |