]>
Commit | Line | Data |
---|---|---|
a072738e CG |
1 | /* |
2 | * Netburst Perfomance Events (P4, old Xeon) | |
3 | * | |
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | |
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | |
6 | * | |
7 | * For licencing details see kernel-base/COPYING | |
8 | */ | |
9 | ||
10 | #ifdef CONFIG_CPU_SUP_INTEL | |
11 | ||
12 | #include <asm/perf_event_p4.h> | |
13 | ||
14 | /* | |
15 | * array indices: 0,1 - HT threads, used with HT enabled cpu | |
16 | */ | |
17 | struct p4_event_template { | |
18 | u32 opcode; /* ESCR event + CCCR selector */ | |
19 | u64 config; /* packed predefined bits */ | |
20 | int dep; /* upstream dependency event index */ | |
f34edbc1 | 21 | int key; /* index into p4_templates */ |
cb7d6b50 LM |
22 | u64 msr; /* |
23 | * the high 32 bits set into MSR_IA32_PEBS_ENABLE and | |
24 | * the low 32 bits set into MSR_P4_PEBS_MATRIX_VERT | |
25 | * for cache events | |
26 | */ | |
a072738e CG |
27 | unsigned int emask; /* ESCR EventMask */ |
28 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | |
29 | unsigned int cntr[2]; /* counter index (offset) */ | |
30 | }; | |
31 | ||
32 | struct p4_pmu_res { | |
33 | /* maps hw_conf::idx into template for ESCR sake */ | |
34 | struct p4_event_template *tpl[ARCH_P4_MAX_CCCR]; | |
35 | }; | |
36 | ||
37 | static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); | |
38 | ||
cb7d6b50 LM |
39 | #define P4_CACHE_EVENT_CONFIG(event, bit) \ |
40 | p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(event) << P4_EVNTSEL_EVENT_SHIFT) | \ | |
41 | p4_config_pack_escr((event##_##bit) << P4_EVNTSEL_EVENTMASK_SHIFT) | \ | |
42 | p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(event) << P4_CCCR_ESCR_SELECT_SHIFT) | |
43 | ||
44 | static __initconst u64 p4_hw_cache_event_ids | |
45 | [PERF_COUNT_HW_CACHE_MAX] | |
46 | [PERF_COUNT_HW_CACHE_OP_MAX] | |
47 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | |
48 | { | |
49 | [ C(L1D ) ] = { | |
50 | [ C(OP_READ) ] = { | |
51 | [ C(RESULT_ACCESS) ] = 0x0, | |
52 | /* 1stL_cache_load_miss_retired */ | |
53 | [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) | |
54 | | KEY_P4_L1D_OP_READ_RESULT_MISS, | |
55 | }, | |
56 | }, | |
57 | [ C(LL ) ] = { | |
58 | [ C(OP_READ) ] = { | |
59 | [ C(RESULT_ACCESS) ] = 0x0, | |
60 | /* 2ndL_cache_load_miss_retired */ | |
61 | [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) | |
62 | | KEY_P4_LL_OP_READ_RESULT_MISS, | |
63 | }, | |
64 | }, | |
65 | [ C(DTLB) ] = { | |
66 | [ C(OP_READ) ] = { | |
67 | [ C(RESULT_ACCESS) ] = 0x0, | |
68 | /* DTLB_load_miss_retired */ | |
69 | [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) | |
70 | | KEY_P4_DTLB_OP_READ_RESULT_MISS, | |
71 | }, | |
72 | [ C(OP_WRITE) ] = { | |
73 | [ C(RESULT_ACCESS) ] = 0x0, | |
74 | /* DTLB_store_miss_retired */ | |
75 | [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) | |
76 | | KEY_P4_DTLB_OP_WRITE_RESULT_MISS, | |
77 | }, | |
78 | }, | |
79 | [ C(ITLB) ] = { | |
80 | [ C(OP_READ) ] = { | |
81 | /* ITLB_reference.HIT */ | |
82 | [ C(RESULT_ACCESS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, HIT) | |
83 | | KEY_P4_ITLB_OP_READ_RESULT_ACCESS, | |
84 | ||
85 | /* ITLB_reference.MISS */ | |
86 | [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, MISS) | |
87 | | KEY_P4_ITLB_OP_READ_RESULT_MISS, | |
88 | }, | |
89 | [ C(OP_WRITE) ] = { | |
90 | [ C(RESULT_ACCESS) ] = -1, | |
91 | [ C(RESULT_MISS) ] = -1, | |
92 | }, | |
93 | [ C(OP_PREFETCH) ] = { | |
94 | [ C(RESULT_ACCESS) ] = -1, | |
95 | [ C(RESULT_MISS) ] = -1, | |
96 | }, | |
97 | }, | |
98 | }; | |
99 | ||
a072738e CG |
100 | /* |
101 | * WARN: CCCR1 doesn't have a working enable bit so try to not | |
102 | * use it if possible | |
103 | * | |
104 | * Also as only we start to support raw events we will need to | |
105 | * append _all_ P4_EVENT_PACK'ed events here | |
106 | */ | |
107 | struct p4_event_template p4_templates[] = { | |
108 | [0] = { | |
a072738e CG |
109 | .opcode = P4_GLOBAL_POWER_EVENTS, |
110 | .config = 0, | |
111 | .dep = -1, | |
f34edbc1 | 112 | .key = 0, |
a072738e CG |
113 | .emask = |
114 | P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING), | |
115 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
116 | .cntr = { 0, 2 }, | |
117 | }, | |
f34edbc1 | 118 | [1] = { |
a072738e CG |
119 | .opcode = P4_INSTR_RETIRED, |
120 | .config = 0, | |
e4495262 | 121 | .dep = -1, /* needs front-end tagging */ |
f34edbc1 | 122 | .key = 1, |
a072738e CG |
123 | .emask = |
124 | P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | | |
e4495262 CG |
125 | P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG), |
126 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | |
a072738e CG |
127 | .cntr = { 12, 14 }, |
128 | }, | |
f34edbc1 | 129 | [2] = { |
a072738e CG |
130 | .opcode = P4_BSQ_CACHE_REFERENCE, |
131 | .config = 0, | |
132 | .dep = -1, | |
f34edbc1 | 133 | .key = 2, |
a072738e CG |
134 | .emask = |
135 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | |
136 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | |
137 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | |
138 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | |
139 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | |
140 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM), | |
141 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | |
142 | .cntr = { 0, 2 }, | |
143 | }, | |
f34edbc1 | 144 | [3] = { |
a072738e CG |
145 | .opcode = P4_BSQ_CACHE_REFERENCE, |
146 | .config = 0, | |
147 | .dep = -1, | |
f34edbc1 | 148 | .key = 3, |
a072738e CG |
149 | .emask = |
150 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | |
151 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | |
152 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), | |
153 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | |
154 | .cntr = { 0, 3 }, | |
155 | }, | |
f34edbc1 | 156 | [4] = { |
a072738e CG |
157 | .opcode = P4_RETIRED_BRANCH_TYPE, |
158 | .config = 0, | |
159 | .dep = -1, | |
f34edbc1 | 160 | .key = 4, |
a072738e CG |
161 | .emask = |
162 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) | | |
163 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) | | |
164 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN) | | |
165 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT), | |
166 | .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 }, | |
167 | .cntr = { 4, 6 }, | |
168 | }, | |
f34edbc1 | 169 | [5] = { |
a072738e CG |
170 | .opcode = P4_MISPRED_BRANCH_RETIRED, |
171 | .config = 0, | |
172 | .dep = -1, | |
f34edbc1 | 173 | .key = 5, |
a072738e CG |
174 | .emask = |
175 | P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS), | |
176 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | |
177 | .cntr = { 12, 14 }, | |
178 | }, | |
f34edbc1 | 179 | [6] = { |
a072738e CG |
180 | .opcode = P4_FSB_DATA_ACTIVITY, |
181 | .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | |
182 | .dep = -1, | |
f34edbc1 | 183 | .key = 6, |
a072738e CG |
184 | .emask = |
185 | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) | | |
186 | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN), | |
187 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | |
188 | .cntr = { 0, 2 }, | |
189 | }, | |
cb7d6b50 LM |
190 | [KEY_P4_L1D_OP_READ_RESULT_MISS] = { |
191 | .opcode = P4_REPLAY_EVENT, | |
192 | .config = 0, | |
193 | .dep = -1, | |
194 | .msr = (u64)(1 << 0 | 1 << 24) << 32 | (1 << 0), | |
195 | .key = KEY_P4_L1D_OP_READ_RESULT_MISS, | |
196 | .emask = | |
197 | P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), | |
198 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, | |
199 | .cntr = { 16, 17 }, | |
200 | }, | |
201 | [KEY_P4_LL_OP_READ_RESULT_MISS] = { | |
202 | .opcode = P4_REPLAY_EVENT, | |
203 | .config = 0, | |
204 | .dep = -1, | |
205 | .msr = (u64)(1 << 1 | 1 << 24) << 32 | (1 << 0), | |
206 | .key = KEY_P4_LL_OP_READ_RESULT_MISS, | |
207 | .emask = | |
208 | P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), | |
209 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, | |
210 | .cntr = { 16, 17 }, | |
211 | }, | |
212 | [KEY_P4_DTLB_OP_READ_RESULT_MISS] = { | |
213 | .opcode = P4_REPLAY_EVENT, | |
214 | .config = 0, | |
215 | .dep = -1, | |
216 | .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 0), | |
217 | .key = KEY_P4_DTLB_OP_READ_RESULT_MISS, | |
218 | .emask = | |
219 | P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), | |
220 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, | |
221 | .cntr = { 16, 17 }, | |
222 | }, | |
223 | [KEY_P4_DTLB_OP_WRITE_RESULT_MISS] = { | |
224 | .opcode = P4_REPLAY_EVENT, | |
225 | .config = 0, | |
226 | .dep = -1, | |
227 | .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 1), | |
228 | .key = KEY_P4_DTLB_OP_WRITE_RESULT_MISS, | |
229 | .emask = | |
230 | P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), | |
231 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, | |
232 | .cntr = { 16, 17 }, | |
233 | }, | |
234 | [KEY_P4_ITLB_OP_READ_RESULT_ACCESS] = { | |
235 | .opcode = P4_ITLB_REFERENCE, | |
236 | .config = 0, | |
237 | .dep = -1, | |
238 | .msr = 0, | |
239 | .key = KEY_P4_ITLB_OP_READ_RESULT_ACCESS, | |
240 | .emask = | |
241 | P4_EVENT_ATTR(P4_ITLB_REFERENCE, HIT), | |
242 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | |
243 | .cntr = { 0, 2 }, | |
244 | }, | |
245 | [KEY_P4_ITLB_OP_READ_RESULT_MISS] = { | |
246 | .opcode = P4_ITLB_REFERENCE, | |
247 | .config = 0, | |
248 | .dep = -1, | |
249 | .msr = 0, | |
250 | .key = KEY_P4_ITLB_OP_READ_RESULT_MISS, | |
251 | .emask = | |
252 | P4_EVENT_ATTR(P4_ITLB_REFERENCE, MISS), | |
253 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | |
254 | .cntr = { 0, 2 }, | |
255 | }, | |
256 | [KEY_P4_UOP_TYPE] = { | |
f34edbc1 LM |
257 | .opcode = P4_UOP_TYPE, |
258 | .config = 0, | |
259 | .dep = -1, | |
cb7d6b50 | 260 | .key = KEY_P4_UOP_TYPE, |
f34edbc1 LM |
261 | .emask = |
262 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | | |
263 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), | |
264 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | |
265 | .cntr = { 16, 17 }, | |
266 | }, | |
a072738e CG |
267 | }; |
268 | ||
269 | static u64 p4_pmu_event_map(int hw_event) | |
270 | { | |
271 | struct p4_event_template *tpl; | |
272 | u64 config; | |
273 | ||
f34edbc1 | 274 | if (hw_event > ARRAY_SIZE(p4_templates)) { |
a072738e CG |
275 | printk_once(KERN_ERR "PMU: Incorrect event index\n"); |
276 | return 0; | |
277 | } | |
f34edbc1 | 278 | tpl = &p4_templates[hw_event]; |
a072738e CG |
279 | |
280 | /* | |
281 | * fill config up according to | |
282 | * a predefined event template | |
283 | */ | |
284 | config = tpl->config; | |
285 | config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT); | |
286 | config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT); | |
287 | config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); | |
f34edbc1 | 288 | config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED); |
a072738e | 289 | |
a072738e CG |
290 | return config; |
291 | } | |
292 | ||
293 | /* | |
294 | * Note that we still have 5 events (from global events SDM list) | |
295 | * intersected in opcode+emask bits so we will need another | |
296 | * scheme there do distinguish templates. | |
297 | */ | |
298 | static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src) | |
299 | { | |
300 | return dst & src; | |
301 | } | |
302 | ||
303 | static struct p4_event_template *p4_pmu_template_lookup(u64 config) | |
304 | { | |
f34edbc1 | 305 | int key = p4_config_unpack_key(config); |
a072738e | 306 | |
f34edbc1 LM |
307 | if (key < ARRAY_SIZE(p4_templates)) |
308 | return &p4_templates[key]; | |
309 | else | |
310 | return NULL; | |
a072738e CG |
311 | } |
312 | ||
313 | /* | |
314 | * We don't control raw events so it's up to the caller | |
315 | * to pass sane values (and we don't count the thread number | |
316 | * on HT machine but allow HT-compatible specifics to be | |
317 | * passed on) | |
318 | */ | |
319 | static u64 p4_pmu_raw_event(u64 hw_event) | |
320 | { | |
321 | return hw_event & | |
322 | (p4_config_pack_escr(P4_EVNTSEL_MASK_HT) | | |
323 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | |
324 | } | |
325 | ||
326 | static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) | |
327 | { | |
328 | int cpu = raw_smp_processor_id(); | |
329 | ||
330 | /* | |
331 | * the reason we use cpu that early is that: if we get scheduled | |
332 | * first time on the same cpu -- we will not need swap thread | |
333 | * specific flags in config (and will save some cpu cycles) | |
334 | */ | |
335 | ||
336 | /* CCCR by default */ | |
337 | hwc->config = p4_config_pack_cccr(p4_default_cccr_conf(cpu)); | |
338 | ||
339 | /* Count user and OS events unless not requested to */ | |
340 | hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel, | |
341 | attr->exclude_user)); | |
cb7d6b50 LM |
342 | /* on HT machine we need a special bit */ |
343 | if (p4_ht_active() && p4_ht_thread(cpu)) | |
344 | hwc->config = p4_set_ht_bit(hwc->config); | |
345 | ||
a072738e CG |
346 | return 0; |
347 | } | |
348 | ||
349 | static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | |
350 | { | |
351 | unsigned long dummy; | |
352 | ||
353 | rdmsrl(hwc->config_base + hwc->idx, dummy); | |
354 | if (dummy & P4_CCCR_OVF) { | |
355 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | |
356 | ((u64)dummy) & ~P4_CCCR_OVF); | |
357 | } | |
358 | } | |
359 | ||
360 | static inline void p4_pmu_disable_event(struct perf_event *event) | |
361 | { | |
362 | struct hw_perf_event *hwc = &event->hw; | |
363 | ||
364 | /* | |
365 | * If event gets disabled while counter is in overflowed | |
366 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | |
367 | * asserted again and again | |
368 | */ | |
369 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | |
370 | (u64)(p4_config_unpack_cccr(hwc->config)) & | |
371 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF); | |
372 | } | |
373 | ||
374 | static void p4_pmu_disable_all(void) | |
375 | { | |
376 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
377 | int idx; | |
378 | ||
379 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | |
380 | struct perf_event *event = cpuc->events[idx]; | |
381 | if (!test_bit(idx, cpuc->active_mask)) | |
382 | continue; | |
383 | p4_pmu_disable_event(event); | |
384 | } | |
385 | } | |
386 | ||
387 | static void p4_pmu_enable_event(struct perf_event *event) | |
388 | { | |
389 | struct hw_perf_event *hwc = &event->hw; | |
390 | int thread = p4_ht_config_thread(hwc->config); | |
391 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | |
392 | u64 escr_base; | |
393 | struct p4_event_template *tpl; | |
394 | struct p4_pmu_res *c; | |
395 | ||
396 | /* | |
397 | * some preparation work from per-cpu private fields | |
398 | * since we need to find out which ESCR to use | |
399 | */ | |
400 | c = &__get_cpu_var(p4_pmu_config); | |
401 | tpl = c->tpl[hwc->idx]; | |
402 | if (!tpl) { | |
403 | pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx); | |
404 | return; | |
405 | } | |
cb7d6b50 LM |
406 | |
407 | if (tpl->msr) { | |
408 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, tpl->msr >> 32); | |
409 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, tpl->msr & 0xffffffff); | |
410 | } | |
411 | ||
a072738e CG |
412 | escr_base = (u64)tpl->escr_msr[thread]; |
413 | ||
414 | /* | |
415 | * - we dont support cascaded counters yet | |
416 | * - and counter 1 is broken (erratum) | |
417 | */ | |
418 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | |
419 | WARN_ON_ONCE(hwc->idx == 1); | |
420 | ||
421 | (void)checking_wrmsrl(escr_base, escr_conf); | |
422 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | |
423 | (u64)(p4_config_unpack_cccr(hwc->config)) | P4_CCCR_ENABLE); | |
424 | } | |
425 | ||
426 | static void p4_pmu_enable_all(void) | |
427 | { | |
428 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
429 | int idx; | |
430 | ||
431 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | |
432 | struct perf_event *event = cpuc->events[idx]; | |
433 | if (!test_bit(idx, cpuc->active_mask)) | |
434 | continue; | |
435 | p4_pmu_enable_event(event); | |
436 | } | |
437 | } | |
438 | ||
439 | static int p4_pmu_handle_irq(struct pt_regs *regs) | |
440 | { | |
441 | struct perf_sample_data data; | |
442 | struct cpu_hw_events *cpuc; | |
443 | struct perf_event *event; | |
444 | struct hw_perf_event *hwc; | |
445 | int idx, handled = 0; | |
446 | u64 val; | |
447 | ||
448 | data.addr = 0; | |
449 | data.raw = NULL; | |
450 | ||
451 | cpuc = &__get_cpu_var(cpu_hw_events); | |
452 | ||
453 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | |
454 | ||
455 | if (!test_bit(idx, cpuc->active_mask)) | |
456 | continue; | |
457 | ||
458 | event = cpuc->events[idx]; | |
459 | hwc = &event->hw; | |
460 | ||
461 | WARN_ON_ONCE(hwc->idx != idx); | |
462 | ||
463 | /* | |
464 | * FIXME: Redundant call, actually not needed | |
465 | * but just to check if we're screwed | |
466 | */ | |
467 | p4_pmu_clear_cccr_ovf(hwc); | |
468 | ||
469 | val = x86_perf_event_update(event); | |
470 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | |
471 | continue; | |
472 | ||
473 | /* | |
474 | * event overflow | |
475 | */ | |
476 | handled = 1; | |
477 | data.period = event->hw.last_period; | |
478 | ||
479 | if (!x86_perf_event_set_period(event)) | |
480 | continue; | |
481 | if (perf_event_overflow(event, 1, &data, regs)) | |
482 | p4_pmu_disable_event(event); | |
483 | } | |
484 | ||
485 | if (handled) { | |
486 | /* p4 quirk: unmask it again */ | |
487 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | |
488 | inc_irq_stat(apic_perf_irqs); | |
489 | } | |
490 | ||
491 | return handled; | |
492 | } | |
493 | ||
494 | /* | |
495 | * swap thread specific fields according to a thread | |
496 | * we are going to run on | |
497 | */ | |
498 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | |
499 | { | |
500 | u32 escr, cccr; | |
501 | ||
502 | /* | |
503 | * we either lucky and continue on same cpu or no HT support | |
504 | */ | |
505 | if (!p4_should_swap_ts(hwc->config, cpu)) | |
506 | return; | |
507 | ||
508 | /* | |
509 | * the event is migrated from an another logical | |
510 | * cpu, so we need to swap thread specific flags | |
511 | */ | |
512 | ||
513 | escr = p4_config_unpack_escr(hwc->config); | |
514 | cccr = p4_config_unpack_cccr(hwc->config); | |
515 | ||
516 | if (p4_ht_thread(cpu)) { | |
517 | cccr &= ~P4_CCCR_OVF_PMI_T0; | |
518 | cccr |= P4_CCCR_OVF_PMI_T1; | |
519 | if (escr & P4_EVNTSEL_T0_OS) { | |
520 | escr &= ~P4_EVNTSEL_T0_OS; | |
521 | escr |= P4_EVNTSEL_T1_OS; | |
522 | } | |
523 | if (escr & P4_EVNTSEL_T0_USR) { | |
524 | escr &= ~P4_EVNTSEL_T0_USR; | |
525 | escr |= P4_EVNTSEL_T1_USR; | |
526 | } | |
527 | hwc->config = p4_config_pack_escr(escr); | |
528 | hwc->config |= p4_config_pack_cccr(cccr); | |
529 | hwc->config |= P4_CONFIG_HT; | |
530 | } else { | |
531 | cccr &= ~P4_CCCR_OVF_PMI_T1; | |
532 | cccr |= P4_CCCR_OVF_PMI_T0; | |
533 | if (escr & P4_EVNTSEL_T1_OS) { | |
534 | escr &= ~P4_EVNTSEL_T1_OS; | |
535 | escr |= P4_EVNTSEL_T0_OS; | |
536 | } | |
537 | if (escr & P4_EVNTSEL_T1_USR) { | |
538 | escr &= ~P4_EVNTSEL_T1_USR; | |
539 | escr |= P4_EVNTSEL_T0_USR; | |
540 | } | |
541 | hwc->config = p4_config_pack_escr(escr); | |
542 | hwc->config |= p4_config_pack_cccr(cccr); | |
543 | hwc->config &= ~P4_CONFIG_HT; | |
544 | } | |
545 | } | |
546 | ||
547 | /* ESCRs are not sequential in memory so we need a map */ | |
548 | static unsigned int p4_escr_map[ARCH_P4_TOTAL_ESCR] = { | |
549 | MSR_P4_ALF_ESCR0, /* 0 */ | |
550 | MSR_P4_ALF_ESCR1, /* 1 */ | |
551 | MSR_P4_BPU_ESCR0, /* 2 */ | |
552 | MSR_P4_BPU_ESCR1, /* 3 */ | |
553 | MSR_P4_BSU_ESCR0, /* 4 */ | |
554 | MSR_P4_BSU_ESCR1, /* 5 */ | |
555 | MSR_P4_CRU_ESCR0, /* 6 */ | |
556 | MSR_P4_CRU_ESCR1, /* 7 */ | |
557 | MSR_P4_CRU_ESCR2, /* 8 */ | |
558 | MSR_P4_CRU_ESCR3, /* 9 */ | |
559 | MSR_P4_CRU_ESCR4, /* 10 */ | |
560 | MSR_P4_CRU_ESCR5, /* 11 */ | |
561 | MSR_P4_DAC_ESCR0, /* 12 */ | |
562 | MSR_P4_DAC_ESCR1, /* 13 */ | |
563 | MSR_P4_FIRM_ESCR0, /* 14 */ | |
564 | MSR_P4_FIRM_ESCR1, /* 15 */ | |
565 | MSR_P4_FLAME_ESCR0, /* 16 */ | |
566 | MSR_P4_FLAME_ESCR1, /* 17 */ | |
567 | MSR_P4_FSB_ESCR0, /* 18 */ | |
568 | MSR_P4_FSB_ESCR1, /* 19 */ | |
569 | MSR_P4_IQ_ESCR0, /* 20 */ | |
570 | MSR_P4_IQ_ESCR1, /* 21 */ | |
571 | MSR_P4_IS_ESCR0, /* 22 */ | |
572 | MSR_P4_IS_ESCR1, /* 23 */ | |
573 | MSR_P4_ITLB_ESCR0, /* 24 */ | |
574 | MSR_P4_ITLB_ESCR1, /* 25 */ | |
575 | MSR_P4_IX_ESCR0, /* 26 */ | |
576 | MSR_P4_IX_ESCR1, /* 27 */ | |
577 | MSR_P4_MOB_ESCR0, /* 28 */ | |
578 | MSR_P4_MOB_ESCR1, /* 29 */ | |
579 | MSR_P4_MS_ESCR0, /* 30 */ | |
580 | MSR_P4_MS_ESCR1, /* 31 */ | |
581 | MSR_P4_PMH_ESCR0, /* 32 */ | |
582 | MSR_P4_PMH_ESCR1, /* 33 */ | |
583 | MSR_P4_RAT_ESCR0, /* 34 */ | |
584 | MSR_P4_RAT_ESCR1, /* 35 */ | |
585 | MSR_P4_SAAT_ESCR0, /* 36 */ | |
586 | MSR_P4_SAAT_ESCR1, /* 37 */ | |
587 | MSR_P4_SSU_ESCR0, /* 38 */ | |
588 | MSR_P4_SSU_ESCR1, /* 39 */ | |
589 | MSR_P4_TBPU_ESCR0, /* 40 */ | |
590 | MSR_P4_TBPU_ESCR1, /* 41 */ | |
591 | MSR_P4_TC_ESCR0, /* 42 */ | |
592 | MSR_P4_TC_ESCR1, /* 43 */ | |
593 | MSR_P4_U2L_ESCR0, /* 44 */ | |
594 | MSR_P4_U2L_ESCR1, /* 45 */ | |
595 | }; | |
596 | ||
597 | static int p4_get_escr_idx(unsigned int addr) | |
598 | { | |
599 | unsigned int i; | |
600 | ||
601 | for (i = 0; i < ARRAY_SIZE(p4_escr_map); i++) { | |
602 | if (addr == p4_escr_map[i]) | |
603 | return i; | |
604 | } | |
605 | ||
606 | return -1; | |
607 | } | |
608 | ||
609 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |
610 | { | |
611 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | |
612 | unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)]; | |
613 | ||
614 | struct hw_perf_event *hwc; | |
615 | struct p4_event_template *tpl; | |
616 | struct p4_pmu_res *c; | |
617 | int cpu = raw_smp_processor_id(); | |
618 | int escr_idx, thread, i, num; | |
619 | ||
620 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | |
621 | bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR); | |
622 | ||
623 | c = &__get_cpu_var(p4_pmu_config); | |
624 | /* | |
625 | * Firstly find out which resource events are going | |
626 | * to use, if ESCR+CCCR tuple is already borrowed | |
627 | * then get out of here | |
628 | */ | |
629 | for (i = 0, num = n; i < n; i++, num--) { | |
630 | hwc = &cpuc->event_list[i]->hw; | |
631 | tpl = p4_pmu_template_lookup(hwc->config); | |
632 | if (!tpl) | |
633 | goto done; | |
634 | thread = p4_ht_thread(cpu); | |
635 | escr_idx = p4_get_escr_idx(tpl->escr_msr[thread]); | |
636 | if (escr_idx == -1) | |
637 | goto done; | |
638 | ||
639 | /* already allocated and remains on the same cpu */ | |
640 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { | |
641 | if (assign) | |
642 | assign[i] = hwc->idx; | |
643 | /* upstream dependent event */ | |
644 | if (unlikely(tpl->dep != -1)) | |
645 | printk_once(KERN_WARNING "PMU: Dep events are " | |
646 | "not implemented yet\n"); | |
647 | goto reserve; | |
648 | } | |
649 | ||
650 | /* it may be already borrowed */ | |
651 | if (test_bit(tpl->cntr[thread], used_mask) || | |
652 | test_bit(escr_idx, escr_mask)) | |
653 | goto done; | |
654 | ||
655 | /* | |
656 | * ESCR+CCCR+COUNTERs are available to use lets swap | |
657 | * thread specific bits, push assigned bits | |
658 | * back and save template into per-cpu | |
659 | * area (which will allow us to find out the ESCR | |
660 | * to be used at moment of "enable event via real MSR") | |
661 | */ | |
662 | p4_pmu_swap_config_ts(hwc, cpu); | |
663 | if (assign) { | |
664 | assign[i] = tpl->cntr[thread]; | |
665 | c->tpl[assign[i]] = tpl; | |
666 | } | |
667 | reserve: | |
668 | set_bit(tpl->cntr[thread], used_mask); | |
669 | set_bit(escr_idx, escr_mask); | |
670 | } | |
671 | ||
672 | done: | |
673 | return num ? -ENOSPC : 0; | |
674 | } | |
675 | ||
676 | static __initconst struct x86_pmu p4_pmu = { | |
677 | .name = "Netburst P4/Xeon", | |
678 | .handle_irq = p4_pmu_handle_irq, | |
679 | .disable_all = p4_pmu_disable_all, | |
680 | .enable_all = p4_pmu_enable_all, | |
681 | .enable = p4_pmu_enable_event, | |
682 | .disable = p4_pmu_disable_event, | |
683 | .eventsel = MSR_P4_BPU_CCCR0, | |
684 | .perfctr = MSR_P4_BPU_PERFCTR0, | |
685 | .event_map = p4_pmu_event_map, | |
686 | .raw_event = p4_pmu_raw_event, | |
f34edbc1 | 687 | .max_events = ARRAY_SIZE(p4_templates), |
a072738e CG |
688 | .get_event_constraints = x86_get_event_constraints, |
689 | /* | |
690 | * IF HT disabled we may need to use all | |
691 | * ARCH_P4_MAX_CCCR counters simulaneously | |
692 | * though leave it restricted at moment assuming | |
693 | * HT is on | |
694 | */ | |
695 | .num_events = ARCH_P4_MAX_CCCR, | |
696 | .apic = 1, | |
697 | .event_bits = 40, | |
698 | .event_mask = (1ULL << 40) - 1, | |
699 | .max_period = (1ULL << 39) - 1, | |
700 | .hw_config = p4_hw_config, | |
701 | .schedule_events = p4_pmu_schedule_events, | |
702 | }; | |
703 | ||
704 | static __init int p4_pmu_init(void) | |
705 | { | |
706 | unsigned int low, high; | |
707 | ||
708 | /* If we get stripped -- indexig fails */ | |
709 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | |
710 | ||
711 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | |
712 | if (!(low & (1 << 7))) { | |
713 | pr_cont("unsupported Netburst CPU model %d ", | |
714 | boot_cpu_data.x86_model); | |
715 | return -ENODEV; | |
716 | } | |
717 | ||
cb7d6b50 LM |
718 | memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, |
719 | sizeof(hw_cache_event_ids)); | |
720 | ||
a072738e CG |
721 | pr_cont("Netburst events, "); |
722 | ||
723 | x86_pmu = p4_pmu; | |
724 | ||
725 | return 0; | |
726 | } | |
727 | ||
728 | #endif /* CONFIG_CPU_SUP_INTEL */ |