]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/cpu/perf_event_intel.c
perf: Add support for supplementary event registers
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / cpu / perf_event_intel.c
CommitLineData
f22f54f4
PZ
1#ifdef CONFIG_CPU_SUP_INTEL
2
a7e3ed1e
AK
3#define MAX_EXTRA_REGS 2
4
5/*
6 * Per register state.
7 */
8struct er_account {
9 int ref; /* reference count */
10 unsigned int extra_reg; /* extra MSR number */
11 u64 extra_config; /* extra MSR config */
12};
13
14/*
15 * Per core state
16 * This used to coordinate shared registers for HT threads.
17 */
18struct intel_percore {
19 raw_spinlock_t lock; /* protect structure */
20 struct er_account regs[MAX_EXTRA_REGS];
21 int refcnt; /* number of threads */
22 unsigned core_id;
23};
24
f22f54f4 25/*
b622d644 26 * Intel PerfMon, used on Core and later.
f22f54f4
PZ
27 */
28static const u64 intel_perfmon_event_map[] =
29{
30 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
31 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
32 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
33 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
34 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
35 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
36 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
37};
38
39static struct event_constraint intel_core_event_constraints[] =
40{
41 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
42 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
43 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
44 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
45 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
46 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
47 EVENT_CONSTRAINT_END
48};
49
50static struct event_constraint intel_core2_event_constraints[] =
51{
b622d644
PZ
52 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
53 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
54 /*
55 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
56 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
57 * ratio between these counters.
58 */
59 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
f22f54f4
PZ
60 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
61 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
62 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
63 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
64 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
65 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
66 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
67 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
b622d644 68 INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
f22f54f4
PZ
69 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
70 EVENT_CONSTRAINT_END
71};
72
73static struct event_constraint intel_nehalem_event_constraints[] =
74{
b622d644
PZ
75 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
76 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
77 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
f22f54f4
PZ
78 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
79 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
80 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
81 INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
82 INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
83 INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
84 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
85 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
86 EVENT_CONSTRAINT_END
87};
88
a7e3ed1e
AK
89static struct extra_reg intel_nehalem_extra_regs[] =
90{
91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
92 EVENT_EXTRA_END
93};
94
95static struct event_constraint intel_nehalem_percore_constraints[] =
96{
97 INTEL_EVENT_CONSTRAINT(0xb7, 0),
98 EVENT_CONSTRAINT_END
99};
100
f22f54f4
PZ
101static struct event_constraint intel_westmere_event_constraints[] =
102{
b622d644
PZ
103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
104 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
105 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
f22f54f4
PZ
106 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
107 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
108 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
d1100770 109 INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
f22f54f4
PZ
110 EVENT_CONSTRAINT_END
111};
112
b06b3d49
LM
113static struct event_constraint intel_snb_event_constraints[] =
114{
115 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
116 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
117 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
118 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
119 INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
120 INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
121 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
122 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
123 EVENT_CONSTRAINT_END
124};
125
a7e3ed1e
AK
126static struct extra_reg intel_westmere_extra_regs[] =
127{
128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
130 EVENT_EXTRA_END
131};
132
133static struct event_constraint intel_westmere_percore_constraints[] =
134{
135 INTEL_EVENT_CONSTRAINT(0xb7, 0),
136 INTEL_EVENT_CONSTRAINT(0xbb, 0),
137 EVENT_CONSTRAINT_END
138};
139
f22f54f4
PZ
140static struct event_constraint intel_gen_event_constraints[] =
141{
b622d644
PZ
142 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
143 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
144 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
f22f54f4
PZ
145 EVENT_CONSTRAINT_END
146};
147
148static u64 intel_pmu_event_map(int hw_event)
149{
150 return intel_perfmon_event_map[hw_event];
151}
152
b06b3d49
LM
153static __initconst const u64 snb_hw_cache_event_ids
154 [PERF_COUNT_HW_CACHE_MAX]
155 [PERF_COUNT_HW_CACHE_OP_MAX]
156 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
157{
158 [ C(L1D) ] = {
159 [ C(OP_READ) ] = {
160 [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
161 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
162 },
163 [ C(OP_WRITE) ] = {
164 [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
165 [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
166 },
167 [ C(OP_PREFETCH) ] = {
168 [ C(RESULT_ACCESS) ] = 0x0,
169 [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
170 },
171 },
172 [ C(L1I ) ] = {
173 [ C(OP_READ) ] = {
174 [ C(RESULT_ACCESS) ] = 0x0,
175 [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
176 },
177 [ C(OP_WRITE) ] = {
178 [ C(RESULT_ACCESS) ] = -1,
179 [ C(RESULT_MISS) ] = -1,
180 },
181 [ C(OP_PREFETCH) ] = {
182 [ C(RESULT_ACCESS) ] = 0x0,
183 [ C(RESULT_MISS) ] = 0x0,
184 },
185 },
186 [ C(LL ) ] = {
187 /*
188 * TBD: Need Off-core Response Performance Monitoring support
189 */
190 [ C(OP_READ) ] = {
191 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
192 [ C(RESULT_ACCESS) ] = 0x01b7,
193 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
194 [ C(RESULT_MISS) ] = 0x01bb,
195 },
196 [ C(OP_WRITE) ] = {
197 /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
198 [ C(RESULT_ACCESS) ] = 0x01b7,
199 /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
200 [ C(RESULT_MISS) ] = 0x01bb,
201 },
202 [ C(OP_PREFETCH) ] = {
203 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
204 [ C(RESULT_ACCESS) ] = 0x01b7,
205 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
206 [ C(RESULT_MISS) ] = 0x01bb,
207 },
208 },
209 [ C(DTLB) ] = {
210 [ C(OP_READ) ] = {
211 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
212 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
213 },
214 [ C(OP_WRITE) ] = {
215 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
216 [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
217 },
218 [ C(OP_PREFETCH) ] = {
219 [ C(RESULT_ACCESS) ] = 0x0,
220 [ C(RESULT_MISS) ] = 0x0,
221 },
222 },
223 [ C(ITLB) ] = {
224 [ C(OP_READ) ] = {
225 [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
226 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
227 },
228 [ C(OP_WRITE) ] = {
229 [ C(RESULT_ACCESS) ] = -1,
230 [ C(RESULT_MISS) ] = -1,
231 },
232 [ C(OP_PREFETCH) ] = {
233 [ C(RESULT_ACCESS) ] = -1,
234 [ C(RESULT_MISS) ] = -1,
235 },
236 },
237 [ C(BPU ) ] = {
238 [ C(OP_READ) ] = {
239 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
240 [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
241 },
242 [ C(OP_WRITE) ] = {
243 [ C(RESULT_ACCESS) ] = -1,
244 [ C(RESULT_MISS) ] = -1,
245 },
246 [ C(OP_PREFETCH) ] = {
247 [ C(RESULT_ACCESS) ] = -1,
248 [ C(RESULT_MISS) ] = -1,
249 },
250 },
251};
252
caaa8be3 253static __initconst const u64 westmere_hw_cache_event_ids
f22f54f4
PZ
254 [PERF_COUNT_HW_CACHE_MAX]
255 [PERF_COUNT_HW_CACHE_OP_MAX]
256 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
257{
258 [ C(L1D) ] = {
259 [ C(OP_READ) ] = {
260 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
261 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
262 },
263 [ C(OP_WRITE) ] = {
264 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
265 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
266 },
267 [ C(OP_PREFETCH) ] = {
268 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
269 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
270 },
271 },
272 [ C(L1I ) ] = {
273 [ C(OP_READ) ] = {
274 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
275 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
276 },
277 [ C(OP_WRITE) ] = {
278 [ C(RESULT_ACCESS) ] = -1,
279 [ C(RESULT_MISS) ] = -1,
280 },
281 [ C(OP_PREFETCH) ] = {
282 [ C(RESULT_ACCESS) ] = 0x0,
283 [ C(RESULT_MISS) ] = 0x0,
284 },
285 },
286 [ C(LL ) ] = {
287 [ C(OP_READ) ] = {
288 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
289 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
290 },
291 [ C(OP_WRITE) ] = {
292 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
293 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
294 },
295 [ C(OP_PREFETCH) ] = {
296 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
297 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
298 },
299 },
300 [ C(DTLB) ] = {
301 [ C(OP_READ) ] = {
302 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
303 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
304 },
305 [ C(OP_WRITE) ] = {
306 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
307 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
308 },
309 [ C(OP_PREFETCH) ] = {
310 [ C(RESULT_ACCESS) ] = 0x0,
311 [ C(RESULT_MISS) ] = 0x0,
312 },
313 },
314 [ C(ITLB) ] = {
315 [ C(OP_READ) ] = {
316 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
317 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
318 },
319 [ C(OP_WRITE) ] = {
320 [ C(RESULT_ACCESS) ] = -1,
321 [ C(RESULT_MISS) ] = -1,
322 },
323 [ C(OP_PREFETCH) ] = {
324 [ C(RESULT_ACCESS) ] = -1,
325 [ C(RESULT_MISS) ] = -1,
326 },
327 },
328 [ C(BPU ) ] = {
329 [ C(OP_READ) ] = {
330 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
331 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
332 },
333 [ C(OP_WRITE) ] = {
334 [ C(RESULT_ACCESS) ] = -1,
335 [ C(RESULT_MISS) ] = -1,
336 },
337 [ C(OP_PREFETCH) ] = {
338 [ C(RESULT_ACCESS) ] = -1,
339 [ C(RESULT_MISS) ] = -1,
340 },
341 },
342};
343
caaa8be3 344static __initconst const u64 nehalem_hw_cache_event_ids
f22f54f4
PZ
345 [PERF_COUNT_HW_CACHE_MAX]
346 [PERF_COUNT_HW_CACHE_OP_MAX]
347 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
348{
349 [ C(L1D) ] = {
350 [ C(OP_READ) ] = {
351 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
352 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
353 },
354 [ C(OP_WRITE) ] = {
355 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
356 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
357 },
358 [ C(OP_PREFETCH) ] = {
359 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
360 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
361 },
362 },
363 [ C(L1I ) ] = {
364 [ C(OP_READ) ] = {
365 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
366 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
367 },
368 [ C(OP_WRITE) ] = {
369 [ C(RESULT_ACCESS) ] = -1,
370 [ C(RESULT_MISS) ] = -1,
371 },
372 [ C(OP_PREFETCH) ] = {
373 [ C(RESULT_ACCESS) ] = 0x0,
374 [ C(RESULT_MISS) ] = 0x0,
375 },
376 },
377 [ C(LL ) ] = {
378 [ C(OP_READ) ] = {
379 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
380 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
381 },
382 [ C(OP_WRITE) ] = {
383 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
384 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
385 },
386 [ C(OP_PREFETCH) ] = {
387 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
388 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
389 },
390 },
391 [ C(DTLB) ] = {
392 [ C(OP_READ) ] = {
393 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
394 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
395 },
396 [ C(OP_WRITE) ] = {
397 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
398 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
399 },
400 [ C(OP_PREFETCH) ] = {
401 [ C(RESULT_ACCESS) ] = 0x0,
402 [ C(RESULT_MISS) ] = 0x0,
403 },
404 },
405 [ C(ITLB) ] = {
406 [ C(OP_READ) ] = {
407 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
408 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
409 },
410 [ C(OP_WRITE) ] = {
411 [ C(RESULT_ACCESS) ] = -1,
412 [ C(RESULT_MISS) ] = -1,
413 },
414 [ C(OP_PREFETCH) ] = {
415 [ C(RESULT_ACCESS) ] = -1,
416 [ C(RESULT_MISS) ] = -1,
417 },
418 },
419 [ C(BPU ) ] = {
420 [ C(OP_READ) ] = {
421 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
422 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
423 },
424 [ C(OP_WRITE) ] = {
425 [ C(RESULT_ACCESS) ] = -1,
426 [ C(RESULT_MISS) ] = -1,
427 },
428 [ C(OP_PREFETCH) ] = {
429 [ C(RESULT_ACCESS) ] = -1,
430 [ C(RESULT_MISS) ] = -1,
431 },
432 },
433};
434
caaa8be3 435static __initconst const u64 core2_hw_cache_event_ids
f22f54f4
PZ
436 [PERF_COUNT_HW_CACHE_MAX]
437 [PERF_COUNT_HW_CACHE_OP_MAX]
438 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
439{
440 [ C(L1D) ] = {
441 [ C(OP_READ) ] = {
442 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
443 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
444 },
445 [ C(OP_WRITE) ] = {
446 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
447 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
448 },
449 [ C(OP_PREFETCH) ] = {
450 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
451 [ C(RESULT_MISS) ] = 0,
452 },
453 },
454 [ C(L1I ) ] = {
455 [ C(OP_READ) ] = {
456 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
457 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
458 },
459 [ C(OP_WRITE) ] = {
460 [ C(RESULT_ACCESS) ] = -1,
461 [ C(RESULT_MISS) ] = -1,
462 },
463 [ C(OP_PREFETCH) ] = {
464 [ C(RESULT_ACCESS) ] = 0,
465 [ C(RESULT_MISS) ] = 0,
466 },
467 },
468 [ C(LL ) ] = {
469 [ C(OP_READ) ] = {
470 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
471 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
472 },
473 [ C(OP_WRITE) ] = {
474 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
475 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
476 },
477 [ C(OP_PREFETCH) ] = {
478 [ C(RESULT_ACCESS) ] = 0,
479 [ C(RESULT_MISS) ] = 0,
480 },
481 },
482 [ C(DTLB) ] = {
483 [ C(OP_READ) ] = {
484 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
485 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
486 },
487 [ C(OP_WRITE) ] = {
488 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
489 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
490 },
491 [ C(OP_PREFETCH) ] = {
492 [ C(RESULT_ACCESS) ] = 0,
493 [ C(RESULT_MISS) ] = 0,
494 },
495 },
496 [ C(ITLB) ] = {
497 [ C(OP_READ) ] = {
498 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
499 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
500 },
501 [ C(OP_WRITE) ] = {
502 [ C(RESULT_ACCESS) ] = -1,
503 [ C(RESULT_MISS) ] = -1,
504 },
505 [ C(OP_PREFETCH) ] = {
506 [ C(RESULT_ACCESS) ] = -1,
507 [ C(RESULT_MISS) ] = -1,
508 },
509 },
510 [ C(BPU ) ] = {
511 [ C(OP_READ) ] = {
512 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
513 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
514 },
515 [ C(OP_WRITE) ] = {
516 [ C(RESULT_ACCESS) ] = -1,
517 [ C(RESULT_MISS) ] = -1,
518 },
519 [ C(OP_PREFETCH) ] = {
520 [ C(RESULT_ACCESS) ] = -1,
521 [ C(RESULT_MISS) ] = -1,
522 },
523 },
524};
525
caaa8be3 526static __initconst const u64 atom_hw_cache_event_ids
f22f54f4
PZ
527 [PERF_COUNT_HW_CACHE_MAX]
528 [PERF_COUNT_HW_CACHE_OP_MAX]
529 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
530{
531 [ C(L1D) ] = {
532 [ C(OP_READ) ] = {
533 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
534 [ C(RESULT_MISS) ] = 0,
535 },
536 [ C(OP_WRITE) ] = {
537 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
538 [ C(RESULT_MISS) ] = 0,
539 },
540 [ C(OP_PREFETCH) ] = {
541 [ C(RESULT_ACCESS) ] = 0x0,
542 [ C(RESULT_MISS) ] = 0,
543 },
544 },
545 [ C(L1I ) ] = {
546 [ C(OP_READ) ] = {
547 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
548 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
549 },
550 [ C(OP_WRITE) ] = {
551 [ C(RESULT_ACCESS) ] = -1,
552 [ C(RESULT_MISS) ] = -1,
553 },
554 [ C(OP_PREFETCH) ] = {
555 [ C(RESULT_ACCESS) ] = 0,
556 [ C(RESULT_MISS) ] = 0,
557 },
558 },
559 [ C(LL ) ] = {
560 [ C(OP_READ) ] = {
561 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
562 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
563 },
564 [ C(OP_WRITE) ] = {
565 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
566 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
567 },
568 [ C(OP_PREFETCH) ] = {
569 [ C(RESULT_ACCESS) ] = 0,
570 [ C(RESULT_MISS) ] = 0,
571 },
572 },
573 [ C(DTLB) ] = {
574 [ C(OP_READ) ] = {
575 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
576 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
577 },
578 [ C(OP_WRITE) ] = {
579 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
580 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
581 },
582 [ C(OP_PREFETCH) ] = {
583 [ C(RESULT_ACCESS) ] = 0,
584 [ C(RESULT_MISS) ] = 0,
585 },
586 },
587 [ C(ITLB) ] = {
588 [ C(OP_READ) ] = {
589 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
590 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
591 },
592 [ C(OP_WRITE) ] = {
593 [ C(RESULT_ACCESS) ] = -1,
594 [ C(RESULT_MISS) ] = -1,
595 },
596 [ C(OP_PREFETCH) ] = {
597 [ C(RESULT_ACCESS) ] = -1,
598 [ C(RESULT_MISS) ] = -1,
599 },
600 },
601 [ C(BPU ) ] = {
602 [ C(OP_READ) ] = {
603 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
604 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
605 },
606 [ C(OP_WRITE) ] = {
607 [ C(RESULT_ACCESS) ] = -1,
608 [ C(RESULT_MISS) ] = -1,
609 },
610 [ C(OP_PREFETCH) ] = {
611 [ C(RESULT_ACCESS) ] = -1,
612 [ C(RESULT_MISS) ] = -1,
613 },
614 },
615};
616
f22f54f4
PZ
617static void intel_pmu_disable_all(void)
618{
619 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
620
621 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
622
623 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
624 intel_pmu_disable_bts();
ca037701
PZ
625
626 intel_pmu_pebs_disable_all();
caff2bef 627 intel_pmu_lbr_disable_all();
f22f54f4
PZ
628}
629
11164cd4 630static void intel_pmu_enable_all(int added)
f22f54f4
PZ
631{
632 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
633
d329527e
PZ
634 intel_pmu_pebs_enable_all();
635 intel_pmu_lbr_enable_all();
f22f54f4
PZ
636 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
637
638 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
639 struct perf_event *event =
640 cpuc->events[X86_PMC_IDX_FIXED_BTS];
641
642 if (WARN_ON_ONCE(!event))
643 return;
644
645 intel_pmu_enable_bts(event->hw.config);
646 }
647}
648
11164cd4
PZ
649/*
650 * Workaround for:
651 * Intel Errata AAK100 (model 26)
652 * Intel Errata AAP53 (model 30)
40b91cd1 653 * Intel Errata BD53 (model 44)
11164cd4 654 *
351af072
ZY
655 * The official story:
656 * These chips need to be 'reset' when adding counters by programming the
657 * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
658 * in sequence on the same PMC or on different PMCs.
659 *
660 * In practise it appears some of these events do in fact count, and
661 * we need to programm all 4 events.
11164cd4 662 */
351af072 663static void intel_pmu_nhm_workaround(void)
11164cd4 664{
351af072
ZY
665 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
666 static const unsigned long nhm_magic[4] = {
667 0x4300B5,
668 0x4300D2,
669 0x4300B1,
670 0x4300B1
671 };
672 struct perf_event *event;
673 int i;
11164cd4 674
351af072
ZY
675 /*
676 * The Errata requires below steps:
677 * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
678 * 2) Configure 4 PERFEVTSELx with the magic events and clear
679 * the corresponding PMCx;
680 * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
681 * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
682 * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
683 */
11164cd4 684
351af072
ZY
685 /*
686 * The real steps we choose are a little different from above.
687 * A) To reduce MSR operations, we don't run step 1) as they
688 * are already cleared before this function is called;
689 * B) Call x86_perf_event_update to save PMCx before configuring
690 * PERFEVTSELx with magic number;
691 * C) With step 5), we do clear only when the PERFEVTSELx is
692 * not used currently.
693 * D) Call x86_perf_event_set_period to restore PMCx;
694 */
11164cd4 695
351af072
ZY
696 /* We always operate 4 pairs of PERF Counters */
697 for (i = 0; i < 4; i++) {
698 event = cpuc->events[i];
699 if (event)
700 x86_perf_event_update(event);
701 }
11164cd4 702
351af072
ZY
703 for (i = 0; i < 4; i++) {
704 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
705 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
706 }
707
708 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
709 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
11164cd4 710
351af072
ZY
711 for (i = 0; i < 4; i++) {
712 event = cpuc->events[i];
713
714 if (event) {
715 x86_perf_event_set_period(event);
31fa58af 716 __x86_pmu_enable_event(&event->hw,
351af072
ZY
717 ARCH_PERFMON_EVENTSEL_ENABLE);
718 } else
719 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
11164cd4 720 }
351af072
ZY
721}
722
723static void intel_pmu_nhm_enable_all(int added)
724{
725 if (added)
726 intel_pmu_nhm_workaround();
11164cd4
PZ
727 intel_pmu_enable_all(added);
728}
729
f22f54f4
PZ
730static inline u64 intel_pmu_get_status(void)
731{
732 u64 status;
733
734 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
735
736 return status;
737}
738
739static inline void intel_pmu_ack_status(u64 ack)
740{
741 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
742}
743
ca037701 744static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
f22f54f4 745{
aff3d91a 746 int idx = hwc->idx - X86_PMC_IDX_FIXED;
f22f54f4
PZ
747 u64 ctrl_val, mask;
748
749 mask = 0xfULL << (idx * 4);
750
751 rdmsrl(hwc->config_base, ctrl_val);
752 ctrl_val &= ~mask;
7645a24c 753 wrmsrl(hwc->config_base, ctrl_val);
f22f54f4
PZ
754}
755
ca037701 756static void intel_pmu_disable_event(struct perf_event *event)
f22f54f4 757{
aff3d91a
PZ
758 struct hw_perf_event *hwc = &event->hw;
759
760 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
f22f54f4
PZ
761 intel_pmu_disable_bts();
762 intel_pmu_drain_bts_buffer();
763 return;
764 }
765
766 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
aff3d91a 767 intel_pmu_disable_fixed(hwc);
f22f54f4
PZ
768 return;
769 }
770
aff3d91a 771 x86_pmu_disable_event(event);
ca037701 772
ab608344 773 if (unlikely(event->attr.precise_ip))
ef21f683 774 intel_pmu_pebs_disable(event);
f22f54f4
PZ
775}
776
ca037701 777static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
f22f54f4 778{
aff3d91a 779 int idx = hwc->idx - X86_PMC_IDX_FIXED;
f22f54f4 780 u64 ctrl_val, bits, mask;
f22f54f4
PZ
781
782 /*
783 * Enable IRQ generation (0x8),
784 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
785 * if requested:
786 */
787 bits = 0x8ULL;
788 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
789 bits |= 0x2;
790 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
791 bits |= 0x1;
792
793 /*
794 * ANY bit is supported in v3 and up
795 */
796 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
797 bits |= 0x4;
798
799 bits <<= (idx * 4);
800 mask = 0xfULL << (idx * 4);
801
802 rdmsrl(hwc->config_base, ctrl_val);
803 ctrl_val &= ~mask;
804 ctrl_val |= bits;
7645a24c 805 wrmsrl(hwc->config_base, ctrl_val);
f22f54f4
PZ
806}
807
aff3d91a 808static void intel_pmu_enable_event(struct perf_event *event)
f22f54f4 809{
aff3d91a
PZ
810 struct hw_perf_event *hwc = &event->hw;
811
812 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
0a3aee0d 813 if (!__this_cpu_read(cpu_hw_events.enabled))
f22f54f4
PZ
814 return;
815
816 intel_pmu_enable_bts(hwc->config);
817 return;
818 }
819
820 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
aff3d91a 821 intel_pmu_enable_fixed(hwc);
f22f54f4
PZ
822 return;
823 }
824
ab608344 825 if (unlikely(event->attr.precise_ip))
ef21f683 826 intel_pmu_pebs_enable(event);
ca037701 827
31fa58af 828 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
f22f54f4
PZ
829}
830
831/*
832 * Save and restart an expired event. Called by NMI contexts,
833 * so it has to be careful about preempting normal event ops:
834 */
835static int intel_pmu_save_and_restart(struct perf_event *event)
836{
cc2ad4ba
PZ
837 x86_perf_event_update(event);
838 return x86_perf_event_set_period(event);
f22f54f4
PZ
839}
840
841static void intel_pmu_reset(void)
842{
0a3aee0d 843 struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
f22f54f4
PZ
844 unsigned long flags;
845 int idx;
846
948b1bb8 847 if (!x86_pmu.num_counters)
f22f54f4
PZ
848 return;
849
850 local_irq_save(flags);
851
852 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
853
948b1bb8 854 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
41bf4989
RR
855 checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
856 checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
f22f54f4 857 }
948b1bb8 858 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
f22f54f4 859 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
948b1bb8 860
f22f54f4
PZ
861 if (ds)
862 ds->bts_index = ds->bts_buffer_base;
863
864 local_irq_restore(flags);
865}
866
867/*
868 * This handler is triggered by the local APIC, so the APIC IRQ handling
869 * rules apply:
870 */
871static int intel_pmu_handle_irq(struct pt_regs *regs)
872{
873 struct perf_sample_data data;
874 struct cpu_hw_events *cpuc;
875 int bit, loops;
2e556b5b 876 u64 status;
b0b2072d 877 int handled;
f22f54f4 878
dc1d628a 879 perf_sample_data_init(&data, 0);
f22f54f4
PZ
880
881 cpuc = &__get_cpu_var(cpu_hw_events);
882
3fb2b8dd 883 intel_pmu_disable_all();
b0b2072d 884 handled = intel_pmu_drain_bts_buffer();
f22f54f4
PZ
885 status = intel_pmu_get_status();
886 if (!status) {
11164cd4 887 intel_pmu_enable_all(0);
b0b2072d 888 return handled;
f22f54f4
PZ
889 }
890
891 loops = 0;
892again:
2e556b5b 893 intel_pmu_ack_status(status);
f22f54f4
PZ
894 if (++loops > 100) {
895 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
896 perf_event_print_debug();
897 intel_pmu_reset();
3fb2b8dd 898 goto done;
f22f54f4
PZ
899 }
900
901 inc_irq_stat(apic_perf_irqs);
ca037701 902
caff2bef
PZ
903 intel_pmu_lbr_read();
904
ca037701
PZ
905 /*
906 * PEBS overflow sets bit 62 in the global status register
907 */
de725dec
PZ
908 if (__test_and_clear_bit(62, (unsigned long *)&status)) {
909 handled++;
ca037701 910 x86_pmu.drain_pebs(regs);
de725dec 911 }
ca037701 912
984b3f57 913 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
f22f54f4
PZ
914 struct perf_event *event = cpuc->events[bit];
915
de725dec
PZ
916 handled++;
917
f22f54f4
PZ
918 if (!test_bit(bit, cpuc->active_mask))
919 continue;
920
921 if (!intel_pmu_save_and_restart(event))
922 continue;
923
924 data.period = event->hw.last_period;
925
926 if (perf_event_overflow(event, 1, &data, regs))
a4eaf7f1 927 x86_pmu_stop(event, 0);
f22f54f4
PZ
928 }
929
f22f54f4
PZ
930 /*
931 * Repeat if there is more work to be done:
932 */
933 status = intel_pmu_get_status();
934 if (status)
935 goto again;
936
3fb2b8dd 937done:
11164cd4 938 intel_pmu_enable_all(0);
de725dec 939 return handled;
f22f54f4
PZ
940}
941
f22f54f4 942static struct event_constraint *
ca037701 943intel_bts_constraints(struct perf_event *event)
f22f54f4 944{
ca037701
PZ
945 struct hw_perf_event *hwc = &event->hw;
946 unsigned int hw_event, bts_event;
f22f54f4 947
ca037701
PZ
948 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
949 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
f22f54f4 950
ca037701 951 if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
f22f54f4 952 return &bts_constraint;
ca037701 953
f22f54f4
PZ
954 return NULL;
955}
956
a7e3ed1e
AK
957static struct event_constraint *
958intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
959{
960 struct hw_perf_event *hwc = &event->hw;
961 unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
962 struct event_constraint *c;
963 struct intel_percore *pc;
964 struct er_account *era;
965 int i;
966 int free_slot;
967 int found;
968
969 if (!x86_pmu.percore_constraints || hwc->extra_alloc)
970 return NULL;
971
972 for (c = x86_pmu.percore_constraints; c->cmask; c++) {
973 if (e != c->code)
974 continue;
975
976 /*
977 * Allocate resource per core.
978 */
979 pc = cpuc->per_core;
980 if (!pc)
981 break;
982 c = &emptyconstraint;
983 raw_spin_lock(&pc->lock);
984 free_slot = -1;
985 found = 0;
986 for (i = 0; i < MAX_EXTRA_REGS; i++) {
987 era = &pc->regs[i];
988 if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
989 /* Allow sharing same config */
990 if (hwc->extra_config == era->extra_config) {
991 era->ref++;
992 cpuc->percore_used = 1;
993 hwc->extra_alloc = 1;
994 c = NULL;
995 }
996 /* else conflict */
997 found = 1;
998 break;
999 } else if (era->ref == 0 && free_slot == -1)
1000 free_slot = i;
1001 }
1002 if (!found && free_slot != -1) {
1003 era = &pc->regs[free_slot];
1004 era->ref = 1;
1005 era->extra_reg = hwc->extra_reg;
1006 era->extra_config = hwc->extra_config;
1007 cpuc->percore_used = 1;
1008 hwc->extra_alloc = 1;
1009 c = NULL;
1010 }
1011 raw_spin_unlock(&pc->lock);
1012 return c;
1013 }
1014
1015 return NULL;
1016}
1017
f22f54f4
PZ
1018static struct event_constraint *
1019intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1020{
1021 struct event_constraint *c;
1022
ca037701
PZ
1023 c = intel_bts_constraints(event);
1024 if (c)
1025 return c;
1026
1027 c = intel_pebs_constraints(event);
f22f54f4
PZ
1028 if (c)
1029 return c;
1030
a7e3ed1e
AK
1031 c = intel_percore_constraints(cpuc, event);
1032 if (c)
1033 return c;
1034
f22f54f4
PZ
1035 return x86_get_event_constraints(cpuc, event);
1036}
1037
a7e3ed1e
AK
1038static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1039 struct perf_event *event)
1040{
1041 struct extra_reg *er;
1042 struct intel_percore *pc;
1043 struct er_account *era;
1044 struct hw_perf_event *hwc = &event->hw;
1045 int i, allref;
1046
1047 if (!cpuc->percore_used)
1048 return;
1049
1050 for (er = x86_pmu.extra_regs; er->msr; er++) {
1051 if (er->event != (hwc->config & er->config_mask))
1052 continue;
1053
1054 pc = cpuc->per_core;
1055 raw_spin_lock(&pc->lock);
1056 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1057 era = &pc->regs[i];
1058 if (era->ref > 0 &&
1059 era->extra_config == hwc->extra_config &&
1060 era->extra_reg == er->msr) {
1061 era->ref--;
1062 hwc->extra_alloc = 0;
1063 break;
1064 }
1065 }
1066 allref = 0;
1067 for (i = 0; i < MAX_EXTRA_REGS; i++)
1068 allref += pc->regs[i].ref;
1069 if (allref == 0)
1070 cpuc->percore_used = 0;
1071 raw_spin_unlock(&pc->lock);
1072 break;
1073 }
1074}
1075
b4cdc5c2
PZ
1076static int intel_pmu_hw_config(struct perf_event *event)
1077{
1078 int ret = x86_pmu_hw_config(event);
1079
1080 if (ret)
1081 return ret;
1082
7639dae0
PZ
1083 if (event->attr.precise_ip &&
1084 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1085 /*
1086 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1087 * (0x003c) so that we can use it with PEBS.
1088 *
1089 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1090 * PEBS capable. However we can use INST_RETIRED.ANY_P
1091 * (0x00c0), which is a PEBS capable event, to get the same
1092 * count.
1093 *
1094 * INST_RETIRED.ANY_P counts the number of cycles that retires
1095 * CNTMASK instructions. By setting CNTMASK to a value (16)
1096 * larger than the maximum number of instructions that can be
1097 * retired per cycle (4) and then inverting the condition, we
1098 * count all cycles that retire 16 or less instructions, which
1099 * is every cycle.
1100 *
1101 * Thereby we gain a PEBS capable cycle counter.
1102 */
1103 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
1104
1105 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1106 event->hw.config = alt_config;
1107 }
1108
b4cdc5c2
PZ
1109 if (event->attr.type != PERF_TYPE_RAW)
1110 return 0;
1111
1112 if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
1113 return 0;
1114
1115 if (x86_pmu.version < 3)
1116 return -EINVAL;
1117
1118 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1119 return -EACCES;
1120
1121 event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
1122
1123 return 0;
1124}
1125
caaa8be3 1126static __initconst const struct x86_pmu core_pmu = {
f22f54f4
PZ
1127 .name = "core",
1128 .handle_irq = x86_pmu_handle_irq,
1129 .disable_all = x86_pmu_disable_all,
1130 .enable_all = x86_pmu_enable_all,
1131 .enable = x86_pmu_enable_event,
1132 .disable = x86_pmu_disable_event,
b4cdc5c2 1133 .hw_config = x86_pmu_hw_config,
a072738e 1134 .schedule_events = x86_schedule_events,
f22f54f4
PZ
1135 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
1136 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
1137 .event_map = intel_pmu_event_map,
f22f54f4
PZ
1138 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1139 .apic = 1,
1140 /*
1141 * Intel PMCs cannot be accessed sanely above 32 bit width,
1142 * so we install an artificial 1<<31 period regardless of
1143 * the generic event period:
1144 */
1145 .max_period = (1ULL << 31) - 1,
1146 .get_event_constraints = intel_get_event_constraints,
a7e3ed1e 1147 .put_event_constraints = intel_put_event_constraints,
f22f54f4
PZ
1148 .event_constraints = intel_core_event_constraints,
1149};
1150
a7e3ed1e
AK
1151static int intel_pmu_cpu_prepare(int cpu)
1152{
1153 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1154
1155 cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
1156 GFP_KERNEL, cpu_to_node(cpu));
1157 if (!cpuc->per_core)
1158 return NOTIFY_BAD;
1159
1160 raw_spin_lock_init(&cpuc->per_core->lock);
1161 cpuc->per_core->core_id = -1;
1162 return NOTIFY_OK;
1163}
1164
74846d35
PZ
1165static void intel_pmu_cpu_starting(int cpu)
1166{
a7e3ed1e
AK
1167 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1168 int core_id = topology_core_id(cpu);
1169 int i;
1170
1171 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1172 struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
1173
1174 if (pc && pc->core_id == core_id) {
1175 kfree(cpuc->per_core);
1176 cpuc->per_core = pc;
1177 break;
1178 }
1179 }
1180
1181 cpuc->per_core->core_id = core_id;
1182 cpuc->per_core->refcnt++;
1183
74846d35
PZ
1184 init_debug_store_on_cpu(cpu);
1185 /*
1186 * Deal with CPUs that don't clear their LBRs on power-up.
1187 */
1188 intel_pmu_lbr_reset();
1189}
1190
1191static void intel_pmu_cpu_dying(int cpu)
1192{
a7e3ed1e
AK
1193 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1194 struct intel_percore *pc = cpuc->per_core;
1195
1196 if (pc) {
1197 if (pc->core_id == -1 || --pc->refcnt == 0)
1198 kfree(pc);
1199 cpuc->per_core = NULL;
1200 }
1201
74846d35
PZ
1202 fini_debug_store_on_cpu(cpu);
1203}
1204
caaa8be3 1205static __initconst const struct x86_pmu intel_pmu = {
f22f54f4
PZ
1206 .name = "Intel",
1207 .handle_irq = intel_pmu_handle_irq,
1208 .disable_all = intel_pmu_disable_all,
1209 .enable_all = intel_pmu_enable_all,
1210 .enable = intel_pmu_enable_event,
1211 .disable = intel_pmu_disable_event,
b4cdc5c2 1212 .hw_config = intel_pmu_hw_config,
a072738e 1213 .schedule_events = x86_schedule_events,
f22f54f4
PZ
1214 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
1215 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
1216 .event_map = intel_pmu_event_map,
f22f54f4
PZ
1217 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1218 .apic = 1,
1219 /*
1220 * Intel PMCs cannot be accessed sanely above 32 bit width,
1221 * so we install an artificial 1<<31 period regardless of
1222 * the generic event period:
1223 */
1224 .max_period = (1ULL << 31) - 1,
3f6da390 1225 .get_event_constraints = intel_get_event_constraints,
a7e3ed1e 1226 .put_event_constraints = intel_put_event_constraints,
3f6da390 1227
a7e3ed1e 1228 .cpu_prepare = intel_pmu_cpu_prepare,
74846d35
PZ
1229 .cpu_starting = intel_pmu_cpu_starting,
1230 .cpu_dying = intel_pmu_cpu_dying,
f22f54f4
PZ
1231};
1232
3c44780b
PZ
1233static void intel_clovertown_quirks(void)
1234{
1235 /*
1236 * PEBS is unreliable due to:
1237 *
1238 * AJ67 - PEBS may experience CPL leaks
1239 * AJ68 - PEBS PMI may be delayed by one event
1240 * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
1241 * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
1242 *
1243 * AJ67 could be worked around by restricting the OS/USR flags.
1244 * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
1245 *
1246 * AJ106 could possibly be worked around by not allowing LBR
1247 * usage from PEBS, including the fixup.
1248 * AJ68 could possibly be worked around by always programming
1249 * a pebs_event_reset[0] value and coping with the lost events.
1250 *
1251 * But taken together it might just make sense to not enable PEBS on
1252 * these chips.
1253 */
1254 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
1255 x86_pmu.pebs = 0;
1256 x86_pmu.pebs_constraints = NULL;
1257}
1258
f22f54f4
PZ
1259static __init int intel_pmu_init(void)
1260{
1261 union cpuid10_edx edx;
1262 union cpuid10_eax eax;
1263 unsigned int unused;
1264 unsigned int ebx;
1265 int version;
1266
1267 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
a072738e
CG
1268 switch (boot_cpu_data.x86) {
1269 case 0x6:
1270 return p6_pmu_init();
1271 case 0xf:
1272 return p4_pmu_init();
1273 }
f22f54f4 1274 return -ENODEV;
f22f54f4
PZ
1275 }
1276
1277 /*
1278 * Check whether the Architectural PerfMon supports
1279 * Branch Misses Retired hw_event or not.
1280 */
1281 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1282 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1283 return -ENODEV;
1284
1285 version = eax.split.version_id;
1286 if (version < 2)
1287 x86_pmu = core_pmu;
1288 else
1289 x86_pmu = intel_pmu;
1290
1291 x86_pmu.version = version;
948b1bb8
RR
1292 x86_pmu.num_counters = eax.split.num_counters;
1293 x86_pmu.cntval_bits = eax.split.bit_width;
1294 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
f22f54f4
PZ
1295
1296 /*
1297 * Quirk: v2 perfmon does not report fixed-purpose events, so
1298 * assume at least 3 events:
1299 */
1300 if (version > 1)
948b1bb8 1301 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
f22f54f4 1302
8db909a7
PZ
1303 /*
1304 * v2 and above have a perf capabilities MSR
1305 */
1306 if (version > 1) {
1307 u64 capabilities;
1308
1309 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
1310 x86_pmu.intel_cap.capabilities = capabilities;
1311 }
1312
ca037701
PZ
1313 intel_ds_init();
1314
f22f54f4
PZ
1315 /*
1316 * Install the hw-cache-events table:
1317 */
1318 switch (boot_cpu_data.x86_model) {
1319 case 14: /* 65 nm core solo/duo, "Yonah" */
1320 pr_cont("Core events, ");
1321 break;
1322
1323 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
3c44780b 1324 x86_pmu.quirks = intel_clovertown_quirks;
f22f54f4
PZ
1325 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1326 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1327 case 29: /* six-core 45 nm xeon "Dunnington" */
1328 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
1329 sizeof(hw_cache_event_ids));
1330
caff2bef
PZ
1331 intel_pmu_lbr_init_core();
1332
f22f54f4 1333 x86_pmu.event_constraints = intel_core2_event_constraints;
17e31629 1334 x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
f22f54f4
PZ
1335 pr_cont("Core2 events, ");
1336 break;
1337
1338 case 26: /* 45 nm nehalem, "Bloomfield" */
1339 case 30: /* 45 nm nehalem, "Lynnfield" */
134fbadf 1340 case 46: /* 45 nm nehalem-ex, "Beckton" */
f22f54f4
PZ
1341 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1342 sizeof(hw_cache_event_ids));
1343
caff2bef
PZ
1344 intel_pmu_lbr_init_nhm();
1345
f22f54f4 1346 x86_pmu.event_constraints = intel_nehalem_event_constraints;
17e31629 1347 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
a7e3ed1e 1348 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
11164cd4 1349 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
a7e3ed1e 1350 x86_pmu.extra_regs = intel_nehalem_extra_regs;
11164cd4 1351 pr_cont("Nehalem events, ");
f22f54f4 1352 break;
caff2bef 1353
b622d644 1354 case 28: /* Atom */
f22f54f4
PZ
1355 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
1356 sizeof(hw_cache_event_ids));
1357
caff2bef
PZ
1358 intel_pmu_lbr_init_atom();
1359
f22f54f4 1360 x86_pmu.event_constraints = intel_gen_event_constraints;
17e31629 1361 x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
f22f54f4
PZ
1362 pr_cont("Atom events, ");
1363 break;
1364
1365 case 37: /* 32 nm nehalem, "Clarkdale" */
1366 case 44: /* 32 nm nehalem, "Gulftown" */
1367 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
1368 sizeof(hw_cache_event_ids));
1369
caff2bef
PZ
1370 intel_pmu_lbr_init_nhm();
1371
f22f54f4 1372 x86_pmu.event_constraints = intel_westmere_event_constraints;
a7e3ed1e 1373 x86_pmu.percore_constraints = intel_westmere_percore_constraints;
40b91cd1 1374 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
17e31629 1375 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
a7e3ed1e 1376 x86_pmu.extra_regs = intel_westmere_extra_regs;
f22f54f4
PZ
1377 pr_cont("Westmere events, ");
1378 break;
b622d644 1379
b06b3d49
LM
1380 case 42: /* SandyBridge */
1381 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1382 sizeof(hw_cache_event_ids));
1383
1384 intel_pmu_lbr_init_nhm();
1385
1386 x86_pmu.event_constraints = intel_snb_event_constraints;
1387 x86_pmu.pebs_constraints = intel_snb_pebs_events;
1388 pr_cont("SandyBridge events, ");
1389 break;
1390
f22f54f4
PZ
1391 default:
1392 /*
1393 * default constraints for v2 and up
1394 */
1395 x86_pmu.event_constraints = intel_gen_event_constraints;
1396 pr_cont("generic architected perfmon, ");
1397 }
1398 return 0;
1399}
1400
1401#else /* CONFIG_CPU_SUP_INTEL */
1402
1403static int intel_pmu_init(void)
1404{
1405 return 0;
1406}
1407
1408#endif /* CONFIG_CPU_SUP_INTEL */