2 * Netburst Perfomance Events (P4, old Xeon)
4 * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
5 * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
7 * For licencing details see kernel-base/COPYING
10 #ifdef CONFIG_CPU_SUP_INTEL
12 #include <asm/perf_event_p4.h>
15 * array indices: 0,1 - HT threads, used with HT enabled cpu
17 struct p4_event_template
{
18 u32 opcode
; /* ESCR event + CCCR selector */
19 u64 config
; /* packed predefined bits */
20 int dep
; /* upstream dependency event index */
21 int key
; /* index into p4_templates */
23 * the high 32 bits set into MSR_IA32_PEBS_ENABLE and
24 * the low 32 bits set into MSR_P4_PEBS_MATRIX_VERT
27 unsigned int emask
; /* ESCR EventMask */
28 unsigned int escr_msr
[2]; /* ESCR MSR for this event */
29 unsigned int cntr
[2]; /* counter index (offset) */
33 /* maps hw_conf::idx into template for ESCR sake */
34 struct p4_event_template
*tpl
[ARCH_P4_MAX_CCCR
];
37 static DEFINE_PER_CPU(struct p4_pmu_res
, p4_pmu_config
);
39 #define P4_CACHE_EVENT_CONFIG(event, bit) \
40 p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(event) << P4_EVNTSEL_EVENT_SHIFT) | \
41 p4_config_pack_escr((event##_##bit) << P4_EVNTSEL_EVENTMASK_SHIFT) | \
42 p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(event) << P4_CCCR_ESCR_SELECT_SHIFT)
44 static __initconst u64 p4_hw_cache_event_ids
45 [PERF_COUNT_HW_CACHE_MAX
]
46 [PERF_COUNT_HW_CACHE_OP_MAX
]
47 [PERF_COUNT_HW_CACHE_RESULT_MAX
] =
51 [ C(RESULT_ACCESS
) ] = 0x0,
52 /* 1stL_cache_load_miss_retired */
53 [ C(RESULT_MISS
) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT
, NBOGUS
)
54 | KEY_P4_L1D_OP_READ_RESULT_MISS
,
59 [ C(RESULT_ACCESS
) ] = 0x0,
60 /* 2ndL_cache_load_miss_retired */
61 [ C(RESULT_MISS
) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT
, NBOGUS
)
62 | KEY_P4_LL_OP_READ_RESULT_MISS
,
67 [ C(RESULT_ACCESS
) ] = 0x0,
68 /* DTLB_load_miss_retired */
69 [ C(RESULT_MISS
) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT
, NBOGUS
)
70 | KEY_P4_DTLB_OP_READ_RESULT_MISS
,
73 [ C(RESULT_ACCESS
) ] = 0x0,
74 /* DTLB_store_miss_retired */
75 [ C(RESULT_MISS
) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT
, NBOGUS
)
76 | KEY_P4_DTLB_OP_WRITE_RESULT_MISS
,
81 /* ITLB_reference.HIT */
82 [ C(RESULT_ACCESS
) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE
, HIT
)
83 | KEY_P4_ITLB_OP_READ_RESULT_ACCESS
,
85 /* ITLB_reference.MISS */
86 [ C(RESULT_MISS
) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE
, MISS
)
87 | KEY_P4_ITLB_OP_READ_RESULT_MISS
,
90 [ C(RESULT_ACCESS
) ] = -1,
91 [ C(RESULT_MISS
) ] = -1,
93 [ C(OP_PREFETCH
) ] = {
94 [ C(RESULT_ACCESS
) ] = -1,
95 [ C(RESULT_MISS
) ] = -1,
101 * WARN: CCCR1 doesn't have a working enable bit so try to not
104 * Also as only we start to support raw events we will need to
105 * append _all_ P4_EVENT_PACK'ed events here
107 struct p4_event_template p4_templates
[] = {
109 .opcode
= P4_GLOBAL_POWER_EVENTS
,
114 P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS
, RUNNING
),
115 .escr_msr
= { MSR_P4_FSB_ESCR0
, MSR_P4_FSB_ESCR1
},
119 .opcode
= P4_INSTR_RETIRED
,
121 .dep
= -1, /* needs front-end tagging */
124 P4_EVENT_ATTR(P4_INSTR_RETIRED
, NBOGUSNTAG
) |
125 P4_EVENT_ATTR(P4_INSTR_RETIRED
, BOGUSNTAG
),
126 .escr_msr
= { MSR_P4_CRU_ESCR0
, MSR_P4_CRU_ESCR1
},
130 .opcode
= P4_BSQ_CACHE_REFERENCE
,
135 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE
, RD_2ndL_HITS
) |
136 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE
, RD_2ndL_HITE
) |
137 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE
, RD_2ndL_HITM
) |
138 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE
, RD_3rdL_HITS
) |
139 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE
, RD_3rdL_HITE
) |
140 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE
, RD_3rdL_HITM
),
141 .escr_msr
= { MSR_P4_BSU_ESCR0
, MSR_P4_BSU_ESCR1
},
145 .opcode
= P4_BSQ_CACHE_REFERENCE
,
150 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE
, RD_2ndL_MISS
) |
151 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE
, RD_3rdL_MISS
) |
152 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE
, WR_2ndL_MISS
),
153 .escr_msr
= { MSR_P4_BSU_ESCR0
, MSR_P4_BSU_ESCR1
},
157 .opcode
= P4_RETIRED_BRANCH_TYPE
,
162 P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE
, CONDITIONAL
) |
163 P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE
, CALL
) |
164 P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE
, RETURN
) |
165 P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE
, INDIRECT
),
166 .escr_msr
= { MSR_P4_TBPU_ESCR0
, MSR_P4_TBPU_ESCR1
},
170 .opcode
= P4_MISPRED_BRANCH_RETIRED
,
175 P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED
, NBOGUS
),
176 .escr_msr
= { MSR_P4_CRU_ESCR0
, MSR_P4_CRU_ESCR1
},
180 .opcode
= P4_FSB_DATA_ACTIVITY
,
181 .config
= p4_config_pack_cccr(P4_CCCR_EDGE
| P4_CCCR_COMPARE
),
185 P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY
, DRDY_DRV
) |
186 P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY
, DRDY_OWN
),
187 .escr_msr
= { MSR_P4_FSB_ESCR0
, MSR_P4_FSB_ESCR1
},
190 [KEY_P4_L1D_OP_READ_RESULT_MISS
] = {
191 .opcode
= P4_REPLAY_EVENT
,
194 .msr
= (u64
)(1 << 0 | 1 << 24) << 32 | (1 << 0),
195 .key
= KEY_P4_L1D_OP_READ_RESULT_MISS
,
197 P4_EVENT_ATTR(P4_REPLAY_EVENT
, NBOGUS
),
198 .escr_msr
= { MSR_P4_CRU_ESCR2
, MSR_P4_CRU_ESCR2
},
201 [KEY_P4_LL_OP_READ_RESULT_MISS
] = {
202 .opcode
= P4_REPLAY_EVENT
,
205 .msr
= (u64
)(1 << 1 | 1 << 24) << 32 | (1 << 0),
206 .key
= KEY_P4_LL_OP_READ_RESULT_MISS
,
208 P4_EVENT_ATTR(P4_REPLAY_EVENT
, NBOGUS
),
209 .escr_msr
= { MSR_P4_CRU_ESCR2
, MSR_P4_CRU_ESCR2
},
212 [KEY_P4_DTLB_OP_READ_RESULT_MISS
] = {
213 .opcode
= P4_REPLAY_EVENT
,
216 .msr
= (u64
)(1 << 2 | 1 << 24) << 32 | (1 << 0),
217 .key
= KEY_P4_DTLB_OP_READ_RESULT_MISS
,
219 P4_EVENT_ATTR(P4_REPLAY_EVENT
, NBOGUS
),
220 .escr_msr
= { MSR_P4_CRU_ESCR2
, MSR_P4_CRU_ESCR2
},
223 [KEY_P4_DTLB_OP_WRITE_RESULT_MISS
] = {
224 .opcode
= P4_REPLAY_EVENT
,
227 .msr
= (u64
)(1 << 2 | 1 << 24) << 32 | (1 << 1),
228 .key
= KEY_P4_DTLB_OP_WRITE_RESULT_MISS
,
230 P4_EVENT_ATTR(P4_REPLAY_EVENT
, NBOGUS
),
231 .escr_msr
= { MSR_P4_CRU_ESCR2
, MSR_P4_CRU_ESCR2
},
234 [KEY_P4_ITLB_OP_READ_RESULT_ACCESS
] = {
235 .opcode
= P4_ITLB_REFERENCE
,
239 .key
= KEY_P4_ITLB_OP_READ_RESULT_ACCESS
,
241 P4_EVENT_ATTR(P4_ITLB_REFERENCE
, HIT
),
242 .escr_msr
= { MSR_P4_ITLB_ESCR0
, MSR_P4_ITLB_ESCR1
},
245 [KEY_P4_ITLB_OP_READ_RESULT_MISS
] = {
246 .opcode
= P4_ITLB_REFERENCE
,
250 .key
= KEY_P4_ITLB_OP_READ_RESULT_MISS
,
252 P4_EVENT_ATTR(P4_ITLB_REFERENCE
, MISS
),
253 .escr_msr
= { MSR_P4_ITLB_ESCR0
, MSR_P4_ITLB_ESCR1
},
256 [KEY_P4_UOP_TYPE
] = {
257 .opcode
= P4_UOP_TYPE
,
260 .key
= KEY_P4_UOP_TYPE
,
262 P4_EVENT_ATTR(P4_UOP_TYPE
, TAGLOADS
) |
263 P4_EVENT_ATTR(P4_UOP_TYPE
, TAGSTORES
),
264 .escr_msr
= { MSR_P4_RAT_ESCR0
, MSR_P4_RAT_ESCR1
},
269 static u64
p4_pmu_event_map(int hw_event
)
271 struct p4_event_template
*tpl
;
274 if (hw_event
> ARRAY_SIZE(p4_templates
)) {
275 printk_once(KERN_ERR
"PMU: Incorrect event index\n");
278 tpl
= &p4_templates
[hw_event
];
281 * fill config up according to
282 * a predefined event template
284 config
= tpl
->config
;
285 config
|= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl
->opcode
) << P4_EVNTSEL_EVENT_SHIFT
);
286 config
|= p4_config_pack_escr(tpl
->emask
<< P4_EVNTSEL_EVENTMASK_SHIFT
);
287 config
|= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl
->opcode
) << P4_CCCR_ESCR_SELECT_SHIFT
);
288 config
|= p4_config_pack_cccr(hw_event
& P4_CCCR_RESERVED
);
294 * Note that we still have 5 events (from global events SDM list)
295 * intersected in opcode+emask bits so we will need another
296 * scheme there do distinguish templates.
298 static inline int p4_pmu_emask_match(unsigned int dst
, unsigned int src
)
303 static struct p4_event_template
*p4_pmu_template_lookup(u64 config
)
305 int key
= p4_config_unpack_key(config
);
307 if (key
< ARRAY_SIZE(p4_templates
))
308 return &p4_templates
[key
];
314 * We don't control raw events so it's up to the caller
315 * to pass sane values (and we don't count the thread number
316 * on HT machine but allow HT-compatible specifics to be
319 static u64
p4_pmu_raw_event(u64 hw_event
)
322 (p4_config_pack_escr(P4_EVNTSEL_MASK_HT
) |
323 p4_config_pack_cccr(P4_CCCR_MASK_HT
));
326 static int p4_hw_config(struct perf_event_attr
*attr
, struct hw_perf_event
*hwc
)
328 int cpu
= raw_smp_processor_id();
331 * the reason we use cpu that early is that: if we get scheduled
332 * first time on the same cpu -- we will not need swap thread
333 * specific flags in config (and will save some cpu cycles)
336 /* CCCR by default */
337 hwc
->config
= p4_config_pack_cccr(p4_default_cccr_conf(cpu
));
339 /* Count user and OS events unless not requested to */
340 hwc
->config
|= p4_config_pack_escr(p4_default_escr_conf(cpu
, attr
->exclude_kernel
,
341 attr
->exclude_user
));
342 /* on HT machine we need a special bit */
343 if (p4_ht_active() && p4_ht_thread(cpu
))
344 hwc
->config
= p4_set_ht_bit(hwc
->config
);
349 static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event
*hwc
)
353 rdmsrl(hwc
->config_base
+ hwc
->idx
, dummy
);
354 if (dummy
& P4_CCCR_OVF
) {
355 (void)checking_wrmsrl(hwc
->config_base
+ hwc
->idx
,
356 ((u64
)dummy
) & ~P4_CCCR_OVF
);
360 static inline void p4_pmu_disable_event(struct perf_event
*event
)
362 struct hw_perf_event
*hwc
= &event
->hw
;
365 * If event gets disabled while counter is in overflowed
366 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
367 * asserted again and again
369 (void)checking_wrmsrl(hwc
->config_base
+ hwc
->idx
,
370 (u64
)(p4_config_unpack_cccr(hwc
->config
)) &
371 ~P4_CCCR_ENABLE
& ~P4_CCCR_OVF
);
374 static void p4_pmu_disable_all(void)
376 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
379 for (idx
= 0; idx
< x86_pmu
.num_events
; idx
++) {
380 struct perf_event
*event
= cpuc
->events
[idx
];
381 if (!test_bit(idx
, cpuc
->active_mask
))
383 p4_pmu_disable_event(event
);
387 static void p4_pmu_enable_event(struct perf_event
*event
)
389 struct hw_perf_event
*hwc
= &event
->hw
;
390 int thread
= p4_ht_config_thread(hwc
->config
);
391 u64 escr_conf
= p4_config_unpack_escr(p4_clear_ht_bit(hwc
->config
));
393 struct p4_event_template
*tpl
;
394 struct p4_pmu_res
*c
;
397 * some preparation work from per-cpu private fields
398 * since we need to find out which ESCR to use
400 c
= &__get_cpu_var(p4_pmu_config
);
401 tpl
= c
->tpl
[hwc
->idx
];
403 pr_crit("%s: Wrong index: %d\n", __func__
, hwc
->idx
);
408 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE
, tpl
->msr
>> 32);
409 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT
, tpl
->msr
& 0xffffffff);
412 escr_base
= (u64
)tpl
->escr_msr
[thread
];
415 * - we dont support cascaded counters yet
416 * - and counter 1 is broken (erratum)
418 WARN_ON_ONCE(p4_is_event_cascaded(hwc
->config
));
419 WARN_ON_ONCE(hwc
->idx
== 1);
421 (void)checking_wrmsrl(escr_base
, escr_conf
);
422 (void)checking_wrmsrl(hwc
->config_base
+ hwc
->idx
,
423 (u64
)(p4_config_unpack_cccr(hwc
->config
)) | P4_CCCR_ENABLE
);
426 static void p4_pmu_enable_all(void)
428 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
431 for (idx
= 0; idx
< x86_pmu
.num_events
; idx
++) {
432 struct perf_event
*event
= cpuc
->events
[idx
];
433 if (!test_bit(idx
, cpuc
->active_mask
))
435 p4_pmu_enable_event(event
);
439 static int p4_pmu_handle_irq(struct pt_regs
*regs
)
441 struct perf_sample_data data
;
442 struct cpu_hw_events
*cpuc
;
443 struct perf_event
*event
;
444 struct hw_perf_event
*hwc
;
445 int idx
, handled
= 0;
451 cpuc
= &__get_cpu_var(cpu_hw_events
);
453 for (idx
= 0; idx
< x86_pmu
.num_events
; idx
++) {
455 if (!test_bit(idx
, cpuc
->active_mask
))
458 event
= cpuc
->events
[idx
];
461 WARN_ON_ONCE(hwc
->idx
!= idx
);
464 * FIXME: Redundant call, actually not needed
465 * but just to check if we're screwed
467 p4_pmu_clear_cccr_ovf(hwc
);
469 val
= x86_perf_event_update(event
);
470 if (val
& (1ULL << (x86_pmu
.event_bits
- 1)))
477 data
.period
= event
->hw
.last_period
;
479 if (!x86_perf_event_set_period(event
))
481 if (perf_event_overflow(event
, 1, &data
, regs
))
482 p4_pmu_disable_event(event
);
486 /* p4 quirk: unmask it again */
487 apic_write(APIC_LVTPC
, apic_read(APIC_LVTPC
) & ~APIC_LVT_MASKED
);
488 inc_irq_stat(apic_perf_irqs
);
495 * swap thread specific fields according to a thread
496 * we are going to run on
498 static void p4_pmu_swap_config_ts(struct hw_perf_event
*hwc
, int cpu
)
503 * we either lucky and continue on same cpu or no HT support
505 if (!p4_should_swap_ts(hwc
->config
, cpu
))
509 * the event is migrated from an another logical
510 * cpu, so we need to swap thread specific flags
513 escr
= p4_config_unpack_escr(hwc
->config
);
514 cccr
= p4_config_unpack_cccr(hwc
->config
);
516 if (p4_ht_thread(cpu
)) {
517 cccr
&= ~P4_CCCR_OVF_PMI_T0
;
518 cccr
|= P4_CCCR_OVF_PMI_T1
;
519 if (escr
& P4_EVNTSEL_T0_OS
) {
520 escr
&= ~P4_EVNTSEL_T0_OS
;
521 escr
|= P4_EVNTSEL_T1_OS
;
523 if (escr
& P4_EVNTSEL_T0_USR
) {
524 escr
&= ~P4_EVNTSEL_T0_USR
;
525 escr
|= P4_EVNTSEL_T1_USR
;
527 hwc
->config
= p4_config_pack_escr(escr
);
528 hwc
->config
|= p4_config_pack_cccr(cccr
);
529 hwc
->config
|= P4_CONFIG_HT
;
531 cccr
&= ~P4_CCCR_OVF_PMI_T1
;
532 cccr
|= P4_CCCR_OVF_PMI_T0
;
533 if (escr
& P4_EVNTSEL_T1_OS
) {
534 escr
&= ~P4_EVNTSEL_T1_OS
;
535 escr
|= P4_EVNTSEL_T0_OS
;
537 if (escr
& P4_EVNTSEL_T1_USR
) {
538 escr
&= ~P4_EVNTSEL_T1_USR
;
539 escr
|= P4_EVNTSEL_T0_USR
;
541 hwc
->config
= p4_config_pack_escr(escr
);
542 hwc
->config
|= p4_config_pack_cccr(cccr
);
543 hwc
->config
&= ~P4_CONFIG_HT
;
547 /* ESCRs are not sequential in memory so we need a map */
548 static unsigned int p4_escr_map
[ARCH_P4_TOTAL_ESCR
] = {
549 MSR_P4_ALF_ESCR0
, /* 0 */
550 MSR_P4_ALF_ESCR1
, /* 1 */
551 MSR_P4_BPU_ESCR0
, /* 2 */
552 MSR_P4_BPU_ESCR1
, /* 3 */
553 MSR_P4_BSU_ESCR0
, /* 4 */
554 MSR_P4_BSU_ESCR1
, /* 5 */
555 MSR_P4_CRU_ESCR0
, /* 6 */
556 MSR_P4_CRU_ESCR1
, /* 7 */
557 MSR_P4_CRU_ESCR2
, /* 8 */
558 MSR_P4_CRU_ESCR3
, /* 9 */
559 MSR_P4_CRU_ESCR4
, /* 10 */
560 MSR_P4_CRU_ESCR5
, /* 11 */
561 MSR_P4_DAC_ESCR0
, /* 12 */
562 MSR_P4_DAC_ESCR1
, /* 13 */
563 MSR_P4_FIRM_ESCR0
, /* 14 */
564 MSR_P4_FIRM_ESCR1
, /* 15 */
565 MSR_P4_FLAME_ESCR0
, /* 16 */
566 MSR_P4_FLAME_ESCR1
, /* 17 */
567 MSR_P4_FSB_ESCR0
, /* 18 */
568 MSR_P4_FSB_ESCR1
, /* 19 */
569 MSR_P4_IQ_ESCR0
, /* 20 */
570 MSR_P4_IQ_ESCR1
, /* 21 */
571 MSR_P4_IS_ESCR0
, /* 22 */
572 MSR_P4_IS_ESCR1
, /* 23 */
573 MSR_P4_ITLB_ESCR0
, /* 24 */
574 MSR_P4_ITLB_ESCR1
, /* 25 */
575 MSR_P4_IX_ESCR0
, /* 26 */
576 MSR_P4_IX_ESCR1
, /* 27 */
577 MSR_P4_MOB_ESCR0
, /* 28 */
578 MSR_P4_MOB_ESCR1
, /* 29 */
579 MSR_P4_MS_ESCR0
, /* 30 */
580 MSR_P4_MS_ESCR1
, /* 31 */
581 MSR_P4_PMH_ESCR0
, /* 32 */
582 MSR_P4_PMH_ESCR1
, /* 33 */
583 MSR_P4_RAT_ESCR0
, /* 34 */
584 MSR_P4_RAT_ESCR1
, /* 35 */
585 MSR_P4_SAAT_ESCR0
, /* 36 */
586 MSR_P4_SAAT_ESCR1
, /* 37 */
587 MSR_P4_SSU_ESCR0
, /* 38 */
588 MSR_P4_SSU_ESCR1
, /* 39 */
589 MSR_P4_TBPU_ESCR0
, /* 40 */
590 MSR_P4_TBPU_ESCR1
, /* 41 */
591 MSR_P4_TC_ESCR0
, /* 42 */
592 MSR_P4_TC_ESCR1
, /* 43 */
593 MSR_P4_U2L_ESCR0
, /* 44 */
594 MSR_P4_U2L_ESCR1
, /* 45 */
597 static int p4_get_escr_idx(unsigned int addr
)
601 for (i
= 0; i
< ARRAY_SIZE(p4_escr_map
); i
++) {
602 if (addr
== p4_escr_map
[i
])
609 static int p4_pmu_schedule_events(struct cpu_hw_events
*cpuc
, int n
, int *assign
)
611 unsigned long used_mask
[BITS_TO_LONGS(X86_PMC_IDX_MAX
)];
612 unsigned long escr_mask
[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR
)];
614 struct hw_perf_event
*hwc
;
615 struct p4_event_template
*tpl
;
616 struct p4_pmu_res
*c
;
617 int cpu
= raw_smp_processor_id();
618 int escr_idx
, thread
, i
, num
;
620 bitmap_zero(used_mask
, X86_PMC_IDX_MAX
);
621 bitmap_zero(escr_mask
, ARCH_P4_TOTAL_ESCR
);
623 c
= &__get_cpu_var(p4_pmu_config
);
625 * Firstly find out which resource events are going
626 * to use, if ESCR+CCCR tuple is already borrowed
627 * then get out of here
629 for (i
= 0, num
= n
; i
< n
; i
++, num
--) {
630 hwc
= &cpuc
->event_list
[i
]->hw
;
631 tpl
= p4_pmu_template_lookup(hwc
->config
);
634 thread
= p4_ht_thread(cpu
);
635 escr_idx
= p4_get_escr_idx(tpl
->escr_msr
[thread
]);
639 /* already allocated and remains on the same cpu */
640 if (hwc
->idx
!= -1 && !p4_should_swap_ts(hwc
->config
, cpu
)) {
642 assign
[i
] = hwc
->idx
;
643 /* upstream dependent event */
644 if (unlikely(tpl
->dep
!= -1))
645 printk_once(KERN_WARNING
"PMU: Dep events are "
646 "not implemented yet\n");
650 /* it may be already borrowed */
651 if (test_bit(tpl
->cntr
[thread
], used_mask
) ||
652 test_bit(escr_idx
, escr_mask
))
656 * ESCR+CCCR+COUNTERs are available to use lets swap
657 * thread specific bits, push assigned bits
658 * back and save template into per-cpu
659 * area (which will allow us to find out the ESCR
660 * to be used at moment of "enable event via real MSR")
662 p4_pmu_swap_config_ts(hwc
, cpu
);
664 assign
[i
] = tpl
->cntr
[thread
];
665 c
->tpl
[assign
[i
]] = tpl
;
668 set_bit(tpl
->cntr
[thread
], used_mask
);
669 set_bit(escr_idx
, escr_mask
);
673 return num
? -ENOSPC
: 0;
676 static __initconst
struct x86_pmu p4_pmu
= {
677 .name
= "Netburst P4/Xeon",
678 .handle_irq
= p4_pmu_handle_irq
,
679 .disable_all
= p4_pmu_disable_all
,
680 .enable_all
= p4_pmu_enable_all
,
681 .enable
= p4_pmu_enable_event
,
682 .disable
= p4_pmu_disable_event
,
683 .eventsel
= MSR_P4_BPU_CCCR0
,
684 .perfctr
= MSR_P4_BPU_PERFCTR0
,
685 .event_map
= p4_pmu_event_map
,
686 .raw_event
= p4_pmu_raw_event
,
687 .max_events
= ARRAY_SIZE(p4_templates
),
688 .get_event_constraints
= x86_get_event_constraints
,
690 * IF HT disabled we may need to use all
691 * ARCH_P4_MAX_CCCR counters simulaneously
692 * though leave it restricted at moment assuming
695 .num_events
= ARCH_P4_MAX_CCCR
,
698 .event_mask
= (1ULL << 40) - 1,
699 .max_period
= (1ULL << 39) - 1,
700 .hw_config
= p4_hw_config
,
701 .schedule_events
= p4_pmu_schedule_events
,
704 static __init
int p4_pmu_init(void)
706 unsigned int low
, high
;
708 /* If we get stripped -- indexig fails */
709 BUILD_BUG_ON(ARCH_P4_MAX_CCCR
> X86_PMC_MAX_GENERIC
);
711 rdmsr(MSR_IA32_MISC_ENABLE
, low
, high
);
712 if (!(low
& (1 << 7))) {
713 pr_cont("unsupported Netburst CPU model %d ",
714 boot_cpu_data
.x86_model
);
718 memcpy(hw_cache_event_ids
, p4_hw_cache_event_ids
,
719 sizeof(hw_cache_event_ids
));
721 pr_cont("Netburst events, ");
728 #endif /* CONFIG_CPU_SUP_INTEL */